Beispiel #1
0
def get_start_num(ori_img, diff_map, temp_fname, darknet_model, frcnn_model):
    save_format_try_image(ori_img, diff_map, temp_fname)
    yolo_input, frcnn_input = get_yolo_image(temp_fname), get_frcnn_image(
        temp_fname)

    list_boxes = darknet_model(yolo_input)
    yolo_results = post_process(list_boxes)
    yolo_num = len(yolo_results)

    frcnn_results, scores, _ = frcnn_model(
        img=[frcnn_input],
        img_metas=[[{
            'filename': '',
            'ori_filename': '',
            'ori_shape': (500, 500, 3),
            'img_shape': (800, 800, 3),
            'pad_shape': (800, 800, 3),
            'scale_factor': np.array([1.6, 1.6, 1.6, 1.6]),
            'flip': False,
            'flip_direction': None,
            'img_norm_cfg': {
                'mean': np.array([123.675, 116.28, 103.53]),
                'std': np.array([58.395, 57.12, 57.375]),
                'to_rgb': True
            }
        }]],
        return_loss=False,
        rescale=False)
    frcnn_results = np.concatenate(frcnn_results)
    frcnn_num = np.sum(frcnn_results[:, 4] > 0.3)
    return yolo_num, frcnn_num, yolo_results, frcnn_results
Beispiel #2
0
    def predict(self, image):

        org_image = np.copy(image)
        org_h, org_w, _ = org_image.shape
        original_image_size = org_image.shape[:2]

        image_data = utils.image_preporcess(image,
                                            [self.input_size, self.input_size])
        image_data = image_data[np.newaxis, ...]

        detections = self.sess.run(self.det,
                                   feed_dict={self.input_data: image_data})
        detections = utils.post_process(detections, original_image_size,
                                        [cfg.input_image_h, cfg.input_image_w],
                                        cfg.down_ratio, cfg.score_threshold)

        bboxes = []
        scores = [0]
        classes = [0]
        if cfg.use_nms:
            cls_in_img = list(set(detections[:, 5]))
            results = []
            for c in cls_in_img:
                cls_mask = (detections[:, 5] == c)
                classified_det = detections[cls_mask]
                classified_bboxes = classified_det[:, :4]
                classified_scores = classified_det[:, 4]
                inds = utils.py_nms(classified_bboxes,
                                    classified_scores,
                                    max_boxes=50,
                                    iou_thresh=0.5)
                results.extend(classified_det[inds])
            results = np.asarray(results)
            if len(results) != 0:
                bboxes = results[:, 0:4]
                scores = results[:, 4]
                classes = results[:, 5]
                #bboxes_draw_on_img(original_image, classes, scores, bboxes, class_names)
        else:
            bboxes = detections[:, 0:4]
            scores = detections[:, 4]
            classes = detections[:, 5]
        #bboxes_draw_on_img(original_image, classes, scores, bboxes, class_names)

        return bboxes, scores, classes
Beispiel #3
0
            cnt += len(preds)
            if cnt % opt.display_freq == 0:
                print('[' + str(cnt) + '/' + str(len(dataset)) + ']' +
                      ' Time: %.2f' % (time.time() - iter_start_time))
            for k in range(len(preds)):
                ls.append((ids[k], preds[k]))
            iter_start_time = time.time()

        groups = groupby(ls, key=lambda x: x[0])
        for id, group in groups:
            arrayls = []
            for item in group:
                arrayls.append(item[1])
            output.write(
                post_process(pred=np.array(arrayls, dtype='float32'),
                             id=id,
                             opt=opt))

    os.system('gzip ' +
              os.path.join(model.save_dir, "test", 'pred' + timestr + '.txt'))

    # Detach the memory
    save_dir = model.save_dir
    model.cpu()
    model = None
    dataset = None
    lr_loader = None

    print('Prediction Saved in ' +
          os.path.join(save_dir, "test", 'pred' + timestr + '.txt.gz'))
    print('>>Submit Now: (large/small/no)?')
Beispiel #4
0
def train():
    # define dataset
    num_train_imgs = len(open(cfg.train_data_file, 'r').readlines())
    num_train_batch = int(math.ceil(float(num_train_imgs) / cfg.batch_size))
    num_test_imgs = len(open(cfg.test_data_file, 'r').readlines())
    num_test_batch = int(math.ceil(float(num_test_imgs) / 1))

    train_dataset = tf.data.TextLineDataset(cfg.train_data_file)
    train_dataset = train_dataset.shuffle(num_train_imgs)
    train_dataset = train_dataset.batch(cfg.batch_size)
    train_dataset = train_dataset.map(lambda x: tf.py_func(get_data, inp=[x, True],
                                                           Tout=[tf.float32, tf.float32, tf.float32, tf.float32,
                                                                 tf.float32, tf.float32, tf.int32, tf.int32]),
                                      num_parallel_calls=6)
    train_dataset = train_dataset.prefetch(3)

    test_dataset = tf.data.TextLineDataset(cfg.test_data_file)
    test_dataset = test_dataset.batch(1)
    test_dataset = test_dataset.map(lambda x: tf.py_func(get_data, inp=[x, False],
                                                         Tout=[tf.float32, tf.float32, tf.float32, tf.float32,
                                                               tf.float32, tf.float32, tf.int32, tf.int32]),
                                    num_parallel_calls=1)
    test_dataset = test_dataset.prefetch(1)

    iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
    trainset_init_op = iterator.make_initializer(train_dataset)
    testset_init_op = iterator.make_initializer(test_dataset)

    input_data, batch_hm, batch_wh, batch_reg, batch_reg_mask, batch_ind, batch_img_size, batch_id = iterator.get_next()
    input_data.set_shape([None, None, None, 3])
    batch_hm.set_shape([None, None, None, None])
    batch_wh.set_shape([None, None, None])
    batch_reg.set_shape([None, None, None])
    batch_reg_mask.set_shape([None, None])
    batch_ind.set_shape([None, None])
    batch_img_size.set_shape([None, None])
    batch_id.set_shape([None])

    # training flag
    is_training = tf.placeholder(dtype=tf.bool, name='is_training')

    # difine model and loss
    model = CenterNet(input_data, is_training, "dla_34")
    hm = model.pred_hm
    wh = model.pred_wh
    reg = model.pred_reg
    from utils.decode import decode
    det = decode(hm, wh, reg, K=cfg.max_objs)

    with tf.variable_scope('loss'):
        # hm_loss, wh_loss, reg_loss = model.compute_loss(batch_hm, batch_wh, batch_reg, batch_reg_mask, batch_ind)
        hm_loss, wh_loss, reg_loss = model.compute_loss_pcl(batch_hm, batch_wh, batch_reg, batch_reg_mask, batch_ind)
        total_loss = hm_loss + wh_loss + reg_loss

    # define train op
    if cfg.lr_type == "CosineAnnealing":
        learning_rate = 0.0001
        global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step')
        # warmup_steps = tf.constant(cfg.warm_up_epochs * num_train_batch, dtype=tf.float64, name='warmup_steps')
        # train_steps = tf.constant(cfg.epochs * num_train_batch, dtype=tf.float64, name='train_steps')
        # learning_rate = tf.cond(
        #     pred=global_step < warmup_steps,
        #     true_fn=lambda: global_step / warmup_steps * cfg.init_lr,
        #     false_fn=lambda: cfg.end_lr + 0.5 * (cfg.init_lr - cfg.end_lr) *
        #                      (1 + tf.cos(
        #                          (global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))
        # )
        global_step_update = tf.assign_add(global_step, 1.0)

        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            with tf.control_dependencies([optimizer, global_step_update]):
                train_op = tf.no_op()

    else:
        global_step = tf.Variable(0, trainable=False)
        if cfg.lr_type == "exponential":
            learning_rate = 0.0001
            # learning_rate = tf.train.exponential_decay(cfg.lr,
            #                                            global_step,
            #                                            cfg.lr_decay_steps,
            #                                            cfg.lr_decay_rate,
            #                                            staircase=True)
        elif cfg.lr_type == "piecewise":
            learning_rate = 0.0001
            # learning_rate = tf.train.piecewise_constant(global_step, cfg.lr_boundaries, cfg.lr_piecewise)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(total_loss, global_step=global_step)

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

    with tf.Session() as sess:
        with tf.name_scope('summary'):
            tf.summary.scalar("learning_rate", learning_rate)
            tf.summary.scalar("hm_loss", hm_loss)
            tf.summary.scalar("wh_loss", wh_loss)
            tf.summary.scalar("reg_loss", reg_loss)
            tf.summary.scalar("total_loss", total_loss)

            logdir = "./log_dla/"
            if os.path.exists(logdir): shutil.rmtree(logdir)
            os.mkdir(logdir)
            write_op = tf.summary.merge_all()
            summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph)

        # train
        sess.run(tf.global_variables_initializer())
        # if cfg.dla_pretrain:
        #     saver.restore(sess, './checkpoint/centernet_test_loss=3.1386.ckpt-79')
        for epoch in range(1, 1 + cfg.epochs):
            pbar = tqdm(range(num_train_batch))
            train_epoch_loss, test_epoch_loss = [], []
            sess.run(trainset_init_op)
            for i in pbar:
                _, summary, train_step_loss, global_step_val, _hm_loss, _wh_loss, _reg_loss = sess.run(
                    [train_op, write_op, total_loss, global_step, hm_loss, wh_loss, reg_loss],
                    feed_dict={is_training: True})
                train_epoch_loss.append(train_step_loss)
                summary_writer.add_summary(summary, global_step_val)
                pbar.set_description("train loss: %.2f" % train_step_loss)
                if i % 20 == 0:
                    print("train loss: %.2f hm_loss: %.2f wh_loss:%2f reg_loss:%2f learning_rate:%f" % (train_step_loss, _hm_loss, _wh_loss, _reg_loss, learning_rate) )
            print("begining test")
            sess.run(testset_init_op)
            val_preds = []
            # for j in range(num_test_batch ):
            #     test_step_loss = sess.run(total_loss, feed_dict={is_training: False})
            #     test_epoch_loss.append(test_step_loss)
            train_epoch_loss = np.mean(train_epoch_loss)
            # train_epoch_loss, test_epoch_loss = np.mean(train_epoch_loss), np.mean(test_epoch_loss)
            ckpt_file = "./checkpoint/centernet_train_loss=%.4f.ckpt" % train_epoch_loss
            log_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
            print("=> Epoch: %2d Time: %s Train loss: %.2f Saving %s ..."
                  % (epoch, log_time, train_epoch_loss, ckpt_file))
            saver.save(sess, ckpt_file, global_step=epoch)

            if epoch % cfg.eval_epoch == 0 and epoch > 0:
                sess.run(testset_init_op)
                for j in range(num_test_batch):
                    detections, _batch_img_size, _batch_id = sess.run([det, batch_img_size, batch_id], feed_dict={is_training: False})
                    ori_h = _batch_img_size[0][1]
                    ori_w = _batch_img_size[0][0]
                    detect_post = post_process(detections, (ori_h, ori_w), [cfg.input_image_h, cfg.input_image_w],
                                               cfg.down_ratio, cfg.score_threshold)
                    id = _batch_id[0]
                    detect_per_img = get_preds_gpu(detect_post, id)
                    val_preds.extend(detect_per_img)
                rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()
                info = ""
                gt_dict = parse_gt_rec(cfg.test_data_file, [cfg.input_image_h, cfg.input_image_w], cfg.letterbox_resize)
                for ii in range(cfg.num_classes):
                    from utils.utils import voc_eval
                    npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=cfg.score_threshold,
                                                       use_07_metric=cfg.use_voc_07_metric)
                    info += 'EVAL: Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}\n'.format(ii, rec, prec, ap)
                    rec_total.update(rec, npos)
                    prec_total.update(prec, nd)
                    ap_total.update(ap, 1)
                mAP = ap_total.average
                info += 'EVAL: Recall: {:.4f}, Precison: {:.4f}, mAP: {:.4f}\n'.format(rec_total.average,
                                                                                       prec_total.average, mAP)
                print(info)
Beispiel #5
0
def run_seg(config_file_seg):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 2. segmentation inference
    # ------------------------------------------------------------------------------------------------------------
    config = load_config(config_file_seg)

    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    if os.path.exists('cls_preds.csv'):
        testloader = make_loader(data_folder=config.data.test_dir,
                                 df_path='cls_preds.csv',
                                 phase='filtered_test',
                                 batch_size=config.test.batch_size,
                                 num_workers=config.num_workers,
                                 transforms=get_transforms(
                                     config.transforms.test))
    else:
        testloader = make_loader(data_folder=config.data.test_dir,
                                 df_path=config.data.sample_submission_path,
                                 phase='test',
                                 batch_size=config.test.batch_size,
                                 num_workers=config.num_workers,
                                 transforms=get_transforms(
                                     config.transforms.test))

    model.to(config.device)
    model.eval()

    checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth")
    model.load_state_dict(checkpoint['model_state_dict'])

    if os.path.exists(config.work_dir + '/threshold_search.json'):
        with open(config.work_dir + '/threshold_search.json') as json_file:
            data = json.load(json_file)
        df = pd.DataFrame(data)
        min_sizes = list(df.T.idxmax().values.astype(int))
        print('load best threshold from validation:', min_sizes)
    else:
        min_sizes = config.test.min_size
        print('load default threshold:', min_sizes)

    predictions = []
    with torch.no_grad():
        for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta)

            for fname, preds in zip(batch_fnames, batch_preds):
                if config.data.num_classes == 4:
                    for cls in range(preds.shape[0]):
                        mask = preds[cls, :, :]
                        mask, num = post_process(mask,
                                                 config.test.best_threshold,
                                                 min_sizes[cls])
                        rle = mask2rle(mask)
                        name = fname + f"_{cls + 1}"
                        predictions.append([name, rle])
                else:  # == 5
                    for cls in range(1, 5):
                        mask = preds[cls, :, :]
                        mask, num = post_process(mask,
                                                 config.test.best_threshold,
                                                 min_sizes[cls])
                        rle = mask2rle(mask)
                        name = fname + f"_{cls}"
                        predictions.append([name, rle])

    # ------------------------------------------------------------------------------------------------------------
    # submission
    # ------------------------------------------------------------------------------------------------------------
    df = pd.DataFrame(predictions,
                      columns=['ImageId_ClassId', 'EncodedPixels'])
    df.to_csv(config.work_dir + "/submission.csv", index=False)
Beispiel #6
0
def ensemble():
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # parmeters and configs
    # ------------------------------------------------------------------------------------------------------------
    config_paths320 = [
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold0.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold1.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold2.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold3.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold4.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold0.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold1.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold2.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold3.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold4.yml',
    ]
    config_paths384 = [
        'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold0.yml',
        'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold1.yml',
        'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold2.yml',
        'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold3.yml',
        'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold4.yml',
        'config/seg/048_resnet34_FPN_img384_mixup_fold0.yml',
        'config/seg/048_resnet34_FPN_img384_mixup_fold1.yml',
        'config/seg/048_resnet34_FPN_img384_mixup_fold2.yml',
        'config/seg/048_resnet34_FPN_img384_mixup_fold3.yml',
        'config/seg/048_resnet34_FPN_img384_mixup_fold4.yml',
    ]
    LABEL_THRESHOLDS = [0.68, 0.69, 0.69, 0.67]
    MASK_THRESHOLDS = [0.31, 0.36, 0.31, 0.34]
    MIN_SIZES = [7500, 10000, 7500, 7500]
    WEIGHTS = [0.5, 0.5]
    # ------------------------------------------------------------------------------------------------------------
    #
    # ------------------------------------------------------------------------------------------------------------
    config = load_config('config/base_config.yml')

    def get_model_and_loader(config_paths):
        config = load_config(config_paths[0])

        models = []
        for c in config_paths:
            models.append(load_model(c))

        model = MultiSegModels(models)

        testloader = make_loader(
            data_folder=config.data.test_dir,
            df_path=config.data.sample_submission_path,
            phase='test',
            img_size=(config.data.height, config.data.width),
            batch_size=config.test.batch_size,
            num_workers=config.num_workers,
            transforms=get_transforms(config.transforms.test))
        return model, testloader

    model320, loader320 = get_model_and_loader(config_paths320)
    model384, loader384 = get_model_and_loader(config_paths384)

    predictions = []
    with torch.no_grad():
        for (batch_fnames320,
             batch_images320), (batch_fnames384, batch_images384) in tqdm(
                 zip(loader320, loader384)):
            batch_images320 = batch_images320.to(config.device)
            batch_images384 = batch_images384.to(config.device)

            batch_preds320 = predict_batch(model320,
                                           batch_images320,
                                           tta=config.test.tta)
            batch_preds384 = predict_batch(model384,
                                           batch_images384,
                                           tta=config.test.tta)

            batch_preds320 = resize_batch_images(batch_preds320, SUB_HEIGHT,
                                                 SUB_WIDTH)
            batch_preds384 = resize_batch_images(batch_preds384, SUB_HEIGHT,
                                                 SUB_WIDTH)
            batch_preds = batch_preds320 * \
                WEIGHTS[0] + batch_preds384 * WEIGHTS[1]

            batch_labels320 = torch.nn.functional.adaptive_max_pool2d(
                torch.sigmoid(torch.Tensor(batch_preds320)),
                1).view(batch_preds320.shape[0], -1)
            batch_labels384 = torch.nn.functional.adaptive_max_pool2d(
                torch.sigmoid(torch.Tensor(batch_preds384)),
                1).view(batch_preds384.shape[0], -1)
            batch_labels = batch_labels320 * \
                WEIGHTS[0] + batch_labels384 * WEIGHTS[1]

            for fname, preds, labels in zip(batch_fnames320, batch_preds,
                                            batch_labels):
                for cls in range(4):
                    if labels[cls] <= LABEL_THRESHOLDS[cls]:
                        pred = np.zeros(preds[cls, :, :].shape)
                    else:
                        pred, _ = post_process(preds[cls, :, :],
                                               MASK_THRESHOLDS[cls],
                                               MIN_SIZES[cls],
                                               height=SUB_HEIGHT,
                                               width=SUB_WIDTH)
                    rle = mask2rle(pred)
                    cls_name = INV_CLASSES[cls]
                    name = fname + f"_{cls_name}"
                    predictions.append([name, rle])

    # ------------------------------------------------------------------------------------------------------------
    # submission
    # ------------------------------------------------------------------------------------------------------------
    sub_df = pd.DataFrame(predictions,
                          columns=['Image_Label', 'EncodedPixels'])

    sample_submission = pd.read_csv(config.data.sample_submission_path)
    df_merged = pd.merge(sample_submission,
                         sub_df,
                         on='Image_Label',
                         how='left')
    df_merged.fillna('', inplace=True)
    df_merged['EncodedPixels'] = df_merged['EncodedPixels_y']
    df_merged = df_merged[['Image_Label', 'EncodedPixels']]

    df_merged.to_csv("submission.csv", index=False)

    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/'
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/'
    else:
        config.work_dir = '.'
    df_merged.to_csv(config.work_dir + '/submission.csv', index=False)
Beispiel #7
0
def run_seg(config_file_seg):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 2. segmentation inference
    # ------------------------------------------------------------------------------------------------------------
    config = load_config(config_file_seg)
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir

    if os.path.exists('cls_preds.csv'):
        testloader = make_loader(
            data_folder=config.data.test_dir,
            df_path='cls_preds.csv',
            phase='filtered_test',
            img_size=(config.data.height, config.data.width),
            batch_size=config.test.batch_size,
            num_workers=config.num_workers,
            transforms=get_transforms(config.transforms.test))
    else:
        testloader = make_loader(
            data_folder=config.data.test_dir,
            df_path=config.data.sample_submission_path,
            phase='test',
            img_size=(config.data.height, config.data.width),
            batch_size=config.test.batch_size,
            num_workers=config.num_workers,
            transforms=get_transforms(config.transforms.test))

    model = load_model(config_file_seg)

    if os.path.exists(config.work_dir + '/threshold_search.json'):
        with open(config.work_dir + '/threshold_search.json') as json_file:
            data = json.load(json_file)
        df = pd.DataFrame(data)
        min_sizes = list(df.T.idxmax().values.astype(int))
        print('load best threshold from validation:', min_sizes)
    else:
        min_sizes = config.test.min_size
        print('load default threshold:', min_sizes)

    predictions = []
    with torch.no_grad():
        for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta)

            for fname, preds in zip(batch_fnames, batch_preds):
                for cls in range(preds.shape[0]):
                    pred, _ = post_process(
                        preds[cls, :, :],
                        config.test.best_threshold,
                        min_sizes[cls],
                        height=config.transforms.test.Resize.height,
                        width=config.transforms.test.Resize.width)
                    pred = cv2.resize(pred, (SUB_WIDTH, SUB_HEIGHT))
                    pred = (pred > 0.5).astype(int)
                    rle = mask2rle(pred)
                    cls_name = INV_CLASSES[cls]
                    name = fname + f"_{cls_name}"
                    predictions.append([name, rle])

    # ------------------------------------------------------------------------------------------------------------
    # submission
    # ------------------------------------------------------------------------------------------------------------
    df = pd.DataFrame(predictions, columns=['Image_Label', 'EncodedPixels'])
    df.to_csv(config.work_dir + "/submission.csv", index=False)
Beispiel #8
0
def train():
    # define dataset
    num_train_imgs = len(open(cfg.train_data_file, 'r').readlines())
    num_train_batch = int(math.ceil(float(num_train_imgs) / cfg.batch_size))
    num_test_imgs = len(open(cfg.test_data_file, 'r').readlines())
    num_test_batch = int(math.ceil(float(num_test_imgs) / 1))

    train_dataset = tf.data.TextLineDataset(cfg.train_data_file)
    train_dataset = train_dataset.shuffle(num_train_imgs)
    train_dataset = train_dataset.batch(cfg.batch_size)
    train_dataset = train_dataset.map(lambda x: tf.py_func(
        get_data,
        inp=[x, True],
        Tout=[
            tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.
            float32, tf.int32, tf.int32
        ]),
                                      num_parallel_calls=6)
    train_dataset = train_dataset.prefetch(3)

    test_dataset = tf.data.TextLineDataset(cfg.test_data_file)
    test_dataset = test_dataset.batch(1)
    test_dataset = test_dataset.map(lambda x: tf.py_func(
        get_data,
        inp=[x, False],
        Tout=[
            tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.
            float32, tf.int32, tf.int32
        ]),
                                    num_parallel_calls=1)
    test_dataset = test_dataset.prefetch(1)

    iterator = tf.data.Iterator.from_structure(train_dataset.output_types,
                                               train_dataset.output_shapes)
    trainset_init_op = iterator.make_initializer(train_dataset)
    testset_init_op = iterator.make_initializer(test_dataset)

    input_data, hm, wh, reg, reg_mask, ind, img_size, id = iterator.get_next()

    batch_input_data = []
    batch_hm = []
    batch_wh = []
    batch_reg = []
    batch_reg_mask = []
    batch_ind = []
    batch_img_size = []
    batch_id = []

    for i in range(cfg.NUM_GPU):
        start = i * (cfg.batch_size // cfg.NUM_GPU)
        end = (i + 1) * (cfg.batch_size // cfg.NUM_GPU)

        single_input_data = input_data[start:end, :, :, :]
        single_hm = hm[start:end, :, :, :]
        single_wh = wh[start:end, :, :]
        single_reg = reg[start:end, :, :]
        single_reg_mask = reg_mask[start:end, :]
        single_ind = ind[start:end, :]
        single_img_size = img_size[start:end:, :]
        single_id = id[start:end, :]

        batch_input_data.append(single_input_data)
        batch_hm.append(single_hm)
        batch_wh.append(single_wh)
        batch_reg.append(single_reg)
        batch_reg_mask.append(single_reg_mask)
        batch_ind.append(single_ind)
        batch_img_size.append(single_img_size)
        batch_id.append(single_id)

        batch_input_data[i].set_shape([None, None, None, 3])
        batch_hm[i].set_shape([None, None, None, None])
        batch_wh[i].set_shape([None, None, None])
        batch_reg[i].set_shape([None, None, None])
        batch_reg_mask[i].set_shape([None, None])
        batch_ind[i].set_shape([None, None])
        batch_img_size[i].set_shape([None, None])
        batch_id[i].set_shape([None])

    # difine model and loss
    with tf.device('/cpu:0'):
        tower_grads = []
        hm_loss = []
        wh_loss = []
        reg_loss = []
        total_loss = []
        hm_pred_list = []
        wh_pred_list = []
        reg_pred_list = []
        pred_det = []
        # training flag
        is_training = tf.placeholder(dtype=tf.bool, name='is_training')
        # with tf.variable_scope(tf.get_variable_scope()):
        with tf.variable_scope(tf.get_variable_scope(),
                               reuse=tf.AUTO_REUSE) as scope:
            for i in range(cfg.NUM_GPU):
                print("current gpu is", i)
                with tf.device('/gpu:%d' % i):
                    model = CenterNet(batch_input_data[i], is_training,
                                      "dla_34")
                    hm_pred = model.pred_hm
                    wh_pred = model.pred_wh
                    reg_pred = model.pred_reg
                    hm_pred_list.append(hm_pred)
                    wh_pred_list.append(wh_pred)
                    reg_pred_list.append(reg_pred)
                    det = decode(hm_pred_list[i],
                                 wh_pred_list[i],
                                 reg_pred_list[i],
                                 K=cfg.max_objs)
                    pred_det.append(det)
                    with tf.variable_scope('loss'):
                        l2_loss = tf.losses.get_regularization_loss()
                        # hm_loss[i], wh_loss[i], reg_loss[i] = model.compute_loss(batch_hm[i], batch_wh[i], batch_reg[i], batch_reg_mask[i], batch_ind[i])
                        hm_loss_single, wh_loss_single, reg_loss_single = model.compute_loss(
                            batch_hm[i], batch_wh[i], batch_reg[i],
                            batch_reg_mask[i], batch_ind[i])
                        hm_loss.append(hm_loss_single)
                        wh_loss.append(wh_loss_single)
                        reg_loss.append(reg_loss_single)
                        total_loss_single = hm_loss[i] + wh_loss[i] + reg_loss[
                            i] + l2_loss
                        total_loss.append(total_loss_single)
                    # define train op
                    global_step = tf.Variable(0, trainable=False)
                    if cfg.lr_type == "exponential":
                        learning_rate = tf.train.exponential_decay(
                            cfg.lr,
                            global_step,
                            cfg.lr_decay_steps,
                            cfg.lr_decay_rate,
                            staircase=True)
                    elif cfg.lr_type == "piecewise":
                        learning_rate = tf.train.piecewise_constant(
                            global_step, cfg.lr_boundaries, cfg.lr_piecewise)
                    optimizer = tf.train.AdamOptimizer(learning_rate)
                    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

                    with tf.control_dependencies(update_ops):
                        grads = optimizer.compute_gradients(total_loss[i])
                        clip_grad_var = [
                            gv if gv[0] is None else
                            [tf.clip_by_norm(gv[0], 100.), gv[1]]
                            for gv in grads
                        ]
                        tower_grads.append(clip_grad_var)
            last_loss = tf.reduce_mean(total_loss)
            if len(tower_grads) > 1:
                clip_grad_var = sum_gradients(tower_grads)
            else:
                clip_grad_var = tower_grads[0]
            train_op = optimizer.apply_gradients(clip_grad_var,
                                                 global_step=global_step)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
            config = tf.ConfigProto(allow_soft_placement=True)
            with tf.Session(config=config) as sess:
                with tf.name_scope('summary'):
                    tf.summary.scalar("learning_rate", learning_rate)
                    tf.summary.scalar("hm_loss", tf.reduce_mean(hm_loss))
                    tf.summary.scalar("wh_loss", tf.reduce_mean(wh_loss))
                    tf.summary.scalar("reg_loss", tf.reduce_mean(reg_loss))
                    tf.summary.scalar("total_loss", tf.reduce_mean(total_loss))

                    logdir = "./log_dla/"
                    if os.path.exists(logdir): shutil.rmtree(logdir)
                    os.mkdir(logdir)
                    write_op = tf.summary.merge_all()
                    summary_writer = tf.summary.FileWriter(logdir,
                                                           graph=sess.graph)
                sess.run(tf.global_variables_initializer())
                if cfg.pre_train:
                    saver.restore(
                        sess,
                        './checkpoint/centernet_train_epoch_loss=313.7357.ckpt-6'
                    )
                for epoch in range(1, 1 + cfg.epochs):
                    pbar = tqdm(range(num_train_batch))
                    train_epoch_loss, test_epoch_loss = [], []
                    sess.run(trainset_init_op)
                    for i in pbar:
                        _, summary, train_step_loss, global_step_val = sess.run(
                            [train_op, write_op, last_loss, global_step],
                            feed_dict={is_training: True})
                        train_epoch_loss.append(train_step_loss)
                        summary_writer.add_summary(summary, global_step_val)
                        pbar.set_description("train loss: %.2f" %
                                             train_step_loss)
                    # sess.run(testset_init_op)
                    # for j in range(num_test_batch ):
                    #     test_step_loss = sess.run(last_loss, feed_dict={is_training: False})
                    #     test_epoch_loss.append(test_step_loss)
                    # train_epoch_loss, test_epoch_loss = np.mean(train_epoch_loss), np.mean(test_epoch_loss)
                    train_epoch_loss = np.mean(train_epoch_loss)
                    ckpt_file = "./checkpoint/centernet_train_epoch_loss=%.4f.ckpt" % train_epoch_loss
                    log_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                             time.localtime(time.time()))
                    print(
                        "=> Epoch: %2d Time: %s Train loss: %.2f  Saving %s ..."
                        % (epoch, log_time, train_epoch_loss, ckpt_file))
                    saver.save(sess, ckpt_file, global_step=epoch)

                    if epoch % cfg.eval_epoch == 0 and epoch > 0:
                        print("begining test")
                        sess.run(testset_init_op)
                        val_preds = []
                        for j in tqdm(range(num_test_batch)):
                            detections, _batch_img_size, _batch_id = sess.run(
                                [pred_det[0], img_size, id],
                                feed_dict={is_training: False})
                            # print("detecttiion is", detections)
                            # print("_batch_img_size is", _batch_img_size)
                            # print("id is", _batch_id)
                            ori_h = _batch_img_size[0][1]
                            ori_w = _batch_img_size[0][0]
                            detect_post = post_process(
                                detections, (ori_h, ori_w),
                                [cfg.input_image_h, cfg.input_image_w],
                                cfg.down_ratio, cfg.score_threshold)
                            id_test = _batch_id[0]
                            detect_per_img = get_preds_gpu(
                                detect_post, id_test)
                            val_preds.extend(detect_per_img)
                        rec_total, prec_total, ap_total = AverageMeter(
                        ), AverageMeter(), AverageMeter()
                        info = ""
                        gt_dict = parse_gt_rec(
                            cfg.test_data_file,
                            [cfg.input_image_h, cfg.input_image_w],
                            cfg.letterbox_resize)
                        for ii in range(cfg.num_classes):
                            from utils.utils import voc_eval
                            npos, nd, rec, prec, ap = voc_eval(
                                gt_dict,
                                val_preds,
                                ii,
                                iou_thres=cfg.score_threshold,
                                use_07_metric=cfg.use_voc_07_metric)
                            info += 'EVAL: Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}\n'.format(
                                ii, rec, prec, ap)
                            rec_total.update(rec, npos)
                            prec_total.update(prec, nd)
                            ap_total.update(ap, 1)
                        mAP = ap_total.average
                        info += 'EVAL: Recall: {:.4f}, Precison: {:.4f}, mAP: {:.4f}\n'.format(
                            rec_total.average, prec_total.average, mAP)
                        print(info)
Beispiel #9
0
runner = SupervisedRunner(
    model=tta_model,
    device=get_device())
for i, test_batch in enumerate(tqdm.tqdm(loaders['test'])):
    test_batch = test_batch[0].cuda()
    runner_out = runner.predict_batch(
        {"features": test_batch})['logits']
    gc.collect()
    for i, batch in enumerate(runner_out):
        for probability in batch:
            probability = probability.cpu().detach().numpy()
            if probability.shape != (350, 525):
                probability = cv2.resize(probability, dsize=(
                    525, 350), interpolation=cv2.INTER_LINEAR)
            predict, num_predict = post_process(
                sigmoid(probability),
                class_params[f"{image_id % 4}"][0],
                class_params[f"{image_id % 4}"][1])
            if num_predict == 0:
                encoded_pixels.append('')
            else:
                r = mask2rle(predict)
                encoded_pixels.append(r)
            image_id += 1


del model
gc.collect()
torch.cuda.empty_cache()

assert len(encoded_pixels) == 14792
sub['EncodedPixels'] = encoded_pixels
Beispiel #10
0
def run_seg(config_file_seg):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 2. segmentation inference
    # ------------------------------------------------------------------------------------------------------------
    config = load_config(config_file_seg)

    validloader = make_loader(
        data_folder=config.data.train_dir,
        df_path=config.data.train_df_path,
        phase='valid',
        batch_size=config.train.batch_size,
        num_workers=config.num_workers,
        idx_fold=config.data.params.idx_fold,
        transforms=get_transforms(config.transforms.test),
        num_classes=config.data.num_classes,
    )

    # create segmentation model with pre-trained encoder
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )
    model.to(config.device)
    model.eval()
    checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth")
    model.load_state_dict(checkpoint['model_state_dict'])

    all_dice = {}
    min_sizes = [100, 300, 500, 750, 1000, 1500, 2000, 3000]
    for min_size in min_sizes:
        all_dice[min_size] = {}
        for cls in range(config.data.num_classes):
            all_dice[min_size][cls] = []

    with torch.no_grad():
        for i, (batch_images, batch_masks) in enumerate(tqdm(validloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta)

            batch_masks = batch_masks.cpu().numpy()

            for masks, preds in zip(batch_masks, batch_preds):
                for cls in range(config.data.num_classes):
                    for min_size in min_sizes:
                        pred, _ = post_process(preds[cls, :, :],
                                               config.test.best_threshold,
                                               min_size)
                        mask = masks[cls, :, :]
                        all_dice[min_size][cls].append(dice_score(pred, mask))

    for cls in range(config.data.num_classes):
        for min_size in min_sizes:
            all_dice[min_size][cls] = sum(all_dice[min_size][cls]) / len(
                all_dice[min_size][cls])
            dict_to_json(all_dice, config.work_dir + '/threshold_search.json')
            if config.data.num_classes == 4:
                defect_class = cls + 1
            else:
                defect_class = cls
            print('average dice score for class{} for min_size {}: {}'.format(
                defect_class, min_size, all_dice[min_size][cls]))
Beispiel #11
0
    model.load_state_dict(torch.load(cfg.load_model))
    model.eval()
    dataloader = DataLoader(ImageFolder(cfg.test_dir), batch_size=1, shuffle=False, num_workers=1)
    # 为每个类名配置不同的颜色
    hsv_tuples = [(x / len(cfg.class_name), 1., 1.) for x in range(len(cfg.class_name))]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
    imgs = []  # 图片保存路径
    img_detections = []  # 每张图片的检测结果
    for img_paths, input_imgs in dataloader:
        input_imgs = input_imgs.cuda()
        with torch.no_grad():
            a = time.time()
            hm, wh, offset = model(input_imgs)
            print("forward耗时:  ", (time.time()-a)*1000)
            results = post_process(hm, wh, offset, 50)
        imgs.extend(img_paths)
        img_detections.extend(results)

    for path, detections in zip(imgs, img_detections):
        img = Image.open(path)
        w, h = img.size
        font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=16)
        if detections is not None:
            # 先将网络输入尺寸下的坐标转换成max(w, h)下的坐标
            detections[:, :4] *= max(h, w) / cfg.input_size
            # 如果h<w,则是一个宽边图,需要在y轴上减去(w - h) / 2,下同
            if h < w:
                detections[:, 1:4:2] -= (w - h) / 2
            else:
                detections[:, 0:3:2] -= (h - w) / 2
Beispiel #12
0
def validation(config_file_seg):

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    config = load_config(config_file_seg)
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir

    validloader = make_loader(
        data_folder=config.data.train_dir,
        df_path=config.data.train_df_path,
        phase='valid',
        img_size=(config.data.height, config.data.width),
        batch_size=config.test.batch_size,
        num_workers=config.num_workers,
        idx_fold=config.data.params.idx_fold,
        transforms=get_transforms(config.transforms.test),
        num_classes=config.data.num_classes,
    )

    model = load_model(config_file_seg)

    min_sizes = np.arange(0, 20000, 5000)
    label_thresholds = [0.6, 0.7, 0.8]
    mask_thresholds = [0.2, 0.3, 0.4]
    all_dice = np.zeros(
        (4, len(label_thresholds), len(mask_thresholds), len(min_sizes)))
    count = 0

    with torch.no_grad():
        for i, (batch_images, batch_masks) in enumerate(tqdm(validloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta)

            batch_labels = torch.nn.functional.adaptive_max_pool2d(
                torch.sigmoid(torch.Tensor(batch_preds)),
                1).view(batch_preds.shape[0], -1)

            batch_masks = batch_masks.cpu().numpy()
            batch_labels = batch_labels.cpu().numpy()

            batch_masks = resize_batch_images(batch_masks, SUB_HEIGHT,
                                              SUB_WIDTH)
            batch_preds = resize_batch_images(batch_preds, SUB_HEIGHT,
                                              SUB_WIDTH)

            for labels, masks, preds in zip(batch_labels, batch_masks,
                                            batch_preds):
                for cls in range(config.data.num_classes):
                    for i, label_th in enumerate(label_thresholds):
                        for j, mask_th in enumerate(mask_thresholds):
                            for k, min_size in enumerate(min_sizes):
                                if labels[cls] <= label_th:
                                    pred = np.zeros(preds[cls, :, :].shape)
                                else:
                                    pred, _ = post_process(preds[cls, :, :],
                                                           mask_th,
                                                           min_size,
                                                           height=SUB_HEIGHT,
                                                           width=SUB_WIDTH)
                                mask = masks[cls, :, :]

                                dice = dice_score(pred, mask)
                                all_dice[cls, i, j, k] += dice
                count += 1

    all_dice = all_dice / (count)
    np.save('all_dice', all_dice)

    parameters = {}
    parameters['label_thresholds'] = []
    parameters['mask_thresholds'] = []
    parameters['min_sizes'] = []
    parameters['dice'] = []
    cv_score = 0

    for cls in range(4):
        i, j, k = np.where((all_dice[cls] == all_dice[cls].max()))
        parameters['label_thresholds'].append(float(label_thresholds[i[0]]))
        parameters['mask_thresholds'].append(float(mask_thresholds[j[0]]))
        parameters['min_sizes'].append(int(min_sizes[k[0]]))
        parameters['dice'].append(float(all_dice[cls].max()))
        cv_score += all_dice[cls].max() / 4

    print('cv_score:', cv_score)
    dict_to_json(parameters, config.work_dir + '/parameters.json')
    print(pd.DataFrame(parameters))
Beispiel #13
0
def run_seg(config_dir):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 2. segmentation inference
    # ------------------------------------------------------------------------------------------------------------
    config_root = Path(config_dir) / 'seg'
    config_paths = [config_root / p for p in os.listdir(config_root)]
    base_config_paths = [
        Path(config_dir) / p for p in os.listdir(config_dir) if 'yml' in p
    ]
    config = load_config(base_config_paths[0])

    models = []
    for c in config_paths:
        models.append(load_model(c))

    model = MultiSegModels(models)

    if os.path.exists('cls_preds.csv'):
        testloader = make_loader(data_folder=config.data.test_dir,
                                 df_path='cls_preds.csv',
                                 phase='filtered_test',
                                 batch_size=config.test.batch_size,
                                 num_workers=config.num_workers,
                                 transforms=get_transforms(
                                     config.transforms.test))
    else:
        testloader = make_loader(data_folder=config.data.test_dir,
                                 df_path=config.data.sample_submission_path,
                                 phase='test',
                                 batch_size=config.test.batch_size,
                                 num_workers=config.num_workers,
                                 transforms=get_transforms(
                                     config.transforms.test))

    if os.path.exists(config.work_dir + '/threshold_search.json'):
        with open(config.work_dir + '/threshold_search.json') as json_file:
            data = json.load(json_file)
        df = pd.DataFrame(data)
        min_sizes = list(df.T.idxmax().values.astype(int))
        print('load best threshold from validation:', min_sizes)
    else:
        min_sizes = config.test.min_size
        print('load default threshold:', min_sizes)

    predictions = []
    with torch.no_grad():
        for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta)

            for fname, preds in zip(batch_fnames, batch_preds):
                for cls in range(preds.shape[0]):
                    mask = preds[cls, :, :]
                    mask, num = post_process(mask, config.test.best_threshold,
                                             min_sizes[cls])
                    rle = mask2rle(mask)
                    name = fname + f"_{cls + 1}"
                    predictions.append([name, rle])

    # ------------------------------------------------------------------------------------------------------------
    # submission
    # ------------------------------------------------------------------------------------------------------------
    sub_df = pd.DataFrame(predictions,
                          columns=['ImageId_ClassId', 'EncodedPixels'])

    sample_submission = pd.read_csv(config.data.sample_submission_path)
    df_merged = pd.merge(sample_submission,
                         sub_df,
                         on='ImageId_ClassId',
                         how='left')
    df_merged.fillna('', inplace=True)
    df_merged['EncodedPixels'] = df_merged['EncodedPixels_y']
    df_merged = df_merged[['ImageId_ClassId', 'EncodedPixels']]

    df_merged.to_csv("submission.csv", index=False)
    df_merged.to_csv(KAGGLE_WORK_DIR + "/submission.csv", index=False)
class_names = read_class_names(cfg.classes_file)
img_names = os.listdir(
    '/home/pcl/tf_work/TF_CenterNet/VOC/test/VOCdevkit/VOC2007/JPEGImages')
for img_name in img_names:
    img_path = '/home/pcl/tf_work/TF_CenterNet/VOC/test/VOCdevkit/VOC2007/JPEGImages/' + img_name
    print(img_path)
    original_image = cv2.imread(img_path)
    original_image_size = original_image.shape[:2]
    image_data = image_preporcess(np.copy(original_image),
                                  [cfg.input_image_h, cfg.input_image_w])
    image_data = image_data[np.newaxis, ...]

    t0 = time.time()
    detections = sess.run(det, feed_dict={inputs: image_data})
    detections = post_process(detections, original_image_size,
                              [cfg.input_image_h, cfg.input_image_w],
                              cfg.down_ratio, cfg.score_threshold)
    print('Inferencce took %.1f ms (%.2f fps)' %
          ((time.time() - t0) * 1000, 1 / (time.time() - t0)))
    if cfg.use_nms:
        cls_in_img = list(set(detections[:, 5]))
        results = []
        for c in cls_in_img:
            cls_mask = (detections[:, 5] == c)
            classified_det = detections[cls_mask]
            classified_bboxes = classified_det[:, :4]
            classified_scores = classified_det[:, 4]
            inds = py_nms(classified_bboxes,
                          classified_scores,
                          max_boxes=50,
                          iou_thresh=0.5)
Beispiel #15
0
def update_one_model(ori_img, diff_map, temp_fname, best_temp_fname,
                     darknet_model, frcnn_model, flag, start_yolo_num,
                     start_frcnn_num, dest_num, rate, gt_bboxes):
    print("Updating %s..." % flag)
    # generate bbox grad mask
    grad_mask = np.zeros((500, 500, 3), dtype=np.float)
    for bbox in gt_bboxes:
        x1, y1, x2, y2 = bbox
        x1_ = max(0, int(x1 - (x2 - x1) * 0.1))
        x2_ = min(499, int(x2 + (x2 - x1) * 0.1))
        y1_ = max(0, int(y1 - (y2 - y1) * 0.1))
        y2_ = min(499, int(y2 + (y2 - y1) * 0.1))
        cv2.rectangle(grad_mask, (x1_, y1_), (x2_, y2_), (255, 255, 255), -1)
    grad_mask = np.swapaxes(np.swapaxes(grad_mask, 1, 2), 0, 1).reshape(
        (1, 3, 500, 500))

    step = 0
    max_steps_num = 200 if flag == 'frcnn' else 50
    best_yolo_num = start_yolo_num
    best_frcnn_num = start_frcnn_num
    min_yolo_loss = float('inf')
    min_frcnn_loss = float('inf')
    min_creterion = float('inf')
    best_diff_map = None
    gradient = np.zeros((1, 3, 500, 500), dtype=np.float)

    relu = torch.nn.ReLU()
    while (step < max_steps_num):
        save_format_try_image(ori_img, diff_map, temp_fname)
        yolo_input, frcnn_input = get_yolo_image(temp_fname), get_frcnn_image(
            temp_fname)

        yolo_input.requires_grad = True
        list_boxes = darknet_model(yolo_input)
        yolo_results = post_process(list_boxes)
        yolo_num = len(yolo_results)

        boxes_0 = list_boxes[0].view(3, 85, -1)
        loss_0 = torch.sum(relu(boxes_0[:, 4, :]))
        boxes_1 = list_boxes[1].view(3, 85, -1)
        loss_1 = torch.sum(relu(boxes_1[:, 4, :]))
        boxes_2 = list_boxes[2].view(3, 85, -1)
        loss_2 = torch.sum(relu(boxes_2[:, 4, :]))
        yolo_loss = loss_0 + loss_1 + loss_2

        frcnn_input.requires_grad = True
        frcnn_results, scores, _ = frcnn_model(
            img=[frcnn_input],
            img_metas=[[{
                'filename': '',
                'ori_filename': '',
                'ori_shape': (500, 500, 3),
                'img_shape': (800, 800, 3),
                'pad_shape': (800, 800, 3),
                'scale_factor': np.array([1.6, 1.6, 1.6, 1.6]),
                'flip': False,
                'flip_direction': None,
                'img_norm_cfg': {
                    'mean': np.array([123.675, 116.28, 103.53]),
                    'std': np.array([58.395, 57.12, 57.375]),
                    'to_rgb': True
                }
            }]],
            return_loss=False,
            rescale=False)

        frcnn_results = np.concatenate(frcnn_results)
        frcnn_loss = torch.sum(relu(scores[:, :-1] - 0.049))
        frcnn_num = np.sum(frcnn_results[:, 4] > 0.3)

        # # get gt bboxes
        # gt_bboxes = []
        # h = w = 500
        # for yolo_bbox in yolo_results:
        #     x1, y1, x2, y2 = yolo_bbox[:4]
        #     x1, x2 = int(x1*w), int(x2*w)
        #     y1, y2 = int(y1*h), int(y2*h)
        #     gt_bboxes.append([x1-x2//2, y1-y2//2, x1+x2//2, y1+y2//2])
        # for frcnn_bbox in frcnn_results:
        #     if(frcnn_bbox[-1] > 0.3):
        #         x1, y1, x2, y2 = [int(x/1.6) for x in frcnn_bbox[:4]]
        #         gt_bboxes.append([x1,y1,x2,y2])
        # # generate bbox grad mask
        # grad_mask = np.zeros((500,500,3), dtype=np.float)
        # for bbox in gt_bboxes:
        #     x1, y1, x2, y2 = bbox
        #     cv2.rectangle(grad_mask, (x1,y1), (x2,y2), (255,255,255), -1)
        # grad_mask = np.swapaxes(np.swapaxes(grad_mask, 1, 2), 0, 1).reshape((1,3,500,500))

        if (step == 0):
            epoch_creterion = float(yolo_num) / start_yolo_num + float(
                frcnn_num) / start_frcnn_num

        #creterion = yolo_num if flag == 'yolo' else frcnn_num
        creterion = 10000 * (min(1.,
                                 float(yolo_num) / start_yolo_num) +
                             min(1.,
                                 float(frcnn_num) / start_frcnn_num)) + (
                                     yolo_loss
                                     if flag == 'yolo' else frcnn_loss)
        if (creterion < min_creterion):
            min_creterion = creterion
            min_frcnn_loss = frcnn_loss
            min_yolo_loss = yolo_loss
            best_yolo_num = yolo_num
            best_frcnn_num = frcnn_num
            best_diff_map = diff_map.copy()
            copyfile(temp_fname, best_temp_fname)

        # check rate
        patch_number, area_rate = get_cd_score(fname, best_temp_fname)
        print(
            "%d @ [%d,%d,  %d,%d  --> %d] f_loss=%g y_loss=%g min_f_loss=%g min_y_loss=%g, best patch=%d rate=%g limit=%.2f"
            % (step, yolo_num, frcnn_num, best_yolo_num, best_frcnn_num,
               dest_num, frcnn_loss, yolo_loss, min_frcnn_loss, min_yolo_loss,
               patch_number, area_rate, 100. - rate))
        if (((yolo_num == 0 and flag == 'yolo') or
             (frcnn_num == 0 and flag == 'frcnn')) and area_rate < 0.02
                and patch_number <= 10):
            break

        darknet_model.zero_grad()
        yolo_loss.backward(retain_graph=False)
        yolo_d_grad = yolo_input.grad.data.cpu().numpy().reshape(
            (1, 3, 608, 608))
        yolo_d_grad = np.swapaxes(np.swapaxes(yolo_d_grad[0], 0, 1), 1, 2)
        yolo_d_grad = mmcv.imresize(yolo_d_grad, (500, 500))
        yolo_d_grad = np.swapaxes(np.swapaxes(yolo_d_grad, 1, 2), 0,
                                  1).reshape((1, 3, 500, 500))

        frcnn_model.zero_grad()
        frcnn_loss.backward(retain_graph=False)
        frcnn_d_grad = frcnn_input.grad.data.cpu().numpy().reshape(
            (1, 3, 800, 800))

        frcnn_d_grad[:, 0, :, :] = frcnn_d_grad[:, 0, :, :] * (58.395 / 255.)
        frcnn_d_grad[:, 1, :, :] = frcnn_d_grad[:, 1, :, :] * (57.12 / 255.)
        frcnn_d_grad[:, 2, :, :] = frcnn_d_grad[:, 2, :, :] * (57.375 / 255.)
        frcnn_d_grad = np.swapaxes(np.swapaxes(frcnn_d_grad[0], 0, 1), 1, 2)
        frcnn_d_grad = mmcv.imresize(frcnn_d_grad, (500, 500))
        frcnn_d_grad = np.swapaxes(np.swapaxes(frcnn_d_grad, 1, 2), 0,
                                   1).reshape((1, 3, 500, 500))
        #frcnn_d_norm = np.linalg.norm(frcnn_d_grad, ord=2, axis=1).reshape(500,500)
        #frcnn_d_norm = (frcnn_d_norm - np.min(frcnn_d_norm)) / (np.max(frcnn_d_norm) - np.min(frcnn_d_norm))
        #frcnn_weight = np.repeat(frcnn_d_norm.reshape(1,1,500,500), 3, axis=1)
        #frcnn_d_grad = np.multiply(frcnn_weight, frcnn_d_grad)
        frcnn_d_grad = normalize(frcnn_d_grad.reshape(3, -1), axis=1).reshape(
            (1, 3, 500, 500))
        frcnn_d_grad = frcnn_d_grad * 10

        if (flag == 'yolo'):
            alpha = 0.95
        else:
            alpha = 0.1
        gradient = (1. - alpha) * frcnn_d_grad + alpha * yolo_d_grad
        #if(flag == 'frcnn'):
        #    gradient = 0.9 * gradient + 0.1 * grad
        #else:
        #    gradient = grad

        loss = yolo_loss if flag == 'yolo' else frcnn_loss
        if (flag == 'yolo'):
            step_size = 0.02
        else:
            if (loss > 10):
                step_size = 0.2  #0.1 + 0.3*(float(loss)-10.)/(start_loss-10.)
            elif (loss > 5):
                step_size = 0.2
            else:
                step_size = 0.01
        step_size = step_size * (1. - float(step) / max_steps_num)

        #get_loss_on_grad_and_stepsize(gradient, step_size, ori_img, diff_map, temp_fname, darknet_model, frcnn_model)
        gradient = step_size * gradient
        diff_map -= gradient

        # check area rate
        diff_map[grad_mask == 0] = 0
        diff_map_change = np.sum(np.abs(diff_map), axis=1)
        high_thresh = np.percentile(diff_map_change, 99)
        gray_mask = ((diff_map_change > high_thresh) * 255.).astype(np.uint8)

        gray_mask = gray_mask.reshape(500, 500)
        diff_map[0, 0, :, :][gray_mask == 0] = 0
        diff_map[0, 1, :, :][gray_mask == 0] = 0
        diff_map[0, 2, :, :][gray_mask == 0] = 0

        # check connected parts' number
        save_format_try_image(ori_img, diff_map, temp_fname)
        cd_map = get_cd_map(fname, temp_fname)
        labels = measure.label(cd_map, background=0, connectivity=2)
        label_num = np.max(labels)

        if (label_num > 10):
            areas = [np.sum(labels == i) for i in range(1, label_num + 1)]
            label_ids = list(range(1, label_num + 1))
            areas, label_ids = zip(*sorted(zip(areas, label_ids)))

            for i in label_ids[:-10]:
                #gray_mask[labels==i] = 0
                diff_map[0, 0, :, :][labels == i] = 0
                diff_map[0, 1, :, :][labels == i] = 0
                diff_map[0, 2, :, :][labels == i] = 0

        #kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3, 3))
        #gray_mask = cv2.morphologyEx(gray_mask, cv2.MORPH_CLOSE, kernel)
        #gray_mask = gray_mask.reshape(500,500)
        #diff_map[0,0,:,:][gray_mask == 0] = 0
        #diff_map[0,1,:,:][gray_mask == 0] = 0
        #diff_map[0,2,:,:][gray_mask == 0] = 0
        #see = check_image(diff_map)
        #cv2.imwrite('check/%03d_region.jpg' % step, see)
        #cv2.imwrite('check/%03d_region_filter.jpg' % step, cv2.medianBlur(see, 3))
        step += 1

    return float(best_yolo_num) / start_yolo_num + float(
        best_frcnn_num) / start_frcnn_num >= epoch_creterion, best_diff_map
def sigmoid(x): return 1/(1+np.exp(-x))


class_params = {}
for class_id in range(4):
    print(class_id)
    attempts = []
    for t in range(0, 100, 5):
        t /= 100
        for ms in tqdm.tqdm([0, 100, 1200, 5000, 10000],
                            desc=f'{class_id+1}/4; {t}/100'):
            masks = []
            for i in tqdm.tqdm(range(class_id, len(probabilities), 4)):
                probability = probabilities[i]
                predict, num_predict = post_process(
                    sigmoid(probability), t, ms)
                masks.append(predict)
            gc.collect()
            d = []
            for i, j in zip(masks, valid_masks[class_id::4]):
                if (i.sum() == 0) & (j.sum() == 0):
                    d.append(1)
                else:
                    d.append(dice(i, j))
            gc.collect()
            attempts.append((t, ms, np.mean(d)))
    gc.collect()
    attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])

    attempts_df = attempts_df.sort_values('dice', ascending=False)
    print(attempts_df.head())
def ensemble():
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # parmeters and configs
    # ------------------------------------------------------------------------------------------------------------
    config_paths320 = [
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold0.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold1.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold2.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold3.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold4.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold0.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold1.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold2.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold3.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold4.yml',
    ]
    #see there use later on
    # LABEL_THRESHOLDS = [0.68, 0.69, 0.69, 0.67]
    # MASK_THRESHOLDS = [0.31, 0.36, 0.31, 0.34]
    LABEL_THRESHOLDS = [0.67, 0.67, 0.67, 0.67,0.67,0.67,0.67,0.50]
    MASK_THRESHOLDS = [0.31, 0.31, 0.31, 0.31,0.31,0.31,0.31,0.31]
    # MIN_SIZES = [7500, 7500, 7500, 7500,7500,7500,7500,7500]
    MIN_SIZES = [0,0,0,0,0,0,0,0]
    WEIGHTS = [0.5, 0.5]
    # ------------------------------------------------------------------------------------------------------------
    #
    # ------------------------------------------------------------------------------------------------------------
    config = load_config('config/base_config.yml')

    ''' load the models for evaluation'''
    def get_model_and_loader(config_paths):
        config = load_config(config_paths[0])
        models = []
        for c in config_paths:
            models.append(load_model(c))

        model = MultiSegModels(models)

        print(config.data.test_dir)

        testloader = make_loader(
            data_folder=config.data.test_dir,
            df_path=config.data.sample_submission_path,
            phase='test',
            img_size=(config.data.height, config.data.width),
            batch_size=config.test.batch_size,
            num_workers=config.num_workers,
            transforms=get_transforms(config.transforms.test)
        )
        return model, testloader

    model320, loader320=get_model_and_loader(config_paths320)

    predictions = []

    with torch.no_grad():
        for (batch_fnames320, batch_images320) in tqdm(loader320):
            batch_images320 = batch_images320.to(config.device)
            print(batch_images320.size())
            batch_preds320 = predict_batch(
                model320, batch_images320, tta=config.test.tta)

            #resize the images from multi resolution models
            batch_preds320 = resize_batch_images(
                batch_preds320, SUB_HEIGHT, SUB_WIDTH)
            batch_preds=batch_preds320



            batch_labels320 = torch.nn.functional.adaptive_max_pool2d(torch.sigmoid(
                torch.Tensor(batch_preds320)), 1).view(batch_preds320.shape[0], -1)
            #print(batch_labels320)

            #change batch_labels by weighing factor later on
            batch_labels =batch_labels320

            print("batch_preds",batch_preds.shape)
            print("batch_labels",batch_labels.size())


            for fname, preds, labels in zip(batch_fnames320, batch_preds, batch_labels):
                print("ad",labels.size())
                for cls in range(8):
                    if labels[cls] <= LABEL_THRESHOLDS[cls]:
                        pred = np.zeros(preds[cls, :, :].shape)
                        print("setting 0",cls)
                    else:
                        if cls==7:
                            print("ok")
                        #print("probability",preds[cls, :, :])

                        pred, _ = post_process(
                            preds[cls, :, :], MASK_THRESHOLDS[cls], MIN_SIZES[cls], height=SUB_HEIGHT, width=SUB_WIDTH)
                        cls_name = INV_CLASSES[cls]
                        print(fname)
                    dump_name='results/masks/experiment1/'+fname+'class_'+str(cls)+'.jpg'
                    print(dump_name)
                    cv2.imwrite(dump_name,  pred* 255)