def test_cluster_det(model, cfg, logger):
    if cfg.load_from:
        load_checkpoint(model, cfg.load_from)

    for k, v in cfg.model['kwargs'].items():
        setattr(cfg.test_data, k, v)
    dataset = build_dataset(cfg.test_data)
    processor = build_processor(cfg.stage)

    losses = []
    output_probs = []

    if cfg.gpus == 1:
        data_loader = build_dataloader(dataset,
                                       processor,
                                       cfg.batch_size_per_gpu,
                                       cfg.workers_per_gpu,
                                       train=False)

        model = MMDataParallel(model, device_ids=range(cfg.gpus))
        if cfg.cuda:
            model.cuda()

        model.eval()
        for i, data in enumerate(data_loader):
            with torch.no_grad():
                output, loss = model(data, return_loss=True)
                losses += [loss.item()]
                if i % cfg.log_config.interval == 0:
                    if dataset.ignore_meta:
                        logger.info('[Test] Iter {}/{}'.format(
                            i, len(data_loader)))
                    else:
                        logger.info('[Test] Iter {}/{}: Loss {:.4f}'.format(
                            i, len(data_loader), loss))
                if cfg.save_output:
                    output = output.view(-1)
                    prob = output.data.cpu().numpy()
                    output_probs.append(prob)
    else:
        raise NotImplementedError

    if not dataset.ignore_meta:
        avg_loss = sum(losses) / len(losses)
        logger.info('[Test] Overall Loss {:.4f}'.format(avg_loss))

    if cfg.save_output:
        fn = os.path.basename(cfg.load_from)
        opath = os.path.join(cfg.work_dir, fn[:fn.rfind('.pth')] + '.npz')
        meta = {
            'tot_inst_num': dataset.inst_num,
            'proposal_folders': cfg.test_data.proposal_folders,
        }
        print('dump output to {}'.format(opath))
        output_probs = np.concatenate(output_probs).ravel()
        np.savez_compressed(opath, data=output_probs, meta=meta)
Exemple #2
0
def get_data(model, output_path, cfg, is_test = False, num_runs = 1):
    if is_test:
        dataset = build_dataset(cfg.test_data)
    else:
        dataset = build_dataset(cfg.train_data)
    processor = build_processor(cfg.stage)

    data_loader = build_dataloader(
            dataset,
            processor,
            cfg.batch_size_per_gpu,
            cfg.workers_per_gpu,
            train=False)

    model = MMDataParallel(model, device_ids=range(cfg.gpus))
    if cfg.cuda:
        model.cuda()
    all_feas = []
    all_iops = []
    for k in range(num_runs):
        for i, data in enumerate(data_loader):
            print('\t running ' + str(k) + 'th run, ' + str(i) + 'th batch')
            with torch.no_grad():
                hist_std_fea, label, iop = model(data, return_loss = False, return_data = True)
                fea = hist_std_fea.cpu().numpy()
                bs, dim = fea.shape
                for j in range(bs):
                    this_fea = fea[j].tolist()
                    all_feas.append(this_fea)
                    all_iops.append(float(iop.cpu()[j]))

    lines = []
    print ('in total we have ' + str(len(all_iops)) + ' proposals')
    for fea, iop in zip(all_feas, all_iops):
        f_str = [str(f) for f in fea ]
        f_str.append(str(iop))
        line = ' '.join(f_str)
        line += '\n'
        lines.append(line)

    f = open(output_path, 'w')
    f.writelines(lines)
    f.close()
def train_cluster_det(model, cfg, logger):
    # prepare data loaders
    for k, v in cfg.model['kwargs'].items():
        setattr(cfg.train_data, k, v)
    dataset = build_dataset(cfg.train_data)
    processor = build_processor(cfg.stage)
    data_loaders = [
        build_dataloader(dataset,
                         processor,
                         cfg.batch_size_per_gpu,
                         cfg.workers_per_gpu,
                         train=True,
                         shuffle=True)
    ]

    # train
    if cfg.distributed:
        _dist_train(model, data_loaders, cfg)
    else:
        _single_train(model, data_loaders, cfg)
Exemple #4
0
def train_cluster(model, cfg, logger, batch_processor):
    # prepare data loaders
    for k, v in cfg.model['kwargs'].items():
        setattr(cfg.train_data, k, v)

    dataset = build_dataset(cfg.train_data)
    assert not dataset.ignore_label, 'Please specify label_path for training'

    processor = build_processor(cfg.stage)
    data_loaders = [
        build_dataloader(dataset,
                         processor,
                         cfg.batch_size_per_gpu,
                         cfg.workers_per_gpu,
                         train=True,
                         shuffle=True)
    ]

    # train
    if cfg.distributed:
        _dist_train(model, data_loaders, batch_processor, cfg)
    else:
        _single_train(model, data_loaders, batch_processor, cfg)
def _single_train(model, dataset1, dataset2, processor, cfg, logger):
    model = [
        MMDataParallel(item, device_ids=range(cfg.gpus)).cuda()
        for item in model
    ]
    optimizer = [build_optimizer(item, cfg.optimizer) for item in model]

    iter_num = 0
    model1, model2, model3 = model[0], model[1], model[2]
    optimizer1, optimizer2, optimizer3 = optimizer[0], optimizer[1], optimizer[
        2]
    data_loaders2 = build_dataloader(dataset2,
                                     processor,
                                     cfg.batch_size_per_gpu,
                                     cfg.workers_per_gpu,
                                     train=False,
                                     shuffle=False)
    data_loaders1 = build_dataloader(dataset1,
                                     processor,
                                     cfg.batch_size_per_gpu,
                                     cfg.workers_per_gpu,
                                     train=True,
                                     shuffle=True)
    lr_scheduler1 = torch.optim.lr_scheduler.StepLR(optimizer1, step_size=30)
    lr_scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer2, step_size=30)
    lr_scheduler3 = torch.optim.lr_scheduler.StepLR(optimizer3, step_size=30)
    while iter_num < cfg.total_iterations:
        lr_scheduler1.step()
        lr_scheduler2.step()
        lr_scheduler3.step()
        for i, data_batch in enumerate(data_loaders1):
            iter_num += 1
            model1.train()
            model2.train()
            model3.train()
            model1.zero_grad()
            model2.zero_grad()
            model3.zero_grad()
            x1, loss1 = model1(data_batch, return_loss=True)
            loss1 = torch.mean(loss1)
            x2, loss2 = model2(data_batch, return_loss=True)
            loss2 = torch.mean(loss2)
            x3, loss3 = model3(data_batch, return_loss=True)
            loss3 = torch.mean(loss3)
            logger.info('[Iteration] {}, lr {}'.format(
                iter_num,
                lr_scheduler1.get_lr()[0]))
            logger.info('[Train] [Conv3d] Loss {:.4f}'.format(loss1))
            logger.info('[Train] [dsgcn] Loss {:.4f}'.format(loss2))
            logger.info('[Train] [histgram_std] Loss {:.4f}'.format(loss3))

            if iter_num % cfg.log_config.interval == 0:
                with open(
                        cfg.work_dir + "/cnn_model_iter_" + str(iter_num) +
                        ".pth", 'wb') as to_save1:
                    torch.save(model1.module, to_save1)
                with open(
                        cfg.work_dir + "/dsgcn_model_iter_" + str(iter_num) +
                        ".pth", 'wb') as to_save2:
                    torch.save(model2.module, to_save2)
                with open(
                        cfg.work_dir + "/histstd_model_iter_" + str(iter_num) +
                        ".pth", 'wb') as to_save3:
                    torch.save(model3.module, to_save3)

            loss12 = F.kl_div(F.log_softmax(x1, dim=1), x2.detach(),
                              False) / x1.shape[0]
            loss13 = F.kl_div(F.log_softmax(x1, dim=1), x3.detach(),
                              False) / x1.shape[0]
            loss1 = loss1 + loss12 + loss13
            loss1.backward()
            optimizer1.step()
            loss21 = F.kl_div(F.log_softmax(x2, dim=1), x1.detach(),
                              False) / x2.shape[0]
            loss23 = F.kl_div(F.log_softmax(x2, dim=1), x3.detach(),
                              False) / x2.shape[0]
            loss2 = loss2 + loss23
            loss2.backward()
            optimizer2.step()
            loss31 = F.kl_div(F.log_softmax(x3, dim=1), x1.detach(),
                              False) / x3.shape[0]
            loss32 = F.kl_div(F.log_softmax(x3, dim=1), x2.detach(),
                              False) / x3.shape[0]
            loss3 = loss3 + loss32
            loss3.backward()
            optimizer3.step()
def test_cluster_mall(model1, cfg, logger):
    model = torch.load(cfg.load_from1)

    for k, v in cfg.model1['kwargs'].items():
        setattr(cfg.test_data, k, v)
    for k, v in cfg.model2['kwargs'].items():
        setattr(cfg.test_data, k, v)
    setattr(cfg.test_data, 'phase', 'test')
    dataset = build_dataset_mall(cfg.test_data)
    processor = build_processor(cfg.stage)

    losses = []
    output_probs = []
    IoP_GT = []
    IoP_binary_GT = []
    num_impure_pro = 0
    if cfg.gpus == 1:
        data_loader = build_dataloader(dataset,
                                       processor,
                                       cfg.batch_size_per_gpu,
                                       cfg.workers_per_gpu,
                                       train=False)

        model = MMDataParallel(model, device_ids=range(cfg.gpus))
        if cfg.cuda:
            model.cuda()
        output_IoP_loss = []
        model.eval()
        for i, data in enumerate(data_loader):
            with torch.no_grad():
                output, loss = model(data, return_loss=True)
                losses += [loss.item()]
                num_impure_pro += (data[-1] == 0).nonzero().shape[0]
                if i % cfg.log_config.interval == 0:
                    logger.info('[Test] Iter {}/{}: Loss {:.4f}'.format(
                        i, len(data_loader), loss))
                if cfg.save_output:
                    output = output[:, 1]
                    output = output.view(-1)
                    output_probs.append(output.tolist())
                    IoP_GT.append(data[-1].tolist())
    else:
        raise NotImplementedError
    output_probs1 = [iop for item in output_probs for iop in item]
    output_probs = np.array([iop for item in output_probs for iop in item])
    IoP_GT0 = [iop for item in IoP_GT for iop in item]
    IoP_GT = np.array([iop for item in IoP_GT for iop in item])
    output_probs = torch.from_numpy(output_probs)
    IoP_GT1 = torch.from_numpy(IoP_GT)
    #HistgramStd.eval_batch_new(output_probs, IoP_GT1, 'BCE')
    output_probs2 = np.array(output_probs1)
    # plot roc curve
    false_positive_rate, true_positive_rate, thresholds = roc_curve(
        IoP_GT, output_probs2)
    roc_auc = auc(false_positive_rate, true_positive_rate)
    plt.title('ROC')
    plt.plot(false_positive_rate,
             true_positive_rate,
             'b',
             label='AUC = %0.4f' % roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0, 1], [0, 1], 'r--')
    plt.ylabel('TPR')
    plt.xlabel('FPR')
    plt.draw()
    plt.savefig(cfg.work_dir + '/ROC.jpg')
    plt.close()

    # plot IoP distribution curve
    pos01 = np.where((IoP_GT1 == 0))
    iop_01 = output_probs2[pos01]
    pos02 = np.where((IoP_GT1 == 1))
    iop_02 = output_probs2[pos02]
    if cfg.save_output:
        plt.figure(1)
        plt.subplot(1, 1, 1)
        plt.boxplot([iop_01.tolist(), iop_02.tolist()], notch=True)

        x_tricks = np.array([1, 2])
        plt.xticks(x_tricks)
        plt.grid(axis='y')
        plt.draw()
        plt.savefig(cfg.work_dir + '/Estimated_IoP.jpg')
        plt.close()

    estimated_iop_dict = {}
    for i, node in enumerate(dataset.lst):
        node_name = node.split('/')[-1]
        estimated_iop = output_probs1[i]
        estimated_iop_dict[node_name] = estimated_iop
    with open(cfg.work_dir + '/Estimated_IoP_eval_dict.json', 'w') as f:
        json.dump(estimated_iop_dict, f)
    with open(cfg.work_dir + '/Estimated_IoP_eval.json', 'w') as f:
        json.dump(output_probs1, f)
    with open(cfg.work_dir + '/GT_IoP_eval.json', 'w') as f:
        json.dump(IoP_GT0, f)
def test_cluster_seg(model, cfg, logger):
    assert osp.isfile(cfg.pred_iou_score)

    if cfg.load_from:
        logger.info('load pretrained model from: {}'.format(cfg.load_from))
        load_checkpoint(model, cfg.load_from, strict=True, logger=logger)

    for k, v in cfg.model['kwargs'].items():
        setattr(cfg.test_data, k, v)

    setattr(cfg.test_data, 'pred_iop_score', cfg.pred_iop_score)

    dataset = build_dataset(cfg.test_data)
    processor = build_processor(cfg.stage)

    inst_num = dataset.inst_num

    # read pred_scores from file and do sanity check
    d = np.load(cfg.pred_iou_score, allow_pickle=True)
    pred_scores = d['data']
    meta = d['meta'].item()
    assert inst_num == meta['tot_inst_num'], '{} vs {}'.format(
        inst_num, meta['tot_inst_num'])

    proposals = [fn_node for fn_node, _ in dataset.tot_lst]
    _proposals = []
    fn_node_pattern = '*_node.npz'
    for proposal_folder in meta['proposal_folders']:
        fn_clusters = sorted(
            glob.glob(osp.join(proposal_folder, fn_node_pattern)))
        _proposals.extend([fn_node for fn_node in fn_clusters])
    assert proposals == _proposals, '{} vs {}'.format(len(proposals),
                                                      len(_proposals))

    losses = []
    pred_outlier_scores = []
    stats = {'mean': []}

    if cfg.gpus == 1:
        data_loader = build_dataloader(dataset,
                                       processor,
                                       cfg.test_batch_size_per_gpu,
                                       cfg.workers_per_gpu,
                                       train=False)

        model = MMDataParallel(model, device_ids=range(cfg.gpus))
        if cfg.cuda:
            model.cuda()

        model.eval()
        for i, data in enumerate(data_loader):
            with torch.no_grad():
                output, loss = model(data, return_loss=True)
                losses += [loss.item()]
                if i % cfg.log_config.interval == 0:
                    if dataset.ignore_label:
                        logger.info('[Test] Iter {}/{}'.format(
                            i, len(data_loader)))
                    else:
                        logger.info('[Test] Iter {}/{}: Loss {:.4f}'.format(
                            i, len(data_loader), loss))
                if cfg.save_output:
                    output = F.softmax(output, dim=1)
                    output = output[:, 1, :]
                    scores = output.data.cpu().numpy()
                    pred_outlier_scores.extend(list(scores))
                    stats['mean'] += [scores.mean()]
    else:
        raise NotImplementedError

    if not dataset.ignore_label:
        avg_loss = sum(losses) / len(losses)
        logger.info('[Test] Overall Loss {:.4f}'.format(avg_loss))

    scores_mean = 1. * sum(stats['mean']) / len(stats['mean'])
    logger.info('mean of pred_outlier_scores: {:.4f}'.format(scores_mean))

    # save predicted scores
    if cfg.save_output:
        if cfg.load_from:
            fn = osp.basename(cfg.load_from)
        else:
            fn = 'random'
        opath = osp.join(cfg.work_dir, fn[:fn.rfind('.pth')] + '.npz')
        meta = {
            'tot_inst_num': inst_num,
            'proposal_folders': cfg.test_data.proposal_folders,
        }
        logger.info('dump pred_outlier_scores ({}) to {}'.format(
            len(pred_outlier_scores), opath))
        np.savez_compressed(opath, data=pred_outlier_scores, meta=meta)

    # post-process
    outlier_scores = {
        fn_node: outlier_score
        for (fn_node,
             _), outlier_score in zip(dataset.lst, pred_outlier_scores)
    }

    # de-overlap (w gcn-s)
    pred_labels_w_seg = deoverlap(pred_scores,
                                  proposals,
                                  inst_num,
                                  cfg.th_pos,
                                  cfg.th_iou,
                                  outlier_scores=outlier_scores,
                                  th_outlier=cfg.th_outlier,
                                  keep_outlier=cfg.keep_outlier)

    # de-overlap (wo gcn-s)
    pred_labels_wo_seg = deoverlap(pred_scores, proposals, inst_num,
                                   cfg.th_pos, cfg.th_iou)

    # save predicted labels
    if cfg.save_output:
        ofn_meta_w_seg = osp.join(cfg.work_dir, 'pred_labels_w_seg.txt')
        ofn_meta_wo_seg = osp.join(cfg.work_dir, 'pred_labels_wo_seg.txt')
        print('save predicted labels to {} and {}'.format(
            ofn_meta_w_seg, ofn_meta_wo_seg))
        pred_idx2lb_w_seg = list2dict(pred_labels_w_seg, ignore_value=-1)
        pred_idx2lb_wo_seg = list2dict(pred_labels_wo_seg, ignore_value=-1)
        write_meta(ofn_meta_w_seg, pred_idx2lb_w_seg, inst_num=inst_num)
        write_meta(ofn_meta_wo_seg, pred_idx2lb_wo_seg, inst_num=inst_num)

    # evaluation
    if not dataset.ignore_label:
        gt_labels = dataset.labels
        print('==> evaluation (with gcn-s)')
        for metric in cfg.metrics:
            evaluate(gt_labels, pred_labels_w_seg, metric)
        print('==> evaluation (without gcn-s)')
        for metric in cfg.metrics:
            evaluate(gt_labels, pred_labels_wo_seg, metric)
def test_cluster_det(model, cfg, logger):
    if cfg.load_from:
        logger.info('load pretrained model from: {}'.format(cfg.load_from))
        load_checkpoint(model, cfg.load_from, strict=True, logger=logger)

    for k, v in cfg.model['kwargs'].items():
        setattr(cfg.test_data, k, v)
    dataset = build_dataset(cfg.test_data)
    processor = build_processor(cfg.stage)

    losses = []
    pred_scores = []

    if cfg.gpus == 1:
        data_loader = build_dataloader(dataset,
                                       processor,
                                       cfg.test_batch_size_per_gpu,
                                       cfg.workers_per_gpu,
                                       train=False)

        model = MMDataParallel(model, device_ids=range(cfg.gpus))
        if cfg.cuda:
            model.cuda()

        model.eval()
        for i, data in enumerate(data_loader):
            with torch.no_grad():
                output, loss = model(data, return_loss=True)
                losses += [loss.item()]
                if i % cfg.log_config.interval == 0:
                    if dataset.ignore_label:
                        logger.info('[Test] Iter {}/{}'.format(
                            i, len(data_loader)))
                    else:
                        logger.info('[Test] Iter {}/{}: Loss {:.4f}'.format(
                            i, len(data_loader), loss))
                if cfg.save_output:
                    output = output.view(-1)
                    prob = output.data.cpu().numpy()
                    pred_scores.append(prob)
    else:
        raise NotImplementedError

    if not dataset.ignore_label:
        avg_loss = sum(losses) / len(losses)
        logger.info('[Test] Overall Loss {:.4f}'.format(avg_loss))

    # save predicted scores
    if cfg.save_output:
        if cfg.load_from:
            fn = os.path.basename(cfg.load_from)
        else:
            fn = 'random'
        opath = os.path.join(cfg.work_dir, fn[:fn.rfind('.pth')] + '.npz')
        meta = {
            'tot_inst_num': dataset.inst_num,
            'proposal_folders': cfg.test_data.proposal_folders,
        }
        print('dump pred_score to {}'.format(opath))
        pred_scores = np.concatenate(pred_scores).ravel()
        np.savez_compressed(opath, data=pred_scores, meta=meta)

    # de-overlap
    proposals = [fn_node for fn_node, _ in dataset.lst]
    pred_labels = deoverlap(pred_scores, proposals, dataset.inst_num,
                            cfg.th_pos, cfg.th_iou)

    # save predicted labels
    if cfg.save_output:
        ofn_meta = os.path.join(cfg.work_dir, 'pred_labels.txt')
        print('save predicted labels to {}'.format(ofn_meta))
        pred_idx2lb = list2dict(pred_labels, ignore_value=-1)
        write_meta(ofn_meta, pred_idx2lb, inst_num=dataset.inst_num)

    # evaluation
    if not dataset.ignore_label:
        print('==> evaluation')
        gt_labels = dataset.labels
        for metric in cfg.metrics:
            evaluate(gt_labels, pred_labels, metric)