예제 #1
0
def run(datapath,
        benchmark,
        backbone,
        thres,
        alpha,
        hyperpixel,
        logpath,
        args,
        beamsearch=False,
        model=None,
        dataloader=None):
    r"""Runs Semantic Correspondence as an Optimal Transport Problem"""

    # 1. Logging initialization
    if not os.path.isdir('logs'):
        os.mkdir('logs')
    if not beamsearch:
        logfile = 'logs/{}_{}_{}_{}_exp{}-{}_e{}_m{}_{}_{}'.format(
            benchmark, backbone, args.split, args.sim, args.exp1, args.exp2,
            args.eps, args.classmap, args.cam, args.hyperpixel)
        print(logfile)
        util.init_logger(logfile)
        util.log_args(args)

    # 2. Evaluation benchmark initialization
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if dataloader is None:
        download.download_dataset(os.path.abspath(datapath), benchmark)
        #split = 'val' if beamsearch else 'test'
        split = args.split
        dset = download.load_dataset(benchmark, datapath, thres, device, split,
                                     args.cam)
        dataloader = DataLoader(dset, batch_size=1, num_workers=0)

    # 3. Model initialization
    if model is None:
        model = scot_CAM.SCOT_CAM(backbone, hyperpixel, benchmark, device,
                                  args.cam)
    else:
        model.hyperpixel_ids = util.parse_hyperpixel(hyperpixel)

    # 4. Evaluator initialization
    evaluator = evaluation.Evaluator(benchmark, device)

    zero_pcks = 0
    srcpt_list = []
    trgpt_list = []
    time_list = []
    PCK_list = []
    for idx, data in enumerate(dataloader):
        threshold = 0.0

        # a) Retrieve images and adjust their sizes to avoid large numbers of hyperpixels
        data['src_img'], data['src_kps'], data['src_intratio'] = util.resize(
            data['src_img'], data['src_kps'][0])
        data['trg_img'], data['trg_kps'], data['trg_intratio'] = util.resize(
            data['trg_img'], data['trg_kps'][0])
        src_size = data['src_img'].size()
        trg_size = data['trg_img'].size()

        if len(args.cam) > 0:
            data['src_mask'] = util.resize_mask(data['src_mask'], src_size)
            data['trg_mask'] = util.resize_mask(data['trg_mask'], trg_size)
            data['src_bbox'] = util.get_bbox_mask(data['src_mask'],
                                                  thres=threshold).to(device)
            data['trg_bbox'] = util.get_bbox_mask(data['trg_mask'],
                                                  thres=threshold).to(device)
        else:
            data['src_mask'] = None
            data['trg_mask'] = None

        data['alpha'] = alpha
        tic = time.time()

        # b) Feed a pair of images to Hyperpixel Flow model
        with torch.no_grad():
            confidence_ts, src_box, trg_box = model(
                data['src_img'], data['trg_img'], args.sim, args.exp1,
                args.exp2, args.eps, args.classmap, data['src_bbox'],
                data['trg_bbox'], data['src_mask'], data['trg_mask'], backbone)
            conf, trg_indices = torch.max(confidence_ts, dim=1)
            unique, inv = torch.unique(trg_indices,
                                       sorted=False,
                                       return_inverse=True)
            trgpt_list.append(len(unique))
            srcpt_list.append(len(confidence_ts))

        # c) Predict key-points & evaluate performance
        prd_kps = geometry.predict_kps(src_box, trg_box, data['src_kps'],
                                       confidence_ts)
        toc = time.time()
        #print(toc-tic)
        time_list.append(toc - tic)
        pair_pck = evaluator.evaluate(prd_kps, data)
        PCK_list.append(pair_pck)
        if pair_pck == 0:
            zero_pcks += 1

        # d) Log results
        if not beamsearch:
            evaluator.log_result(idx, data=data)

    #save_file = logfile.replace('logs/','')
    #np.save('PCK_{}.npy'.format(save_file), PCK_list)
    if beamsearch:
        return (sum(evaluator.eval_buf['pck']) /
                len(evaluator.eval_buf['pck'])) * 100.
    else:
        logging.info('source points:' +
                     str(sum(srcpt_list) * 1.0 / len(srcpt_list)))
        logging.info('target points:' +
                     str(sum(trgpt_list) * 1.0 / len(trgpt_list)))
        logging.info('avg running time:' +
                     str(sum(time_list) / len(time_list)))
        evaluator.log_result(len(dset), data=None, average=True)
        logging.info('Total Number of 0.00 pck images:' + str(zero_pcks))
예제 #2
0
def run(datapath,
        benchmark,
        backbone,
        thres,
        alpha,
        hyperpixel,
        logpath,
        args,
        model=None,
        dataloader=None):
    r"""Runs Semantic Correspondence as an Optimal Transport Problem"""

    # 1. Logging initialization
    if not os.path.isdir('logs'):
        os.mkdir('logs')
    logfile = 'logs/{}_{}_{}_{}_exp{}-{}_e{}_m{}_{}_{}'.format(
        benchmark, backbone, args.split, args.sim, args.exp1, args.exp2,
        args.eps, args.classmap, args.cam, args.hyperpixel)
    print(logfile)
    util.init_logger(logfile)
    util.log_args(args)

    # 2. Evaluation benchmark initialization
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if dataloader is None:
        csv_file = 'test_pairs_tss.csv'
        dataset = TSSDataset(csv_file=os.path.join(datapath, benchmark,
                                                   csv_file),
                             dataset_path=os.path.join(datapath, benchmark))
        dataloader = DataLoader(dataset, batch_size=1, num_workers=0)

    # 3. Model initialization
    if model is None:
        model = scot_CAM.SCOT_CAM(backbone, hyperpixel, benchmark, device,
                                  args.cam)
    else:
        model.hyperpixel_ids = util.parse_hyperpixel(hyperpixel)

    time_list = []
    for idx, data in enumerate(dataloader):
        threshold = 0.0

        # a) Retrieve images and adjust their sizes to avoid large numbers of hyperpixels
        if args.classmap in [0, 1]:
            src_img, src_size, src_size2, src_ratio = util.resize_TSS(
                data['src_img'])
            trg_img, trg_size, trg_size2, trg_ratio = util.resize_TSS(
                data['trg_img'])

            src_mask, trg_mask, src_bbox, trg_bbox = None, None, None, None

        data['alpha'] = alpha
        #tic = time.time()

        # b) Feed a pair of images to Hyperpixel Flow model
        with torch.no_grad():
            # target meshgrids --> source matching points
            confidence_ts, trg_box, src_box = model(trg_img, src_img, args.sim,
                                                    args.exp1, args.exp2,
                                                    args.eps, args.classmap,
                                                    trg_bbox, src_bbox,
                                                    trg_mask, src_mask,
                                                    backbone)

        # c) Image Grids and write flow to files
        h_tgt = int(trg_size[1].data.cpu().numpy())
        w_tgt = int(trg_size[2].data.cpu().numpy())
        grid_x_np, grid_y_np = np.meshgrid(range(1, w_tgt + 1),
                                           range(1, h_tgt + 1))
        grid_x = torch.tensor(grid_x_np).view(1, -1).cuda()
        grid_y = torch.tensor(grid_y_np).view(1, -1).cuda()
        trg_kps = torch.cat((grid_x, grid_y),
                            0).type(torch.cuda.FloatTensor)  # 2xwh
        trg_kps *= trg_ratio
        n_points = trg_kps.size(1)
        n_itr = int(n_points / 10000)

        prd_kps = torch.zeros_like(trg_kps).to(trg_kps.device)
        for i in range(0, n_itr + 1):
            s = i * 10000
            t = min(n_points, (i + 1) * 10000)
            if s >= t:
                break
            trg_part = trg_kps[:, s:t].contiguous().clone()
            prd_part = geometry.predict_kps(trg_box, src_box, trg_part,
                                            confidence_ts)
            prd_kps[:, s:t] = prd_part

        def pointsToGrid(x, h_tgt=h_tgt, w_tgt=w_tgt):
            return x.contiguous().view(1, 2, h_tgt,
                                       w_tgt).transpose(1, 2).transpose(2, 3)

        prd_grid = pointsToGrid(prd_kps).squeeze(0)  # hxwx2
        prd_grid /= src_ratio
        disp_x = prd_grid[:, :, 0].data.cpu().numpy() - grid_x_np
        disp_y = prd_grid[:, :, 1].data.cpu().numpy() - grid_y_np
        flow = np.concatenate(
            (np.expand_dims(disp_x, 2), np.expand_dims(disp_y, 2)), 2)

        flow_path = os.path.join(args.datapath, 'TSS/result_CAM',
                                 data['flow_path'][0])
        create_file_path(flow_path)
        write_flo_file(flow, flow_path)

        #toc = time.time()
        #print(idx, toc-tic)
        print(idx + 1, '/', dataset.__len__())
예제 #3
0
                        help='exponential factor on final OT scores')
    parser.add_argument('--eps',
                        type=float,
                        default=0.05,
                        help='epsilon for Sinkhorn Regularization')
    parser.add_argument(
        '--classmap',
        type=int,
        default=1,
        help='class activation map: 0 for none, 1 for using CAM')
    parser.add_argument(
        '--cam',
        type=str,
        default='',
        help='activation map folder, empty for end2end computation')

    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    run(datapath=args.datapath,
        benchmark=args.dataset,
        backbone=args.backbone,
        thres=args.thres,
        alpha=args.alpha,
        hyperpixel=args.hyperpixel,
        logpath=args.logpath,
        args=args,
        beamsearch=False)

    util.log_args(args)
예제 #4
0
파일: evaluate.py 프로젝트: juhongm999/hpf
def run(datapath,
        benchmark,
        backbone,
        thres,
        alpha,
        hyperpixel,
        logpath,
        beamsearch,
        model=None,
        dataloader=None,
        visualize=False):
    r"""Runs Hyperpixel Flow framework"""

    # 1. Logging initialization
    if not os.path.isdir('logs'):
        os.mkdir('logs')
    if not beamsearch:
        cur_datetime = datetime.datetime.now().__format__('_%m%d_%H%M%S')
        logfile = os.path.join('logs', logpath + cur_datetime + '.log')
        util.init_logger(logfile)
        util.log_args(args)
        if visualize: os.mkdir(logfile + 'vis')

    # 2. Evaluation benchmark initialization
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if dataloader is None:
        download.download_dataset(os.path.abspath(datapath), benchmark)
        split = 'val' if beamsearch else 'test'
        dset = download.load_dataset(benchmark, datapath, thres, device, split)
        dataloader = DataLoader(dset, batch_size=1, num_workers=0)

    # 3. Model initialization
    if model is None:
        model = hpflow.HyperpixelFlow(backbone, hyperpixel, benchmark, device)
    else:
        model.hyperpixel_ids = util.parse_hyperpixel(hyperpixel)

    # 4. Evaluator initialization
    evaluator = evaluation.Evaluator(benchmark, device)

    for idx, data in enumerate(dataloader):

        # a) Retrieve images and adjust their sizes to avoid large numbers of hyperpixels
        data['src_img'], data['src_kps'], data['src_intratio'] = util.resize(
            data['src_img'], data['src_kps'][0])
        data['trg_img'], data['trg_kps'], data['trg_intratio'] = util.resize(
            data['trg_img'], data['trg_kps'][0])
        data['alpha'] = alpha

        # b) Feed a pair of images to Hyperpixel Flow model
        with torch.no_grad():
            confidence_ts, src_box, trg_box = model(data['src_img'],
                                                    data['trg_img'])

        # c) Predict key-points & evaluate performance
        prd_kps = geometry.predict_kps(src_box, trg_box, data['src_kps'],
                                       confidence_ts)
        evaluator.evaluate(prd_kps, data)

        # d) Log results
        if not beamsearch:
            evaluator.log_result(idx, data=data)
        if visualize:
            vispath = os.path.join(
                logfile + 'vis', '%03d_%s_%s' %
                (idx, data['src_imname'][0], data['trg_imname'][0]))
            util.visualize_prediction(data['src_kps'].t().cpu(),
                                      prd_kps.t().cpu(), data['src_img'],
                                      data['trg_img'], vispath)
    if beamsearch:
        return (sum(evaluator.eval_buf['pck']) /
                len(evaluator.eval_buf['pck'])) * 100.
    else:
        evaluator.log_result(len(dset), data=None, average=True)