Beispiel #1
0
def test_image_hred_model(config):
    db = vg(config, 'train')
    loaddb = caption_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=3 * config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=caption_collate_fn)

    net = ImageHREDModel(config)
    net.train()
    for name, param in net.named_parameters():
        print(name, param.size())

    for cnt, batched in enumerate(loader):
        images = batched['images'].float()
        sent_inds = batched['sent_inds'].long()
        sent_msks = batched['sent_msks'].long()
        img_feats, txt_feats = net(sent_inds, sent_msks, None, images)
        print('images', images.size())
        print('img_feats', img_feats.size())
        print('txt_feats', txt_feats.size())
        loss = net.forward_loss(img_feats, txt_feats)
        print(loss)
        metrics, caches = net.evaluate(img_feats, txt_feats)
        print(metrics)
        break
Beispiel #2
0
def dump_image_features(config):
    output_dir = osp.join(config.data_dir, 'vg', 'global_features')
    maybe_create(output_dir)

    db = vg(config)
    loaddb = caption_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=1,
                        shuffle=False,
                        num_workers=0,
                        collate_fn=caption_collate_fn)
    net = ImageEncoder(config)
    if config.cuda:
        net = net.cuda()
    net.eval()
    for cnt, batched in enumerate(loader):
        images = batched['images'].float()
        if config.cuda:
            images = images.cuda()
        indices = batched['image_inds']
        image_index = int(indices[0])
        output_path = osp.join(output_dir, str(image_index).zfill(12) + '.npy')
        features = net(images).squeeze().cpu().data.numpy()
        assert (len(features) == 2048)
        pickle_save(output_path, features)
        print(cnt, image_index)
Beispiel #3
0
def test_region_grounding_model(config):
    db = vg(config, 'test')
    loaddb = region_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=3 * config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=region_collate_fn)

    net = RegionGroundingModel(config)
    if config.pretrained is not None:
        pretrained_path = osp.join(config.data_dir,
                                   'caches/region_grounding_ckpts',
                                   config.pretrained + '.pkl')
        states = torch.load(pretrained_path,
                            map_location=lambda storage, loc: storage)
        net.load_state_dict(states['state_dict'], strict=False)
    net.train()
    for name, param in net.named_parameters():
        print(name, param.size())

    for cnt, batched in enumerate(loader):
        scene_inds = batched['scene_inds'].long()
        sent_inds = batched['sent_inds'].long()
        sent_msks = batched['sent_msks'].long()
        region_feats = batched['region_feats'].float()
        region_clses = batched['region_clses'].long()
        region_masks = batched['region_masks'].float()
        img_feats, masked_feats, txt_feats, subspace_masks, sample_logits, sample_indices = \
            net(scene_inds, sent_inds, sent_msks, None, None, None, region_feats, region_clses, region_masks, config.explore_mode)
        if config.instance_dim > 1:
            print(sample_indices[0])
        # print('sample_logits', sample_logits.size())
        # print('sample_indices', sample_indices.size())
        txt_masks = txt_feats.new_ones(txt_feats.size(0), txt_feats.size(1))
        losses = net.final_loss(img_feats, masked_feats, region_masks,
                                txt_feats, txt_masks, sample_logits,
                                sample_indices)
        print('losses', losses.size(), torch.mean(losses))

        if config.subspace_alignment_mode > 0:
            metrics, cache_results = net.evaluate(masked_feats, region_masks,
                                                  txt_feats)
        else:
            metrics, cache_results = net.evaluate(img_feats, region_masks,
                                                  txt_feats)
        print('metrics', metrics)
        print('txt_feats', txt_feats.size())
        print('img_feats', img_feats.size())

        break
Beispiel #4
0
def create_text_reference_html(config):
    config_html = HTML()
    config_table = config_html.table(border='1')
    testdb = vg(config, 'val')
    for i in range(len(testdb.scenedb)):
        scene = testdb.scenedb[i]
        image_index = scene['image_index']
        path = "http://www.cs.virginia.edu/~ft3ex/data/language_vision/val_image_htmls/%d.html"%image_index
        r = config_table.tr
        c = r.td()
        c.a('%04d'%i, href='%s'%path)
    html_file = open('reference.html', 'w')
    print(config_table, file=html_file)
    html_file.close()
Beispiel #5
0
def create_img_reference_html(config):
    config_html = HTML()
    config_table = config_html.table(border='1')
    testdb = vg(config, 'test')
    for i in range(len(testdb.scenedb)):
        scene = testdb.scenedb[i]
        image_index = scene['image_index']
        img_path  = "http://www.cs.virginia.edu/~ft3ex/data/vg/VG_100K/%d.jpg"%image_index
        html_path = "http://www.cs.virginia.edu/~ft3ex/data/language_vision/test_image_htmls/%d.html"%image_index
        c = config_table.tr
        a = c.a(href='%s'%html_path)
        a.img(src='%s'%img_path, height='150')
    html_file = open('img_reference.html', 'w')
    print(config_table, file=html_file)
    html_file.close()
Beispiel #6
0
def test_image_encoder(config):
    db = vg(config, 'test')
    loaddb = caption_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=3 * config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=caption_collate_fn)

    net = ImageEncoder(config)
    for cnt, batched in enumerate(loader):
        images = batched['images'].float()
        print('images', images.size())
        feats = net(images)
        print('features', feats.size())
        break
Beispiel #7
0
def test_caption_loader(config):
    db = vg(config, 'train')
    # db = coco(config, 'train')
    loaddb = caption_loader(db)
    output_dir = osp.join(config.model_dir, 'test_caption_dataloader')
    maybe_create(output_dir)

    loader = DataLoader(loaddb,
                        batch_size=config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=caption_collate_fn)

    start = time()
    plt.switch_backend('agg')
    for cnt, batched in enumerate(loader):
        sent_inds = batched['sent_inds'].long()
        sent_msks = batched['sent_msks'].long()
        images = batched['images'].float()
        captions = batched['captions']
        print('sent_inds', sent_inds.size())
        print('sent_msks', sent_msks.size())
        print('images', images.size())
        for i in range(config.batch_size):
            color = cv2.imread(
                db.color_path_from_index(batched['image_inds'][i]),
                cv2.IMREAD_COLOR)
            out_path = osp.join(output_dir,
                                '%d.png' % batched['image_inds'][i])
            fig = plt.figure(figsize=(32, 16))
            for j in range(min(config.max_turns, 10)):
                plt.subplot(2, 5, j + 1)
                plt.title(
                    captions[i][j] + '\n' +
                    ' '.join([str(x.data.item())
                              for x in sent_inds[i, j]]) + '\n' +
                    ' '.join([str(x.data.item()) for x in sent_msks[i, j]]))
                plt.imshow(color[:, :, ::-1])
                plt.axis('off')
            fig.savefig(out_path, bbox_inches='tight')
            plt.close(fig)
        print('------------------')
        if cnt == 2:
            break
    print("Time", time() - start)
Beispiel #8
0
def test_region_model(config):
    db = vg(config, 'test')
    loaddb = region_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=3 * config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=region_collate_fn)

    net = RegionModel(config)
    net.train()

    for name, param in net.named_parameters():
        print(name, param.size())

    for cnt, batched in enumerate(loader):
        start = time()
        scene_inds = batched['scene_inds'].long()[:config.batch_size]
        sent_inds = batched['sent_inds'].long()[:config.batch_size]
        sent_msks = batched['sent_msks'].long()[:config.batch_size]
        region_feats = batched['region_feats'].float()[:config.batch_size]
        region_clses = batched['region_clses'].long()[:config.batch_size]
        region_masks = batched['region_masks'].float()[:config.batch_size]
        src_region_feats = batched['region_feats'].float(
        )[config.batch_size:2 * config.batch_size]
        src_region_clses = batched['region_clses'].long()[config.batch_size:2 *
                                                          config.batch_size]
        src_region_masks = batched['region_masks'].float(
        )[config.batch_size:2 * config.batch_size]

        img_feats, masked_feats, txt_feats, subspace_masks, sample_logits, sample_indices = \
            net(scene_inds, sent_inds, sent_msks,
            src_region_feats, src_region_clses, src_region_masks,
            region_feats, region_clses, region_masks,
            config.explore_mode)
        print('img_feats', img_feats.size())
        print('txt_feats', txt_feats.size())
        if config.subspace_alignment_mode > 0:
            print('masked_feats', masked_feats.size())
            print('subspace_masks', subspace_masks.size())
        if config.instance_dim > 1:
            print('sample_logits', sample_logits.size())
            print('sample_indices', sample_indices.size())
        print('time:', time() - start)
        break
Beispiel #9
0
def main_rnn(config):
    testdb = vg(config, 'test')
    trainer = RegionGroundingTrainer(config)
    with open(
            osp.join(testdb.cache_dir,
                     'img_features/vg_rnn_1280_img_features.pkl'),
            'rb') as fid:
        data_ = pickle.load(fid)
        all_img_feats = data_['feats']
        all_img_masks = data_['masks']

    all_img_feats = torch.from_numpy(all_img_feats).float()
    all_img_masks = torch.from_numpy(all_img_masks).float()
    if config.cuda:
        all_img_feats = all_img_feats.cuda()
        all_img_masks = all_img_masks.cuda()
    print('all_img_feats', all_img_feats.size())
    print('all_img_masks', all_img_masks.size())

    count = 0
    all_captions = []
    while count < 10:
        print('Please input the query:\n')
        query = input()
        r, top5_img_inds = trainer.net.demo_step(query, all_captions,
                                                 all_img_feats, all_img_masks,
                                                 testdb)
        top5_imgs = []
        for x in top5_img_inds:
            cur_img = cv2.imread(testdb.color_path_from_index(x),
                                 cv2.IMREAD_COLOR)
            cur_img, _, _ = create_squared_image(cur_img)
            cur_img = cv2.resize(cur_img, (500, 500))
            top5_imgs.append(cur_img)
        fig = plt.figure(figsize=(32, 8))
        plt.suptitle(query, fontsize=20)
        for i in range(len(top5_imgs)):
            cur_img = top5_imgs[i]
            plt.subplot(1, 5, i + 1)
            plt.imshow(cur_img[:, :, ::-1].astype(np.uint8))
            plt.axis('off')
        plt.show()
        count += 1
        print('turn:', count)
Beispiel #10
0
def check_region_clses(config):
    db = vg(config, 'train')
    loaddb = region_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=region_collate_fn)

    min_index = 1000000
    max_index = -1
    for cnt, batched in enumerate(loader):
        region_clses = batched['region_clses'].long()
        min_index = min(min_index, torch.min(region_clses).item())
        max_index = max(max_index, torch.max(region_clses).item())
        if cnt % 1000:
            print('iter:', cnt)
    print('min_index', min_index)
    print('max_index', max_index)
Beispiel #11
0
def test_image_model(config):
    db = vg(config, 'test')
    loaddb = caption_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=caption_collate_fn)

    net = ImageModel(config)
    for cnt, batched in enumerate(loader):
        images = batched['images'].float()
        sent_inds = batched['sent_inds'].long()
        sent_msks = batched['sent_msks'].long()
        img_feats, txt_feats = net(sent_inds, sent_msks, None, images)
        print('images', images.size())
        print('img_feats', img_feats.size())
        print('txt_feats', txt_feats.size())
        break
Beispiel #12
0
def test_grounding_loss(config):
    db = vg(config, 'test')
    loaddb = region_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=3 * config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=region_collate_fn)

    net = RegionModel(config)
    criterion = GroundingLoss(config)
    for cnt, batched in enumerate(loader):
        scene_inds = batched['scene_inds'].long()[:config.batch_size]
        sent_inds = batched['sent_inds'].long()[:config.batch_size]
        sent_msks = batched['sent_msks'].long()[:config.batch_size]
        region_feats = batched['region_feats'].float()[:config.batch_size]
        region_clses = batched['region_clses'].long()[:config.batch_size]
        region_masks = batched['region_masks'].float()[:config.batch_size]
        src_region_feats = batched['region_feats'].float(
        )[config.batch_size:2 * config.batch_size]
        src_region_clses = batched['region_clses'].long()[config.batch_size:2 *
                                                          config.batch_size]
        src_region_masks = batched['region_masks'].float(
        )[config.batch_size:2 * config.batch_size]

        img_feats, masked_feats, txt_feats, subspace_masks, sample_logits, sample_indices = \
            net(scene_inds, sent_inds, sent_msks,
            src_region_feats, src_region_clses, src_region_masks,
            region_feats, region_clses, region_masks,
            config.explore_mode)
        masked_feats = img_feats
        sim1 = criterion.compute_batch_mutual_similarity(
            masked_feats, region_masks, txt_feats)
        sim2 = criterion.debug_compute_batch_mutual_similarity(
            masked_feats, region_masks, txt_feats)
        print('sim1', sim1.size())
        print('sim2', sim2.size())
        print('diff', torch.sum(torch.abs(sim1 - sim2)))
        txt_masks = txt_feats.new_ones(txt_feats.size(0), txt_feats.size(1))
        losses = criterion.forward_loss(masked_feats, region_masks, txt_feats,
                                        txt_masks, config.loss_reduction_mode)
        print('losses', losses.size())
        break
Beispiel #13
0
def test_paragraph_model(config):
    db = vg(config, 'test')
    loaddb = paragraph_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=3 * config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=paragraph_collate_fn)

    net = ParagraphModel(config)
    net.train()

    for name, param in net.named_parameters():
        print(name, param.size())

    for cnt, batched in enumerate(loader):
        start = time()
        scene_inds = batched['scene_inds'].long()[:config.batch_size]
        sent_inds = batched['sent_inds'].long()[:config.batch_size]
        sent_msks = batched['sent_msks'].long()[:config.batch_size]
        region_feats = batched['region_feats'].float()[:config.batch_size]
        region_clses = batched['region_clses'].long()[:config.batch_size]
        region_masks = batched['region_masks'].float()[:config.batch_size]

        img_feats, txt_feats = net(sent_inds, sent_msks, region_feats,
                                   region_clses, region_masks)
        losses = net.loss(img_feats, region_masks, txt_feats.unsqueeze(1))
        print('losses', losses.size(), torch.mean(losses))
        metrics, cache_results = net.evaluate(img_feats, region_masks,
                                              txt_feats.unsqueeze(1))
        print('metrics', metrics)

        print('sent_inds', sent_inds.size())
        print('sent_msks', sent_msks.size())
        print('region_feats', region_feats.size())
        print('region_clses', region_clses.size())
        print('region_masks', region_masks.size())

        print('img_feats', img_feats.size())
        print('txt_feats', txt_feats.size())
        print('time:', time() - start)
        break
Beispiel #14
0
def test_text_encoder(config):
    db = vg(config, 'test')
    loaddb = region_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=3 * config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=region_collate_fn)

    net = TextEncoder(config)
    for cnt, batched in enumerate(loader):
        sent_inds = batched['sent_inds'].long()
        sent_msks = batched['sent_msks'].float()
        bsize, slen, fsize = sent_inds.size()
        print('sent_inds', sent_inds.size())
        print('sent_msks', sent_msks.size())
        f1, f2, h = net(sent_inds.view(bsize * slen, fsize),
                        sent_msks.view(bsize * slen, fsize))
        print(f1.size(), f2.size(), h.size())
        break
Beispiel #15
0
def test_region_encoder(config):
    db = vg(config, 'test')
    loaddb = region_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=3 * config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=region_collate_fn)

    net = RegionEncoder(config)
    for cnt, batched in enumerate(loader):
        region_feats = batched['region_feats'].float()
        region_clses = batched['region_clses'].long()
        print('region_feats', region_feats.size())
        print('region_clses', region_clses.size())
        img_feats, masked_feats, mm = net(region_feats, region_clses)
        print('img_feats', img_feats.size())
        if config.subspace_alignment_mode > 0:
            print('masked_feats', masked_feats.size())
            print('mm', mm.size())
        break
Beispiel #16
0
def create_html_per_image(config):
    testdb = vg(config, 'test')
    image_folder_name = 'test_image_htmls'
    maybe_create(image_folder_name)
    for i in range(len(testdb.scenedb)):
        scene = testdb.scenedb[i]
        all_meta_regions = [scene['regions'][x] for x in sorted(list(scene['regions'].keys()))]
        captions = [x['caption'] for x in all_meta_regions[:config.max_turns]]
        image_index = scene['image_index']
        text = '\n'.join(captions)
        path = "http://www.cs.virginia.edu/~ft3ex/data/vg/VG_100K/%d.jpg"%image_index
        config_html = HTML()
        config_table = config_html.table(border='1')
        r = config_table.tr
        c1 = r.td()
        c1.img(src='%s'%path, height='700')
        c2 = r.td()
        for j in range(len(captions)):
            c2.p(captions[j])
        html_file = open(osp.join(image_folder_name, '%d.html'%image_index), 'w')
        print(config_table, file=html_file)
        html_file.close()
        print(i)
Beispiel #17
0
def test_model(config):
    testdb = vg(config, 'test')
    trainer = RegionGroundingTrainer(config)
    trainer.test(testdb)
Beispiel #18
0
def test_response_gen(config):
    s = time()
    db = vg(config)
    html_folder_name = 'template_htmls'
    maybe_create(html_folder_name)

    with open('candidates.json', 'r') as fp:
        candidates = json.load(fp)

    captioner = RelativeCaptioner(db)

    for k, v in candidates.items():
        target_scene = db.scenedb[v['src']]
        decoy_scenes = [db.scenedb[x] for x in v['top5']]
        unmention_candidates = captioner.collect_unmentioned_candidates(
            target_scene, decoy_scenes)
        captions_1 = []
        if len(unmention_candidates) > 0:
            cap1 = caption_1(
                unmention_candidates[np.random.randint(
                    0, len(unmention_candidates))], target_scene)
            if cap1 is not None:
                captions_1.append(cap1)
            cap2 = caption_2(
                unmention_candidates[np.random.randint(
                    0, len(unmention_candidates))], target_scene)
            if cap2 is not None:
                captions_1.append(cap2)
            cap3 = caption_3(
                unmention_candidates[np.random.randint(
                    0, len(unmention_candidates))], target_scene)
            if cap3 is not None:
                captions_1.append(cap3)
                # print(cap3)
        captions_2 = []
        mention_candidates = captioner.collect_mentioned_candidates(
            target_scene, decoy_scenes)
        if len(mention_candidates) > 0:
            cap1 = caption_1(
                mention_candidates[np.random.randint(0,
                                                     len(mention_candidates))],
                target_scene)
            if cap1 is not None:
                captions_2.append(cap1)
            cap2 = caption_2(
                mention_candidates[np.random.randint(0,
                                                     len(mention_candidates))],
                target_scene)
            if cap2 is not None:
                captions_2.append(cap2)
            cap3 = caption_3(
                mention_candidates[np.random.randint(0,
                                                     len(mention_candidates))],
                target_scene)
            if cap3 is not None:
                captions_2.append(cap3)
                # print(cap3)
            cap4 = caption_4(
                mention_candidates[np.random.randint(0,
                                                     len(mention_candidates))],
                target_scene)
            if cap4 is not None:
                captions_2.append(cap4)
            cap5 = caption_5(
                mention_candidates[np.random.randint(0,
                                                     len(mention_candidates))],
                target_scene)
            if cap5 is not None:
                captions_2.append(cap5)
                # print(cap5)
        # query_path = "http://www.cs.virginia.edu/~ft3ex/data/vg/VG_100K/%d.jpg"%v['src']
        # top5_paths = ["http://www.cs.virginia.edu/~ft3ex/data/vg/VG_100K/%d.jpg"%x for x in v['top5']]
        query_path = "file:///Users/fuwentan/datasets/vg/VG_100K/%d.jpg" % v[
            'src']
        top5_paths = [
            "file:///Users/fuwentan/datasets/vg/VG_100K/%d.jpg" % x
            for x in v['top5']
        ]

        config_html = HTML()
        config_table = config_html.table(border='1')
        r1 = config_table.tr
        c1 = r1.td(colspan="2")
        for j in range(len(captions_1)):
            c1.p(captions_1[j])
        c2 = r1.td()
        c2.img(src='%s' % query_path, height='200')
        c3 = r1.td(colspan="2")
        for j in range(len(captions_2)):
            c3.p(captions_2[j])

        r2 = config_table.tr
        for j in range(5):
            c2_r2_c = r2.td()
            c2_r2_c.img(src='%s' % top5_paths[j], height='200')

        html_file = open(
            osp.join(html_folder_name, '%d_%d.html' % (v['src'], v['turn'])),
            'w')
        print(config_table, file=html_file)
        html_file.close()
        print(k)
elif args.vg_dataset == "vg6":
    vg_version = "1600-400-400"
    vg_split = imdb_vg_name
    set_cfgs = [
        'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS',
        '[0.25, 0.5, 1, 2, 4]'
    ]
elif args.vg_dataset == "vg_bm":
    vg_version = "150-50-50"
    vg_split = imdb_vg_name
    set_cfgs = [
        'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS',
        '[0.25, 0.5, 1, 2, 4]'
    ]

imdb_vg = vg(vg_version, vg_split)


def bbox_proposal_fast(obj_prob, att_prob, rois):

    batch_size = obj_prob.size(0)

    # get the top obj cls, excluded the background class.
    max_obj_prob, max_obj_clss = obj_prob[:, :, 1:].max(2)
    # get the top att cls, exclude the background class.
    max_att_prob, max_att_clss = att_prob[:, :, 1:].max(2)
    # get the top rel cls, exlude the background class
    # max_rel_scores, max_rel_ind = rel_prob[:, :, 1:].max(2)

    # compute the final score, B x N
    obj_att_scores = max_obj_prob * max_att_prob
Beispiel #20
0
def test_region_loader(config):
    db = vg(config, 'train')
    # db = coco(config, 'train')
    loaddb = region_loader(db)
    loader = DataLoader(loaddb,
                        batch_size=config.batch_size,
                        shuffle=True,
                        num_workers=config.num_workers,
                        collate_fn=region_collate_fn)

    output_dir = osp.join(config.model_dir, 'test_region_loader')
    maybe_create(output_dir)

    start = time()
    plt.switch_backend('agg')
    for cnt, batched in enumerate(loader):
        print('scene_inds', batched['scene_inds'])
        sent_inds = batched['sent_inds'].long()
        sent_msks = batched['sent_msks'].long()
        widths = batched['widths']
        heights = batched['heights']

        captions = batched['captions']
        region_boxes = batched['region_boxes'].float()
        region_feats = batched['region_feats'].float()
        region_clses = batched['region_clses'].long()
        region_masks = batched['region_masks'].long()

        print('sent_inds', sent_inds.size())
        print('sent_msks', sent_msks.size())
        print('region_boxes', region_boxes.size())
        print('region_feats', region_feats.size())
        print('region_clses', region_clses.size())
        print('region_masks', region_masks.size())
        print('clses', torch.min(region_clses), torch.max(region_clses))
        print('widths', widths)
        print('heights', heights)

        for i in range(len(sent_inds)):
            # print('####')
            # print(len(captions), len(captions[0]))
            entry = {}
            image_index = batched['image_inds'][i]
            entry['width'] = widths[i]
            entry['height'] = heights[i]
            nr = torch.sum(region_masks[i])
            entry['region_boxes'] = xyxys_to_xywhs(
                region_boxes[i, :nr].cpu().data.numpy())

            color = cv2.imread(db.color_path_from_index(image_index),
                               cv2.IMREAD_COLOR)
            color, _, _ = create_squared_image(color)

            out_path = osp.join(output_dir, '%d.png' % image_index)
            layouts = db.render_regions_as_output(
                entry,
                bg=cv2.resize(
                    color,
                    (config.visu_size[0], config.visu_size[0]))[:, :, ::-1])

            fig = plt.figure(figsize=(32, 16))
            for j in range(min(14, len(layouts))):
                plt.subplot(3, 5, j + 1)
                if j < config.max_turns:
                    plt.title(
                        captions[i][j] + '\n' +
                        ' '.join([str(x.data.item())
                                  for x in sent_inds[i, j]]) + '\n' +
                        ' '.join([str(x.data.item())
                                  for x in sent_msks[i, j]]))
                plt.imshow(layouts[j].astype(np.uint8))
                plt.axis('off')
            plt.subplot(3, 5, 15)
            plt.imshow(color[:, :, ::-1])
            plt.axis('off')
            fig.savefig(out_path, bbox_inches='tight')
            plt.close(fig)

        print('------------------')
        if cnt == 2:
            break
    print("Time", time() - start)
Beispiel #21
0
def overfit_model(config):
    valdb = vg(config, 'val')
    valdb.scenedb = valdb.scenedb[:31]
    trainer = ImageHREDTrainer(config)
    trainer.train(valdb, valdb, valdb)
Beispiel #22
0
def train_model(config):
    traindb = vg(config, 'train')
    valdb = vg(config, 'val')
    trainer = ImageHREDTrainer(config)
    trainer.train(traindb, valdb, valdb)
Beispiel #23
0
def overfit_model(config):
    valdb = vg(config, 'val')
    valdb.scenedb = valdb.scenedb[:31]
    trainer = RegionGroundingTrainer(config)
    trainer.train(valdb, valdb, valdb)
# Set up coco_2015_<split>
for year in ['2015']:
  for split in ['test', 'test-dev']:
    name = 'coco_{}_{}'.format(year, split)
    __sets[name] = (lambda split=split, year=year: coco(split, year))

# Set up vg_<split>
# for version in ['1600-400-20']:
#     for split in ['minitrain', 'train', 'minival', 'val', 'test']:
#         name = 'vg_{}_{}'.format(version,split)
#         __sets[name] = (lambda split=split, version=version: vg(version, split))
for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']:
    for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']:
        name = 'vg_{}_{}'.format(version,split)
        __sets[name] = (lambda split=split, version=version: vg(version, split))
        
# set up image net.
for split in ['train', 'val', 'val1', 'val2', 'test']:
    name = 'imagenet_{}'.format(split)
    devkit_path = 'data/imagenet/ILSVRC/devkit'
    data_path = 'data/imagenet/ILSVRC'
    __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path))

for split in ['train', 'val', 'test']:
    name = 'wider_face_{}'.format(split)
    __sets[name] = (lambda split=split: wider_face(split))

for split in ['train', 'val', 'test']:
    name = 'MI3_{}'.format(split)
    __sets[name] = (lambda split=split: mi3(split))
Beispiel #25
0
def test_vg_dataset(config):
    s = time()
    db = vg(config, 'train')
Beispiel #26
0
def dump_trained_features(config):
    traindb = vg(config, 'train')
    trainer = RegionGroundingTrainer(config)
    trainer.test(traindb)
Beispiel #27
0
def test_model(config):
    testdb = vg(config, 'test')
    trainer = ImageHREDTrainer(config)
    trainer.test(testdb)
Beispiel #28
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

__sets = {}
from datasets.vg import vg
from datasets.clevr import clevr
from datasets.visualgenome import visualgenome
from datasets.vrd import vrd

import numpy as np

# Set up vg_<split>
for split in ['train', 'validation', 'test']:
    name = 'visual_genome_{}'.format(split)
    __sets[name] = (lambda split=split: vg(split))

# Set up clevr_<split>
for split in ['train', 'val', 'test']:
    name = 'clevr_{}'.format(split)
    __sets[name] = (lambda split=split: clevr(split))

# Set up vg_<split>
for split in ['train', 'validation', 'test']:
    name = 'visualgenome_{}'.format(split)
    __sets[name] = (lambda split=split: visualgenome(split))

# Set up vrd_<split>
for split in ['train', 'validation', 'test']:
    name = 'vrd_{}'.format(split)
    __sets[name] = (lambda split=split: vrd(split))
Beispiel #29
0
def dump_trained_features(config):
    traindb = vg(config, 'train')
    trainer = ImageHREDTrainer(config)
    trainer.test(traindb)
Beispiel #30
0
# Set up vg_<split>
# for version in ['1600-400-20']:
#     for split in ['minitrain', 'train', 'minival', 'val', 'test']:
#         name = 'vg_{}_{}'.format(version,split)
#         __sets[name] = (lambda split=split, version=version: vg(version, split))
for version in [
        '150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450',
        '1600-400-20'
]:
    for split in [
            'minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val',
            'test'
    ]:
        name = 'vg_{}_{}'.format(version, split)
        __sets[name] = (
            lambda split=split, version=version: vg(version, split))

# set up image net.
for split in ['train', 'val', 'val1', 'val2', 'test']:
    name = 'imagenet_{}'.format(split)
    devkit_path = 'data/imagenet/ILSVRC/devkit'
    data_path = 'data/imagenet/ILSVRC'
    __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=
                    data_path: imagenet(split, devkit_path, data_path))


def get_imdb(name):
    """Get an imdb (image database) by name."""
    if name not in __sets:
        raise KeyError('Unknown dataset: {}'.format(name))
    return __sets[name]()
Beispiel #31
0
# Set up coco_2015_<split>
for year in ['2015']:
  for split in ['test', 'test-dev']:
    name = 'coco_{}_{}'.format(year, split)
    __sets[name] = (lambda split=split, year=year: coco(split, year))

# Set up vg_<split>
# for version in ['1600-400-20']:
#     for split in ['minitrain', 'train', 'minival', 'val', 'test']:
#         name = 'vg_{}_{}'.format(version,split)
#         __sets[name] = (lambda split=split, version=version: vg(version, split))
for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']:
    for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']:
        name = 'vg_{}_{}'.format(version,split)
        __sets[name] = (lambda split=split, version=version: vg(version, split))
        
# set up image net.
for split in ['train', 'val', 'val1', 'val2', 'test']:
    name = 'imagenet_{}'.format(split)
    devkit_path = 'data/imagenet/ILSVRC/devkit'
    data_path = 'data/imagenet/ILSVRC'
    __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path))

def get_imdb(name):
  """Get an imdb (image database) by name."""
  if name not in __sets:
    raise KeyError('Unknown dataset: {}'.format(name))
  return __sets[name]()