if opt_test.dataset == 'coco':
    image_field = ImageDetectionsField(detections_path=os.path.join(
        coco_root, 'coco_detections.hdf5'),
                                       load_in_tmp=False)

    det_field = COCOControlSequenceField(
        detections_path=os.path.join(coco_root, 'coco_detections.hdf5'),
        classes_path=os.path.join(coco_root, 'object_class_list.txt'),
        pad_init=False,
        padding_idx=-1,
        all_boxes=False,
        fix_length=20)

    text_field = TextField(init_token='<bos>',
                           eos_token='<eos>',
                           lower=True,
                           remove_punctuation=True,
                           fix_length=20)

    dataset = COCOEntities(image_field,
                           det_field,
                           text_field,
                           img_root='',
                           ann_root=os.path.join(coco_root, 'annotations'),
                           entities_file=os.path.join(coco_root,
                                                      'coco_entities.json'),
                           id_root=os.path.join(coco_root, 'annotations'))

    test_dataset = COCOEntities(image_field,
                                det_field,
                                RawField(),
    detections_path=os.path.join(flickr_root, 'flickr30k_detections.hdf5'),
    classes_path=os.path.join(flickr_root, 'object_class_list.txt'),
    img_shapes_path=os.path.join(flickr_root, 'flickr_img_shapes.json'),
    precomp_glove_path=os.path.join(flickr_root, 'object_class_glove.pkl'),
    verb_idx_path=os.path.join(flickr_root, 'flickr_verb_idx.json'),
    idx_vs_path=os.path.join(flickr_root, 'idx_2_vs_flickr.json'),
    cap_verb_path=os.path.join(flickr_root, 'cap_2_verb_flickr.json'),
    cap_classes_path=os.path.join(flickr_root, 'cap_2_classes_flickr.json'),
    idx_v_og_path=os.path.join(flickr_root, 'idx_2_v_og_flickr.json'),
    vocab_list_path=os.path.join(flickr_root, 'vocab_tv_flickr.json'),
    fix_length=10,
    visual=False)

text_field = TextField(init_token='<bos>',
                       eos_token='<eos>',
                       lower=True,
                       remove_punctuation=True,
                       fix_length=20)

test_dataset = FlickrEntities(image_field,
                              RawField(),
                              det_field,
                              img_root='',
                              ann_file=os.path.join(
                                  flickr_root, 'flickr30k_annotations.json'),
                              entities_root=flickr_entities_root,
                              verb_filter=True)

train_dataset, val_dataset, test_dataset = test_dataset.splits
test_dataset = DictionaryDataset(test_dataset.examples, test_dataset.fields,
                                 'image')
Beispiel #3
0
parser.add_argument('--step_size', default=3, type=int, help='learning rate schedule step size')
parser.add_argument('--gamma', default=0.8, type=float, help='learning rate schedule gamma')
parser.add_argument('--h2_first_lstm', default=1, type=int, help='h2 as input to the first lstm')
parser.add_argument('--img_second_lstm', default=0, type=int, help='img vector as input to the second lstm')
parser.add_argument('--sample_rl', action='store_true', help='reinforcement learning with cider optimization')
parser.add_argument('--sample_rl_nw', action='store_true', help='reinforcement learning with cider + nw optimization')
opt = parser.parse_args()
print(opt)

image_field = ImageDetectionsField(detections_path=os.path.join(coco_root, 'coco_detections.hdf5'), load_in_tmp=False)

det_field = COCOControlSequenceField(detections_path=os.path.join(coco_root, 'coco_detections.hdf5'),
                                     classes_path=os.path.join(coco_root, 'object_class_list.txt'),
                                     pad_init=False, padding_idx=-1, all_boxes=False, fix_length=20)

text_field = TextField(init_token='<bos>', eos_token='<eos>', lower=True, remove_punctuation=True, fix_length=20)

dataset = COCOEntities(image_field, det_field, text_field,
                       img_root='',
                       ann_root=os.path.join(coco_root, 'annotations'),
                       entities_file=os.path.join(coco_root, 'coco_entities.json'),
                       id_root=os.path.join(coco_root, 'annotations'))

train_dataset, val_dataset, _ = dataset.splits
text_field.build_vocab(train_dataset, val_dataset, min_freq=5)

test_dataset = COCOEntities(image_field, det_field, RawField(),
                            img_root='',
                            ann_root=os.path.join(coco_root, 'annotations'),
                            entities_file=os.path.join(coco_root, 'coco_entities.json'),
                            id_root=os.path.join(coco_root, 'annotations'),