if opt_test.dataset == 'coco': image_field = ImageDetectionsField(detections_path=os.path.join( coco_root, 'coco_detections.hdf5'), load_in_tmp=False) det_field = COCOControlSequenceField( detections_path=os.path.join(coco_root, 'coco_detections.hdf5'), classes_path=os.path.join(coco_root, 'object_class_list.txt'), pad_init=False, padding_idx=-1, all_boxes=False, fix_length=20) text_field = TextField(init_token='<bos>', eos_token='<eos>', lower=True, remove_punctuation=True, fix_length=20) dataset = COCOEntities(image_field, det_field, text_field, img_root='', ann_root=os.path.join(coco_root, 'annotations'), entities_file=os.path.join(coco_root, 'coco_entities.json'), id_root=os.path.join(coco_root, 'annotations')) test_dataset = COCOEntities(image_field, det_field, RawField(),
detections_path=os.path.join(flickr_root, 'flickr30k_detections.hdf5'), classes_path=os.path.join(flickr_root, 'object_class_list.txt'), img_shapes_path=os.path.join(flickr_root, 'flickr_img_shapes.json'), precomp_glove_path=os.path.join(flickr_root, 'object_class_glove.pkl'), verb_idx_path=os.path.join(flickr_root, 'flickr_verb_idx.json'), idx_vs_path=os.path.join(flickr_root, 'idx_2_vs_flickr.json'), cap_verb_path=os.path.join(flickr_root, 'cap_2_verb_flickr.json'), cap_classes_path=os.path.join(flickr_root, 'cap_2_classes_flickr.json'), idx_v_og_path=os.path.join(flickr_root, 'idx_2_v_og_flickr.json'), vocab_list_path=os.path.join(flickr_root, 'vocab_tv_flickr.json'), fix_length=10, visual=False) text_field = TextField(init_token='<bos>', eos_token='<eos>', lower=True, remove_punctuation=True, fix_length=20) test_dataset = FlickrEntities(image_field, RawField(), det_field, img_root='', ann_file=os.path.join( flickr_root, 'flickr30k_annotations.json'), entities_root=flickr_entities_root, verb_filter=True) train_dataset, val_dataset, test_dataset = test_dataset.splits test_dataset = DictionaryDataset(test_dataset.examples, test_dataset.fields, 'image')
parser.add_argument('--step_size', default=3, type=int, help='learning rate schedule step size') parser.add_argument('--gamma', default=0.8, type=float, help='learning rate schedule gamma') parser.add_argument('--h2_first_lstm', default=1, type=int, help='h2 as input to the first lstm') parser.add_argument('--img_second_lstm', default=0, type=int, help='img vector as input to the second lstm') parser.add_argument('--sample_rl', action='store_true', help='reinforcement learning with cider optimization') parser.add_argument('--sample_rl_nw', action='store_true', help='reinforcement learning with cider + nw optimization') opt = parser.parse_args() print(opt) image_field = ImageDetectionsField(detections_path=os.path.join(coco_root, 'coco_detections.hdf5'), load_in_tmp=False) det_field = COCOControlSequenceField(detections_path=os.path.join(coco_root, 'coco_detections.hdf5'), classes_path=os.path.join(coco_root, 'object_class_list.txt'), pad_init=False, padding_idx=-1, all_boxes=False, fix_length=20) text_field = TextField(init_token='<bos>', eos_token='<eos>', lower=True, remove_punctuation=True, fix_length=20) dataset = COCOEntities(image_field, det_field, text_field, img_root='', ann_root=os.path.join(coco_root, 'annotations'), entities_file=os.path.join(coco_root, 'coco_entities.json'), id_root=os.path.join(coco_root, 'annotations')) train_dataset, val_dataset, _ = dataset.splits text_field.build_vocab(train_dataset, val_dataset, min_freq=5) test_dataset = COCOEntities(image_field, det_field, RawField(), img_root='', ann_root=os.path.join(coco_root, 'annotations'), entities_file=os.path.join(coco_root, 'coco_entities.json'), id_root=os.path.join(coco_root, 'annotations'),