fix_length=20)

test_dataset = FlickrEntities(image_field,
                              RawField(),
                              det_field,
                              img_root='',
                              ann_file=os.path.join(
                                  flickr_root, 'flickr30k_annotations.json'),
                              entities_root=flickr_entities_root,
                              verb_filter=True)

train_dataset, val_dataset, test_dataset = test_dataset.splits
test_dataset = DictionaryDataset(test_dataset.examples, test_dataset.fields,
                                 'image')
dataloader_test = DataLoader(test_dataset,
                             batch_size=opt.batch_size,
                             num_workers=opt.nb_workers)
train_dataset = DictionaryDataset(train_dataset.examples, train_dataset.fields,
                                  'image')
dataloader_train = DataLoader(train_dataset,
                              batch_size=opt.batch_size,
                              num_workers=opt.nb_workers)

if not os.path.isdir(opt.checkpoint_path):
    os.makedirs(opt.checkpoint_path)

# region sort model define
re_sort_net = S_SSP().cuda()

# load the model
if opt.eval:
Ejemplo n.º 2
0
test_dataset = COCOEntities(image_field, det_field, RawField(),
                            img_root='',
                            ann_root=os.path.join(coco_root, 'annotations'),
                            entities_file=os.path.join(coco_root, 'coco_entities.json'),
                            id_root=os.path.join(coco_root, 'annotations'),
                            filtering=True)

_, val_dataset, _ = test_dataset.splits

if opt.sample_rl or opt.sample_rl_nw:
    train_dataset.fields['text'] = RawField()
    train_dataset_raw = PairedDataset(train_dataset.examples, {'image': image_field, 'detection': det_field, 'text': RawField()})
    ref_caps_train = list(train_dataset_raw.text)
    cider_train = evaluation.Cider(evaluation.PTBTokenizer.tokenize(ref_caps_train))

dataloader_train = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.nb_workers)

val_dataset.fields['text'] = RawField()
dataloader_val = DataLoader(val_dataset, batch_size=16, num_workers=opt.nb_workers)

model = ControllableCaptioningModel(20, len(text_field.vocab), text_field.vocab.stoi['<bos>'],
                                    h2_first_lstm=opt.h2_first_lstm, img_second_lstm=opt.img_second_lstm).to(device)

optim = Adam(model.parameters(), lr=opt.lr)
scheduler = StepLR(optim, step_size=opt.step_size, gamma=opt.gamma)
loss_fn = NLLLoss()
loss_fn_gate = NLLLoss(ignore_index=-1)

start_epoch = 0
best_cider = .0
patience = 0