def test(setup_data, input_seqs=None, test_style=ROM_STYLE): if input_seqs is None: _, input_rems_text = get_data(train=False, test_style=test_style) else: input_rems_text = input_seqs slen = len(input_seqs) for i in xrange(slen): input_rems_text[i].append(test_style) _, _, enc_tok_text, _ = tokenize_text( input_rems_text, idx_to_word=setup_data['enc_idx_to_word'], word_to_idx=setup_data['enc_word_to_idx']) enc_padded_text = pad_text(enc_tok_text) dlen = enc_padded_text.shape[0] num_batch = dlen / BATCH_SIZE if dlen % BATCH_SIZE != 0: num_batch += 1 res = [] for i in xrange(num_batch): dec_tensor = generate( setup_data['enc'], setup_data['dec'], enc_padded_text[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]) res.append(dec_tensor) all_text = [] res = np.concatenate(res, axis=0) for row in res: utok = untokenize(row, setup_data['dec_idx_to_word'], to_text=True) all_text.append(utok) return all_text
def train(): input_text, input_rems_text = get_data(train=True) dec_idx_to_word, dec_word_to_idx, dec_tok_text, dec_bias = tokenize_text( input_text, lower_case=True, vsize=20000) dec_padded_text = pad_text(dec_tok_text) dec_vocab_size = len(dec_idx_to_word) enc_idx_to_word, enc_word_to_idx, enc_tok_text, _ = tokenize_text( input_rems_text) enc_padded_text = pad_text(enc_tok_text) enc_vocab_size = len(enc_idx_to_word) dec_text_tensor = torch.tensor(dec_padded_text, requires_grad=False) if cuda: dec_text_tensor = dec_text_tensor.cuda(device=device) enc, dec = build_model(enc_vocab_size, dec_vocab_size, dec_bias=dec_bias) enc_optim, dec_optim, lossfunc = build_trainers(enc, dec) num_batches = enc_padded_text.shape[0] / BATCH_SIZE sm_loss = None enc.train() dec.train() for epoch in xrange(0, 13): print "Starting New Epoch: %d" % epoch order = np.arange(enc_padded_text.shape[0]) np.random.shuffle(order) enc_padded_text = enc_padded_text[order] dec_text_tensor.data = dec_text_tensor.data[order] for i in xrange(num_batches): s = i * BATCH_SIZE e = (i + 1) * BATCH_SIZE _, enc_pp, dec_pp, enc_lengths = make_packpadded( s, e, enc_padded_text, dec_text_tensor) enc.zero_grad() dec.zero_grad() hid = enc.initHidden(BATCH_SIZE) out_enc, hid_enc = enc.forward(enc_pp, hid, enc_lengths) hid_enc = torch.cat([hid_enc[0, :, :], hid_enc[1, :, :]], dim=1).unsqueeze(0) out_dec, hid_dec, attn = dec.forward(dec_pp[:, :-1], hid_enc, out_enc) out_perm = out_dec.permute(0, 2, 1) dec_text_tensor.shape loss = lossfunc(out_perm, dec_pp[:, 1:]) if sm_loss is None: sm_loss = loss.data else: sm_loss = sm_loss * 0.95 + 0.05 * loss.data loss.backward() clip_grad_value_(enc_optim.param_groups[0]['params'], 5.0) clip_grad_value_(dec_optim.param_groups[0]['params'], 5.0) enc_optim.step() dec_optim.step() #del loss if i % 100 == 0: print "Epoch: %.3f" % (i / float(num_batches) + epoch, ), "Loss:", sm_loss print "GEN:", untokenize( torch.argmax(out_dec, dim=2)[0, :], dec_idx_to_word) #print "GEN:", untokenize(torch.argmax(out_dec,dim=2)[1,:], dec_idx_to_word) print "GT:", untokenize(dec_pp[0, :], dec_idx_to_word) print "IN:", untokenize(enc_pp[0, :], enc_idx_to_word) print torch.argmax(attn[0], dim=1) print "--------------" save_state(enc, dec, enc_optim, dec_optim, dec_idx_to_word, dec_word_to_idx, enc_idx_to_word, enc_word_to_idx, epoch)
def test(setup_data, test_folder = None, test_images=None): enc = setup_data['enc'] dec = setup_data['dec'] cnn = setup_data['cnn'] trans = setup_data['trans'] loaded_state = setup_data['loaded_state'] s2s_data = setup_data['s2s_data'] dec_vocab_size = len(loaded_state['dec_idx_to_word']) if test_folder is not None: # load images from folder img_reader = get_image_reader(test_folder, trans, BATCH_SIZE) using_images = True elif test_images is not None: # load images from memory img_reader = torch.utils.data.DataLoader( ImageNetLoader(test_images, trans), batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=True) using_images = True else: # load precomputed image features from dataset feats, filenames, sents = get_data(train=False) feats_tensor = torch.tensor(feats, requires_grad=False) if cuda: feats_tensor = feats_tensor.cuda(device=device) img_reader = TestIterator(feats_tensor, sents) using_images = False all_text = [] for input, text_data in img_reader: if using_images: if cuda: input = input.cuda(device=device) with torch.no_grad(): batch_feats_tensor = cnn(input) else: batch_feats_tensor = input dec_tensor = generate(enc, dec, batch_feats_tensor) untok = [] for i in xrange(dec_tensor.shape[0]): untok.append(untokenize(dec_tensor[i], loaded_state['dec_idx_to_word'], to_text=False)) text = s2s.test(s2s_data, untok) for i in range(len(text)): if using_images: # text data is filenames print "FN :", text_data[i] else: # text data is ground truth sentences print "GT :", text_data[i] print "DET:", ' '.join(untok[i]) print "ROM:", text[i], "\n" all_text.extend(text) return all_text
def train(): feats, filenames, sents = get_data(train=True) dec_idx_to_word, dec_word_to_idx, dec_tok_text, dec_bias = tokenize_text(sents) dec_padded_text = pad_text(dec_tok_text) dec_vocab_size = len(dec_idx_to_word) enc, dec = build_model(dec_vocab_size, dec_bias) enc_optim, dec_optim, lossfunc = build_trainers(enc, dec) feats_tensor = torch.tensor(feats, requires_grad=False) dec_text_tensor = torch.tensor(dec_padded_text, requires_grad=False) if cuda: feats_tensor = feats_tensor.cuda(device=device) dec_text_tensor = dec_text_tensor.cuda(device=device) num_batches = feats.shape[0] / BATCH_SIZE sm_loss = None enc.train() dec.train() for epoch in xrange(0, 13): print "Starting New Epoch: %d" % epoch order = np.arange(feats.shape[0]) np.random.shuffle(order) del feats_tensor, dec_text_tensor if cuda: torch.cuda.empty_cache() feats_tensor = torch.tensor(feats[order], requires_grad=False) dec_text_tensor = torch.tensor(dec_padded_text[order], requires_grad=False) if cuda: feats_tensor = feats_tensor.cuda(device=device) dec_text_tensor = dec_text_tensor.cuda(device=device) for i in xrange(num_batches): s = i * BATCH_SIZE e = (i+1) * BATCH_SIZE enc.zero_grad() dec.zero_grad() hid_enc = enc.forward(feats_tensor[s:e]).unsqueeze(0) out_dec, hid_dec = dec.forward(dec_text_tensor[s:e,:-1], hid_enc) out_perm = out_dec.permute(0, 2, 1) loss = lossfunc(out_perm, dec_text_tensor[s:e,1:]) if sm_loss is None: sm_loss = loss.data else: sm_loss = sm_loss*0.95 + 0.05*loss.data loss.backward() enc_optim.step() dec_optim.step() if i % 100 == 0: print "Epoch: %.3f" % (i/float(num_batches) + epoch,), "Loss:", sm_loss print "GEN:", untokenize(torch.argmax(out_dec,dim=2)[0,:], dec_idx_to_word) print "GT:", untokenize(dec_text_tensor[s,:], dec_idx_to_word) print "--------------" save_state(enc, dec, enc_optim, dec_optim, dec_idx_to_word, dec_word_to_idx, epoch)
def test(setup_data, test_folder=None, test_images=None): enc = setup_data['enc'] dec = setup_data['dec'] cnn = setup_data['cnn'] trans = setup_data['trans'] loaded_state = setup_data['loaded_state'] s2s_data = setup_data['s2s_data'] k = 0 dec_vocab_size = len(loaded_state['dec_idx_to_word']) id_captions = [] json_data = json.dumps(id_captions) if test_folder is not None: # load images from folder img_reader = get_image_reader(test_folder, trans, BATCH_SIZE) using_images = True elif test_images is not None: # load images from memory img_reader = torch.utils.data.DataLoader(ImageNetLoader( test_images, trans), batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=True) using_images = True else: # load precomputed image features from dataset feats, filenames, sents = get_data(train=False) feats_tensor = torch.tensor(feats, requires_grad=False) if cuda: feats_tensor = feats_tensor.cuda(device=device) img_reader = TestIterator(feats_tensor, sents) using_images = False all_text = [] for input, text_data in img_reader: if using_images: if cuda: input = input.cuda(device=device) with torch.no_grad(): batch_feats_tensor = cnn(input) else: batch_feats_tensor = input dec_tensor = generate(enc, dec, batch_feats_tensor) untok = [] for i in range(dec_tensor.shape[0]): untok.append( untokenize(dec_tensor[i], loaded_state['dec_idx_to_word'], to_text=False)) text = test_s2s(s2s_data, untok) for i in range(len(text)): filenames[k] = filenames[k].replace('COCO_val2014_', '') filenames[k] = filenames[k].replace('.jpg', '') j = { "image_id": int(filenames[k]), "caption": text[i], "words": ' '.join(untok[i]) } id_captions.append(j) k += 1 all_text.extend(text) with open( 'results/captions_val2014_' + test_model_fname + seq_to_seq_test_model_fname + '_results.json', 'w') as outfile: json.dump(id_captions, outfile) pandas.DataFrame(id_captions).to_csv( 'results/captions_val2014_' + test_model_fname + seq_to_seq_test_model_fname + '_results.csv', index=False) return all_text