def test_get_moses_multi_bleu(): hypotheses = ["The brown fox jumps over the dog 笑", "The brown fox jumps over the dog 2 笑"] references = [ "The quick brown fox jumps over the lazy dog 笑", "The quick brown fox jumps over the lazy dog 笑" ] result = get_moses_multi_bleu(hypotheses, references, lowercase=False) np.testing.assert_almost_equal(result, 46.51, decimal=2)
def calculate_bleu(src, trg, corpus_level=False, weights=(0.25, 0.25, 0.25, 0.25), use_torchnlp=True): # src = [[sent words1], [sent words2], ...], trg = [sent words] if not use_torchnlp: if not corpus_level: score = bleu_score.sentence_bleu(src, trg, weights=weights) else: score = bleu_score.corpus_bleu(src, trg, weights=weights) else: score = get_moses_multi_bleu(src, trg, lowercase=True) return score
def evaluateRandomly(encoder, decoder, n, data_pairs): bleu_score_total = 0 for i in range(n): pair = random.choice(data_pairs) print('>', pair[0]) print('=', pair[1]) output_words, attentions = evaluate(encoder, decoder, pair[0]) output_sentence = ' '.join(output_words) bleu_score_total += get_moses_multi_bleu([output_sentence], [pair[1]], lowercase=True) print('<', output_sentence) print('') bleu_score_avg = bleu_score_total / n return bleu_score_avg
def evaluate(self, model, data): """ Evaluate a model on given dataset and return performance. Args: model (seq2seq.models): model to evaluate data (seq2seq.dataset.dataset.Dataset): dataset to evaluate against Returns: loss (float): loss of the given model on the given dataset """ model.eval() loss = self.loss loss.reset() match = 0 total = 0 device = None if torch.cuda.is_available() else -1 batch_iterator = torchtext.data.BucketIterator( dataset=data, batch_size=self.batch_size, sort=True, sort_key=lambda x: len(x.src), device=device, train=False) tgt_vocab = data.fields[seq2seq.tgt_field_name].vocab pad = tgt_vocab.stoi[data.fields[seq2seq.tgt_field_name].pad_token] hypotheses, references = [], [] with torch.no_grad(): for batch in batch_iterator: # obtain input variables and input lengths # shape: (bs, lens) and (bs,) input_variables, input_lengths = getattr( batch, seq2seq.src_field_name) # obtain target sentences # shape: (bs, lens) target_variables = getattr(batch, seq2seq.tgt_field_name) decoder_outputs, decoder_hidden, other = model( input_variables, input_lengths.tolist(), target_variables) # Evaluation seqlist = other['sequence'] for step, step_output in enumerate(decoder_outputs): target = target_variables[:, step + 1] loss.eval_batch( step_output.view(target_variables.size(0), -1), target) non_padding = target.ne(pad) correct = seqlist[step].view(-1).eq(target).masked_select( non_padding).sum().item() match += correct total += non_padding.sum().item() hypotheses.extend( step_output.view(target_variables.size(0), -1).tolist()) references.extend(target.tolist()) if total == 0: accuracy = float('nan') else: accuracy = match / total bleu = get_moses_multi_bleu(hypotheses, references) return loss.get_loss(), accuracy, bleu
def bleu(tar, pred): 'Calculates moses bleu given two arrays of str tokens' tar, pred = ' '.join(tar), ' '.join(pred) return get_moses_multi_bleu([tar], [pred])
model.zero_grad() #reference.append(" ".join(sentence)) cap_target = prepare_sequence(sentence, word_to_ix) cap_pred = model(cap_target,features[i]) cap_pred = cap_pred.view(-1,vocab_size) maxval, maxidx = torch.max(cap_pred,dim=-1) sent=[] key_list=list(word_to_ix.keys()) for k in range(maxidx.shape[0]): sent.append(key_list[maxidx[k]]) #hypothesis.append(" ".join(sent)) #print(" ".join(sent)) #print(" ".join(sentence)) te=get_moses_multi_bleu(" ".join(sent), " ".join(sentence), lowercase=True) #print(te) if(te > ma): ma=te to=to+ma #print(ma) to=to/30 print(to) #print(get_moses_multi_bleu(hypothesis, reference, lowercase=True)) er=0 pr=0 hypothesis=[] reference=[]
# print(features.shape) avlad = vladmodel(features[i]) # print(avlad.shape) avlad = avlad.view(-1) cap_pred = model(cap_target,avlad) cap_pred = cap_pred.view(-1,vocab_size) maxval, maxidx = torch.max(cap_pred,dim=-1) sent=[] key_list=list(word_to_ix.keys()) for k in range(maxidx.shape[0]): sent.append(key_list[maxidx[k]]) hypothesis.append(" ".join(sent)) print('bleu-score') print(get_moses_multi_bleu(hypothesis, reference, lowercase=True)) ### Validation set evaluation ### datapath = 'video_data/' cap_val= np.load(datapath+'captionsDev.npy',encoding='bytes') cap_val.shape feat_val = np.load('vgg_feat_val.npy') val_feat = feat_val[:,:,:500,:,:] val_feat.shape features_val = torch.Tensor(val_feat).cuda() def prepare_sequence(seq, to_ix): idxs = [] for w in seq:
def main(): random.seed(SEED) np.random.seed(SEED) track_blue = [] # Build up dataset s_train, s_test = load_from_big_file('obama_speech', g_sequence_len) # idx_to_word: List of id to word # word_to_idx: Dictionary mapping word to id idx_to_word, word_to_idx = fetch_vocab(s_train, s_train, s_test) # input_seq, target_seq = prepare_data(DATA_GERMAN, DATA_ENGLISH, word_to_idx) global VOCAB_SIZE VOCAB_SIZE = len(idx_to_word) save_vocab(CHECKPOINT_PATH + 'metadata.data', idx_to_word, word_to_idx, VOCAB_SIZE, g_emb_dim, g_hidden_dim) print('VOCAB SIZE:', VOCAB_SIZE) # Define Networks generator = Generator(VOCAB_SIZE, g_emb_dim, g_hidden_dim, opt.cuda) if opt.cuda: generator = generator.cuda() # Generate toy data using target lstm print('Generating data ...') # Generate samples either from sentences file or lstm # Sentences file will be structured input sentences # LSTM based is BOG approach generate_real_data('obama_speech', BATCH_SIZE, GENERATED_NUM, idx_to_word, word_to_idx, POSITIVE_FILE, TEST_FILE) # generate_samples(target_lstm, BATCH_SIZE, GENERATED_NUM, POSITIVE_FILE, idx_to_word) # generate_samples(target_lstm, BATCH_SIZE, 10, TEST_FILE, idx_to_word) # Create Test data iterator for testing test_iter = GenDataIter(TEST_FILE, BATCH_SIZE) #test_predict(generator, test_iter, idx_to_word, train_mode=True) # Load data from file gen_data_iter = GenDataIter(POSITIVE_FILE, BATCH_SIZE) lines = read_file(POSITIVE_FILE) refrences = [] for line in lines: phrase = [] for char in line: phrase.append(idx_to_word[char]) refrences.append(' '.join(phrase)) #refrences.append(phrase) # Pretrain Generator using MLE gen_criterion = nn.NLLLoss(size_average=False) gen_optimizer = optim.Adam(generator.parameters()) if opt.cuda: gen_criterion = gen_criterion.cuda() print('Pretrain with MLE ...') for epoch in range(PRE_EPOCH_NUM): loss = train_epoch(generator, gen_data_iter, gen_criterion, gen_optimizer) print('Epoch [%d] Model Loss: %f' % (epoch, loss)) sys.stdout.flush() generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE) if track_training: lines = read_file(EVAL_FILE) hypotheses = [] for line in lines: phrase = [] for char in line: phrase.append(idx_to_word[char]) hypotheses.append(' '.join(phrase)) #hypotheses.append(phrase) bleu_score = get_moses_multi_bleu(hypotheses, refrences, lowercase=True) track_blue.append(bleu_score) print(track_blue) torch.save(generator.state_dict(), CHECKPOINT_PATH + 'generator_mle.model') track_blue = np.array(track_blue) np.save(ROOT_PATH + 'track_blue_mle3.npy', track_blue) plt.plot(track_blue) plt.show()
def main(): random.seed(SEED) np.random.seed(SEED) track_blue = [] # Build up dataset s_train, s_test = load_from_big_file('obama_speech',g_sequence_len) # idx_to_word: List of id to word # word_to_idx: Dictionary mapping word to id idx_to_word, word_to_idx = fetch_vocab(s_train, s_train, s_test) # input_seq, target_seq = prepare_data(DATA_GERMAN, DATA_ENGLISH, word_to_idx) global VOCAB_SIZE VOCAB_SIZE = len(idx_to_word) save_vocab(CHECKPOINT_PATH + 'metadata.data', idx_to_word, word_to_idx, VOCAB_SIZE, g_emb_dim, g_hidden_dim) print('VOCAB SIZE:', VOCAB_SIZE) # Define Networks generator = Generator(VOCAB_SIZE, g_emb_dim, g_hidden_dim, opt.cuda) discriminator = Discriminator(d_num_class, VOCAB_SIZE, d_emb_dim, d_filter_sizes, d_num_filters, d_dropout) if opt.cuda: generator = generator.cuda() discriminator = discriminator.cuda() # Generate toy data using target lstm print('Generating data ...') # Generate samples either from sentences file or lstm # Sentences file will be structured input sentences # LSTM based is BOG approach generate_real_data('obama_speech', BATCH_SIZE, GENERATED_NUM, idx_to_word, word_to_idx, POSITIVE_FILE, TEST_FILE) # generate_samples(target_lstm, BATCH_SIZE, GENERATED_NUM, POSITIVE_FILE, idx_to_word) # generate_samples(target_lstm, BATCH_SIZE, 10, TEST_FILE, idx_to_word) # Create Test data iterator for testing test_iter = GenDataIter(TEST_FILE, BATCH_SIZE) #test_predict(generator, test_iter, idx_to_word, train_mode=True) # Load data from file gen_data_iter = GenDataIter(POSITIVE_FILE, BATCH_SIZE) lines = read_file(POSITIVE_FILE) refrences = [] for line in lines: phrase = [] for char in line: phrase.append(idx_to_word[char]) refrences.append(' '.join(phrase)) #refrences.append(phrase) # Pretrain Generator using MLE gen_criterion = nn.NLLLoss(size_average=False) gen_optimizer = optim.Adam(generator.parameters()) if opt.cuda: gen_criterion = gen_criterion.cuda() print('Pretrain with MLE ...') for epoch in range(PRE_EPOCH_NUM): loss = train_epoch(generator, gen_data_iter, gen_criterion, gen_optimizer) print('Epoch [%d] Model Loss: %f' % (epoch, loss)) sys.stdout.flush() generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE) if track_training: lines = read_file(EVAL_FILE) hypotheses = [] for line in lines: phrase = [] for char in line: phrase.append(idx_to_word[char]) hypotheses.append(' '.join(phrase)) #hypotheses.append(phrase) bleu_score=get_moses_multi_bleu(hypotheses, refrences, lowercase=True) track_blue.append(bleu_score) print(track_blue) # generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE) # eval_iter = GenDataIter(EVAL_FILE, BATCH_SIZE) # loss = eval_epoch(target_lstm, eval_iter, gen_criterion) # print('Epoch [%d] True Loss: %f' % (epoch, loss)) # Pretrain Discriminator dis_criterion = nn.NLLLoss(size_average=False) dis_optimizer = optim.Adam(discriminator.parameters()) if opt.cuda: dis_criterion = dis_criterion.cuda() print('Pretrain Discriminator ...') for epoch in range(5): generate_samples(generator, BATCH_SIZE, GENERATED_NUM, NEGATIVE_FILE) dis_data_iter = DisDataIter(POSITIVE_FILE, NEGATIVE_FILE, BATCH_SIZE) for _ in range(3): loss = train_epoch(discriminator, dis_data_iter, dis_criterion, dis_optimizer) print('Epoch [%d], loss: %f' % (epoch, loss)) #sys.stdout.flush() # Adversarial Training rollout = Rollout(generator, 0.8) print('#####################################################') print('Start Adversarial Training...\n') gen_gan_loss = GANLoss() gen_gan_optm = optim.Adam(generator.parameters()) if opt.cuda: gen_gan_loss = gen_gan_loss.cuda() gen_criterion = nn.NLLLoss(size_average=False) if opt.cuda: gen_criterion = gen_criterion.cuda() dis_criterion = nn.NLLLoss(size_average=False) dis_optimizer = optim.Adam(discriminator.parameters()) if opt.cuda: dis_criterion = dis_criterion.cuda() for total_batch in range(TOTAL_BATCH): ## Train the generator for one step for it in range(1): samples = generator.sample(BATCH_SIZE, g_sequence_len) # construct the input to the genrator, add zeros before samples and delete the last column zeros = torch.zeros((BATCH_SIZE, 1)).type(torch.LongTensor) if samples.is_cuda: zeros = zeros.cuda() inputs = Variable(torch.cat([zeros, samples.data], dim=1)[:, :-1].contiguous()) targets = Variable(samples.data).contiguous().view((-1,)) # calculate the reward rewards = rollout.get_reward(samples, 16, discriminator) rewards = Variable(torch.Tensor(rewards)) rewards = torch.exp(rewards).contiguous().view((-1,)) if opt.cuda: rewards = rewards.cuda() prob = generator.forward(inputs) # print('SHAPE: ', prob.shape, targets.shape, rewards.shape) loss = gen_gan_loss(prob, targets, rewards) gen_gan_optm.zero_grad() loss.backward() gen_gan_optm.step() # print('GEN PRED DIM: ', prob.shape) if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1: # generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE) # eval_iter = GenDataIter(EVAL_FILE, BATCH_SIZE) # loss = eval_epoch(target_lstm, eval_iter, gen_criterion) # print('Batch [%d] True Loss: %f' % (total_batch, loss)) # predictions = torch.max(prob, dim=1)[1] # predictions = predictions.view(BATCH_SIZE, -1) # # print('PRED SHAPE:' , predictions.shape) # for each_sen in list(predictions): # print('Training Output:', generate_sentence_from_id(idx_to_word, each_sen, DEBUG_FILE)) # # test_predict(generator, test_iter, idx_to_word, train_mode=True) loss_gen = eval_epoch(generator, gen_data_iter, gen_criterion) print('Epoch [%d] Model Loss: %f' % (total_batch, loss_gen)) generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE) #show_some_generated_sequences(idx_to_word, 10, EVAL_FILE) sys.stdout.flush() if track_training: lines = read_file(EVAL_FILE) hypotheses = [] for line in lines: phrase = [] for char in line: phrase.append(idx_to_word[char]) hypotheses.append(' '.join(phrase)) # hypotheses.append(phrase) bleu_score = get_moses_multi_bleu(hypotheses, refrences, lowercase=True) track_blue.append(bleu_score) print(track_blue) torch.save(generator.state_dict(), CHECKPOINT_PATH + 'generator_seqgan.model') torch.save(discriminator.state_dict(), CHECKPOINT_PATH + 'discriminator_seqgan.model') rollout.update_params() for _ in range(4): generate_samples(generator, BATCH_SIZE, GENERATED_NUM, NEGATIVE_FILE) dis_data_iter = DisDataIter(POSITIVE_FILE, NEGATIVE_FILE, BATCH_SIZE) for _ in range(2): loss = train_epoch(discriminator, dis_data_iter, dis_criterion, dis_optimizer) track_blue = np.array(track_blue) np.save(ROOT_PATH + 'track_blue_seqgan2.npy', track_blue) plt.plot(track_blue) plt.show()
all_scores_bleu = [] all_stds_bleu = [] for iteration in range(1, 11): checkpoint_path = f'trial_1/checkpoint/text_gan_{str(iteration*10000).zfill(6)}_model.pth' checkpoint = torch.load(checkpoint_path) net_g.load_state_dict(checkpoint['tg']) net_g.cuda() net_g.eval() fake_texts = [] for i in range(100): noise = torch.randn(32, 128).cuda() g_text_latent = net_g(noise) g_captions = net_t_ae.generate(g_text_latent) fake_texts += g_captions #p,r,f = b_score(fake_texts, real_texts, bert="bert-base-uncased", verbose=True) #print(f.mean().item()) #all_scores_bert.append(f.mean().item()) fake_whole_texts = '' for t in fake_texts: fake_whole_texts += t score = get_moses_multi_bleu([fake_whole_texts], [real_whole_texts], lowercase=True) all_scores_bleu.append(score) print(all_scores_bert, all_scores_bleu)