def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, args): """ Max Likelihood Pretraining for the generator """ num_data = len(real_data_samples) total_loss = 0 for i in range(0, num_data, args.g_bsz): inp, target = helpers.prepare_generator_batch( real_data_samples[i:i + args.g_bsz], start_letter=args.start_letter, gpu=args.cuda) gen_opt.zero_grad() loss = gen.batchNLLLoss(inp, target) loss.backward() gen_opt.step() total_loss += loss.data[0] if (i / args.g_bsz) % ceil(ceil(num_data / float(args.g_bsz)) / 10.) == 0: # roughly every 10% of an epoch print('.', end='') sys.stdout.flush() # each loss in a batch is loss per sample total_loss = total_loss / ceil( num_data / float(args.g_bsz)) / args.max_seq_len # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, args.num_eval, args) return oracle_loss, total_loss
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, epochs):#使用的是正常的数据预训练 """ Max Likelihood Pretraining for the generator """ for epoch in range(epochs): print('epoch %d : ' % (epoch + 1), end='') sys.stdout.flush() total_loss = 0 for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE): inp, target = helpers.prepare_generator_batch(real_data_samples[i:i + BATCH_SIZE], start_letter=START_LETTER, gpu=CUDA) gen_opt.zero_grad() loss = gen.batchNLLLoss(inp, target) loss.backward() gen_opt.step() total_loss += loss.data.item() if (i / BATCH_SIZE) % ceil( ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0: # roughly every 10% of an epoch print('.', end='') sys.stdout.flush() # each loss in a batch is loss per sample total_loss = total_loss / ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN, start_letter=START_LETTER, gpu=CUDA) print(' average_train_NLL = %.4f, oracle_sample_NLL = %.4f' % (total_loss, oracle_loss))
def train_generator_MLE(gen, gen_opt, real_data_samples, epochs): """ Max Likelihood Pretraining for the generator """ for epoch in range(epochs): print('epoch %d : ' % (epoch + 1), end='') sys.stdout.flush() total_loss = 0 for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE): inp, target = helpers.prepare_generator_batch(real_data_samples[i:i + BATCH_SIZE], start_letter=START_LETTER, gpu=CUDA) gen_opt.zero_grad() loss = gen.batchNLLLoss(inp, target) loss.backward() gen_opt.step() total_loss += loss.data.item() if (i / BATCH_SIZE) % ceil( ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0: # roughly every 10% of an epoch print('.', end='') sys.stdout.flush() # Generate LSTM samples path='output/MSE-{}.samples'.format(epoch) generateSamples(gen, path) # each loss in a batch is loss per sample total_loss = total_loss / ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN print(' average_train_NLL = %.4f' % (total_loss))
def train_generator_MLE(gen,gen_opt,oracle,real_data_samples,epochs): """ 极大似然预训练生成器 """ for epoch in range(epochs): print("epoch %d:"%(epoch+1),end=" ") sys.stdout.flush() total_loss = 0 for i in range(0,POS_NEG_SAMPLES,BATCH_SIZE): inp,target = helpers.prepare_generator_batch(real_data_samples[i:i+BATCH_SIZE],start_letter=START_LETTER,gpu=CUDA) #inp表示生成器输入的内容,target是正常的文本内容,通过target来计算inp的loss优化generator gen_opt.zero_grad() loss = gen.batchNLLLoss(inp,target) loss.backward() gen_opt.step() total_loss += loss.data.item() #ceil(POS_NEG_SAMPLES / float(BATCH_SIZE))向上取整得到一共是m个batch_size,将m个batch_size分成10份,通过i来看是不是某百分之十的一部分 if(i/BATCH_SIZE)%ceil(ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0: # roughly every 10% of an epoch print('.', end='') sys.stdout.flush() #each loss in a batch is loss per sample,因为批次是按照seq_len来分别计算每个word的损失的,所以最后除上MAX_SEQ_LEN即为每个样本的loss total_loss = total_loss/ ceil(POS_NEG_SAMPLES/float(BATCH_SIZE))/ MAX_SEQ_LEN #sample from generator and compute oracle NLL,通过生成器自己生成的样本计算损失(计算生成器自动生成样本的能力) oracle_loss = helpers.batchwise_oracle_nll(gen,oracle,POS_NEG_SAMPLES,BATCH_SIZE,MAX_SEQ_LEN,start_letter=START_LETTER,gpu=CUDA) print("average_train_NLL=%.4f,oracle_sample_NLL=%.4f"%(total_loss,oracle_loss))
def train_generator_PG(gen, gen_opt, oracle, dis, num_batches): """ The generator is trained using policy gradients, using the reward from the discriminator. Training is done for num_batches batches. """ for batch in range(num_batches): s = gen.sample(BATCH_SIZE * 2) # 64 works best inp, target = helpers.prepare_generator_batch( s, start_letter=START_LETTER, gpu=CUDA) rewards = dis.batchClassify(target) gen_opt.zero_grad() pg_loss = gen.batchPGLoss(inp, target, rewards) pg_loss.backward() gen_opt.step() # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN, start_letter=START_LETTER, gpu=CUDA) print(' oracle_sample_NLL = %.4f' % oracle_loss)
def train_generator_PG(gen, gen_opt, dis, train_iter, num_batches): """ The generator is trained using policy gradients, using the reward from the discriminator. Training is done for num_batches batches. """ global pg_count global best_advbleu pg_count += 1 num_sentences = 0 total_loss = 0 rollout = Rollout(gen, update_learning_rate) for i, data in enumerate(train_iter): if i == num_batches: break src_data_wrap = data.source ans = data.answer[0] # tgt_data = data.target[0].permute(1, 0) passage = src_data_wrap[0].permute(1, 0) if CUDA: scr_data = data.source[0].to(device) # lengths x batch_size scr_lengths = data.source[1].to(device) ans = ans.to(device) ans_p = ans.permute(1, 0) src_data_wrap = (scr_data, scr_lengths, ans) passage = passage.to(device) passage = (passage, ans_p) num_sentences += scr_data.size(1) with torch.no_grad(): samples, _ = gen.sample(src_data_wrap) # 64 batch_size works best rewards = rollout.get_reward(samples, passage, src_data_wrap, rollout_size, dis, src_rev, rev, train_ref, tgt_pad) inp, target = helpers.prepare_generator_batch(samples, gpu=CUDA) gen_opt.zero_grad() pg_loss = gen.batchPGLoss(src_data_wrap, inp, target, rewards) pg_loss.backward() gen_opt.step() total_loss += pg_loss rollout.update_params() # TODO: DON'T KNOW WHY gen.eval() # print("Set gen to {0} mode".format('train' if model.decoder.dropout.training else 'eval')) valid_bleu = evaluation.evalModel(gen, val_iter, pg_count, rev, src_special, tgt_special, tgt_ref, src_rev) print('Validation bleu-4 = %g' % (valid_bleu * 100)) if valid_bleu > best_advbleu: best_advbleu = valid_bleu torch.save(gen.state_dict(), 'advparams.pkl') print('save model') # train_bleu = evaluation.evalModel(gen, train_iter) # print('training bleu = %g' % (train_bleu * 100)) gen.train() print("\npg_loss on %d bactches : %.4f" %(i+1, total_loss/num_batches))
def train_generator_PG(gen,gen_opt,oracle,dis,num_batches): #适用策略梯度训练生成器,使用来自鉴别器的奖励 for batch in range(num_batches): s = gen.sample(BATCH_SIZE*2) #长度为64的sample inp,target = helpers.prepare_generator_batch(s,start_letter=START_LETTER,gpu=CUDA) reward = dis.batchClassify(target) #概率作为奖励值 gen_opt.zero_grad() pg_loss = gen.batchPGLoss(inp,target,reward) pg_loss.backward() gen_opt.step() oracle_loss = helpers.batchwise_oracle_nll(gen,oracle,POS_NEG_SAMPLES,BATCH_SIZE,MAX_SEQ_LEN,start_letter=START_LETTER,gpu=CUDA) print('oracle_sample_NLL=%.4f'%oracle_loss)
def train_generator_PG(gen, gen_opt, dis, num_batches): """ The generator is trained using policy gradients, using the reward from the discriminator. Training is done for num_batches batches. """ for batch in range(num_batches): s = gen.sample(BATCH_SIZE * 2) # 64 works best inp, target = helpers.prepare_generator_batch( s, start_letter=START_LETTER, gpu=CUDA) rewards = dis.batchClassify(target) gen_opt.zero_grad() pg_loss = gen.batchPGLoss(inp, target, rewards) pg_loss.backward() gen_opt.step()
def train_generator_PG(gen, gen_opt, dis, batch_size, episodes, num_batches, Sample_Size=20): """ The generator is trained using policy gradients, using the reward from the discriminator. Training is done for num_batches batches. """ for batch in range(num_batches): random_sample_index = np.random.choice(len(episodes), Sample_Size, replace=False) random_episodes = episodes[random_sample_index] s, condition = gen.sample(random_episodes, idx_BOC) # 64 works best inp, target = helpers.prepare_generator_batch(s, gpu=CUDA) rewards = dis.batchClassify(target, condition) gen_opt.zero_grad() pg_loss = gen.batchPGLoss(inp, target, rewards, condition) pg_loss.backward() gen_opt.step() print("PG Loss = %f"%pg_loss.data[0])
def train_generator_PG(gen, gen_opt, validation_data_samples, dis, num_batches,_id=0): """ The generator is trained using policy gradients, using the reward from the discriminator. Training is done for num_batches batches. """ for batch in range(num_batches): s = gen.sample(BATCH_SIZE*2) # 64 works best inp, target = helpers.prepare_generator_batch(s, start_letter=START_LETTER, gpu=CUDA) rewards = dis.batchClassify(target) gen_opt.zero_grad() pg_loss = gen.batchPGLoss(inp, target, rewards) pg_loss.backward() gen_opt.step() # Generate LSTM samples path='output/ADV-{}.samples'.format(_id) generateSamples(gen, path)
def calculatePPL(gen,testpath): testset=loadData(testpath) testset_tensor=torch.tensor(testset) length=[] with open(testpath,'r') as fin: for line in fin: length.append(getLength(line)) length=np.array(length) nll_all=[] TEST_SIZE=testset_tensor.shape[0] for i in tqdm(range(0, TEST_SIZE)): inp, target = helpers.prepare_generator_batch(testset_tensor[i:i + 1], start_letter=START_LETTER, gpu=CUDA) nll = gen.batchNLLLoss(inp, target) nll_all.append(float(nll.data.cpu())) nll_all=np.array(nll_all) return np.mean(2**(nll_all/length))
def train_generator_PG(gen, gen_opt, dis, oracle, args): """ The generator is trained using policy gradients, using the reward from the discriminator. Training is done for num_batches batches. """ sample_buf = torch.zeros(args.g_bsz * args.max_seq_len, args.vocab_size) if args.cuda: sample_buf = sample_buf.cuda() for batch in range(args.g_steps): s = gen.sample(args.g_bsz) inp, target = helpers.prepare_generator_batch( s, start_letter=args.start_letter, gpu=args.cuda) # get reward from oracle #s_oh = helpers.get_oh(s, sample_buf) #rewards = dis.batchClassify(Variable(s_oh)) rewards = oracle.batchLL(inp, target) gen_opt.zero_grad() pg_loss = gen.batchPGLoss(inp, target, rewards) pg_loss.backward() gen_opt.step()
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, epochs): """ Max Likelihood Pretraining for the generator """ for epoch in range(epochs): print('epoch %d : ' % (epoch + 1), end='') sys.stdout.flush() total_loss = 0 for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE): inp, target = helpers.prepare_generator_batch(real_data_samples[i:i + BATCH_SIZE], start_letter=START_LETTER, # real_data_samples (10000, 20) gpu=CUDA) # inp: (32, 20), [[ 0, 87, 4410, 3560, 1699, 3485, 1407, 4982, 3391, 1144, 2960, 3784, # 2351, 3609, 92, 3391, 2187, 168, 4767, 4973], # target: (32, 20) # tensor([[ 87, 4410, 3560, 1699, 3485, 1407, 4982, 3391, 1144, 2960, 3784, 2351, # 3609, 92, 3391, 2187, 168, 4767, 4973, 619], gen_opt.zero_grad() loss = gen.batchNLLLoss(inp, target) loss.backward() gen_opt.step() total_loss += loss.data.item() if (i / BATCH_SIZE) % ceil( ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0: # roughly every 10% of an epoch print('.', end='') sys.stdout.flush() # each loss in a batch is loss per sample total_loss = total_loss / ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN, start_letter=START_LETTER, gpu=CUDA) print(' average_train_NLL = %.4f, oracle_sample_NLL = %.4f' % (total_loss, oracle_loss))
pretrained_dis_path = "seq30_b64_dim_200_v12028_mlep400, advp20_posnef_10700_dis.pth" pretrained_oracle_path = "seq30_b64_dim_200_v12028_mlep400, advp20_posnef_10700_oracle.pth" # load pre-train model model_dic = torch.load( "seq30_b64_dim_200_v12028_mlep100_advp_20_posneg_10700.pth") oracle.load_state_dict(model_dic['oracle']) oracle_opt = optim.Adam(oracle.parameters(), lr=1e-2) out, hid = oracle.forward(inp, hidden_mat) total_loss = 0 for i in range(0, BATCH_SIZE * 1000, BATCH_SIZE): # 300, 800, 100000 batch = getbatch(real_data, i, i + BATCH_SIZE) inp, target = helpers.prepare_generator_batch(batch, start_letter=START_LETTER, gpu=CUDA) oracle_opt.zero_grad() loss = oracle.batchNLLLoss(inp, target) loss.backward() oracle_opt.step() total_loss += loss.data.item() # each loss in a batch is loss per sample total_loss = total_loss / ceil( POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN if i % (BATCH_SIZE * 10) == 0: print("loss:", total_loss) print("save oracle")
def train_generator_MLE(gen, gen_opt, train_iter, epochs): """ Max Likelihood Pretraining for the generator """ best_bleu = 0 for epoch in range(epochs): print('epoch %d : ' % (epoch + 1)) total_loss = 0 num_words = 0 report_loss = 0 report_num = 0 for i, data in enumerate(train_iter): tgt_data = data.target[0] src_data_wrap = data.source ans = data.answer[0] if CUDA: scr_data = data.source[0].to(device) scr_lengths = data.source[1].to(device) ans = ans.to(device) src_data_wrap = (scr_data, scr_lengths, ans) tgt_lengths = data.target[1] tgt_lengths = torch.LongTensor(tgt_lengths) num_words += tgt_lengths.sum().item() tgt_data=tgt_data.permute(1,0) # --> batch x length inp, target = helpers.prepare_generator_batch(tgt_data, gpu=CUDA) gen_opt.zero_grad() loss = gen.batchNLLLoss(src_data_wrap, inp, target) # inp means decoder inp, target means decoder target. loss.div(tgt_data.size(1)).backward() # loss.backward() gen_opt.step() report_loss += loss.item() report_num += tgt_data.size(1) total_loss += loss.item() # if i % 20 == -1 % 20: # print(("inter loss = %.4f") % (report_loss / report_num)) # report_loss = 0 # report_num = 0 loss_perword = total_loss / num_words train_ppl = math.exp(min(loss_perword, 100)) print('loss = %.4f' % (total_loss / len(train_iter.dataset))) print('ppl = %.4f' % train_ppl) # evaluate blue scores # valid data # if epoch%5 == -1%5: gen.eval() # print("Set gen to {0} mode".format('train' if model.decoder.dropout.training else 'eval')) valid_bleu = evaluation.evalModel(gen, val_iter, epoch, rev, src_special, tgt_special, tgt_ref, src_rev) print('Validation bleu-4 = %g' % (valid_bleu * 100)) if valid_bleu > best_bleu: best_bleu = valid_bleu torch.save(gen.state_dict(), 'params.pkl') print('save '+str(epoch + 1)+' epoch model') gen_opt.updateLearningRate(valid_bleu) #train_bleu = evaluation.evalModel(gen, train_iter) #print('training bleu = %g' % (train_bleu * 100)) gen.train()
def train_generator_MLE(gen, gen_opt, episodes, valid_episodes, batch_size, epochs): """ Max Likelihood Pretraining for the generator """ #print("batch_size = %d"%batch_size) #print("epochs = %d"%epochs) for epoch in range(epochs): print('epoch %d : ' % (epoch + 1)) sys.stdout.flush() total_loss = 0 total_size = 0 print("train:") print("len(episodes) = %d"%len(episodes)) for batch_idx in range(0, len(episodes), batch_size): #print("batch_idx = %d"%batch_idx) #for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE): #print("1") personas_your = get_persona_batch(episodes[batch_idx:batch_idx+batch_size], 1) #print("2") personas_partner = get_persona_batch(episodes[batch_idx:batch_idx+batch_size], 0) #print("3") turn_batch_list = get_dialog_batches(episodes[batch_idx:batch_idx+batch_size]) #print("4") inp, target = helpers.prepare_generator_batch(turn_batch_list, gpu=CUDA) #print("5") gen_opt.zero_grad() #print("6") loss = gen.batchNLLLoss(inp, target, personas_your, personas_partner) #print("7") loss.backward() gen_opt.step() print("epoch: %d, batch_idx: %d, loss per sample = %f"%(epoch+1, batch_idx, loss.data[0]/turn_batch_list.size(0)/turn_batch_list.size(1))) #total_loss += loss.data[0] #total_size += turn_batch_list.size(0) #if (i / BATCH_SIZE) % ceil( #ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0: # roughly every 10% of an epoch #print('.', end='') #sys.stdout.flush() #if idx % 10 == 0: #print('.', end='') #sys.stdout.flush() if epoch % 5 == 0: print("valid:") for batch_idx in range(0, len(valid_episodes), batch_size): #for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE): personas_your = get_persona_batch(valid_episodes[batch_idx:batch_idx+batch_size], 1) personas_partner = get_persona_batch(valid_episodes[batch_idx:batch_idx+batch_size], 0) turn_batch_list = get_dialog_batches(valid_episodes[batch_idx:batch_idx+batch_size]) inp, target = helpers.prepare_generator_batch(turn_batch_list, gpu=CUDA) #gen_opt.zero_grad() loss = gen.batchNLLLoss(inp, target, personas_your, personas_partner) #loss.backward() #gen_opt.step() print("epoch: %d, batch_idx: %d, loss per sample = %f"%(epoch+1, batch_idx, loss.data[0]/turn_batch_list.size(0)/turn_batch_list.size(1)))
true_loss = loss_fn(true_out, torch.zeros([data.batch_size]).type(torch.cuda.FloatTensor)) fake_out = dis.batchClassify(fake_tgt_data, (passage, ans)) # hidden = none over here fake_loss = loss_fn(fake_out, torch.ones([data.batch_size]).type(torch.cuda.FloatTensor)) loss = true_loss + fake_loss loss.backward() dis_optimizer.step() else: gen.train() real_sample = tgt_data real_length = data.target[1] with torch.no_grad(): samples, _ = gen.sample(src_data_wrap) # 64 batch_size works best rewards = rollout.get_reward(samples, (passage, ans ), src_data_wrap, rollout_size, dis, src_rev, rev, train_ref, tgt_pad) inp, target = helpers.prepare_generator_batch(samples, gpu=CUDA) gen_optimizer.zero_grad() pg_loss = gen.batchPGLoss(src_data_wrap, inp, target, rewards) pg_loss.backward() gen_optimizer.step() rollout.update_params() # TODO: DON'T KNOW WHY gen.eval() # print("Set gen to {0} mode".format('train' if model.decoder.dropout.training else 'eval')) valid_bleu = evaluation.evalModel(gen, val_iter, pg_count, rev, src_special, tgt_special, tgt_ref, src_rev) print('Validation bleu-4 = %g' % (valid_bleu * 100)) # print('\n--------\nEPOCH %d\n--------' % (epoch+1)) # # TRAIN GENERATOR # print('\nAdversarial Training Generator : ', end='')