def train_generator_PG(gen, gen_opt, oracle, dis, num_batches): """ The generator is trained using policy gradients, using the reward from the discriminator. Training is done for num_batches batches. """ for batch in range(num_batches): s = gen.sample(BATCH_SIZE * 2) # 64 works best inp, target = helpers.prepare_generator_batch( s, start_letter=START_LETTER, gpu=CUDA) rewards = dis.batchClassify(target) gen_opt.zero_grad() pg_loss = gen.batchPGLoss(inp, target, rewards) pg_loss.backward() gen_opt.step() # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN, start_letter=START_LETTER, gpu=CUDA) print(' oracle_sample_NLL = %.4f' % oracle_loss)
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, epochs):#使用的是正常的数据预训练 """ Max Likelihood Pretraining for the generator """ for epoch in range(epochs): print('epoch %d : ' % (epoch + 1), end='') sys.stdout.flush() total_loss = 0 for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE): inp, target = helpers.prepare_generator_batch(real_data_samples[i:i + BATCH_SIZE], start_letter=START_LETTER, gpu=CUDA) gen_opt.zero_grad() loss = gen.batchNLLLoss(inp, target) loss.backward() gen_opt.step() total_loss += loss.data.item() if (i / BATCH_SIZE) % ceil( ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0: # roughly every 10% of an epoch print('.', end='') sys.stdout.flush() # each loss in a batch is loss per sample total_loss = total_loss / ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN, start_letter=START_LETTER, gpu=CUDA) print(' average_train_NLL = %.4f, oracle_sample_NLL = %.4f' % (total_loss, oracle_loss))
def train_generator_MLE(gen,gen_opt,oracle,real_data_samples,epochs): """ 极大似然预训练生成器 """ for epoch in range(epochs): print("epoch %d:"%(epoch+1),end=" ") sys.stdout.flush() total_loss = 0 for i in range(0,POS_NEG_SAMPLES,BATCH_SIZE): inp,target = helpers.prepare_generator_batch(real_data_samples[i:i+BATCH_SIZE],start_letter=START_LETTER,gpu=CUDA) #inp表示生成器输入的内容,target是正常的文本内容,通过target来计算inp的loss优化generator gen_opt.zero_grad() loss = gen.batchNLLLoss(inp,target) loss.backward() gen_opt.step() total_loss += loss.data.item() #ceil(POS_NEG_SAMPLES / float(BATCH_SIZE))向上取整得到一共是m个batch_size,将m个batch_size分成10份,通过i来看是不是某百分之十的一部分 if(i/BATCH_SIZE)%ceil(ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0: # roughly every 10% of an epoch print('.', end='') sys.stdout.flush() #each loss in a batch is loss per sample,因为批次是按照seq_len来分别计算每个word的损失的,所以最后除上MAX_SEQ_LEN即为每个样本的loss total_loss = total_loss/ ceil(POS_NEG_SAMPLES/float(BATCH_SIZE))/ MAX_SEQ_LEN #sample from generator and compute oracle NLL,通过生成器自己生成的样本计算损失(计算生成器自动生成样本的能力) oracle_loss = helpers.batchwise_oracle_nll(gen,oracle,POS_NEG_SAMPLES,BATCH_SIZE,MAX_SEQ_LEN,start_letter=START_LETTER,gpu=CUDA) print("average_train_NLL=%.4f,oracle_sample_NLL=%.4f"%(total_loss,oracle_loss))
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, args): """ Max Likelihood Pretraining for the generator """ num_data = len(real_data_samples) total_loss = 0 for i in range(0, num_data, args.g_bsz): inp, target = helpers.prepare_generator_batch( real_data_samples[i:i + args.g_bsz], start_letter=args.start_letter, gpu=args.cuda) gen_opt.zero_grad() loss = gen.batchNLLLoss(inp, target) loss.backward() gen_opt.step() total_loss += loss.data[0] if (i / args.g_bsz) % ceil(ceil(num_data / float(args.g_bsz)) / 10.) == 0: # roughly every 10% of an epoch print('.', end='') sys.stdout.flush() # each loss in a batch is loss per sample total_loss = total_loss / ceil( num_data / float(args.g_bsz)) / args.max_seq_len # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, args.num_eval, args) return oracle_loss, total_loss
def train_generator_PG(gen,gen_opt,oracle,dis,num_batches): #适用策略梯度训练生成器,使用来自鉴别器的奖励 for batch in range(num_batches): s = gen.sample(BATCH_SIZE*2) #长度为64的sample inp,target = helpers.prepare_generator_batch(s,start_letter=START_LETTER,gpu=CUDA) reward = dis.batchClassify(target) #概率作为奖励值 gen_opt.zero_grad() pg_loss = gen.batchPGLoss(inp,target,reward) pg_loss.backward() gen_opt.step() oracle_loss = helpers.batchwise_oracle_nll(gen,oracle,POS_NEG_SAMPLES,BATCH_SIZE,MAX_SEQ_LEN,start_letter=START_LETTER,gpu=CUDA) print('oracle_sample_NLL=%.4f'%oracle_loss)
def train_generator_MLE(gen, gen_opt, oracle, dataloader, epochs): """ Max Likelihood Pretraining for the generator """ for epoch in range(epochs): start_time = time.time() print('epoch %d/%d : ' % (epoch + 1, epochs), end='') sys.stdout.flush() total_loss = 0 select_samples = dataloader.train for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE): batch = select_samples[i:i + BATCH_SIZE] inp, target = split_data(batch, BATCH_SIZE) gen_opt.zero_grad() loss = gen.batchNLLLoss(inp, target) loss.backward() gen_opt.step() total_loss += loss.data.item() if (i / BATCH_SIZE) % ceil( ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0: # roughly every 10% of an epoch print('.', end='') sys.stdout.flush() # each loss in a batch is loss per sample total_loss = total_loss / ceil( POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN, start_letter=START_LETTER, gpu=CUDA) msg = ' average_train_NLL = %.4f, oracle_sample_NLL = %.4f, time = %.2f' % ( total_loss, oracle_loss, time.time() - start_time) log.append(msg) print(msg)
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, epochs): """ Max Likelihood Pretraining for the generator """ for epoch in range(epochs): print('epoch %d : ' % (epoch + 1), end='') sys.stdout.flush() total_loss = 0 for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE): inp, target = helpers.prepare_generator_batch(real_data_samples[i:i + BATCH_SIZE], start_letter=START_LETTER, # real_data_samples (10000, 20) gpu=CUDA) # inp: (32, 20), [[ 0, 87, 4410, 3560, 1699, 3485, 1407, 4982, 3391, 1144, 2960, 3784, # 2351, 3609, 92, 3391, 2187, 168, 4767, 4973], # target: (32, 20) # tensor([[ 87, 4410, 3560, 1699, 3485, 1407, 4982, 3391, 1144, 2960, 3784, 2351, # 3609, 92, 3391, 2187, 168, 4767, 4973, 619], gen_opt.zero_grad() loss = gen.batchNLLLoss(inp, target) loss.backward() gen_opt.step() total_loss += loss.data.item() if (i / BATCH_SIZE) % ceil( ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0: # roughly every 10% of an epoch print('.', end='') sys.stdout.flush() # each loss in a batch is loss per sample total_loss = total_loss / ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN, start_letter=START_LETTER, gpu=CUDA) print(' average_train_NLL = %.4f, oracle_sample_NLL = %.4f' % (total_loss, oracle_loss))
def train_generator_PG_rollout(gen, gen_opt, oracle, dis, num_batches): """ New function with roll-out policy """ for batch in range(num_batches): seq, h = gen.sample_rollout_init(BATCH_SIZE*2) for t in range(1,MAX_SEQ_LEN): loss = 0 for n in range(MC_SIZE): inp, target = gen.mc(seq, t, gpu=CUDA) rewards = dis.batchClassify(target) pg_loss = gen.batchPGLoss(inp, target, rewards) loss += pg_loss loss /= MC_SIZE gen_opt.zero_grad() loss.backward() gen_opt.step() seq, h = gen.sample_rollout(BATCH_SIZE*2, seq, h, t) # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN, start_letter=START_LETTER, gpu=CUDA) print(' oracle_sample_NLL = %.4f' % oracle_loss)
# gen.load_state_dict(torch.load(pretrained_gen_path)) # PRETRAIN DISCRIMINATOR print('\nStarting Discriminator Training...') dis_optimizer = optim.Adagrad(dis.parameters()) train_discriminator(dis, dis_optimizer, oracle_samples, gen, oracle, 50, 3) torch.save(dis.state_dict(), pretrained_dis_path) # dis.load_state_dict(torch.load(pretrained_dis_path)) # ADVERSARIAL TRAINING print('\nStarting Adversarial Training...') oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN, start_letter=START_LETTER, gpu=CUDA) print('\nInitial Oracle Sample Loss : %.4f' % oracle_loss) for epoch in range(ADV_TRAIN_EPOCHS): print('\n--------\nEPOCH %d\n--------' % (epoch + 1)) # TRAIN GENERATOR print('\nAdversarial Training Generator : ', end='') sys.stdout.flush() train_generator_PG(gen, gen_optimizer, oracle, dis, 1) # TRAIN DISCRIMINATOR print('\nAdversarial Training Discriminator : ') train_discriminator(dis, dis_optimizer, oracle_samples, gen, oracle, 5,
# PRETRAIN DISCRIMINATOR dis_optimizer = optim.Adagrad(dis.parameters()) if args.pre_d_load is not None: print("Load pretrained D") dis.load_state_dict(torch.load(args.pre_d_load)) else: print('\nStarting Discriminator Training...') train_discriminator(dis, dis_optimizer, oracle_samples, gen, oracle, args.d_pre_steps, args.d_pre_epochs, args) if args.pre_d_save is not None: torch.save(dis.state_dict(), args.pre_d_save) # ADVERSARIAL TRAINING print('\nStarting Adversarial Training...') oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, args.num_eval, args) print('\nInitial Oracle Sample Loss : %.4f' % oracle_loss) for epoch in range(args.mle_epochs, args.mle_epochs + args.adv_epochs): print('\n--------\nEPOCH %d\n--------' % (epoch + 1)) # TRAIN GENERATOR print('\nAdversarial Training Generator : ', end='') sys.stdout.flush() train_generator_PG(gen, gen_optimizer, dis, oracle, args) # sample from generator and compute oracle NLL oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, args.num_eval, args) print(' oracle_sample_NLL = %.4f' % oracle_loss) logger.scalar_summary("oracle_loss", oracle_loss, epoch + 1) # TRAIN DISCRIMINATOR