def pretrain_discriminator(model_dict, optimizer_dict, scheduler_dict, dis_dataloader_params, vocab_size, positive_file, negative_file, batch_size, epochs, use_cuda=False, temperature=1.0): discriminator = model_dict["discriminator"] d_optimizer = optimizer_dict[ "discriminator"] # d_optimizer = optim.Adam(discriminator.parameters(), lr=d_lr) d_lr_scheduler = scheduler_dict[ "discriminator"] # d_scheduler = optim.lr_scheduler.StepLR(d_optimizer, step_size=step_size, gamma=gamma) generate_samples(model_dict, negative_file, batch_size, use_cuda, temperature) # dis_dataloader_params["positive_filepath"] = positive_file dis_dataloader_params["negative_filepath"] = negative_file #print(dis_dataloader_params) dataloader = dis_data_loader( **dis_dataloader_params) # this is where data iterator is used cross_entropy = nn.CrossEntropyLoss( ) #this one is similar to NLL (negative log likelihood) if use_cuda: cross_entropy = cross_entropy.cuda() for epoch in range(epochs): for i, sample in enumerate(dataloader): d_optimizer.zero_grad() data, label = sample["data"], sample[ "label"] #initialize sample variables 这里有个大问题,这个sample是batch_size的吗? data = Variable(data) label = Variable(label) if use_cuda: data = data.cuda() label = label.cuda() outs = discriminator( data) # 目前我从Dsicriminator里面的提示,得到data是batch_size * seq_len的 loss = cross_entropy( outs["score"], label.view(-1)) + discriminator.l2_loss( ) # 加上l2_loss这个是防止过拟合吗?注意,这个l2只是最后的linear部分系数,不涉及CNN部分 d_lr_scheduler.step( ) # 这个和optimizer.step()谁先? scheduler的参数和epochs是否要配合?? loss.backward() d_optimizer.step() if i == 63: print( "Pre-Discriminator loss: {:.5f}".format(loss)) # 这是什么意思?? model_dict["discriminator"] = discriminator optimizer_dict["discriminator"] = d_optimizer scheduler_dict["discriminator"] = d_lr_scheduler return model_dict, optimizer_dict, scheduler_dict
def pretrain_discriminator(model_dict, optimizer_dict, scheduler_dict, dis_dataloader_params, vocab_size, positive_file, negative_file, batch_size, epochs, use_cuda=False, temperature=1.0): discriminator = model_dict["discriminator"] d_optimizer = optimizer_dict["discriminator"] d_lr_scheduler = scheduler_dict["discriminator"] generate_samples(model_dict, negative_file, batch_size, use_cuda, temperature) dis_dataloader_params["positive_filepath"] = positive_file dis_dataloader_params["negative_filepath"] = negative_file #print(dis_dataloader_params) dataloader = dis_data_loader( **dis_dataloader_params) #this is where data iterator is used cross_entropy = nn.CrossEntropyLoss( ) #this one is similar to NLL (negative log likelihood) if use_cuda: cross_entropy = cross_entropy.cuda() for epoch in range(epochs): for i, sample in enumerate(dataloader): d_optimizer.zero_grad() data, label = sample["data"], sample[ "label"] #initialize sample variables data = Variable(data) label = Variable(label) if use_cuda: data = data.cuda() label = label.cuda() outs = discriminator(data) loss = cross_entropy(outs["score"], label.view(-1)) + discriminator.l2_loss() d_lr_scheduler.step() loss.backward() d_optimizer.step() if i == 63: print("Pre-Discriminator loss: {:.5f}".format(loss)) model_dict["discriminator"] = discriminator optimizer_dict["discriminator"] = d_optimizer scheduler_dict["discriminator"] = d_lr_scheduler return model_dict, optimizer_dict, scheduler_dict
def adversarial_train(model_dict, optimizer_dict, scheduler_dict, dis_dataloader_params, vocab_size, pos_file, neg_file, batch_size, gen_train_num=1, dis_train_epoch=5, dis_train_num=3, max_norm=5.0, rollout_num=4, use_cuda=False, temperature=1.0, epoch=1, tot_epoch=100): """ Get all the models, optimizer and schedulers """ generator = model_dict["generator"] discriminator = model_dict["discriminator"] worker = generator.worker manager = generator.manager m_optimizer = optimizer_dict["manager"] w_optimizer = optimizer_dict["worker"] d_optimizer = optimizer_dict["discriminator"] #Why zero grad only m and w? m_optimizer.zero_grad() w_optimizer.zero_grad() m_lr_scheduler = scheduler_dict["manager"] w_lr_scheduler = scheduler_dict["worker"] d_lr_scheduler = scheduler_dict["discriminator"] #Adversarial training for generator for _ in range(gen_train_num): m_lr_scheduler.step() w_lr_scheduler.step() m_optimizer.zero_grad() w_optimizer.zero_grad() #get all the return values adv_rets = recurrent_func("adv")(model_dict, use_cuda) real_goal = adv_rets["real_goal"] all_goal = adv_rets["all_goal"] prediction = adv_rets["prediction"] delta_feature = adv_rets["delta_feature"] delta_feature_for_worker = adv_rets["delta_feature_for_worker"] gen_token = adv_rets["gen_token"] rewards = get_rewards(model_dict, gen_token, rollout_num, use_cuda) m_loss = loss_func("adv_manager")(rewards, real_goal, delta_feature) w_loss = loss_func("adv_worker")(all_goal, delta_feature_for_worker, gen_token, prediction, vocab_size, use_cuda) torch.autograd.grad( m_loss, manager.parameters()) #based on loss improve the parameters torch.autograd.grad(w_loss, worker.parameters()) clip_grad_norm_(manager.parameters(), max_norm) clip_grad_norm_(worker.parameters(), max_norm) m_optimizer.step() w_optimizer.step() print("Adv-Manager loss: {:.5f} Adv-Worker loss: {:.5f}".format( m_loss, w_loss)) del adv_rets del real_goal del all_goal del prediction del delta_feature del delta_feature_for_worker del gen_token del rewards #Adversarial training for discriminator for n in range(dis_train_epoch): generate_samples(model_dict, neg_file, batch_size, use_cuda, temperature) dis_dataloader_params["positive_filepath"] = pos_file dis_dataloader_params["negative_filepath"] = neg_file dataloader = dis_data_loader(**dis_dataloader_params) cross_entropy = nn.CrossEntropyLoss() if use_cuda: cross_entropy = cross_entropy.cuda() """ for d-steps do Use current G, θm,θw to generate negative examples and combine with given positive examples S Train discriminator Dφ for k epochs by Eq. (2) end for """ for _ in range(dis_train_num): for i, sample in enumerate(dataloader): data, label = sample["data"], sample["label"] data = Variable(data) label = Variable(label) if use_cuda: data = data.cuda(async=True) label = label.cuda(async=True) outs = discriminator(data) loss = cross_entropy(outs["score"], label.view(-1)) + discriminator.l2_loss() d_optimizer.zero_grad() d_lr_scheduler.step() loss.backward() d_optimizer.step() print("{}/{} Adv-Discriminator Loss: {:.5f}".format( n, range(dis_train_epoch), loss)) #Save all changes model_dict["discriminator"] = discriminator generator.worker = worker generator.manager = manager model_dict["generator"] = generator optimizer_dict["manager"] = m_optimizer optimizer_dict["worker"] = w_optimizer optimizer_dict["discriminator"] = d_optimizer scheduler_dict["manager"] = m_lr_scheduler scheduler_dict["worker"] = w_lr_scheduler scheduler_dict["disciminator"] = d_lr_scheduler return model_dict, optimizer_dict, scheduler_dict