args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) if args.resume: print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) criterion_bce = nn.BCELoss() criterion_cel = nn.CrossEntropyLoss() best_prec1 = 0 best_pred_y = [] best_gt_y = [] global_step = 0 total_steps = args.grl_rampup_epochs * len(source_loader) def train(epoch): model.train() global global_step for batch_idx, (batch_s, batch_t) in enumerate(zip(source_loader, target_loader)): adjust_learning_rate(optimizer, epoch, batch_idx, len(source_loader))
def main(): # parse arguments args = parseArgs() assert args.mode in ['train', 'test'] if args.mode == 'test': assert os.path.isfile(args.checkpointspath) # some necessary preparations checkDir(cfg.BACKUP_DIR) logger_handle = Logger(cfg.LOGFILEPATH.get(args.mode)) start_epoch = 1 end_epoch = cfg.NUM_EPOCHS + 1 use_cuda = torch.cuda.is_available() # define the dataset dataset = ImageDataset(rootdir=cfg.ROOTDIR, imagesize=cfg.IMAGE_SIZE, img_norm_info=cfg.IMAGE_NORM_INFO) dataloader = torch.utils.data.DataLoader(dataset, batch_size=cfg.BATCH_SIZE, shuffle=True) # define the loss function loss_func = nn.BCELoss() if use_cuda: loss_func = loss_func.cuda() # define the model net_g = Generator(cfg) net_d = Discriminator(cfg) if use_cuda: net_g = net_g.cuda() net_d = net_d.cuda() # define the optimizer optimizer_g = buildOptimizer(net_g.parameters(), cfg.OPTIMIZER_CFG['generator']) optimizer_d = buildOptimizer(net_d.parameters(), cfg.OPTIMIZER_CFG['discriminator']) # load the checkpoints if args.checkpointspath: checkpoints = loadCheckpoints(args.checkpointspath, logger_handle) net_d.load_state_dict(checkpoints['net_d']) net_g.load_state_dict(checkpoints['net_g']) optimizer_g.load_state_dict(checkpoints['optimizer_g']) optimizer_d.load_state_dict(checkpoints['optimizer_d']) start_epoch = checkpoints['epoch'] + 1 else: net_d.apply(weightsNormalInit) net_g.apply(weightsNormalInit) # define float tensor FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor # train the model if args.mode == 'train': for epoch in range(start_epoch, end_epoch): logger_handle.info('Start epoch %s...' % epoch) for batch_idx, imgs in enumerate(dataloader): imgs = imgs.type(FloatTensor) z = torch.randn(imgs.size(0), cfg.NUM_LATENT_DIMS, 1, 1).type(FloatTensor) imgs_g = net_g(z) # --train generator optimizer_g.zero_grad() labels = FloatTensor(imgs_g.size(0), 1).fill_(1.0) loss_g = loss_func(net_d(imgs_g), labels) loss_g.backward() optimizer_g.step() # --train discriminator optimizer_d.zero_grad() labels = FloatTensor(imgs_g.size(0), 1).fill_(1.0) loss_real = loss_func(net_d(imgs), labels) labels = FloatTensor(imgs_g.size(0), 1).fill_(0.0) loss_fake = loss_func(net_d(imgs_g.detach()), labels) loss_d = loss_real + loss_fake loss_d.backward() optimizer_d.step() # --print infos logger_handle.info('Epoch %s/%s, Batch %s/%s, Loss_G %f, Loss_D %f' % (epoch, cfg.NUM_EPOCHS, batch_idx+1, len(dataloader), loss_g.item(), loss_d.item())) # --save checkpoints if epoch % cfg.SAVE_INTERVAL == 0 or epoch == cfg.NUM_EPOCHS: state_dict = { 'epoch': epoch, 'net_d': net_d.state_dict(), 'net_g': net_g.state_dict(), 'optimizer_g': optimizer_g.state_dict(), 'optimizer_d': optimizer_d.state_dict() } savepath = os.path.join(cfg.BACKUP_DIR, 'epoch_%s.pth' % epoch) saveCheckpoints(state_dict, savepath, logger_handle) save_image(imgs_g.data[:25], os.path.join(cfg.BACKUP_DIR, 'images_epoch_%s.png' % epoch), nrow=5, normalize=True) # test the model else: z = torch.randn(cfg.BATCH_SIZE, cfg.NUM_LATENT_DIMS, 1, 1).type(FloatTensor) net_g.eval() imgs_g = net_g(z) save_image(imgs_g.data[:25], 'images.png', nrow=5, normalize=True)
# Apply the weights_init() function to randomly initialize all # weights to mean=0.0, stddev=0.2 netG.apply(weights_init) # Print the model. print(netG) # Create the discriminator. netD = Discriminator(params).to(device) # Apply the weights_init() function to randomly initialize all # weights to mean=0.0, stddev=0.2 netD.apply(weights_init) # Print the model. print(netD) # Binary Cross Entropy loss function. criterion = nn.BCELoss() fixed_noise = torch.randn(64, params['nz'], 1, 1, device=device) real_label = 1 fake_label = 0 # Optimizer for the discriminator. optimizerD = optim.Adam(netD.parameters(), lr=params['lr'], betas=(params['beta1'], 0.999)) # Optimizer for the generator. optimizerG = optim.Adam(netG.parameters(), lr=params['lr'], betas=(params['beta1'], 0.999))
def __init__(self, weight=None, size_average=True): super(BCELoss2d, self).__init__() self.bce_loss = nn.BCELoss(weight, size_average)
def train_LSTM(net, train_loader, val_loader, batch_size=50, save_model=True): # loss and optimization functions lr = 0.001 criterion = nn.BCELoss() optimizer = torch.optim.Adam(net.parameters(), lr=lr) # training params epochs = 4 # 3-4 is approx where I noticed the validation loss stop decreasing counter = 0 print_every = 100 clip = 5 # gradient clipping device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # move model to GPU, if available if (net.train_on_gpu): print("Using CUDA device: ", device) net = net.to(device) #net = net.to(device) net.train() # train for some number of epochs for e in range(epochs): # initialize hidden state h = net.init_hidden(batch_size) # batch loop for inputs, labels in train_loader: counter += 1 #if (net.train_on_gpu): # inputs, labels = inputs.cuda(), labels.cuda() inputs = inputs.to(device) labels = labels.to(device) # Creating new variables for the hidden state, otherwise # we'd backprop through the entire training history h = tuple([each.data for each in h]) # zero accumulated gradients net.zero_grad() # get the output from the model inputs = inputs.type(torch.LongTensor).to(device) output, h = net(inputs, h) # calculate the loss and perform backprop loss = criterion(output.squeeze(), labels.float()) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. nn.utils.clip_grad_norm_(net.parameters(), clip) optimizer.step() # loss stats if counter % print_every == 0: # Get validation loss val_h = net.init_hidden(batch_size) val_losses = [] net.eval() for inputs, labels in val_loader: # Creating new variables for the hidden state, otherwise # we'd backprop through the entire training history val_h = tuple([each.data for each in val_h]) if (net.train_on_gpu): inputs, labels = inputs.cuda(), labels.cuda() inputs = inputs.type(torch.LongTensor).to(device) output, val_h = net(inputs, val_h) val_loss = criterion(output.squeeze(), labels.float()) val_losses.append(val_loss.item()) net.train() print("Epoch: {}/{}...".format(e + 1, epochs), "Step: {}...".format(counter), "Loss: {:.6f}...".format(loss.item()), "Val Loss: {:.6f}".format(np.mean(val_losses))) if save_model: model_fp = os.path.join(os.curdir, os.pardir, "models", "lstm_trained") torch.save(net.state_dict(), model_fp)
def train(netD, netG, opt): # data preparation pls_input = torch.FloatTensor(opt.batchSize, 1, opt.pulseLen) ac_input = torch.FloatTensor(opt.batchSize, opt.acSize) noise = torch.FloatTensor(opt.batchSize, opt.nz) fixed_noise = torch.FloatTensor(opt.batchSize, nz).normal_(0, 1) label = torch.FloatTensor(opt.batchSize) real_label = 1 fake_label = 0 if opt.cuda: netD.cuda() netG.cuda() criterion.cuda() pls_input, label = pls_input.cuda(), label.cuda() noise, fixed_noise = noise.cuda(), fixed_noise.cuda() ac_input = ac_input.cuda() pls_input = Variable(pls_input) ac_input = Variable(ac_input) label = Variable(label) noise = Variable(noise) fixed_noise = Variable(fixed_noise) # cost criterion criterion = nn.BCELoss() # normal gan #criterion = nn.MSELoss() # lsgan # setup optimizer optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) if opt.dataset in ['nick', 'jenny']: # folder dataset dataset, ac_dataset = load_data(opt.dataroot, num_files=5) else: raise dataset = torch.from_numpy(dataset) ac_dataset = torch.from_numpy(ac_dataset) train_dataset = torch.utils.data.TensorDataset(dataset,ac_dataset) dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers)) for epoch in range(opt.niter): for i, data in enumerate(dataloader, 0): ################################# # (1) Updata D network: maximize log(D(x)) + log(1 - D(G(z))) ################################# # train with real netD.zero_grad() real_pls_cpu, ac_feats_cpu = data if real_pls_cpu.size(0) != opt.batchSize: continue batch_size = real_pls_cpu.size(0) pls_input.data.copy_(real_pls_cpu) ac_input.data.copy_(ac_feats_cpu) label.data.fill_(real_label) output = netD(pls_input, ac_input) errD_real = criterion(output, label) errD_real.backward() D_x = output.data.mean() # train with fake noise.data.resize_(batch_size, nz) noise.data.normal_(0, 1) fake = netG(noise, ac_input) label.data.fill_(fake_label) output = netD(fake.detach(), ac_input) errD_fake = criterion(output, label) errD_fake.backward() D_G_z1 = output.data.mean() errD = errD_real + errD_fake optimizerD.step() ############################ # (2) Update G network: maximize log(D(G(z))) ############################ netG.zero_grad() label.data.fill_(real_label) # fake labels are real for generator cost output = netD(fake, ac_input) errG = criterion(output, label) errG.backward() D_G_z2 = output.data.mean() optimizerG.step() print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f' %(epoch, opt.niter, i, len(dataloader), errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2)) if i % 100 == 0: fake = netG(fixed_noise, ac_input) fake = fake.data.cpu().numpy() fake = fake.reshape(batch_size, -1) real_data = real_pls_cpu.numpy() plot_feats(real_data, fake, epoch, i, opt.outf) # do checkpointing torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' %(opt.outf, epoch)) torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' %(opt.outf, epoch))
discriminator.load_state_dict(torch.load('discriminator_sngan-30.pth')) discriminator.eval() generator = torch.nn.DataParallel(model_sngan.Generator().to(device), device_ids=gpu_ids) generator = generator.cuda() generator.load_state_dict(torch.load('generator_sngan-30.pth')) generator.eval() steganalyzer = torch.nn.DataParallel(model_sngan.Steganalyzer().to(device), device_ids=gpu_ids) steganalyzer = steganalyzer.cuda() steganalyzer.load_state_dict(torch.load('steganalyzer_sngan_trained.pth')) steganalyzer.eval() d_criterion = nn.BCELoss().to(device) s_criterion = nn.BCELoss().to(device) g_criterion = nn.BCELoss().to(device) q_criterion = nn.L1Loss().to(device) d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=d_learning_rate) g_optimizer = torch.optim.Adam(generator.parameters(), lr=g_learning_rate) print( 'Training started!=============================================================================================' ) start = time.time() for epoch in range(num_epoch): for i, (audio, _) in enumerate(dataloader): num_audio = audio.size(0) real_audio = audio.to(device) real_label = torch.ones(num_audio).reshape(batch_size, 1).to(device)
discriminator = Discriminator(bsize, embed_dim, encod_dim, embed_dim_policy, encod_dim_policy, numlabel, recom_length - 1, 2).to(device) print("The generator is:") print(generator) print("The agent is:") print(agent) print("The discriminator is:") print(discriminator) #Loss for test with torch.no_grad(): loss_fn_target = nn.CrossEntropyLoss() loss_fn_reward = nn.BCELoss() loss_fn_target.size_average = False loss_fn_target.to(device) loss_fn_reward.size_average = False loss_fn_reward.to(device) trainSample, validSample, testSample = sampleSplit( trainindex, validindex, testindex, Seqlist, numlabel, recom_length - 1) #, warm_up = 0) #Pretrain generator only val_acc_best, val_preck_best, val_rewd_best, val_loss_best = None, None, None, None print('\n--------------------------------------------') print("Pretrain Generator with given recommendation") print('--------------------------------------------')
dataloader['val'] = DataLoader(dataset['val'], batch_size=16, shuffle=True, num_workers=0) print('OK.') #print (len(dataset['train'])) #print (len(dataloader['train'])) print('Creating network ... ', end='') net = networks.NetFC(nodes=[dataset['train'].get_input_size(), 600, 100, 1], dropout=dropout) print('OK.') print('Creating loss and optimizer ... ', end='') criterion = nn.BCELoss(size_average=False) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) print('OK.') cuda = torch.cuda.is_available() if not cuda: print("No GPU found!!!!!!!!!!!!") if cuda: print('Use GPU ... ', end='') if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs! ", end='') net = nn.DataParallel(net)
if opt.netG != '': netG.load_state_dict(torch.load(opt.netG)) print(netG) netD = net.D(inputChannelSize, outputChannelSize, ndf, 3) netD.apply(weights_init) if opt.netD != '': netD.load_state_dict(torch.load(opt.netD)) print(netD) netG.train() netD.train() #设置损失函数loss function criterionBCE = nn.BCELoss() criterionMSE = nn.MSELoss() #留出空间 real_A = torch.Tensor(opt.batchSize, inputChannelSize, opt.imageSize, opt.imageSize) real_B = torch.Tensor(opt.batchSize, outputChannelSize, opt.imageSize, opt.imageSize) fake_B = torch.Tensor(opt.batchSize, outputChannelSize, opt.imageSize, opt.imageSize) real_AB = torch.Tensor( opt.batchSize, outputChannelSize + inputChannelSize * opt.condition_GAN, opt.imageSize, opt.imageSize) fake_AB = torch.Tensor( opt.batchSize, outputChannelSize + inputChannelSize * opt.condition_GAN, opt.imageSize, opt.imageSize)
self.fc3 = nn.Linear(hidden_size, 1) def forward(self, x): x = F.leaky_relu(self.fc1(x), negative_slope=0.2) x = F.leaky_relu(self.fc2(x), negative_slope=0.2) return torch.sigmoid(self.fc3(x)) generator = Generator(Z_DIM, 256, X_SIZE) discriminator = Discriminator(X_SIZE, 256) d_optim = optim.Adam(discriminator.parameters(), lr=0.0002, betas=[0.5, 0.999]) g_optim = optim.Adam(generator.parameters(), lr=0.0002) picture_out_count = 0 BCELoss = nn.BCELoss() dataloader = torch.utils.data.DataLoader(datasets.MNIST( "./data/mnist", train=True, download=True, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]), ), batch_size=BATCH_SIZE, shuffle=True, drop_last=True) for epoch in range(200): for i, (imgs, _) in enumerate(dataloader):
def gail(config): env = gym.make(config['env']) # collect trajectories if config['train_expert']: expert_policy = TrainExpertPolicy(env) _, expert_masks, _, expert_obs, expert_actions, _ = collect_samples( env, expert_policy, config['expert_trajs_num']) # save with open(config['env'] + '_expert_trajs.pkl', 'wb') as f: pickle.dump([expert_masks, expert_obs, expert_actions], f) else: with open(config['env'] + '_expert_trajs.pkl', 'rb') as f: expert_masks, expert_obs, expert_actions = pickle.load(f) # make expert trajs into tensors expert_obs_tensor = list_of_list_into_tensor(expert_obs).float() expert_actions_tensor = list_of_list_into_tensor(expert_actions)[ ..., None].float() # Initialize Networks disc = Discriminator(env.observation_space.shape[0], 1) policy = Policy(env.observation_space.shape[0], env.action_space.n) value_net = Value(env.observation_space.shape[0]) # Initialize loss modules disc_loss_fn = nn.BCELoss(reduction='none') # initialize optimizer optimizer_disc = optim.Adam(disc.parameters(), lr=1e-2) optimizer_p = optim.Adam(policy.parameters(), lr=1e-2) optimizer_v = optim.Adam(value_net.parameters(), lr=1e-2) start_time = time.time() # Training loop for i in count(1): # run policy and get samples _, gen_masks, gen_log_probs, gen_obs, \ gen_actions, _ = collect_samples(env, policy, n_episodes=config['gen_trajs_num']) # discriminator loss g = disc(gen_obs, gen_actions) * gen_masks e = disc(expert_obs_tensor, expert_actions_tensor) disc_loss = (disc_loss_fn(g, torch.ones(*g.size()))*gen_masks).sum(axis=1).mean() +\ (disc_loss_fn(e, torch.zeros(*e.size()))*expert_masks).sum(axis=1).mean() optimizer_disc.zero_grad() disc_loss.backward() # evaluate reward under current discriminator reward = evaluate_reward(disc, gen_obs, gen_actions) # update policy net values = value_net(gen_obs) advantage, returns = generalized_advantage(reward, gen_masks, values) optimizer_p.zero_grad() loss_p = (gen_log_probs * advantage).sum(axis=1).mean() # no minus sign loss_p.backward(retain_graph=True) optimizer_p.step() # Update value net optimizer_v.zero_grad() loss_v = F.mse_loss(values, returns, reduction='none').sum(axis=1).mean() loss_v.backward() optimizer_v.step() print(i) # evaluate current policy under the true reward function if i % 5 == 0: eval_rewards, _, _, _, _, \ gen_actions = collect_samples(env, policy, n_episodes=5) eval_rewards = eval_rewards.sum(axis=1) print(eval_rewards.detach().numpy()) if eval_rewards.mean().item() > env.spec.reward_threshold: print("Model converged in", time.time() - start_time) _, _, rewards, masks, _, log_probs = \ collect_samples(env, policy, 1, render=True) break
} train_loader = get_trajectory_tensor_dataset(**train_dataset_args) val_loader = get_trajectory_tensor_dataset(**val_dataset_args) test_loader = get_trajectory_tensor_dataset(**test_dataset_args) # ########## SET UP MODEL ########## # encoder = CNN_2D_1D_Encoder(**encoder_args).to(DEVICE) decoder = CNN_1D_Trajectory_Tensor_Classifier( **decoder_args).to(DEVICE) params = list(encoder.parameters()) + list(decoder.parameters()) optimizer = optim.Adam(params, lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) loss_function = nn.BCELoss() # ########## TRAIN AND EVALUATE ########## # best_ap = 0 for epoch in range(NUM_EPOCHS): print("----------- EPOCH " + str(epoch) + " -----------") trainer_args = { "encoder": encoder, "decoder": decoder, "device": DEVICE, "train_loader": train_loader, "optimizer": optimizer, "loss_function": loss_function, "debug_mode": DEBUG_MODE,
def __init__(self): super(VAELoss, self).__init__() self.bce = nn.BCELoss(reduction='sum')
def train_fake_image_gan(self): bce_loss = nn.BCELoss() l1_loss = nn.L1Loss() for epoch in range(self.epochs): for sample in self.fake_image_data_loader: sentences = sample['sentences'] matched_images = sample['matched_images'] unmatched_images = sample['unmatched_images'] sentences = torch.tensor( sentences, requires_grad=False).cuda(device=cuda0) matched_images = torch.tensor( matched_images, requires_grad=False).cuda(device=cuda0) unmatched_images = torch.tensor( unmatched_images, requires_grad=False).cuda(device=cuda0) real_labels = torch.ones( matched_images.size(0)).cuda(device=cuda0) fake_labels = torch.zeros( matched_images.size(0)).cuda(device=cuda0) # 更新判别器 for param in self.image_generator.parameters(): param.requires_grad = False for param in self.fake_image_discriminator.parameters(): param.requires_grad = True self.fake_image_discriminator_optimizer.zero_grad() # 计算损失 fake_images = self.image_generator(sentences) matched_scores = self.fake_image_discriminator( matched_images, sentences) unmatched_scores = self.fake_image_discriminator( unmatched_images, sentences) fake_scores = self.fake_image_discriminator( fake_images, sentences) matched_loss = bce_loss(matched_scores, real_labels) unmatched_loss = bce_loss(unmatched_scores, fake_labels) fake_loss = bce_loss(fake_scores, fake_labels) discriminator_loss = matched_loss + unmatched_loss + fake_loss # 损失反向传递 discriminator_loss.backward() self.fake_image_discriminator_optimizer.step() # 更新生成器 for param in self.image_generator.parameters(): param.requires_grad = True for param in self.fake_image_discriminator.parameters(): param.requires_grad = False self.image_generator_optimizer.zero_grad() fake_images = self.image_generator(sentences) fake_scores = self.fake_image_discriminator( fake_images, sentences) fake_loss = bce_loss(fake_scores, real_labels) generator_loss = fake_loss + self.l1_coef * l1_loss( fake_images, matched_images) generator_loss.backward() self.image_generator_optimizer.step() print("Epoch: %d, generator_loss= %f, discriminator_loss= %f" % (epoch, generator_loss.data, discriminator_loss.data)) val_images, val_sentences = load_validation_set(self.arguments) val_sentences = torch.tensor(val_sentences[:10], requires_grad=False).cuda() val_images = torch.tensor(val_images[:10], requires_grad=False).cuda() fake_images = self.image_generator(val_sentences) index = 0 for val_image, fake_image in zip(val_images, fake_images): save_image(val_image, self.arguments['synthetic_image_path'], "val_" + str(index) + ".jpg") save_image(fake_image, self.arguments['synthetic_image_path'], "fake_" + str(index) + ".jpg") index = index + 1
def train(self): """Train the model by iterating through the dataset num_epoch times, printing the duration per epoch """ lr = 0.0002 batch_size = 1000 num_epochs = 10 # Labels for real data: # - for discriminator, this is real images # - for generator this is what we wanted the discriminator output to be real_samples_labels = torch.ones((batch_size, 1), device=GPU_DEVICE) # Init loss functions loss_function = nn.BCELoss() gen_losses = [] disc_losses = [] # total data is dataset * num_epochs # Load train data train_set = self.data() train_loader = torch.utils.data.DataLoader( train_set, batch_size=batch_size, shuffle=True ) self.generator.model.eval() self.discriminator.model.train() # Labels for generated data, all 0 generated_samples_labels = torch.zeros((batch_size, 1), device=GPU_DEVICE) # Load optimizer optimizer_discriminator = torch.optim.Adam( self.discriminator.parameters(), lr=lr, ) self.generator.model.train() self.discriminator.model.eval() # total data is batch_size * num_epochs # Load optimizer optimizer_generator = torch.optim.Adam( self.generator.parameters(), lr=lr, ) start = timeit.default_timer() # Repeat num_epoch times for epoch in range(num_epochs): for n, (images, labels) in enumerate(train_loader): # Iterate through dataset if GPU_DEVICE: images = images.cuda() # Data for training the discriminator latent_space_samples = self.latent_input(batch_size) generated_samples = self.generator(latent_space_samples) # label inputs as real, fake all_samples = torch.cat((images, generated_samples)) all_samples_labels = torch.cat( (real_samples_labels, generated_samples_labels) ) # Training the discriminator self.discriminator.zero_grad() output_discriminator = self.discriminator(all_samples) loss_discriminator = loss_function( output_discriminator, all_samples_labels ) loss_discriminator.backward() optimizer_discriminator.step() disc_losses.append(float(loss_discriminator)) # Data for training the generator latent_space_samples = self.latent_input(batch_size) # Training the generator self.generator.zero_grad() generated_samples = self.generator(latent_space_samples) output_discriminator_generated = self.discriminator(generated_samples) loss_generator = loss_function( output_discriminator_generated, real_samples_labels ) loss_generator.backward() optimizer_generator.step() gen_losses.append(float(loss_generator)) if epoch % (x := 10) == 0: # Show loss print(f"Epoch: {epoch} Loss D.: {loss_discriminator}") print(f"Epoch: {epoch} Loss G.: {loss_generator}") print(timeit.default_timer() - start) start = timeit.default_timer()
def train_fake_sentence_gan(self): bce_loss = nn.BCELoss() l1_loss = nn.L1Loss() for epoch in range(self.epochs): for sample in self.fake_sentence_data_loader: images = sample['images'] matched_sentences = sample['matched_sentences'] unmatched_sentences = sample['unmatched_sentences'] images = torch.tensor(images, requires_grad=False).cuda(device=cuda1) matched_sentences = torch.tensor( matched_sentences, requires_grad=False).cuda(device=cuda1) unmatched_sentences = torch.tensor( unmatched_sentences, requires_grad=False).cuda(device=cuda1) real_labels = torch.ones(images.size(0)).cuda(device=cuda1) fake_labels = torch.zeros(images.size(0)).cuda(device=cuda1) # 更新判别器 self.fake_sentence_discriminator_optimizer.zero_grad() # 计算损失 # lengths = [len(matched_sentence) for matched_sentence in matched_sentences] fake_sentences = self.sentence_generator(images) matched_scores = self.fake_sentence_discriminator( images, matched_sentences) unmatched_scores = self.fake_sentence_discriminator( images, unmatched_sentences) fake_scores = self.fake_sentence_discriminator( images, fake_sentences) matched_loss = bce_loss(matched_scores, real_labels) unmatched_loss = bce_loss(unmatched_scores, fake_labels) fake_loss = bce_loss(fake_scores, fake_labels) discriminator_loss = matched_loss + unmatched_loss + fake_loss # 损失反向传递 discriminator_loss.backward() self.fake_sentence_discriminator_optimizer.step() # 更新生成器 self.sentence_generator_optimizer.zero_grad() fake_sentences = self.sentence_generator(images) fake_scores = self.fake_sentence_discriminator( images, fake_sentences) fake_loss = bce_loss(fake_scores, real_labels) # generator_loss = fake_loss + self.l1_coef * l1_loss(fake_sentences, matched_sentences) generator_loss = fake_loss generator_loss.backward() self.sentence_generator_optimizer.step() print("Epoch: %d, generator_loss= %f, discriminator_loss= %f" % (epoch, generator_loss.data, discriminator_loss.data)) val_images, val_sentences = load_validation_set(self.arguments) val_sentences = torch.tensor(val_sentences[:10], requires_grad=False).cuda() val_images = torch.tensor(val_images[:10], requires_grad=False).cuda() fake_sentences = self.sentence_generator(val_images) fake_sentences = fake_sentences.cpu().numpy() fake_sentences = convert_indexes2sentence( self.arguments['idx2word'], fake_sentences) save_sentence(val_sentences, fake_sentences, self.arguments['synthetic_sentence_path'])
def train_net(net, epochs=5, batch_size=1, lr=0.1, val_percent=0.1, save_cp=True, gpu=False, img_scale=0.5): dir_img = '/home/lixiaoxing/data/DRIVE/train/' dir_mask = '/home/lixiaoxing/data/DRIVE/trainannot/' dir_checkpoint = 'checkpoints/' if os.path.exists(dir_checkpoint) is False: os.makedirs(dir_checkpoint) ids = get_ids(dir_img) ids = split_ids(ids) iddataset = split_train_val(ids, val_percent) print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, len(iddataset['train']), len(iddataset['val']), str(save_cp), str(gpu))) N_train = len(iddataset['train']) #optimizer = optim.SGD(net.parameters(), # lr=lr, # momentum=0.9, # weight_decay=0.0005) optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=1e-5) criterion = nn.BCELoss() #criterion = DiceCoeff() for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) net.train() # reset the generators train = get_imgs_and_masks_y(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks_y(iddataset['val'], dir_img, dir_mask, img_scale) epoch_loss = 0 for i, b in enumerate(batch(train, batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs) masks_probs_flat = masks_pred.view(-1) true_masks_flat = true_masks.view(-1) #print(masks_pred.shape, true_masks.shape) loss = criterion(masks_probs_flat, true_masks_flat) epoch_loss += loss.item() #print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished ! Loss: {}'.format(epoch_loss / i)) if 1: val_dice = eval_net(net, val, gpu) print('Validation Dice Coeff: {}'.format(val_dice)) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1))
def train_main(): # Learning 1: New layers model = PretrainedNet().to(device) params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.SGD(params, lr=0.01) criterion = nn.BCELoss() print(model) batch_size = 25 train_loader = get_train_loader(batch_size) validation_loader = get_validation_loader(batch_size) log = get_tensorboard('pretrained') epochs = 10 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.state_dict(), model_file) print('Wrote model to', model_file) # Learning 2: Fine-tuning log = get_tensorboard('finetuned') for name, layer in model.vgg_features.named_children(): note = ' ' for param in layer.parameters(): note = '-' if int(name) >= 24: param.requires_grad = True note = '+' print(name, note, layer, len(param)) params = filter(lambda p: p.requires_grad, model.parameters()) # optimizer = optim.SGD(model.parameters(), lr=1e-3) optimizer = optim.RMSprop(params, lr=1e-5) criterion = nn.BCELoss() print(model) prev_epochs = epoch epochs = 20 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, prev_epochs + epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, prev_epochs + epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.state_dict(), model_file_ft) print('Wrote finetuned model to', model_file_ft)
self.dense.add_module("linear", nn.Linear(6144, 1)) self.dense.add_module("sigmoid", nn.Sigmoid()) def forward(self, x): x = self.embedding(x).transpose(1, 2) x = self.conv(x) y = self.dense(x) return y model = Net() # 准确率 def accuracy(y_pred, y_true): y_pred = torch.where(y_pred > 0.5, torch.ones_like(y_pred, dtype=torch.float32), torch.zeros_like(y_pred, dtype=torch.float32)) acc = torch.mean(1 - torch.abs(y_true - y_pred)) return acc model.to('cuda:4') model.compile(loss_func=nn.BCELoss(), optimizer=torch.optim.Adagrad(model.parameters(), lr=0.02), metrics_dict={"accuracy": accuracy}, device='cuda:4') # 有时候模型训练过程中不收敛,需要多试几次 dfhistory = model.fit(10, dl_train, dl_val=dl_test, log_step_freq=200)
def main(): # D & G 네트워크 생성 D = Discriminator().to(device) G = Generator().to(device) print(D) print(G) # weights 초기화 D.apply(weights_init) G.apply(weights_init) # loss function 정의 criterion = nn.BCELoss() # optimizer 정의 D_optim = optim.Adam(D.parameters(), lr=0.0002, betas=(0.5, 0.999)) G_optim = optim.Adam(G.parameters(), lr=0.0002, betas=(0.5, 0.999)) Tensor = torch.cuda.FloatTensor if device else torch.FloatTensor for epoch in range(num_epoch): for i, (imgs, _) in enumerate(dataloader): # ground truth tensor 생성 valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False) fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False) # imgs를 Tensor로 변환 real_imgs = Variable(imgs.type(Tensor)) # # # # # # Discriminator 학습 # # # # # G_optim.zero_grad() # 노이즈 벡터 생성 z = Variable( Tensor(np.random.normal(0, 1, (imgs.shape[0], latent_size)))) # 가짜 이미지 생성 fake_imgs = G(z) # G loss loss_G = criterion(D(fake_imgs), valid) # G 최적화 loss_G.backward() G_optim.step() # # # # # # Generator 학습 # # # # # D_optim.zero_grad() # D loss real_loss = criterion(D(real_imgs), valid) fake_loss = criterion(D(fake_imgs.detach()), fake) # D 최적화 loss_D = (real_loss + fake_loss) / 2 loss_D.backward() D_optim.step() # 학습 진행사항 출력 print( 'Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}'. format(epoch, num_epoch, i + 1, len(dataloader), loss_D.item(), loss_G.item())) # 이미지 저장 save_image(fake_imgs.data[:25], 'images/{0:03d}.png'.format(epoch + 1), nrow=5, normalize=True)
def BCE_loss(x, target): return nn.BCELoss()(x.squeeze(2), target)
def main(args): skill_dataset = SkillDataset(args.train_path) # TODO : add collator function to make it batch-wise padding train_skill_loader = torch.utils.data.DataLoader( skill_dataset, batch_size=args.batch_size, shuffle=False, num_workers=2) model = SkillLSTM(2 * args.input_size, args.hidden_size, args.input_size) # the last argument is output_size model.train() # Get optimizer and scheduler can be used if given in paper optimizer = optim.SGD(model.parameters(), args.lr) # Get loss function loss_fn = nn.BCELoss() loss_list = [] # training preparation steps_till_eval = args.eval_steps epoch = 0 while epoch != args.num_epochs: epoch += 1 epoch_loss = 0 print("Entering epoch number") with torch.enable_grad(), tqdm( total=len(skill_dataset)) as progress_bar: for (student, student_orig) in train_skill_loader: # Setup forward optimizer.zero_grad() # Forward print("Starting forward pass") lengths = [int(q[0]) for q in student_orig] batch_out = model( student, lengths) # (batch_size, num_ques, skill_size) loss = loss_fn(batch_out, student[:, :, args.input_size:]) epoch_loss = epoch_loss + loss.item() # Backward print("Starting backward pass") loss.backward() nn.utils.clip_grad_norm_( model.parameters(), args.max_grad_norm) # To tackle exploding gradients optimizer.step() # Log info progress_bar.update(args.batch_size) progress_bar.set_postfix(epoch=epoch, BCELoss=loss) steps_till_eval -= args.batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint print("Evaluating the trained model") res_rmse, res_auc, res_r2 = evaluate(model) print("RMSE is : {}".format(res_rmse)) print("AUC is : {}".format(res_auc)) print("R2 is : {}".format(res_r2)) model.train() save_checkpoint(model, args.save_path) print("Evaluation complete") loss_list.append(epoch_loss) print("Epoch loss is : {}".format(epoch_loss)) # output plot for loss visualization plt.figure(loss_list) plt.savefig(args.save_loss_plot)
def loss(self, x, target): return nn.BCELoss()(x, target)
def __init__(self, config, dataset): super(S3Rec, self).__init__(config, dataset) # load parameters info self.n_layers = config['n_layers'] self.n_heads = config['n_heads'] self.hidden_size = config['hidden_size'] # same as embedding_size self.inner_size = config[ 'inner_size'] # the dimensionality in feed-forward layer self.hidden_dropout_prob = config['hidden_dropout_prob'] self.attn_dropout_prob = config['attn_dropout_prob'] self.hidden_act = config['hidden_act'] self.layer_norm_eps = config['layer_norm_eps'] self.FEATURE_FIELD = config['item_attribute'] self.FEATURE_LIST = self.FEATURE_FIELD + config['LIST_SUFFIX'] self.train_stage = config['train_stage'] # pretrain or finetune self.pre_model_path = config[ 'pre_model_path'] # We need this for finetune self.mask_ratio = config['mask_ratio'] self.aap_weight = config['aap_weight'] self.mip_weight = config['mip_weight'] self.map_weight = config['map_weight'] self.sp_weight = config['sp_weight'] self.initializer_range = config['initializer_range'] self.loss_type = config['loss_type'] # load dataset info self.n_items = dataset.item_num + 1 # for mask token self.mask_token = self.n_items - 1 self.n_features = dataset.num( self.FEATURE_FIELD) - 1 # we don't need padding self.item_feat = dataset.get_item_feature() # define layers and loss # modules shared by pre-training stage and fine-tuning stage self.item_embedding = nn.Embedding(self.n_items, self.hidden_size, padding_idx=0) self.position_embedding = nn.Embedding(self.max_seq_length, self.hidden_size) self.feature_embedding = nn.Embedding(self.n_features, self.hidden_size, padding_idx=0) self.trm_encoder = TransformerEncoder( n_layers=self.n_layers, n_heads=self.n_heads, hidden_size=self.hidden_size, inner_size=self.inner_size, hidden_dropout_prob=self.hidden_dropout_prob, attn_dropout_prob=self.attn_dropout_prob, hidden_act=self.hidden_act, layer_norm_eps=self.layer_norm_eps) self.LayerNorm = nn.LayerNorm(self.hidden_size, eps=self.layer_norm_eps) self.dropout = nn.Dropout(self.hidden_dropout_prob) # modules for pretrain # add unique dense layer for 4 losses respectively self.aap_norm = nn.Linear(self.hidden_size, self.hidden_size) self.mip_norm = nn.Linear(self.hidden_size, self.hidden_size) self.map_norm = nn.Linear(self.hidden_size, self.hidden_size) self.sp_norm = nn.Linear(self.hidden_size, self.hidden_size) self.loss_fct = nn.BCELoss(reduction='none') # modules for finetune if self.loss_type == 'BPR' and self.train_stage == 'finetune': self.loss_fct = BPRLoss() elif self.loss_type == 'CE' and self.train_stage == 'finetune': self.loss_fct = nn.CrossEntropyLoss() elif self.train_stage == 'finetune': raise NotImplementedError( "Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization assert self.train_stage in ['pretrain', 'finetune'] if self.train_stage == 'pretrain': self.apply(self._init_weights) else: # load pretrained model for finetune pretrained = torch.load(self.pre_model_path) self.logger.info('Load pretrained model from', self.pre_model_path) self.load_state_dict(pretrained['state_dict'])
def main(): torch.manual_seed(options['seed']) os.environ['CUDA_VISIBLE_DEVICES'] = options['gpu'] use_gpu = torch.cuda.is_available() if options['use_cpu']: use_gpu = False feat_dim = 2 if 'cnn' in options['model'] else 512 options.update( { 'feat_dim': feat_dim, 'use_gpu': use_gpu } ) if use_gpu: print("Currently using GPU: {}".format(options['gpu'])) cudnn.benchmark = True torch.cuda.manual_seed_all(options['seed']) else: print("Currently using CPU") dataset = datasets.create(options['dataset'], **options) out_dataset = datasets.create(options['out_dataset'], **options) trainloader, testloader = dataset.trainloader, dataset.testloader outloader = out_dataset.testloader options.update( { 'num_classes': dataset.num_classes } ) print("Creating model: {}".format(options['model'])) if 'cnn' in options['model']: net = ConvNet(num_classes=dataset.num_classes) else: if options['cs']: net = resnet34ABN(num_classes=dataset.num_classes, num_bns=2) else: net = ResNet34(dataset.num_classes) if options['cs']: print("Creating GAN") nz = options['nz'] netG = gan.Generator32(1, nz, 64, 3) # ngpu, nz, ngf, nc netD = gan.Discriminator32(1, 3, 64) # ngpu, nc, ndf fixed_noise = torch.FloatTensor(64, nz, 1, 1).normal_(0, 1) criterionD = nn.BCELoss() Loss = importlib.import_module('loss.'+options['loss']) criterion = getattr(Loss, options['loss'])(**options) if use_gpu: net = nn.DataParallel(net, device_ids=[i for i in range(len(options['gpu'].split(',')))]).cuda() criterion = criterion.cuda() if options['cs']: netG = nn.DataParallel(netG, device_ids=[i for i in range(len(options['gpu'].split(',')))]).cuda() netD = nn.DataParallel(netD, device_ids=[i for i in range(len(options['gpu'].split(',')))]).cuda() fixed_noise.cuda() model_path = os.path.join(options['outf'], 'models', options['dataset']) file_name = '{}_{}_{}_{}_{}'.format(options['model'], options['dataset'], options['loss'], str(options['weight_pl']), str(options['cs'])) if options['eval']: net, criterion = load_networks(net, model_path, file_name, criterion=criterion) results = test(net, criterion, testloader, outloader, epoch=0, **options) print("Acc (%): {:.3f}\t AUROC (%): {:.3f}\t OSCR (%): {:.3f}\t".format(results['ACC'], results['AUROC'], results['OSCR'])) return params_list = [{'params': net.parameters()}, {'params': criterion.parameters()}] optimizer = torch.optim.Adam(params_list, lr=options['lr']) if options['cs']: optimizerD = torch.optim.Adam(netD.parameters(), lr=options['gan_lr'], betas=(0.5, 0.999)) optimizerG = torch.optim.Adam(netG.parameters(), lr=options['gan_lr'], betas=(0.5, 0.999)) if options['stepsize'] > 0: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60, 90, 120]) start_time = time.time() score_now = 0.0 for epoch in range(options['max_epoch']): print("==> Epoch {}/{}".format(epoch+1, options['max_epoch'])) if options['cs']: train_cs(net, netD, netG, criterion, criterionD, optimizer, optimizerD, optimizerG, trainloader, epoch=epoch, **options) train(net, criterion, optimizer, trainloader, epoch=epoch, **options) if options['eval_freq'] > 0 and (epoch+1) % options['eval_freq'] == 0 or (epoch+1) == options['max_epoch']: print("==> Test") results = test(net, criterion, testloader, outloader, epoch=epoch, **options) print("Acc (%): {:.3f}\t AUROC (%): {:.3f}\t OSCR (%): {:.3f}\t".format(results['ACC'], results['AUROC'], results['OSCR'])) save_networks(net, model_path, file_name, criterion=criterion) if options['cs']: save_GAN(netG, netD, model_path, file_name) fake = netG(fixed_noise) GAN_path = os.path.join(model_path, 'samples') mkdir_if_missing(GAN_path) vutils.save_image(fake.data, '%s/gan_samples_epoch_%03d.png'%(GAN_path, epoch), normalize=True) if options['stepsize'] > 0: scheduler.step() elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
train=True, download=True, transform=transform), batch_size=batch_size, shuffle=True) # network G = generator(64) # CHANGED D = discriminator(64) G.weight_init(mean=0.0, std=0.02) D.weight_init(mean=0.0, std=0.02) G.cuda() D.cuda() # Binary Cross Entropy loss BCE_loss = nn.BCELoss() # Adam optimizer G_optimizer = optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.999)) D_optimizer = optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.999)) # results save folder root = 'MNIST_cDCGAN_results/' model = 'MNIST_cDCGAN_' os.makedirs('MNIST_cDCGAN_results', exist_ok=True) os.makedirs('MNIST_cDCGAN_results/Fixed_results', exist_ok=True) os.makedirs('MNIST_cDCGAN_results/Losses', exist_ok=True) total_store = [] mean_inception_store = [] std_inception_store = []
load_epoch = 0 if os.path.exists(models_dir): for file in os.listdir(models_dir): if file.startswith("sup_net_epoch_"): load_epoch = max(load_epoch, int(file.split('_')[3].split('.')[0])) if load_epoch > 0: load_filename = 'sup_net_epoch_{}.pth'.format(load_epoch) print('Loading model {}'.format(load_filename)) load_path = os.path.join(models_dir, load_filename) sup_net.load_state_dict(torch.load(load_path)) else: os.makedirs(models_dir) sup_net.cuda() bce = nn.BCELoss() l1 = nn.L1Loss() mse = nn.MSELoss() optimizer = optim.Adam(sup_net.parameters(), lr=args.lr) def estimate_metrics(pred, random_query, binary_target, sup_net, switch_vec): query_pred = torch.gather(pred, 1, random_query.view(-1, 1)).squeeze(1) num_s = torch.tensor(np.sum(switch_vec).item(), dtype=torch.float32).to(device) # ipdb.set_trace() _, class_pred = torch.max(pred, dim=1) binary_pred = class_pred.eq(random_query).type(torch.cuda.LongTensor)
def main(): main_arg_parser = argparse.ArgumentParser(description="options") main_arg_parser.add_argument("-e,", "--epochs", type=int, default=150) main_arg_parser.add_argument("-lr", "--learning-rate", type=float, default=0.001) main_arg_parser.add_argument("--weight-decay", help="L2 regularization coefficient", type=float, default=0) main_arg_parser.add_argument("--orthoreg", action="store_true") main_arg_parser.add_argument("--cuda", action="store_true") main_arg_parser.add_argument( "--test-set-size", help="proportion of dataset to allocate as test set [0..1]", type=float, default=0.1) main_arg_parser.add_argument( "--aflw-path", help="path to aflw dir (should contain aflw_{12,14}.t7)", default="EX2_data/aflw") main_arg_parser.add_argument( "--voc-path", help= "path to VOC2007 directory (should contain JPEGImages, Imagesets dirs)", default="EX2_data/VOC2007") main_arg_parser.add_argument("--batch-size", type=int, default=64) # submitted convergence plot obtained from visdom using this flag (everything else default) main_arg_parser.add_argument("--visdom-plot", action="store_true") main_arg_parser.add_argument("--seed", help="random seed for torch", type=int, default=42) main_arg_parser.add_argument("--continue-from", help="checkpoint to continue from") args = main_arg_parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) # cuda only if asked and exists cuda = args.cuda and torch.cuda.is_available() if cuda: torch.cuda.manual_seed(args.seed) print("Using CUDA!") else: import time print( "Not using CUDA. Add --cuda or this may take a while. Have a moment to hit ctrl-c" ) #time.sleep(3) if args.visdom_plot: plotter = utils.VisdomLinePlotter('12Net Loss') else: plotter = None # load data dataset = load_12net_data(args.aflw_path, args.voc_path) # data is ordered by class, so shuffle and split to test/train indices_shuffled = list(torch.randperm(len(dataset))) first_test_index = int((1 - args.test_set_size) * len(indices_shuffled)) # we keep lists of indices as the test/train division. This respects torch's seed # and we can sample out of these separate lists at test and train train_subset = indices_shuffled[:first_test_index] test_subset = indices_shuffled[first_test_index:] # train and test loss_criterion = nn.BCELoss() net = Net12() optimizer = Adam(net.parameters(), args.learning_rate, weight_decay=args.weight_decay) if args.continue_from: print("continuing from {}".format(args.continue_from)) loaded = torch.load(args.continue_from) net.load_state_dict(loaded['state_dict']) optimizer.load_state_dict(loaded['optimizer']) if cuda: net.cuda() if args.epochs > 0: train(net, loss_criterion, dataset, optimizer, plotter=plotter, epochs=args.epochs, train_subset=train_subset, test_subset=test_subset, batch_size=args.batch_size, cuda=cuda, orthoreg=args.orthoreg) precisions, recalls = calc_precision_recall(net, loss_criterion, dataset, test_subset, batch_size=args.batch_size, cuda=cuda) if args.visdom_plot: import visdom viz = visdom.Visdom() viz.line(X=np.array(recalls), Y=np.array(precisions), opts=dict(title="Precision-Recall Curve", xlabel="Recall", ylabel="Precision"), env="main") # find first threshold below 99% recall for idx in range(len(recalls)): if recalls[idx] < 0.99: break best_index = idx - 1 # one before we dropped below 99% print( "threshold {} to get recall >99% ({}). Resulting precision {}".format( best_index / len(recalls), recalls[best_index], precisions[best_index])) torch.save( { 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), }, "q1_checkpoint.pth.tar")
def train_iters(ae_model, dis_model): train_data_loader = non_pair_data_loader( batch_size=args.batch_size, id_bos=args.id_bos, id_eos=args.id_eos, id_unk=args.id_unk, max_sequence_length=args.max_sequence_length, vocab_size=args.vocab_size) train_data_loader.create_batches(args.train_file_list, args.train_label_list, if_shuffle=True) add_log("Start train process.") ae_model.train() dis_model.train() ae_optimizer = NoamOpt( ae_model.src_embed[0].d_model, 1, 2000, torch.optim.Adam(ae_model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) dis_optimizer = torch.optim.Adam(dis_model.parameters(), lr=0.0001) ae_criterion = get_cuda( LabelSmoothing(size=args.vocab_size, padding_idx=args.id_pad, smoothing=0.1)) dis_criterion = nn.BCELoss(size_average=True) for epoch in range(200): print('-' * 94) epoch_start_time = time.time() for it in range(train_data_loader.num_batch): batch_sentences, tensor_labels, \ tensor_src, tensor_src_mask, tensor_tgt, tensor_tgt_y, \ tensor_tgt_mask, tensor_ntokens = train_data_loader.next_batch() # Forward pass latent, out = ae_model.forward(tensor_src, tensor_tgt, tensor_src_mask, tensor_tgt_mask) # Loss calculation loss_rec = ae_criterion( out.contiguous().view(-1, out.size(-1)), tensor_tgt_y.contiguous().view(-1)) / tensor_ntokens.data ae_optimizer.optimizer.zero_grad() loss_rec.backward() ae_optimizer.step() # Classifier dis_lop = dis_model.forward(to_var(latent.clone())) loss_dis = dis_criterion(dis_lop, tensor_labels) dis_optimizer.zero_grad() loss_dis.backward() dis_optimizer.step() if it % 200 == 0: add_log( '| epoch {:3d} | {:5d}/{:5d} batches | rec loss {:5.4f} | dis loss {:5.4f} |' .format(epoch, it, train_data_loader.num_batch, loss_rec, loss_dis)) print(id2text_sentence(tensor_tgt_y[0], args.id_to_word)) generator_text = ae_model.greedy_decode( latent, max_len=args.max_sequence_length, start_id=args.id_bos) print(id2text_sentence(generator_text[0], args.id_to_word)) add_log('| end of epoch {:3d} | time: {:5.2f}s |'.format( epoch, (time.time() - epoch_start_time))) # Save model torch.save(ae_model.state_dict(), args.current_save_path + 'ae_model_params.pkl') torch.save(dis_model.state_dict(), args.current_save_path + 'dis_model_params.pkl') return