def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] #+ (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ ) model = SentenceVAE( vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if torch.cuda.is_available(): model = model.cuda() if args.tensorboard_logging: writer = SummaryWriter(os.path.join('./',args.logdir, expierment_name(args,ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join('./',args.save_model_path,'VAE', ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/(1+np.exp(-k*(step-x0)))) elif anneal_function == 'linear': return min(1, step/x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) NLL_w_avg = NLL_loss/torch.sum(length).float() # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight,NLL_w_avg print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader( dataset=datasets[split], batch_size=args.batch_size, shuffle=split=='train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available() ) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight,NLL_w_avg = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss)/batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing # Avoid the .cat error !!! #print(loss.data) #print(tracker['ELBO']) loss_data = torch.tensor([loss.data.item()]) tracker['ELBO'] = torch.cat((tracker['ELBO'], loss_data)) #Orig: tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data),1) if args.tensorboard_logging: writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, NLL-word-Loss %9.4f" %(split.upper(), iteration, len(data_loader)-1, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight,NLL_w_avg)) #split = 'invalid' #JUST TO DEBUG!!! if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) #ERROR HERE!!! tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} if not os.path.exists(os.path.join('./dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('./dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train' and epoch %10 ==0 : checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s"%checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() curBest = 1000000 for split in splits: datasets[split] = Mixed(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, totalIterations, split): if (split != 'train'): return 1 elif anneal_function == 'identity': return 1 elif anneal_function == 'linear': return 1.005 * float(step) / totalIterations elif anneal_function == 'sigmoid': return (1 / (1 + math.exp(-8 * (float(step) / totalIterations)))) elif anneal_function == 'tanh': return math.tanh(4 * (float(step) / totalIterations)) elif anneal_function == 'linear_capped': #print(float(step)*30/totalIterations) return min(1.0, float(step) * 5 / totalIterations) elif anneal_function == 'cyclic': quantile = int(totalIterations / 5) remainder = int(step % quantile) midPoint = int(quantile / 2) if (remainder > midPoint): return 1 else: return float(remainder) / midPoint else: return 1 ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, totalIterations, split): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence #print((1 + logv - mean.pow(2) - logv.exp()).size()) KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) #print(KL_loss.size()) KL_weight = kl_anneal_function(anneal_function, step, totalIterations, split) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor tensor2 = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor tensor3 = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor tensor4 = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 stop = False Z = [] L = [] for epoch in range(args.epochs): if (stop): break for split in splits: if (split == 'test'): z_data = [] domain_label = [] z_bool = False domain_label_bool = False if (stop): break data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) totalIterations = (int(len(datasets[split]) / args.batch_size) + 1) * args.epochs tracker = defaultdict(tensor) tracker2 = defaultdict(tensor2) tracker3 = defaultdict(tensor3) tracker4 = defaultdict(tensor4) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): # if(iteration > 400): # break batch_size = batch['input'].size(0) labels = batch['label'] for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) if (split == 'test'): if (z_bool == False): z_bool = True domain_label = labels.tolist() z_data = z else: domain_label += labels.tolist() #print(domain_label) z_data = torch.cat((z_data, z), 0) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, totalIterations, split) if split == 'train': #KL_loss_thresholded = torch.clamp(KL_loss, min=6.0) loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data)) tracker2['KL_loss'] = torch.cat( (tracker2['KL_loss'], KL_loss.data)) tracker3['Recon_loss'] = torch.cat( (tracker3['Recon_loss'], recon_loss.data)) tracker4['Perplexity'] = torch.cat( (tracker4['Perplexity'], torch.exp(recon_loss.data / batch_size))) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], recon_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) if (split == 'test'): Z = z_data L = domain_label if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), torch.mean(tracker['negELBO']), epoch) writer.add_scalar("%s-Epoch/KL_loss" % split.upper(), torch.mean(tracker2['KL_loss']) / batch_size, epoch) writer.add_scalar( "%s-Epoch/Recon_loss" % split.upper(), torch.mean(tracker3['Recon_loss']) / batch_size, epoch) writer.add_scalar("%s-Epoch/Perplexity" % split.upper(), torch.mean(tracker4['Perplexity']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': if (torch.mean(tracker['negELBO']) < curBest): curBest = torch.mean(tracker['negELBO']) else: stop = True dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps_32_0', ts)): os.makedirs('dumps_32_0/' + ts) with open( os.path.join('dumps_32_0/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint # if split == 'train': # checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) # torch.save(model.state_dict(), checkpoint_path) # logger.info("Model saved at %s"%checkpoint_path) Z = Z.data.cpu().numpy() print(Z.shape) beforeTSNE = TSNE(random_state=20150101).fit_transform(Z) scatter(beforeTSNE, L, [0, 1, 2], (5, 5), 'latent discoveries') plt.savefig('mixed_tsne' + args.anneal_function + '.png', dpi=120)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def sigmoid(step): x = step - 6569.5 if x < 0: a = np.exp(x) res = (a / (1 + a)) else: res = (1 / (1 + np.exp(-x))) return float(res) def frange_cycle_linear(n_iter, start=0.0, stop=1.0, n_cycle=4, ratio=0.5): L = np.ones(n_iter) * stop period = n_iter / n_cycle step = (stop - start) / (period * ratio) # linear schedule for c in range(n_cycle): v, i = start, 0 while v <= stop and (int(i + c * period) < n_iter): L[int(i + c * period)] = v v += step i += 1 return L n_iter = 0 for epoch in range(args.epochs): split = 'train' data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) for iteration, batch in enumerate(data_loader): n_iter += 1 print("Total no of iterations = " + str(n_iter)) L = frange_cycle_linear(n_iter) def kl_anneal_function(anneal_function, step): if anneal_function == 'identity': return 1 if anneal_function == 'sigmoid': return sigmoid(step) if anneal_function == 'cyclic': return float(L[step]) ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, split='train'): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) if split == 'train': KL_weight = kl_anneal_function(anneal_function, step) else: KL_weight = 1 return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, split) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data)) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], recon_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), torch.mean(tracker['negELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s" % checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) log_file = open("res.txt", "a") log_file.write(expierment_name(args, ts)) log_file.write("\n") graph_file = open("elbo-graph.txt", "a") graph_file.write(expierment_name(args, ts)) graph_file.write("\n") model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) elif anneal_function == "softplus": return min(1, np.log(1 + np.exp(k * step))) elif anneal_function == "no": return 1 NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 val_lowest_elbo = 5000 val_accu_epoch = 0 val_min_epoch = 0 split_elbo = {"train": [], "valid": []} if args.test: split_elbo["test"] = [] split_loss = {"train": [], "valid": []} if args.test: split_loss["test"] = [] for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) if split != 'train': KL_weight = 1.0 loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data)) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], NLL_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) split_loss[split].append([ loss.data[0], NLL_loss.data[0] / batch_size, KL_loss.data[0] / batch_size ]) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) split_elbo[split].append([torch.mean(tracker["ELBO"])]) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path) if split == 'valid': if torch.mean(tracker['ELBO']) < val_lowest_elbo: val_lowest_elbo = torch.mean(tracker['ELBO']) val_accu_epoch = 0 val_min_epoch = epoch else: val_accu_epoch += 1 if val_accu_epoch >= 3: if not args.test: exp_str = "" exp_str += "train_ELBO={}\n".format( split_elbo["train"][val_min_epoch]) exp_str += "valid_ELBO={}\n".format( split_elbo["valid"][val_min_epoch]) exp_str += "==========\n" log_file.write(exp_str) log_file.close() print(exp_str) graph_file.write("ELBO\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[0]) line += "\n" graph_file.write(line) graph_file.write("NLL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[1]) line += "\n" graph_file.write(line) graph_file.write("KL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[2]) line += "\n" graph_file.write(line) graph_file.close() exit() elif split == 'test' and val_accu_epoch >= 3: exp_str = "" exp_str += "train_ELBO={}\n".format( split_elbo["train"][val_min_epoch]) exp_str += "valid_ELBO={}\n".format( split_elbo["valid"][val_min_epoch]) exp_str += "test_ELBO={}\n".format( split_elbo["test"][val_min_epoch]) exp_str += "==========\n" log_file.write(exp_str) log_file.close() print(exp_str) graph_file.write("ELBO\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[0]) line += "\n" for s in splits: for i in split_elbo[s]: line += "{},".format(i[0]) line += "\n" graph_file.write(line) graph_file.write("NLL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[1]) line += "\n" graph_file.write(line) graph_file.write("KL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[2]) line += "\n" graph_file.write(line) graph_file.close() exit() if epoch == args.epochs - 1: exp_str = "" exp_str += "train_ELBO={}\n".format( split_elbo["train"][val_min_epoch]) exp_str += "valid_ELBO={}\n".format( split_elbo["valid"][val_min_epoch]) if args.test: exp_str += "test_ELBO={}\n".format( split_elbo["test"][val_min_epoch]) exp_str += "==========\n" log_file.write(exp_str) log_file.close() print(exp_str) graph_file.write("ELBO\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[0]) line += "\n" graph_file.write(line) graph_file.write("NLL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[1]) line += "\n" graph_file.write(line) graph_file.write("KL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[2]) line += "\n" graph_file.write(line) graph_file.close() exit()
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: if args.dataset == 'ptb': Dataset = PTB elif args.dataset == 'twitter': Dataset = PoliticianTweets else: print("Invalid dataset. Exiting") exit() datasets[split] = Dataset( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ ) model = SentenceVAE( vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) # if args.from_file != "": # model = torch.load(args.from_file) # if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter(os.path.join(args.logdir, experiment_name(args,ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) if 'sigmoid' in args.anneal_function and args.dataset=='ptb': linspace = np.linspace(-5,5,13160) # 13160 = number of training examples in ptb elif 'sigmoid' in args.anneal_function and args.dataset=='twitter': linspace = np.linspace(-5, 5, 25190) #6411/25190? = number of training examples in short version of twitter def kl_anneal_function(anneal_function, step, param_dict=None): if anneal_function == 'identity': return 1 elif anneal_function == 'sigmoid' or anneal_function=='sigmoid_klt': s = 1/(len(linspace)) return(float((1)/(1+np.exp(-param_dict['ag']*(linspace[step]))))) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, param_dict=None): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) if args.anneal_function == 'sigmoid_klt': if float(KL_loss)/args.batch_size < param_dict['kl_threshold']: # print("KL_loss of %s is below threshold %s. Returning this threshold instead"%(float(KL_loss)/args.batch_size,param_dict['kl_threshold'])) KL_loss = to_var(torch.Tensor([param_dict['kl_threshold']*args.batch_size])) KL_weight = kl_anneal_function(anneal_function, step, {'ag': args.anneal_aggression}) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader( dataset=datasets[split], batch_size=args.batch_size, shuffle=split=='train', num_workers=0, pin_memory=torch.cuda.is_available() ) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) if split == 'train' and batch_size != args.batch_size: print("WARNING: Found different batch size\nargs.batch_size= %s, input_size=%s"%(args.batch_size, batch_size)) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, {'kl_threshold': args.kl_threshold}) loss = (NLL_loss + KL_weight * KL_loss)/batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # print(step) # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data)) if args.tensorboard_logging: writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL_Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) # print("Step %s: %s"%(epoch*len(data_loader) + iteration, KL_weight)) writer.add_scalar("%s/KL_Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): logger.info("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" %(split.upper(), iteration, len(data_loader)-1, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean ELBO %9.4f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s"%checkpoint_path) torch.save(model, f"model-{args.dataset}-{ts}.pickle")
def main(args): # Load the vocab with open(args.data_dir+'/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) # Initialize semantic loss sl = Semantic_Loss() datasets = OrderedDict() for split in splits: datasets[split] = PTB( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ ) params = dict( vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) model = SentenceVAE(**params) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f: json.dump(params, f, indent=4) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/(1+np.exp(-k*(step-x0)))) elif anneal_function == 'linear': return min(1, step/x0) def perplexity_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/ 1+np.exp(-k*(step-x0))) elif anneal_function == 'linear': return min(1, (step/x0)) NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx, reduction='sum') def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0, \ batch_perplexity, perplexity_anneal_function): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) # Perplexity perp_loss = batch_perplexity perp_weight = perplexity_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): # Keep track of epoch loss epoch_loss = [] for split in splits: data_loader = DataLoader( dataset=datasets[split], batch_size=args.batch_size, shuffle=split=='train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available() ) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() batch_t_start = None for iteration, batch in enumerate(data_loader): if batch_t_start: batch_run_time = time.time() - batch_t_start # print("Batch run time: " + str(batch_run_time)) batch_t_start = time.time() batch_size = batch['input_sequence'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Get the original sentences in this batch batch_sentences = idx2word(batch['input_sequence'], i2w=i2w, pad_idx=w2i['<pad>']) # Remove the first tag batch_sentences = [x.replace("<sos>", "") for x in batch_sentences] # Forward pass (logp, mean, logv, z), states = model(**batch) # Choose some random pairs of samples within the batch # to get latent representations for batch_index_pairs = list(itertools.combinations(np.arange(batch_size), 2)) random.shuffle(batch_index_pairs) batch_index_pairs = batch_index_pairs[:args.perplexity_samples_per_batch] batch_perplexity = [] # If we start the perplexity start_perplexity = epoch > 10 # If we should have perplexity loss if start_perplexity and args.perplexity_loss: # For each pair, get the intermediate representations in the latent space for index_pair in batch_index_pairs: with torch.no_grad(): z1_hidden = states['z'][index_pair[0]].cpu() z2_hidden = states['z'][index_pair[1]].cpu() z_hidden = to_var(torch.from_numpy(interpolate(start=z1_hidden, end=z2_hidden, steps=1)).float()) if args.rnn_type == "lstm": with torch.no_grad(): z1_cell_state = states['z_cell_state'].cpu().squeeze()[index_pair[0]] z2_cell_state = states['z_cell_state'].cpu().squeeze()[index_pair[1]] z_cell_states = \ to_var(torch.from_numpy(interpolate(start=z1_cell_state, end=z2_cell_state, steps=1)).float()) samples, _ = model.inference(z=z_hidden, z_cell_state=z_cell_states) else: samples, _ = model.inference(z=z_hidden, z_cell_state=None) # Check interpolated sentences interpolated_sentences = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']) # For each sentence, get the perplexity and show it perplexities = [] for sentence in interpolated_sentences: perplexities.append(sl.get_perplexity(sentence)) avg_sample_perplexity = sum(perplexities) / len(perplexities) batch_perplexity.append(avg_sample_perplexity) # Calculate batch perplexity avg_batch_perplexity = sum(batch_perplexity) / len(batch_perplexity) # loss calculation NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, \ args.k, args.x0, avg_batch_perplexity, perplexity_anneal_function) loss = ((NLL_loss + KL_weight * KL_loss) / batch_size) + (perp_loss * perp_weight) else: # Epochs < X, so train without perplexity # loss calculation NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, \ args.k, args.x0, 0, perplexity_anneal_function) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # Turn model back into train, since inference changed to eval if split == 'train': model.train() else: model.eval() # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # Add loss epoch_loss.append(loss.item()) # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data.view(1, -1)), dim=0) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.item() / batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, Perp-loss %9.4f, Perp-weight %6.3f" % (split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size, KL_loss.item()/batch_size, KL_weight, perp_loss, perp_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, tracker['ELBO'].mean())) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist()} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % epoch) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, x1, x2): if anneal_function == 'identity': return 1 elif anneal_function == 'linear': return min(1, step / x1) elif anneal_function == 'logistic': return float(1 / (1 + np.exp(-x2 * (step - x1)))) elif anneal_function == 'cyclic_log': return float(1 / (1 + np.exp(-x2 * ((step % (3 * x1)) - x1)))) elif anneal_function == 'cyclic_lin': return min(1, (step % (3 * x1)) / x1) ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, x1, x2): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, x1, x2) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 early_stopping = EarlyStopping(history=10) for epoch in range(args.epochs): early_stopping_flag = False for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) # tracker = defaultdict(tensor) tracker = defaultdict(list) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.x1, args.x2) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'].append(loss.item()) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): # print(step) # logger.info("Step = %d"%step) logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), recon_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) # tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) # print(z.data.shape) tracker['z'].append(z.data.tolist()) mean_loss = sum(tracker['negELBO']) / len(tracker['negELBO']) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, mean_loss)) # print(mean_loss) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), mean_loss, epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'] } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) if (args.early_stopping): if (early_stopping.check(mean_loss)): early_stopping_flag = True # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s" % checkpoint_path) if (early_stopping_flag): print("Early stopping trigerred. Training stopped...") break
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, filename=os.path.join(args.logdir, experiment_name(args, ts) + ".log")) logger = logging.getLogger(__name__) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) total_step = int(args.epochs * 42000.0 / args.batch_size) def kl_anneal_function(anneal_function, step): if anneal_function == 'half': return 0.5 if anneal_function == 'identity': return 1 if anneal_function == 'double': return 2 if anneal_function == 'quadra': return 4 if anneal_function == 'sigmoid': return 1 / (1 + np.exp((0.5 * total_step - step) / 200)) if anneal_function == 'monotonic': beta = step * 4 / total_step if beta > 1: beta = 1.0 return beta if anneal_function == 'cyclical': t = total_step / 4 beta = 4 * (step % t) / t if beta > 1: beta = 1.0 return beta ReconLoss = torch.nn.NLLLoss(reduction='sum', ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 train_loss = [] test_loss = [] for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(list) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing # tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data)) tracker["negELBO"].append(loss.item()) tracker["recon_loss"].append(recon_loss.item() / batch_size) tracker["KL_Loss"].append(KL_loss.item() / batch_size) tracker["KL_Weight"].append(KL_weight) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "\tStep\t%s\t%04d\t%i\t%9.4f\t%9.4f\t%9.4f\t%6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), recon_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) print( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), recon_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'].append(z.data.tolist()) logger.info( "\tEpoch\t%s\t%02d\t%i\t%9.4f\t%9.4f\t%9.4f\t%6.3f" % (split.upper(), epoch, args.epochs, sum(tracker['negELBO']) / len(tracker['negELBO']), 1.0 * sum(tracker['recon_loss']) / len(tracker['recon_loss']), 1.0 * sum(tracker['KL_Loss']) / len(tracker['KL_Loss']), 1.0 * sum(tracker['KL_Weight']) / len(tracker['KL_Weight']))) print("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, sum(tracker['negELBO']) / len(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar( "%s-Epoch/NegELBO" % split.upper(), 1.0 * sum(tracker['negELBO']) / len(tracker['negELBO']), epoch) writer.add_scalar( "%s-Epoch/recon_loss" % split.upper(), 1.0 * sum(tracker['recon_loss']) / len(tracker['recon_loss']), epoch) writer.add_scalar( "%s-Epoch/KL_Loss" % split.upper(), 1.0 * sum(tracker['KL_Loss']) / len(tracker['KL_Loss']), epoch) writer.add_scalar( "%s-Epoch/KL_Weight" % split.upper(), 1.0 * sum(tracker['KL_Weight']) / len(tracker['KL_Weight']), epoch) if split == 'train': train_loss.append(1.0 * sum(tracker['negELBO']) / len(tracker['negELBO'])) else: test_loss.append(1.0 * sum(tracker['negELBO']) / len(tracker['negELBO'])) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'] } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path) sns.set(style="whitegrid") df = pd.DataFrame() df["train"] = train_loss df["test"] = test_loss ax = sns.lineplot(data=df, legend=False) ax.set(xlabel='Epoch', ylabel='Loss') plt.legend(title='Split', loc='upper right', labels=['Train', 'Test']) plt.savefig(os.path.join(args.logdir, experiment_name(args, ts) + ".png"), transparent=True, dpi=300)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: # datasets[split] = BGoogle( # data_dir=args.data_dir, # split=split, # create_data=args.create_data, # batch_size=args.batch_size , # max_sequence_length=args.max_sequence_length, # min_occ=args.min_occ # ) datasets[split] = Amazon(data_dir=args.data_dir, split=split, create_data=args.create_data, batch_size=args.batch_size, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) tokenizer = TweetTokenizer(preserve_case=False) vocab_file = "amazon.vocab.json" with open(os.path.join(args.data_dir, vocab_file), 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) # save_model_path = os.path.join(args.save_model_path, ts) save_model_path = args.save_model_path if not os.path.exists(save_model_path): os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 save_mode = True last_ELBO = 1e32 for epoch in range(args.epochs): print("+" * 20) # f_test_example(model, tokenizer, w2i, i2w) for split in splits: # data_loader = DataLoader( # dataset=datasets[split], # batch_size=args.batch_size, # shuffle=split=='train', # num_workers=cpu_count(), # pin_memory=torch.cuda.is_available() # ) batch_size = args.batch_size tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() # for iteration, batch in enumerate(data_loader): iteration = 0 iteration_total = datasets[split].batch_num print("batch_num", iteration_total) for input_batch_tensor, target_batch_tensor, length_batch_tensor in datasets[ split]: if torch.is_tensor(input_batch_tensor): input_batch_tensor = to_var(input_batch_tensor) if torch.is_tensor(target_batch_tensor): target_batch_tensor = to_var(target_batch_tensor) if torch.is_tensor(length_batch_tensor): length_batch_tensor = to_var(length_batch_tensor) # batch_size = batch['input'].size(0) # for k, v in batch.items(): # if torch.is_tensor(v): # batch[k] = to_var(v) # Forward pass # logp, mean, logv, z = model(batch['input'], batch['length']) logp, mean, logv, z = model(input_batch_tensor, length_batch_tensor) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn( logp, target_batch_tensor, length_batch_tensor, mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 iteration += 1 # bookkeepeing # print("elbo", tracker['ELBO']) # print("loss", loss) if iteration == 0: tracker['ELBO'] = loss.data tracker['ELBO'] = tracker['ELBO'].view(1) else: tracker['ELBO'] = torch.cat( (tracker['ELBO'], loss.view(1))) if args.tensorboard_logging: # print(loss.data) writer.add_scalar("%s/ELBO" % split.upper(), loss.data.item(), epoch * iteration_total + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data.item() / batch_size, epoch * iteration_total + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data.item() / batch_size, epoch * iteration_total + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * iteration_total + iteration) if iteration % args.print_every == 0 or iteration + 1 == iteration_total: print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, iteration_total - 1, loss.data.item(), NLL_loss.data.item() / batch_size, KL_loss.data.item() / batch_size, KL_weight)) # if split == 'valid': # if 'target_sents' not in tracker: # tracker['target_sents'] = list() # tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) # # print("z", tracker['z'], z) # tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) # break print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) cur_ELBO = torch.mean(tracker['ELBO']) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), cur_ELBO, epoch) if split == "valid": if cur_ELBO < last_ELBO: save_mode = True else: save_mode = False last_ELBO = cur_ELBO # save a dump of all sentences and the encoded latent space # if split == 'valid': # dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} # if not os.path.exists(os.path.join('dumps', ts)): # os.makedirs('dumps/'+ts) # with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: # json.dump(dump,dump_file) # save checkpoint if split == 'train': # checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) checkpoint_path = os.path.join(save_model_path, "best.pytorch") if save_mode == True: torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) total_steps = (len(datasets["train"]) // args.batch_size) * args.epochs print("Train dataset size", total_steps) def kl_anneal_function(anneal_function, step): if anneal_function == 'identity': return 1 if anneal_function == 'linear': if args.warmup is None: return 1 - (total_steps - step) / total_steps else: warmup_steps = (total_steps / args.epochs) * args.warmup return 1 - (warmup_steps - step ) / warmup_steps if step < warmup_steps else 1.0 ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'] = torch.cat( (tracker['negELBO'], loss.data.unsqueeze(0))) if args.tensorboard_logging: neg_elbo = (recon_loss + KL_loss) / batch_size writer.add_scalar("%s/Negative_ELBO" % split.upper(), neg_elbo.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], recon_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), torch.mean(tracker['negELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s" % checkpoint_path) if args.num_samples: torch.cuda.empty_cache() model.eval() with torch.no_grad(): print(f"Generating {args.num_samples} samples") generations, _ = model.inference(n=args.num_samples) vocab = datasets["train"].i2w print( "Sampled latent codes from z ~ N(0, I), generated sentences:") for i, generation in enumerate(generations, start=1): sentence = [vocab[str(word.item())] for word in generation] print(f"{i}:", " ".join(sentence))
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.localtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ, use_bert=args. False) model = SentenceVAE(alphabet_size=datasets['train'].alphabet_size, vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) print("Saving model to directory: " + save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) def word_weight_function(step, k, x0): return float(1 / (1 + np.exp(-k * (step - x0)))) NLL = torch.nn.NLLLoss(reduction='sum', ignore_index=datasets['train'].pad_idx) def loss_fn(def_logp, word_logp, def_target, def_length, word_target, word_length, mean, logv): # cut-off unnecessary padding from target definition, and flatten def_target = def_target[:, :torch.max(def_length).item()].contiguous( ).view(-1) def_logp = def_logp.view(-1, def_logp.size(2)) # Negative Log Likelihood def_NLL_loss = NLL(def_logp, def_target) # cut off padding for words word_target = word_target[:, :torch.max(word_length).item( )].contiguous().view(-1) word_logp = word_logp.view(-1, word_logp.size(2)) # Word NLL word_NLL_loss = NLL(word_logp, word_target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) return def_NLL_loss, word_NLL_loss, KL_loss def get_weights(anneal_function, step, k, x0): # for logistic function, k = growth rate KL_weight = kl_anneal_function(anneal_function, step, k, x0) word_weight = word_weight_function(step, k, x0) return {'def': 1, 'word': word_weight, 'kl': KL_weight} optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model = model.train() else: model = model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass [def_logp, word_logp], mean, logv, z = model(batch['input'], batch['length'], batch['word_length']) # loss calculation def_NLL_loss, word_NLL_loss, KL_loss = loss_fn( def_logp, word_logp, batch['target'], batch['length'], batch['word'], batch['word_length'], mean, logv) weights = get_weights(args.anneal_function, step, args.k, args.x0) loss = (weights['def'] * def_NLL_loss + weights['word'] * word_NLL_loss + weights['kl'] * KL_loss) / batch_size mean_logv = torch.mean(logv) # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'] = torch.cat( (tracker['ELBO'], loss.detach().unsqueeze(0))) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/Def NLL Loss" % split.upper(), def_NLL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/Word NLL Loss" % split.upper(), word_NLL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), weights['kl'], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Word Weight" % split.upper(), weights['word'], epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, Def NLL-Loss %9.4f, Word NLL-Loss %9.4f Word-Weight %6.3f, KL-Loss %9.4f, KL-Weight %6.3f KL-VAL %9.4f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), def_NLL_loss.item() / batch_size, word_NLL_loss.item() / batch_size, weights['word'], KL_loss.item() / batch_size, weights['kl'], mean_logv)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'], i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) ptb = PTB(vocab_file=args.vocab_file, train_file=args.train_file, train_with_vocab=False, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) datasets = PTBDataset(ptb) print('done preprocessing data') model = SentenceVAE(vocab_size=datasets.vocab_size, sos_idx=datasets.sos_idx, eos_idx=datasets.eos_idx, pad_idx=datasets.pad_idx, unk_idx=datasets.unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) model.ptb = ptb if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets.pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten #target = target[:, :torch.max(length).data[0]].contiguous().view(-1) target = target[:, :torch.max(length).data].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 model.train() split = 'train' for epoch in range(args.epochs): data_loader = DataLoader(dataset=datasets, batch_size=args.batch_size, shuffle=True, num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(list) for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass #logp, mean, logv, z = model(batch['input'], batch['length']) logp, mean, logv, z, encoder_last = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'].append(loss.data.cpu().numpy().tolist()) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.data, epoch * len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data, NLL_loss.data / batch_size, KL_loss.data / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets.get_i2w(), pad_idx=datasets.pad_idx) tracker['z'].append(z.data) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, np.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), np.mean(tracker['ELBO']), epoch) ''' # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z']} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) ''' # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) joblib.dump(model.cpu(), checkpoint_path) print("Model saved at %s" % checkpoint_path) if torch.cuda.is_available(): model.cuda()
else: weights = torch.FloatTensor(embedding.syn0) model = SentenceVAE(weights.size(0), sos_idx, eos_idx, pad_idx, training=True).to(device) def init_weights(m): if type(m) == torch.nn.Linear: torch.nn.init.xavier_uniform(m.weight) m.bias.data.fill_(0) model.apply(init_weights) model.emb = nn.Embedding.from_pretrained(weights) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=1e-3) def kl_anneal_function(step): k = args.k x0 = args.x0 weight = float(1 / (1 + np.exp(-k * (step - x0)))) return weight criterion = torch.nn.NLLLoss(size_average=False, ignore_index=pad_idx) def loss_function(reconx, x, mu, logvar, step):
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", len(vocab)) print("Loading Train Dataset", args.train_dataset) train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.max_sequence_length, corpus_lines=args.corpus_lines, on_memory=args.on_memory) print("Loading Test Dataset", args.test_dataset) test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.max_sequence_length, on_memory=args.on_memory) \ if args.test_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None splits = ['train', 'test'] data_loaders = { 'train': train_data_loader, 'test': test_data_loader } model = SentenceVAE( vocab_size=len(vocab), sos_idx=vocab.sos_index, eos_idx=vocab.eos_index, pad_idx=vocab.pad_index, unk_idx=vocab.unk_index, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args,ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path) if not os.path.exists(save_model_path): os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/(1+np.exp(-k*(step-x0)))) elif anneal_function == 'linear': return min(1, step/x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=vocab.pad_index) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = data_loaders[split] tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() correct = 0 close = 0 total = 0 for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['raw_length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['raw_length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss)/batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 correct += logp.argmax(dim=1).eq(batch['target']).sum().item() close += torch.mul(logp.argmax(dim=1).ge(batch["target"]-10), logp.argmax(dim=1).le(batch["target"]+10)).sum().item() total += batch['target'].nelement() # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.view(1,))) if args.tensorboard_logging: writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" %(split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size, KL_loss.item()/batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['raw'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f, acc %f, clo %f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']), correct/total, close/total)) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s"%checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] datasets = OrderedDict() for split in splits: datasets[split] = PoetryDataset( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional, condition_size=7) if torch.cuda.is_available(): model = model.cuda() if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def calculate_bleu_scores(original, decoded): reference = original.split(' ') hypothesis = decoded.split(' ') return nltk.translate.bleu_score.sentence_bleu([reference], hypothesis) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length)].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): total_BLEU_score = 0 for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=0, pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model( batch['input'], batch['length'], condition=batch['category'].float()) # logp, mean, logv, z = model(batch['input'], batch['length'], condition=None) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'] = torch.cat( (tracker['ELBO'], loss.data.unsqueeze(0))) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data.item(), NLL_loss.data.item() / batch_size, KL_loss.data.item() / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) # Calculate BLEU score decoded = torch.argmax(logp, dim=-1) for i in range(decoded.shape[0]): decoded_poem = idx2word( [decoded[i]], i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx)[0] original_poem = idx2word( [batch['target'].data[i]], i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx)[0] total_BLEU_score += calculate_bleu_scores( original_poem, decoded_poem) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) if split == 'valid': print("Average BLEU {}".format(total_BLEU_score / decoded.shape[0])) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)