def main(args): with open(args.data_dir + '/poems.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE(vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional, condition_size=0) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict( torch.load(args.load_checkpoint, map_location=torch.device('cpu'))) print("Model loaded from %s" % (args.load_checkpoint)) if torch.cuda.is_available(): model = model.cuda() model.eval() samples, z = model.inference(n=args.num_samples) print('----------SAMPLES----------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') # while True: # samples, z = model.inference(n=1, condition=torch.Tensor([[1, 0, 0, 0, 0, 0, 0]]).cuda()) # poem = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>'])[0] # if 'love' in poem: # breakpoint() z1 = torch.randn([args.latent_size]).numpy() z2 = torch.randn([args.latent_size]).numpy() z = to_var( torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float()) # samples, _ = model.inference(z=z, condition=torch.Tensor([[1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0]]).cuda()) samples, _ = model.inference(z=z) print('-------INTERPOLATION-------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n')
def main(args): with open(args.data_dir + '/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE(vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s" % args.load_checkpoint) if torch.cuda.is_available(): model = model.cuda() model.eval() # samples, z = model.inference(n=args.num_samples) # print('----------SAMPLES----------') # print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') # z_ = torch.randn([args.latent_size]).numpy() # input_sent = "the n stock specialist firms on the big board floor the buyers and sellers of last resort who were criticized after the n crash once again could n't handle the selling pressure" input_sent = "looking for a job was one of the most anxious periods of my life and is for most people" batch_input = torch.LongTensor([[w2i[i] for i in input_sent.split()]]).cuda() batch_len = torch.LongTensor([len(input_sent.split())]).cuda() input_mean = model(batch_input, batch_len, output_mean=True) z_ = input_mean.cpu().detach().numpy() print(z_.shape) # z2 = torch.randn([args.latent_size]).numpy() for i in range(args.latent_size): print(f"-------Dimension {i}------") z1, z2 = z_.copy(), z_.copy() z1[i] -= 0.5 z2[i] += 0.5 z = to_var( torch.from_numpy(interpolate(start=z1, end=z2, steps=5)).float()) samples, _ = model.inference(z=z) print('-------INTERPOLATION-------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n')
def main(args): with open(args.data_dir+'/vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] a2i, i2a = vocab['a2i'], vocab['i2a'] model = SentenceVAE( vocab_size=len(w2i), alphabet_size=len(a2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s"%(args.load_checkpoint)) if torch.cuda.is_available(): model = model.cuda() model.eval() samples, z = model.inference(n=args.num_samples) print('----------SAMPLES----------') print(*idx2defandword(samples, i2w=i2w, i2a=i2a, pad_idx=w2i['<pad>']), sep='\n') z1 = torch.randn([args.latent_size]).numpy() z2 = torch.randn([args.latent_size]).numpy() z = to_var(torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float()) samples, _ = model.inference(z=z) print('-------INTERPOLATION-------') print(*idx2defandword(samples, i2w=i2w, i2a=i2a, pad_idx=w2i['<pad>']), sep='\n')
def load_vae_model_from_args(args): with open(args.data_dir+'/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE( vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) tokenizer = DefaultTokenizer() if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s" % args.load_checkpoint) if torch.cuda.is_available(): model = model.cuda() model.eval() return { 'model': model, 'tokenizer': tokenizer, 'w2i': w2i, 'i2w': i2w, }
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: if args.dataset == 'ptb': Dataset = PTB elif args.dataset == 'twitter': Dataset = PoliticianTweets else: print("Invalid dataset. Exiting") exit() datasets[split] = Dataset( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ ) model = SentenceVAE( vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) # if args.from_file != "": # model = torch.load(args.from_file) # if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter(os.path.join(args.logdir, experiment_name(args,ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) if 'sigmoid' in args.anneal_function and args.dataset=='ptb': linspace = np.linspace(-5,5,13160) # 13160 = number of training examples in ptb elif 'sigmoid' in args.anneal_function and args.dataset=='twitter': linspace = np.linspace(-5, 5, 25190) #6411/25190? = number of training examples in short version of twitter def kl_anneal_function(anneal_function, step, param_dict=None): if anneal_function == 'identity': return 1 elif anneal_function == 'sigmoid' or anneal_function=='sigmoid_klt': s = 1/(len(linspace)) return(float((1)/(1+np.exp(-param_dict['ag']*(linspace[step]))))) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, param_dict=None): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) if args.anneal_function == 'sigmoid_klt': if float(KL_loss)/args.batch_size < param_dict['kl_threshold']: # print("KL_loss of %s is below threshold %s. Returning this threshold instead"%(float(KL_loss)/args.batch_size,param_dict['kl_threshold'])) KL_loss = to_var(torch.Tensor([param_dict['kl_threshold']*args.batch_size])) KL_weight = kl_anneal_function(anneal_function, step, {'ag': args.anneal_aggression}) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader( dataset=datasets[split], batch_size=args.batch_size, shuffle=split=='train', num_workers=0, pin_memory=torch.cuda.is_available() ) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) if split == 'train' and batch_size != args.batch_size: print("WARNING: Found different batch size\nargs.batch_size= %s, input_size=%s"%(args.batch_size, batch_size)) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, {'kl_threshold': args.kl_threshold}) loss = (NLL_loss + KL_weight * KL_loss)/batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # print(step) # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data)) if args.tensorboard_logging: writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL_Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) # print("Step %s: %s"%(epoch*len(data_loader) + iteration, KL_weight)) writer.add_scalar("%s/KL_Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): logger.info("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" %(split.upper(), iteration, len(data_loader)-1, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean ELBO %9.4f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s"%checkpoint_path) torch.save(model, f"model-{args.dataset}-{ts}.pickle")
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", len(vocab)) print("Loading Train Dataset", args.train_dataset) train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.max_sequence_length, corpus_lines=args.corpus_lines, on_memory=args.on_memory) print("Loading Test Dataset", args.test_dataset) test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.max_sequence_length, on_memory=args.on_memory) \ if args.test_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None splits = ['train', 'test'] data_loaders = { 'train': train_data_loader, 'test': test_data_loader } model = SentenceVAE( vocab_size=len(vocab), sos_idx=vocab.sos_index, eos_idx=vocab.eos_index, pad_idx=vocab.pad_index, unk_idx=vocab.unk_index, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args,ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path) if not os.path.exists(save_model_path): os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/(1+np.exp(-k*(step-x0)))) elif anneal_function == 'linear': return min(1, step/x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=vocab.pad_index) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = data_loaders[split] tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() correct = 0 close = 0 total = 0 for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['raw_length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['raw_length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss)/batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 correct += logp.argmax(dim=1).eq(batch['target']).sum().item() close += torch.mul(logp.argmax(dim=1).ge(batch["target"]-10), logp.argmax(dim=1).le(batch["target"]+10)).sum().item() total += batch['target'].nelement() # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.view(1,))) if args.tensorboard_logging: writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" %(split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size, KL_loss.item()/batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['raw'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f, acc %f, clo %f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']), correct/total, close/total)) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s"%checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) log_file = open("res.txt", "a") log_file.write(expierment_name(args, ts)) log_file.write("\n") graph_file = open("elbo-graph.txt", "a") graph_file.write(expierment_name(args, ts)) graph_file.write("\n") model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) elif anneal_function == "softplus": return min(1, np.log(1 + np.exp(k * step))) elif anneal_function == "no": return 1 NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 val_lowest_elbo = 5000 val_accu_epoch = 0 val_min_epoch = 0 split_elbo = {"train": [], "valid": []} if args.test: split_elbo["test"] = [] split_loss = {"train": [], "valid": []} if args.test: split_loss["test"] = [] for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) if split != 'train': KL_weight = 1.0 loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data)) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], NLL_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) split_loss[split].append([ loss.data[0], NLL_loss.data[0] / batch_size, KL_loss.data[0] / batch_size ]) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) split_elbo[split].append([torch.mean(tracker["ELBO"])]) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path) if split == 'valid': if torch.mean(tracker['ELBO']) < val_lowest_elbo: val_lowest_elbo = torch.mean(tracker['ELBO']) val_accu_epoch = 0 val_min_epoch = epoch else: val_accu_epoch += 1 if val_accu_epoch >= 3: if not args.test: exp_str = "" exp_str += "train_ELBO={}\n".format( split_elbo["train"][val_min_epoch]) exp_str += "valid_ELBO={}\n".format( split_elbo["valid"][val_min_epoch]) exp_str += "==========\n" log_file.write(exp_str) log_file.close() print(exp_str) graph_file.write("ELBO\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[0]) line += "\n" graph_file.write(line) graph_file.write("NLL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[1]) line += "\n" graph_file.write(line) graph_file.write("KL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[2]) line += "\n" graph_file.write(line) graph_file.close() exit() elif split == 'test' and val_accu_epoch >= 3: exp_str = "" exp_str += "train_ELBO={}\n".format( split_elbo["train"][val_min_epoch]) exp_str += "valid_ELBO={}\n".format( split_elbo["valid"][val_min_epoch]) exp_str += "test_ELBO={}\n".format( split_elbo["test"][val_min_epoch]) exp_str += "==========\n" log_file.write(exp_str) log_file.close() print(exp_str) graph_file.write("ELBO\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[0]) line += "\n" for s in splits: for i in split_elbo[s]: line += "{},".format(i[0]) line += "\n" graph_file.write(line) graph_file.write("NLL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[1]) line += "\n" graph_file.write(line) graph_file.write("KL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[2]) line += "\n" graph_file.write(line) graph_file.close() exit() if epoch == args.epochs - 1: exp_str = "" exp_str += "train_ELBO={}\n".format( split_elbo["train"][val_min_epoch]) exp_str += "valid_ELBO={}\n".format( split_elbo["valid"][val_min_epoch]) if args.test: exp_str += "test_ELBO={}\n".format( split_elbo["test"][val_min_epoch]) exp_str += "==========\n" log_file.write(exp_str) log_file.close() print(exp_str) graph_file.write("ELBO\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[0]) line += "\n" graph_file.write(line) graph_file.write("NLL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[1]) line += "\n" graph_file.write(line) graph_file.write("KL\n") line = "" for s in splits: for i in split_loss[s]: line += "{},".format(i[2]) line += "\n" graph_file.write(line) graph_file.close() exit()
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def sigmoid(step): x = step - 6569.5 if x < 0: a = np.exp(x) res = (a / (1 + a)) else: res = (1 / (1 + np.exp(-x))) return float(res) def frange_cycle_linear(n_iter, start=0.0, stop=1.0, n_cycle=4, ratio=0.5): L = np.ones(n_iter) * stop period = n_iter / n_cycle step = (stop - start) / (period * ratio) # linear schedule for c in range(n_cycle): v, i = start, 0 while v <= stop and (int(i + c * period) < n_iter): L[int(i + c * period)] = v v += step i += 1 return L n_iter = 0 for epoch in range(args.epochs): split = 'train' data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) for iteration, batch in enumerate(data_loader): n_iter += 1 print("Total no of iterations = " + str(n_iter)) L = frange_cycle_linear(n_iter) def kl_anneal_function(anneal_function, step): if anneal_function == 'identity': return 1 if anneal_function == 'sigmoid': return sigmoid(step) if anneal_function == 'cyclic': return float(L[step]) ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, split='train'): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) if split == 'train': KL_weight = kl_anneal_function(anneal_function, step) else: KL_weight = 1 return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, split) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data)) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], recon_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), torch.mean(tracker['negELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s" % checkpoint_path)
def main(arguments): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) # Logistics parser.add_argument("--cuda", help="CUDA id to use", type=int, default=0) parser.add_argument("--seed", help="Random seed", type=int, default=19) parser.add_argument("--use_pytorch", help="1 to use PyTorch", type=int, default=1) parser.add_argument("--out_dir", help="Dir to write preds to", type=str, default='') parser.add_argument("--log_file", help="File to log to", type=str) parser.add_argument("--load_data", help="0 to read data from scratch", type=int, default=1) # Task options parser.add_argument("--tasks", help="Tasks to evaluate on, as a comma separated list", type=str) parser.add_argument("--max_seq_len", help="Max sequence length", type=int, default=40) # Model options parser.add_argument("--ckpt_path", help="Path to ckpt to load", type=str, default=PATH_PREFIX + 'ckpts/svae/glue_svae/best.mdl') parser.add_argument("--vocab_path", help="Path to vocab to use", type=str, default=PATH_PREFIX + 'processed_data/svae/glue_v2/vocab.json') parser.add_argument("--model", help="Word emb dim", type=str, default='vae') parser.add_argument("--embedding_size", help="Word emb dim", type=int, default=300) parser.add_argument("--word_dropout", help="Word emb dim", type=float, default=0.5) parser.add_argument("--hidden_size", help="RNN size", type=int, default=512) parser.add_argument("--latent_size", help="Latent vector dim", type=int, default=16) parser.add_argument("--num_layers", help="Number of encoder layers", type=int, default=1) parser.add_argument("--bidirectional", help="1 for bidirectional", type=bool, default=False) parser.add_argument("--rnn_type", help="Type of rnn", type=str, choices=['rnn', 'gru'], default='gru') parser.add_argument("--batch_size", help="Batch size to use", type=int, default=64) # Classifier options parser.add_argument("--cls_batch_size", help="Batch size to use", type=int, default=64) args = parser.parse_args(arguments) logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.DEBUG) if args.log_file: fileHandler = logging.FileHandler(args.log_file) logging.getLogger().addHandler(fileHandler) logging.info(args) # define senteval params params_senteval = {'task_path': PATH_TO_DATA, 'usepytorch': args.use_pytorch, 'kfold': 10, 'max_seq_len': args.max_seq_len, 'batch_size': args.batch_size, 'load_data': args.load_data, 'seed': args.seed} params_senteval['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': args.cls_batch_size, 'tenacity': 5, 'epoch_size': 4, 'cudaEfficient': True} # Load InferSent model vocab = json.load(open(args.vocab_path, 'r')) args.denoise = False args.prob_swap, args.prob_drop = 0.0, 0.0 if args.model == 'vae': model = SentenceVAE(args, vocab['w2i'], #sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], #max_sequence_length=args.max_seq_len, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) elif args.model == 'ae': model = SentenceAE(args, vocab['w2i'], embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) model.load_state_dict(torch.load(args.ckpt_path)) model = model.cuda() model.eval() params_senteval['model'] = model # Do SentEval stuff se = senteval.engine.SE(params_senteval, batcher, prepare) tasks = get_tasks(args.tasks) results = se.eval(tasks) if args.out_dir: write_results(results, args.out_dir) if not args.log_file: print(results) else: logging.info(results)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) RANDOM_SEED = 42 dataset = load_dataset("yelp_polarity", split="train") TRAIN_SIZE = len(dataset) - 2_000 VALID_SIZE = 1_000 TEST_SIZE = 1_000 train_test_split = dataset.train_test_split(train_size=TRAIN_SIZE, seed=RANDOM_SEED) train_dataset = train_test_split["train"] test_val_dataset = train_test_split["test"].train_test_split( train_size=VALID_SIZE, test_size=TEST_SIZE, seed=RANDOM_SEED) val_dataset, test_dataset = test_val_dataset["train"], test_val_dataset[ "test"] tokenizer = AutoTokenizer.from_pretrained(args.model_name, use_fast=True) datasets = OrderedDict() datasets['train'] = TextDataset(train_dataset, tokenizer, args.max_sequence_length, not args.disable_sent_tokenize) datasets['valid'] = TextDataset(val_dataset, tokenizer, args.max_sequence_length, not args.disable_sent_tokenize) if args.test: datasets['text'] = TextDataset(test_dataset, tokenizer, args.max_sequence_length, not args.disable_sent_tokenize) print( f"Loading {args.model_name} model. Setting {args.trainable_layers} trainable layers." ) encoder = AutoModel.from_pretrained(args.model_name, return_dict=True) if not args.train_embeddings: for p in encoder.embeddings.parameters(): p.requires_grad = False encoder_layers = encoder.encoder.layer if args.trainable_layers > len(encoder_layers): warnings.warn( f"You are asking to train {args.trainable_layers} layers, but this model has only {len(encoder_layers)}" ) for layer in range(len(encoder_layers) - args.trainable_layers): for p in encoder_layers[layer].parameters(): p.requires_grad = False params = dict(vocab_size=datasets['train'].vocab_size, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional, max_sequence_length=args.max_sequence_length) model = SentenceVAE(encoder=encoder, tokenizer=tokenizer, **params) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f: json.dump(params, f, indent=4) with open(os.path.join(save_model_path, 'train_args.json'), 'w') as f: json.dump(vars(args), f, indent=4) def kl_anneal_function(anneal_function, step, k, x0): if step <= x0: return args.initial_kl_weight if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0 - 2500)))) elif anneal_function == 'linear': return min(1, step / x0) NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx, reduction='sum') def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight params = [{ 'params': model.encoder.parameters(), 'lr': args.encoder_learning_rate }, { 'params': [ *model.decoder_rnn.parameters(), *model.hidden2mean.parameters(), *model.hidden2logv.parameters(), *model.latent2hidden.parameters(), *model.outputs2vocab.parameters() ] }] optimizer = torch.optim.Adam(params, lr=args.learning_rate, weight_decay=args.weight_decay) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=(split == 'train'), num_workers=cpu_count(), pin_memory=torch.cuda.is_available(), collate_fn=DataCollator(tokenizer)) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['attention_mask'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'] = torch.cat( (tracker['ELBO'], loss.data.view(1, -1)), dim=0) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), NLL_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].tolist(), tokenizer=tokenizer) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, tracker['ELBO'].mean())) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences, the encoded latent space and generated sequences if split == 'valid': samples, _ = model.inference(z=tracker['z']) generated_sents = idx2word(samples.tolist(), tokenizer) sents = [{ 'original': target, 'generated': generated } for target, generated in zip(tracker['target_sents'], generated_sents)] dump = {'sentences': sents, 'z': tracker['z'].tolist()} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file, indent=3) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % epoch) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: # datasets[split] = BGoogle( # data_dir=args.data_dir, # split=split, # create_data=args.create_data, # batch_size=args.batch_size , # max_sequence_length=args.max_sequence_length, # min_occ=args.min_occ # ) datasets[split] = Amazon(data_dir=args.data_dir, split=split, create_data=args.create_data, batch_size=args.batch_size, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) tokenizer = TweetTokenizer(preserve_case=False) vocab_file = "amazon.vocab.json" with open(os.path.join(args.data_dir, vocab_file), 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) # save_model_path = os.path.join(args.save_model_path, ts) save_model_path = args.save_model_path if not os.path.exists(save_model_path): os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 save_mode = True last_ELBO = 1e32 for epoch in range(args.epochs): print("+" * 20) # f_test_example(model, tokenizer, w2i, i2w) for split in splits: # data_loader = DataLoader( # dataset=datasets[split], # batch_size=args.batch_size, # shuffle=split=='train', # num_workers=cpu_count(), # pin_memory=torch.cuda.is_available() # ) batch_size = args.batch_size tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() # for iteration, batch in enumerate(data_loader): iteration = 0 iteration_total = datasets[split].batch_num print("batch_num", iteration_total) for input_batch_tensor, target_batch_tensor, length_batch_tensor in datasets[ split]: if torch.is_tensor(input_batch_tensor): input_batch_tensor = to_var(input_batch_tensor) if torch.is_tensor(target_batch_tensor): target_batch_tensor = to_var(target_batch_tensor) if torch.is_tensor(length_batch_tensor): length_batch_tensor = to_var(length_batch_tensor) # batch_size = batch['input'].size(0) # for k, v in batch.items(): # if torch.is_tensor(v): # batch[k] = to_var(v) # Forward pass # logp, mean, logv, z = model(batch['input'], batch['length']) logp, mean, logv, z = model(input_batch_tensor, length_batch_tensor) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn( logp, target_batch_tensor, length_batch_tensor, mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 iteration += 1 # bookkeepeing # print("elbo", tracker['ELBO']) # print("loss", loss) if iteration == 0: tracker['ELBO'] = loss.data tracker['ELBO'] = tracker['ELBO'].view(1) else: tracker['ELBO'] = torch.cat( (tracker['ELBO'], loss.view(1))) if args.tensorboard_logging: # print(loss.data) writer.add_scalar("%s/ELBO" % split.upper(), loss.data.item(), epoch * iteration_total + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data.item() / batch_size, epoch * iteration_total + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data.item() / batch_size, epoch * iteration_total + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * iteration_total + iteration) if iteration % args.print_every == 0 or iteration + 1 == iteration_total: print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, iteration_total - 1, loss.data.item(), NLL_loss.data.item() / batch_size, KL_loss.data.item() / batch_size, KL_weight)) # if split == 'valid': # if 'target_sents' not in tracker: # tracker['target_sents'] = list() # tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) # # print("z", tracker['z'], z) # tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) # break print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) cur_ELBO = torch.mean(tracker['ELBO']) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), cur_ELBO, epoch) if split == "valid": if cur_ELBO < last_ELBO: save_mode = True else: save_mode = False last_ELBO = cur_ELBO # save a dump of all sentences and the encoded latent space # if split == 'valid': # dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} # if not os.path.exists(os.path.join('dumps', ts)): # os.makedirs('dumps/'+ts) # with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: # json.dump(dump,dump_file) # save checkpoint if split == 'train': # checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) checkpoint_path = os.path.join(save_model_path, "best.pytorch") if save_mode == True: torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.localtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ, use_bert=args. False) model = SentenceVAE(alphabet_size=datasets['train'].alphabet_size, vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) print("Saving model to directory: " + save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) def word_weight_function(step, k, x0): return float(1 / (1 + np.exp(-k * (step - x0)))) NLL = torch.nn.NLLLoss(reduction='sum', ignore_index=datasets['train'].pad_idx) def loss_fn(def_logp, word_logp, def_target, def_length, word_target, word_length, mean, logv): # cut-off unnecessary padding from target definition, and flatten def_target = def_target[:, :torch.max(def_length).item()].contiguous( ).view(-1) def_logp = def_logp.view(-1, def_logp.size(2)) # Negative Log Likelihood def_NLL_loss = NLL(def_logp, def_target) # cut off padding for words word_target = word_target[:, :torch.max(word_length).item( )].contiguous().view(-1) word_logp = word_logp.view(-1, word_logp.size(2)) # Word NLL word_NLL_loss = NLL(word_logp, word_target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) return def_NLL_loss, word_NLL_loss, KL_loss def get_weights(anneal_function, step, k, x0): # for logistic function, k = growth rate KL_weight = kl_anneal_function(anneal_function, step, k, x0) word_weight = word_weight_function(step, k, x0) return {'def': 1, 'word': word_weight, 'kl': KL_weight} optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model = model.train() else: model = model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass [def_logp, word_logp], mean, logv, z = model(batch['input'], batch['length'], batch['word_length']) # loss calculation def_NLL_loss, word_NLL_loss, KL_loss = loss_fn( def_logp, word_logp, batch['target'], batch['length'], batch['word'], batch['word_length'], mean, logv) weights = get_weights(args.anneal_function, step, args.k, args.x0) loss = (weights['def'] * def_NLL_loss + weights['word'] * word_NLL_loss + weights['kl'] * KL_loss) / batch_size mean_logv = torch.mean(logv) # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'] = torch.cat( (tracker['ELBO'], loss.detach().unsqueeze(0))) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/Def NLL Loss" % split.upper(), def_NLL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/Word NLL Loss" % split.upper(), word_NLL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), weights['kl'], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Word Weight" % split.upper(), weights['word'], epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, Def NLL-Loss %9.4f, Word NLL-Loss %9.4f Word-Weight %6.3f, KL-Loss %9.4f, KL-Weight %6.3f KL-VAL %9.4f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), def_NLL_loss.item() / batch_size, word_NLL_loss.item() / batch_size, weights['word'], KL_loss.item() / batch_size, weights['kl'], mean_logv)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'], i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) ptb = PTB(vocab_file=args.vocab_file, train_file=args.train_file, train_with_vocab=False, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) datasets = PTBDataset(ptb) print('done preprocessing data') model = SentenceVAE(vocab_size=datasets.vocab_size, sos_idx=datasets.sos_idx, eos_idx=datasets.eos_idx, pad_idx=datasets.pad_idx, unk_idx=datasets.unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) model.ptb = ptb if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets.pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten #target = target[:, :torch.max(length).data[0]].contiguous().view(-1) target = target[:, :torch.max(length).data].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 model.train() split = 'train' for epoch in range(args.epochs): data_loader = DataLoader(dataset=datasets, batch_size=args.batch_size, shuffle=True, num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(list) for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass #logp, mean, logv, z = model(batch['input'], batch['length']) logp, mean, logv, z, encoder_last = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'].append(loss.data.cpu().numpy().tolist()) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.data, epoch * len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data, NLL_loss.data / batch_size, KL_loss.data / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets.get_i2w(), pad_idx=datasets.pad_idx) tracker['z'].append(z.data) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, np.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), np.mean(tracker['ELBO']), epoch) ''' # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z']} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) ''' # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) joblib.dump(model.cpu(), checkpoint_path) print("Model saved at %s" % checkpoint_path) if torch.cuda.is_available(): model.cuda()
split=split, create_data=args.create_data, max_sequence_length=60) # vocab_size = datasets['train'].vocab_size sos_idx = datasets['train'].sos_idx eos_idx = datasets['train'].eos_idx pad_idx = datasets['train'].pad_idx embedding = KeyedVectors.load('model/pretrained_embedding') if args.cuda: weights = torch.FloatTensor(embedding.syn0).cuda() else: weights = torch.FloatTensor(embedding.syn0) model = SentenceVAE(weights.size(0), sos_idx, eos_idx, pad_idx, training=True).to(device) def init_weights(m): if type(m) == torch.nn.Linear: torch.nn.init.xavier_uniform(m.weight) m.bias.data.fill_(0) model.apply(init_weights) model.emb = nn.Embedding.from_pretrained(weights) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=1e-3)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) with open(args.data_dir + '/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] with open(os.path.join(args.save_model_path, 'model_params.json'), 'r') as f: params = json.load(f) model = SentenceVAE(**params) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s" % args.load_checkpoint) if torch.cuda.is_available(): model = model.cuda() print(model) with torch.no_grad(): input_sent = "the n stock specialist firms on the big board floor the buyers and sellers of last resort who were criticized after the n crash once again could n't handle the selling pressure" batch_input = torch.LongTensor([[w2i[i] for i in input_sent.split()]]).cuda() batch_len = torch.LongTensor([len(input_sent.split())]).cuda() input_mean = model(batch_input, batch_len, output_mean=True) data_loader = DataLoader(dataset=datasets["train"], batch_size=args.batch_size, shuffle=False, num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) print('---------CALCULATING NEAREST SENTENCES--------') sim = [] all_sentences = [] for iteration, batch in enumerate(data_loader): for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) all_sentences.append(batch['input']) # Forward pass mean = model(batch['input'], batch['length'], output_mean=True) batch_sim = torch.abs(mean - input_mean) sim.append(batch_sim) sim = torch.cat(sim, dim=0) _, most_similar_per_dim = torch.topk(-sim, k=20, dim=0) most_similar_per_dim = most_similar_per_dim.transpose(0, 1) all_sentences = torch.cat(all_sentences, dim=0) for dim, i in enumerate(most_similar_per_dim): sentences = torch.index_select(all_sentences, dim=0, index=i) print(f"{dim=}") print(*idx2word(sentences, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n')
def main(args): with open(args.data_dir+'/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE( vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s"%(args.load_checkpoint)) if torch.cuda.is_available(): model = model.cuda() model.eval() # samples, z = model.inference(n=args.num_samples) # print('----------SAMPLES----------') # print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') # z1 = torch.randn([args.latent_size]).numpy() # z2 = torch.randn([args.latent_size]).numpy() # z = to_var(torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float()) # samples, _ = model.inference(z=z) # print('-------INTERPOLATION-------') # print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') print('-------Encode ... Decode-------') datasets = PTB( data_dir=args.data_dir, split="valid", create_data=False, max_sequence_length=args.max_sequence_length, min_occ=1 ) data_loader = DataLoader(dataset=datasets, batch_size=2, shuffle='valid',num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) print("*"*10) print(*idx2word(batch['input'], i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') logp, mean, logv, z = model(batch['input'], batch['length']) print("+"*10) samples, z = model.inference(z=z) print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') if iteration == 0: break
def main(args): # Load the vocab with open(args.data_dir+'/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) # Initialize semantic loss sl = Semantic_Loss() datasets = OrderedDict() for split in splits: datasets[split] = PTB( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ ) params = dict( vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) model = SentenceVAE(**params) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f: json.dump(params, f, indent=4) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/(1+np.exp(-k*(step-x0)))) elif anneal_function == 'linear': return min(1, step/x0) def perplexity_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/ 1+np.exp(-k*(step-x0))) elif anneal_function == 'linear': return min(1, (step/x0)) NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx, reduction='sum') def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0, \ batch_perplexity, perplexity_anneal_function): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) # Perplexity perp_loss = batch_perplexity perp_weight = perplexity_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): # Keep track of epoch loss epoch_loss = [] for split in splits: data_loader = DataLoader( dataset=datasets[split], batch_size=args.batch_size, shuffle=split=='train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available() ) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() batch_t_start = None for iteration, batch in enumerate(data_loader): if batch_t_start: batch_run_time = time.time() - batch_t_start # print("Batch run time: " + str(batch_run_time)) batch_t_start = time.time() batch_size = batch['input_sequence'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Get the original sentences in this batch batch_sentences = idx2word(batch['input_sequence'], i2w=i2w, pad_idx=w2i['<pad>']) # Remove the first tag batch_sentences = [x.replace("<sos>", "") for x in batch_sentences] # Forward pass (logp, mean, logv, z), states = model(**batch) # Choose some random pairs of samples within the batch # to get latent representations for batch_index_pairs = list(itertools.combinations(np.arange(batch_size), 2)) random.shuffle(batch_index_pairs) batch_index_pairs = batch_index_pairs[:args.perplexity_samples_per_batch] batch_perplexity = [] # If we start the perplexity start_perplexity = epoch > 10 # If we should have perplexity loss if start_perplexity and args.perplexity_loss: # For each pair, get the intermediate representations in the latent space for index_pair in batch_index_pairs: with torch.no_grad(): z1_hidden = states['z'][index_pair[0]].cpu() z2_hidden = states['z'][index_pair[1]].cpu() z_hidden = to_var(torch.from_numpy(interpolate(start=z1_hidden, end=z2_hidden, steps=1)).float()) if args.rnn_type == "lstm": with torch.no_grad(): z1_cell_state = states['z_cell_state'].cpu().squeeze()[index_pair[0]] z2_cell_state = states['z_cell_state'].cpu().squeeze()[index_pair[1]] z_cell_states = \ to_var(torch.from_numpy(interpolate(start=z1_cell_state, end=z2_cell_state, steps=1)).float()) samples, _ = model.inference(z=z_hidden, z_cell_state=z_cell_states) else: samples, _ = model.inference(z=z_hidden, z_cell_state=None) # Check interpolated sentences interpolated_sentences = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']) # For each sentence, get the perplexity and show it perplexities = [] for sentence in interpolated_sentences: perplexities.append(sl.get_perplexity(sentence)) avg_sample_perplexity = sum(perplexities) / len(perplexities) batch_perplexity.append(avg_sample_perplexity) # Calculate batch perplexity avg_batch_perplexity = sum(batch_perplexity) / len(batch_perplexity) # loss calculation NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, \ args.k, args.x0, avg_batch_perplexity, perplexity_anneal_function) loss = ((NLL_loss + KL_weight * KL_loss) / batch_size) + (perp_loss * perp_weight) else: # Epochs < X, so train without perplexity # loss calculation NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, \ args.k, args.x0, 0, perplexity_anneal_function) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # Turn model back into train, since inference changed to eval if split == 'train': model.train() else: model.eval() # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # Add loss epoch_loss.append(loss.item()) # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data.view(1, -1)), dim=0) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.item() / batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, Perp-loss %9.4f, Perp-weight %6.3f" % (split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size, KL_loss.item()/batch_size, KL_weight, perp_loss, perp_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, tracker['ELBO'].mean())) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist()} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % epoch) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
action='store_true', default=False, help='enables CUDA training') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() with open(args.data_dir + '/ptb.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] embedding = KeyedVectors.load('model/pretrained_embedding') weights = torch.FloatTensor(embedding.syn0) model = SentenceVAE(vocab_size=weights.size(0), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>']) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s" % (args.load_checkpoint)) if torch.cuda.is_available(): model = model.cuda() model.eval() print('----------SAMPLES----------') for i in range(5): sample, z = model.inference() sample = sample.cpu().numpy() print(sample)
def generate(date, epoch, sentiment, n_samples): date = date cuda2 = torch.device('cuda:0') epoch = epoch #date = "2020-Feb-26-17:47:47" #exp_descr = pd.read_csv("EXP_DESCR/" + date + ".csv") #print("Pretained: ", exp_descr['pretrained'][0]) #print("Bidirectional: ", exp_descr['Bidirectional'][0]) #epoch = str(10) #data_dir = 'data' # params = pd.read_csv("Parameters/params.csv") params = params.set_index('time') exp_descr = params.loc[date] # 2019-Dec-02-09:35:25, 60,300,256,0.3,0.5,16,False,0.001,10,False embedding_size = exp_descr["embedding_size"] hidden_size = exp_descr["hidden_size"] rnn_type = exp_descr['rnn_type'] word_dropout = exp_descr["word_dropout"] embedding_dropout = exp_descr["embedding_dropout"] latent_size = exp_descr["latent_size"] num_layers = 1 batch_size = exp_descr["batch_size"] bidirectional = bool(exp_descr["bidirectional"]) max_sequence_length = exp_descr["max_sequence_length"] back = exp_descr["back"] attribute_size = exp_descr["attr_size"] wd_type = exp_descr["word_drop_type"] num_samples = 2 save_model_path = 'bin' ptb = False if ptb == True: vocab_dir = '/ptb.vocab.json' else: vocab_dir = '/yelp_vocab.json' with open("bin/" + date + "/" + vocab_dir, 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE(vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=max_sequence_length, embedding_size=embedding_size, rnn_type=rnn_type, hidden_size=hidden_size, word_dropout=0, embedding_dropout=0, latent_size=latent_size, num_layers=num_layers, cuda=cuda2, bidirectional=bidirectional, attribute_size=attribute_size, word_dropout_type='static', back=back) print(model) # Results # 2019-Nov-28-13:23:06/E4-5".pytorch" load_checkpoint = "bin/" + date + "/" + "E" + str(epoch) + ".pytorch" # load_checkpoint = "bin/2019-Nov-28-12:03:44 /E0.pytorch" if not os.path.exists(load_checkpoint): raise FileNotFoundError(load_checkpoint) if torch.cuda.is_available(): model = model.cuda() device = "cuda" else: device = "cpu" model.load_state_dict( torch.load(load_checkpoint, map_location=torch.device(device))) def attr_generation(n): labels = np.random.randint(2, size=n) enc = OneHotEncoder(handle_unknown='ignore') labels = np.reshape(labels, (len(labels), 1)) enc.fit(labels) one_hot = enc.transform(labels).toarray() one_hot = one_hot.astype(np.float32) one_hot = torch.from_numpy(one_hot) return one_hot model.eval() labels = attr_generation(n=num_samples) from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from sklearn.metrics import accuracy_score analyser = SentimentIntensityAnalyzer() def sentiment_analyzer_scores(sentence): score = analyser.polarity_scores(sentence) if score['compound'] > 0.05: return 1, 'Positive' else: return 0, 'Negative' print('----------SAMPLES----------') labels = [] generated = [] for i in range(n_samples): samples, z, l = model.inference(sentiment) s = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']) #print(sentiment_analyzer_scores(s[0])) if sentiment_analyzer_scores(s[0])[1] == sentiment: generated.append(s[0]) labels.append(sentiment_analyzer_scores(s[0])[0]) #print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') print(sum(labels)) translation = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']) return generated '''
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) total_steps = (len(datasets["train"]) // args.batch_size) * args.epochs print("Train dataset size", total_steps) def kl_anneal_function(anneal_function, step): if anneal_function == 'identity': return 1 if anneal_function == 'linear': if args.warmup is None: return 1 - (total_steps - step) / total_steps else: warmup_steps = (total_steps / args.epochs) * args.warmup return 1 - (warmup_steps - step ) / warmup_steps if step < warmup_steps else 1.0 ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'] = torch.cat( (tracker['negELBO'], loss.data.unsqueeze(0))) if args.tensorboard_logging: neg_elbo = (recon_loss + KL_loss) / batch_size writer.add_scalar("%s/Negative_ELBO" % split.upper(), neg_elbo.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], recon_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), torch.mean(tracker['negELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s" % checkpoint_path) if args.num_samples: torch.cuda.empty_cache() model.eval() with torch.no_grad(): print(f"Generating {args.num_samples} samples") generations, _ = model.inference(n=args.num_samples) vocab = datasets["train"].i2w print( "Sampled latent codes from z ~ N(0, I), generated sentences:") for i, generation in enumerate(generations, start=1): sentence = [vocab[str(word.item())] for word in generation] print(f"{i}:", " ".join(sentence))
def main(args): with open(args.data_dir+'/ptb.vocab.json', 'r') as file: vocab = json.load(file) # required to map between integer-value sentences and real sentences w2i, i2w = vocab['w2i'], vocab['i2w'] # make sure our models for the VAE and Actor exist if not os.path.exists(args.load_vae): raise FileNotFoundError(args.load_vae) model = SentenceVAE( vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) model.load_state_dict( torch.load(args.load_vae, map_location=lambda storage, loc: storage)) model.eval() print("vae model loaded from %s"%(args.load_vae)) # to run in constraint mode, we need the trained generator if args.constraint_mode: if not os.path.exists(args.load_actor): raise FileNotFoundError(args.load_actor) actor = Actor( dim_z=args.latent_size, dim_model=2048, num_labels=args.n_tags) actor.load_state_dict( torch.load(args.load_actor, map_location=lambda storage, loc:storage)) actor.eval() print("actor model loaded from %s"%(args.load_actor)) if torch.cuda.is_available(): model = model.cuda() if args.constraint_mode: actor = actor.cuda() # TODO: to(self.devices) if args.sample: print('*** SAMPLE Z: ***') # get samples from the prior sample_sents, z = model.inference(n=args.num_samples) sample_sents, sample_tags = get_sents_and_tags(sample_sents, i2w, w2i) pickle_it(z.cpu().numpy(), 'samples/z_sample_n{}.pkl'.format(args.num_samples)) pickle_it(sample_sents, 'samples/sents_sample_n{}.pkl'.format(args.num_samples)) pickle_it(sample_tags, 'samples/tags_sample_n{}.pkl'.format(args.num_samples)) print(sample_sents, sep='\n') if args.constraint_mode: print('*** SAMPLE Z_PRIME: ***') # get samples from the prior, conditioned via the actor all_tags_sample_prime = [] all_sents_sample_prime = {} all_z_sample_prime = {} for i, condition in enumerate(LABELS): # binary vector denoting each of the PHRASE_TAGS labels = torch.Tensor(condition).repeat(args.num_samples, 1).cuda() # take z and manipulate using the actor to generate z_prime z_prime = actor.forward(z, labels) sample_sents_prime, z_prime = model.inference( z=z_prime, n=args.num_samples) sample_sents_prime, sample_tags_prime = get_sents_and_tags( sample_sents_prime, i2w, w2i) print('conditoned on: {}'.format(condition)) print(sample_sents_prime, sep='\n') all_tags_sample_prime.append(sample_tags_prime) all_sents_sample_prime[LABEL_NAMES[i]] = sample_sents_prime all_z_sample_prime[LABEL_NAMES[i]] = z_prime.data.cpu().numpy() pickle_it(all_tags_sample_prime, 'samples/tags_sample_prime_n{}.pkl'.format(args.num_samples)) pickle_it(all_sents_sample_prime, 'samples/sents_sample_prime_n{}.pkl'.format(args.num_samples)) pickle_it(all_z_sample_prime, 'samples/z_sample_prime_n{}.pkl'.format(args.num_samples)) if args.interpolate: # get random samples from the latent space z1 = torch.randn([args.latent_size]).numpy() z2 = torch.randn([args.latent_size]).numpy() z = to_var(torch.from_numpy(interpolate(start=z1, end=z2, steps=args.num_samples-2)).float()) print('*** INTERP Z: ***') interp_sents, _ = model.inference(z=z) interp_sents, interp_tags = get_sents_and_tags(interp_sents, i2w, w2i) pickle_it(z.cpu().numpy(), 'samples/z_interp_n{}.pkl'.format(args.num_samples)) pickle_it(interp_sents, 'samples/sents_interp_n{}.pkl'.format(args.num_samples)) pickle_it(interp_tags, 'samples/tags_interp_n{}.pkl'.format(args.num_samples)) print(interp_sents, sep='\n') if args.constraint_mode: print('*** INTERP Z_PRIME: ***') all_tags_interp_prime = [] all_sents_interp_prime = {} all_z_interp_prime = {} for i, condition in enumerate(LABELS): # binary vector denoting each of the PHRASE_TAGS labels = torch.Tensor(condition).repeat(args.num_samples, 1).cuda() # z prime conditioned on this particular binary variable z_prime = actor.forward(z, labels) interp_sents_prime, z_prime = model.inference( z=z_prime, n=args.num_samples) interp_sents_prime, interp_tags_prime = get_sents_and_tags( interp_sents_prime, i2w, w2i) print('conditoned on: {}'.format(condition)) print(interp_sents_prime, sep='\n') all_tags_interp_prime.append(interp_tags_prime) all_sents_interp_prime[LABEL_NAMES[i]] = interp_sents_prime all_z_interp_prime[LABEL_NAMES[i]] = z_prime.data.cpu().numpy() pickle_it(all_tags_interp_prime, 'samples/tags_interp_prime_n{}.pkl'.format(args.num_samples)) pickle_it(all_sents_interp_prime, 'samples/sents_interp_prime_n{}.pkl'.format(args.num_samples)) pickle_it(all_z_interp_prime, 'samples/z_interp_prime_n{}.pkl'.format(args.num_samples)) import IPython; IPython.embed()
def main(args): data_name = args.data_name with open(args.data_dir+data_name+'.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE( vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s"%(args.load_checkpoint)) if torch.cuda.is_available(): model = model.cuda() model.eval() # samples, z = model.inference(n=args.num_samples) # print('----------SAMPLES----------') # print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') # z1 = torch.randn([args.latent_size]).numpy() # z2 = torch.randn([args.latent_size]).numpy() # z = to_var(torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float()) # samples, _ = model.inference(z=z) # print('-------INTERPOLATION-------') # print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') # print('-------Encode ... Decode-------') # datasets = Amazon( # data_dir=args.data_dir, # split="valid", # create_data=False, # batch_size=10, # max_sequence_length=args.max_sequence_length, # min_occ=3 # ) ### load vocab # with open(os.path.join(args.data_dir, args.vocab_file), 'r') as file: # vocab = json.load(file) # w2i, i2w = vocab['w2i'], vocab['i2w'] tokenizer = TweetTokenizer(preserve_case=False) # raw_text = "I like this!" raw_text = "DON'T CARE FOR IT. GAVE IT AS A GIFT AND THEY WERE OKAY WITH IT. JUST NOT WHAT I EXPECTED." input_text = f_raw2vec(tokenizer, raw_text, w2i, i2w) length_text = len(input_text) length_text = [length_text] print("length_text", length_text) input_tensor = torch.LongTensor(input_text) print('input_tensor', input_tensor) input_tensor = input_tensor.unsqueeze(0) if torch.is_tensor(input_tensor): input_tensor = to_var(input_tensor) length_tensor = torch.LongTensor(length_text) print("length_tensor", length_tensor) # length_tensor = length_tensor.unsqueeze(0) if torch.is_tensor(length_tensor): length_tensor = to_var(length_tensor) print("*"*10) print("->"*10, *idx2word(input_tensor, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') logp, mean, logv, z = model(input_tensor, length_tensor) # print("z", z.size(), mean_z.size()) mean = mean.unsqueeze(0) print("mean", mean) print("z", z) samples, z = model.inference(z=mean) print("<-"*10, *idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') for i in range(10): samples, z = model.inference(z=z) print("<-"*10, *idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n')
def main(args): data_name = args.data_name with open(args.data_dir+data_name+'.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE( vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s"%(args.load_checkpoint)) if torch.cuda.is_available(): model = model.cuda() model.eval() samples, z = model.inference(n=args.num_samples) print('----------SAMPLES----------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') z1 = torch.randn([args.latent_size]).numpy() z2 = torch.randn([args.latent_size]).numpy() z = to_var(torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float()) samples, _ = model.inference(z=z) print('-------INTERPOLATION-------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') print('-------Encode ... Decode-------') datasets = Amazon( data_dir=args.data_dir, split="valid", create_data=False, batch_size=10, max_sequence_length=args.max_sequence_length, min_occ=3 ) iteration = 0 for input_batch_tensor, target_batch_tensor, length_batch_tensor in datasets: if torch.is_tensor(input_batch_tensor): input_batch_tensor = to_var(input_batch_tensor) if torch.is_tensor(target_batch_tensor): target_batch_tensor = to_var(target_batch_tensor) if torch.is_tensor(length_batch_tensor): length_batch_tensor = to_var(length_batch_tensor) print("*"*10) print("->"*10, *idx2word(input_batch_tensor, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') logp, mean, logv, z = model(input_batch_tensor,length_batch_tensor) samples, z = model.inference(z=z) print("<-"*10, *idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') # print("+"*10) if iteration == 0: break iteration += 1
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, x1, x2): if anneal_function == 'identity': return 1 elif anneal_function == 'linear': return min(1, step / x1) elif anneal_function == 'logistic': return float(1 / (1 + np.exp(-x2 * (step - x1)))) elif anneal_function == 'cyclic_log': return float(1 / (1 + np.exp(-x2 * ((step % (3 * x1)) - x1)))) elif anneal_function == 'cyclic_lin': return min(1, (step % (3 * x1)) / x1) ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, x1, x2): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, x1, x2) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 early_stopping = EarlyStopping(history=10) for epoch in range(args.epochs): early_stopping_flag = False for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) # tracker = defaultdict(tensor) tracker = defaultdict(list) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.x1, args.x2) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'].append(loss.item()) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): # print(step) # logger.info("Step = %d"%step) logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), recon_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) # tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) # print(z.data.shape) tracker['z'].append(z.data.tolist()) mean_loss = sum(tracker['negELBO']) / len(tracker['negELBO']) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, mean_loss)) # print(mean_loss) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), mean_loss, epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'] } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) if (args.early_stopping): if (early_stopping.check(mean_loss)): early_stopping_flag = True # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) logger.info("Model saved at %s" % checkpoint_path) if (early_stopping_flag): print("Early stopping trigerred. Training stopped...") break
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] #+ (['test'] if args.test else []) datasets = OrderedDict() for split in splits: datasets[split] = PTB( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ ) model = SentenceVAE( vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if torch.cuda.is_available(): model = model.cuda() if args.tensorboard_logging: writer = SummaryWriter(os.path.join('./',args.logdir, expierment_name(args,ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join('./',args.save_model_path,'VAE', ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/(1+np.exp(-k*(step-x0)))) elif anneal_function == 'linear': return min(1, step/x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) NLL_w_avg = NLL_loss/torch.sum(length).float() # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight,NLL_w_avg print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = DataLoader( dataset=datasets[split], batch_size=args.batch_size, shuffle=split=='train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available() ) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation NLL_loss, KL_loss, KL_weight,NLL_w_avg = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss)/batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing # Avoid the .cat error !!! #print(loss.data) #print(tracker['ELBO']) loss_data = torch.tensor([loss.data.item()]) tracker['ELBO'] = torch.cat((tracker['ELBO'], loss_data)) #Orig: tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data),1) if args.tensorboard_logging: writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, NLL-word-Loss %9.4f" %(split.upper(), iteration, len(data_loader)-1, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight,NLL_w_avg)) #split = 'invalid' #JUST TO DEBUG!!! if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) #ERROR HERE!!! tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} if not os.path.exists(os.path.join('./dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('./dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train' and epoch %10 ==0 : checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s"%checkpoint_path)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, filename=os.path.join(args.logdir, experiment_name(args, ts) + ".log")) logger = logging.getLogger(__name__) datasets = OrderedDict() for split in splits: datasets[split] = PTB(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) total_step = int(args.epochs * 42000.0 / args.batch_size) def kl_anneal_function(anneal_function, step): if anneal_function == 'half': return 0.5 if anneal_function == 'identity': return 1 if anneal_function == 'double': return 2 if anneal_function == 'quadra': return 4 if anneal_function == 'sigmoid': return 1 / (1 + np.exp((0.5 * total_step - step) / 200)) if anneal_function == 'monotonic': beta = step * 4 / total_step if beta > 1: beta = 1.0 return beta if anneal_function == 'cyclical': t = total_step / 4 beta = 4 * (step % t) / t if beta > 1: beta = 1.0 return beta ReconLoss = torch.nn.NLLLoss(reduction='sum', ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 train_loss = [] test_loss = [] for epoch in range(args.epochs): for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) tracker = defaultdict(list) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step) if split == 'train': loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing # tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data)) tracker["negELBO"].append(loss.item()) tracker["recon_loss"].append(recon_loss.item() / batch_size) tracker["KL_Loss"].append(KL_loss.item() / batch_size) tracker["KL_Weight"].append(KL_weight) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.item(), epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.item() / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "\tStep\t%s\t%04d\t%i\t%9.4f\t%9.4f\t%9.4f\t%6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), recon_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) print( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.item(), recon_loss.item() / batch_size, KL_loss.item() / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'].append(z.data.tolist()) logger.info( "\tEpoch\t%s\t%02d\t%i\t%9.4f\t%9.4f\t%9.4f\t%6.3f" % (split.upper(), epoch, args.epochs, sum(tracker['negELBO']) / len(tracker['negELBO']), 1.0 * sum(tracker['recon_loss']) / len(tracker['recon_loss']), 1.0 * sum(tracker['KL_Loss']) / len(tracker['KL_Loss']), 1.0 * sum(tracker['KL_Weight']) / len(tracker['KL_Weight']))) print("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, sum(tracker['negELBO']) / len(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar( "%s-Epoch/NegELBO" % split.upper(), 1.0 * sum(tracker['negELBO']) / len(tracker['negELBO']), epoch) writer.add_scalar( "%s-Epoch/recon_loss" % split.upper(), 1.0 * sum(tracker['recon_loss']) / len(tracker['recon_loss']), epoch) writer.add_scalar( "%s-Epoch/KL_Loss" % split.upper(), 1.0 * sum(tracker['KL_Loss']) / len(tracker['KL_Loss']), epoch) writer.add_scalar( "%s-Epoch/KL_Weight" % split.upper(), 1.0 * sum(tracker['KL_Weight']) / len(tracker['KL_Weight']), epoch) if split == 'train': train_loss.append(1.0 * sum(tracker['negELBO']) / len(tracker['negELBO'])) else: test_loss.append(1.0 * sum(tracker['negELBO']) / len(tracker['negELBO'])) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'] } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path) sns.set(style="whitegrid") df = pd.DataFrame() df["train"] = train_loss df["test"] = test_loss ax = sns.lineplot(data=df, legend=False) ax.set(xlabel='Epoch', ylabel='Loss') plt.legend(title='Split', loc='upper right', labels=['Train', 'Test']) plt.savefig(os.path.join(args.logdir, experiment_name(args, ts) + ".png"), transparent=True, dpi=300)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() curBest = 1000000 for split in splits: datasets[split] = Mixed(data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, experiment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, totalIterations, split): if (split != 'train'): return 1 elif anneal_function == 'identity': return 1 elif anneal_function == 'linear': return 1.005 * float(step) / totalIterations elif anneal_function == 'sigmoid': return (1 / (1 + math.exp(-8 * (float(step) / totalIterations)))) elif anneal_function == 'tanh': return math.tanh(4 * (float(step) / totalIterations)) elif anneal_function == 'linear_capped': #print(float(step)*30/totalIterations) return min(1.0, float(step) * 5 / totalIterations) elif anneal_function == 'cyclic': quantile = int(totalIterations / 5) remainder = int(step % quantile) midPoint = int(quantile / 2) if (remainder > midPoint): return 1 else: return float(remainder) / midPoint else: return 1 ReconLoss = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, totalIterations, split): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood recon_loss = ReconLoss(logp, target) # KL Divergence #print((1 + logv - mean.pow(2) - logv.exp()).size()) KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) #print(KL_loss.size()) KL_weight = kl_anneal_function(anneal_function, step, totalIterations, split) return recon_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor tensor2 = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor tensor3 = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor tensor4 = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 stop = False Z = [] L = [] for epoch in range(args.epochs): if (stop): break for split in splits: if (split == 'test'): z_data = [] domain_label = [] z_bool = False domain_label_bool = False if (stop): break data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) totalIterations = (int(len(datasets[split]) / args.batch_size) + 1) * args.epochs tracker = defaultdict(tensor) tracker2 = defaultdict(tensor2) tracker3 = defaultdict(tensor3) tracker4 = defaultdict(tensor4) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): # if(iteration > 400): # break batch_size = batch['input'].size(0) labels = batch['label'] for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['length']) if (split == 'test'): if (z_bool == False): z_bool = True domain_label = labels.tolist() z_data = z else: domain_label += labels.tolist() #print(domain_label) z_data = torch.cat((z_data, z), 0) # loss calculation recon_loss, KL_loss, KL_weight = loss_fn( logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, totalIterations, split) if split == 'train': #KL_loss_thresholded = torch.clamp(KL_loss, min=6.0) loss = (recon_loss + KL_weight * KL_loss) / batch_size else: # report complete elbo when validation loss = (recon_loss + KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data)) tracker2['KL_loss'] = torch.cat( (tracker2['KL_loss'], KL_loss.data)) tracker3['Recon_loss'] = torch.cat( (tracker3['Recon_loss'], recon_loss.data)) tracker4['Perplexity'] = torch.cat( (tracker4['Perplexity'], torch.exp(recon_loss.data / batch_size))) if args.tensorboard_logging: writer.add_scalar("%s/Negative_ELBO" % split.upper(), loss.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/Recon_Loss" % split.upper(), recon_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL_Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): logger.info( "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data[0], recon_loss.data[0] / batch_size, KL_loss.data[0] / batch_size, KL_weight)) if (split == 'test'): Z = z_data L = domain_label if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['negELBO']))) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/NegELBO" % split.upper(), torch.mean(tracker['negELBO']), epoch) writer.add_scalar("%s-Epoch/KL_loss" % split.upper(), torch.mean(tracker2['KL_loss']) / batch_size, epoch) writer.add_scalar( "%s-Epoch/Recon_loss" % split.upper(), torch.mean(tracker3['Recon_loss']) / batch_size, epoch) writer.add_scalar("%s-Epoch/Perplexity" % split.upper(), torch.mean(tracker4['Perplexity']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': if (torch.mean(tracker['negELBO']) < curBest): curBest = torch.mean(tracker['negELBO']) else: stop = True dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps_32_0', ts)): os.makedirs('dumps_32_0/' + ts) with open( os.path.join('dumps_32_0/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint # if split == 'train': # checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) # torch.save(model.state_dict(), checkpoint_path) # logger.info("Model saved at %s"%checkpoint_path) Z = Z.data.cpu().numpy() print(Z.shape) beforeTSNE = TSNE(random_state=20150101).fit_transform(Z) scatter(beforeTSNE, L, [0, 1, 2], (5, 5), 'latent discoveries') plt.savefig('mixed_tsne' + args.anneal_function + '.png', dpi=120)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] datasets = OrderedDict() for split in splits: datasets[split] = PoetryDataset( data_dir=args.data_dir, split=split, create_data=args.create_data, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional, condition_size=7) if torch.cuda.is_available(): model = model.cuda() if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path, ts) os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def calculate_bleu_scores(original, decoded): reference = original.split(' ') hypothesis = decoded.split(' ') return nltk.translate.bleu_score.sentence_bleu([reference], hypothesis) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length)].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 for epoch in range(args.epochs): total_BLEU_score = 0 for split in splits: data_loader = DataLoader(dataset=datasets[split], batch_size=args.batch_size, shuffle=split == 'train', num_workers=0, pin_memory=torch.cuda.is_available()) tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model( batch['input'], batch['length'], condition=batch['category'].float()) # logp, mean, logv, z = model(batch['input'], batch['length'], condition=None) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # bookkeepeing tracker['ELBO'] = torch.cat( (tracker['ELBO'], loss.data.unsqueeze(0))) if args.tensorboard_logging: writer.add_scalar("%s/ELBO" % split.upper(), loss.data[0], epoch * len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data[0] / batch_size, epoch * len(data_loader) + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration + 1 == len( data_loader): print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, len(data_loader) - 1, loss.data.item(), NLL_loss.data.item() / batch_size, KL_loss.data.item() / batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word( batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) # Calculate BLEU score decoded = torch.argmax(logp, dim=-1) for i in range(decoded.shape[0]): decoded_poem = idx2word( [decoded[i]], i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx)[0] original_poem = idx2word( [batch['target'].data[i]], i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx)[0] total_BLEU_score += calculate_bleu_scores( original_poem, decoded_poem) print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) if split == 'valid': print("Average BLEU {}".format(total_BLEU_score / decoded.shape[0])) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = { 'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist() } if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/' + ts) with open( os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file: json.dump(dump, dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % (epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)