def load_model_and_dataset(checkpt_filename): checkpt = torch.load(checkpt_filename) args = checkpt['args'] state_dict = checkpt['state_dict'] # backwards compatibility if not hasattr(args, 'conv'): args.conv = False from model import VAE, setup_data_loaders # model prior_dist = dist.Normal() q_dist = dist.Normal() vae = VAE(z_dim=args.latent_dim, use_cuda=False, prior_dist=prior_dist, q_dist=q_dist, conv=args.conv) vae.load_state_dict(state_dict, strict=False) vae.eval() # dataset loader loader = setup_data_loaders(args, use_cuda=False) return vae, loader
def main(args): # Check if the output folder is exist if not os.path.exists('./vae_results/'): os.mkdir('./vae_results/') # Load data torch.manual_seed(args.seed) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST('./data', train=True, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **kwargs) # Load model model = VAE().cuda() if torch.cuda.is_available() else VAE() optimizer = optim.Adam(model.parameters(), lr=1e-3) # Train and generate sample every epoch loss_list = [] for epoch in range(1, args.epochs + 1): model.train() _loss = train(epoch, model, train_loader, optimizer) loss_list.append(_loss) model.eval() sample = torch.randn(64, 20) sample = Variable(sample).cuda() if torch.cuda.is_available() else Variable(sample) sample = model.decode(sample).cpu() save_image(sample.view(64, 1, 28, 28).data, 'vae_results/sample_' + str(epoch) + '.png') plt.plot(range(len(loss_list)), loss_list, '-o') plt.savefig('vae_results/vae_loss_curve.png')
def load_model(path): restore_dict = torch.load(path) model = VAE(**restore_dict["model"]) model.load_state_dict(restore_dict["model_state_dict"]) model.eval() return model
def main() -> None: tokenizer = Tokenizer(args.vocab_file) vocabulary_size = len(tokenizer) dataset = SentenceDataset(args.input_file, tokenizer=tokenizer.encode) loader = DataLoader(dataset, args.batch_size, shuffle=False, collate_fn=dataset.collate_fn, drop_last=False) searcher = BeamSearch(tokenizer.eos_index, beam_size=args.search_width) model = VAE( num_embeddings=len(tokenizer), dim_embedding=args.dim_embedding, dim_hidden=args.dim_hidden, dim_latent=args.dim_latent, num_layers=args.num_layers, bidirectional=args.bidirectional, dropout=0., word_dropout=0., dropped_index=tokenizer.unk_index, ).to(device) model.load_state_dict(torch.load(args.checkpoint_file, map_location=device)) model.eval() print('Generating sentence...') all_hypotheses = [] with torch.no_grad(): for s in tqdm(loader): s = s.to(device) length = torch.sum(s != tokenizer.pad_index, dim=-1) bsz = s.shape[0] mean, logvar = model.encode(s, length) # z = model.reparameterize(mean, logvar) z = mean hidden = model.fc_hidden(z) hidden = hidden.view(bsz, -1, model.dim_hidden).transpose(0, 1).contiguous() start_predictions = torch.zeros(bsz, device=device).fill_( tokenizer.bos_index).long() start_state = {'hidden': hidden.permute(1, 0, 2)} predictions, log_probabilities = searcher.search( start_predictions, start_state, model.step) for preds in predictions: tokens = preds[0] tokens = tokens[tokens != tokenizer.eos_index].tolist() all_hypotheses.append(tokenizer.decode(tokens)) print('Done') with open(args.output_file, 'w') as f: f.write('\n'.join(all_hypotheses))
def main(ARGS, device): """ Prepares the datasets for training, and optional, validation and testing. Then, initializes the VAE model and runs the training (/validation) process for a given number of epochs. """ data_splits = ['train', 'val'] datasets = { split: IMDB(ARGS.data_dir, split, ARGS.max_sequence_length, ARGS.min_word_occ, ARGS.create_data) for split in data_splits } pretrained_embeddings = datasets['train'].get_pretrained_embeddings( ARGS.embed_dim).to(device) model = VAE( datasets['train'].vocab_size, ARGS.batch_size, device, pretrained_embeddings=pretrained_embeddings, trainset=datasets['train'], max_sequence_length=ARGS.max_sequence_length, lstm_dim=ARGS.lstm_dim, z_dim=ARGS.z_dim, embed_dim=ARGS.embed_dim, n_lstm_layers=ARGS.n_lstm_layers, kl_anneal_type=ARGS.kl_anneal_type, kl_anneal_x0=ARGS.kl_anneal_x0, kl_anneal_k=ARGS.kl_anneal_k, kl_fbits_lambda=ARGS.kl_fbits_lambda, word_keep_rate=ARGS.word_keep_rate, ) model.to(device) optimizer = torch.optim.Adam(model.parameters()) print('Starting training process...') amount_of_files = len(os.listdir("trained_models")) for epoch in range(ARGS.epochs): elbos = run_epoch(model, datasets, device, optimizer) train_elbo, val_elbo = elbos print( f"[Epoch {epoch} train elbo: {train_elbo}, val_elbo: {val_elbo}]") # Perform inference on the trained model with torch.no_grad(): model.eval() samples = model.inference() print(*idx2word(samples, i2w=datasets['train'].i2w, pad_idx=datasets['train'].pad_idx), sep='\n') model.save(f"trained_models/{amount_of_files + 1}.model")
def main() -> None: tokenizer = Tokenizer(args.vocab_file) vocabulary_size = len(tokenizer) searcher = BeamSearch(tokenizer.eos_index, beam_size=args.search_width) model = VAE( num_embeddings=len(tokenizer), dim_embedding=args.dim_embedding, dim_hidden=args.dim_hidden, dim_latent=args.dim_latent, num_layers=args.num_layers, bidirectional=args.bidirectional, dropout=0., word_dropout=0., dropped_index=tokenizer.unk_index, ).to(device) model.load_state_dict(torch.load(args.checkpoint_file, map_location=device)) model.eval() sentence1 = input('Please input sentence1: ') sentence2 = input('Please input sentence2: ') s1 = [tokenizer.bos_index ] + tokenizer.encode(sentence1) + [tokenizer.eos_index] s2 = [tokenizer.bos_index ] + tokenizer.encode(sentence2) + [tokenizer.eos_index] z1, _ = model.encode( torch.tensor([s1]).to(device), torch.tensor([len(s1)]).to(device)) z2, _ = model.encode( torch.tensor([s2]).to(device), torch.tensor([len(s2)]).to(device)) print("\nGenerate intermediate sentences") print(" %s" % sentence1) for r in range(1, 10): z = (1 - 0.1 * r) * z1 + 0.1 * r * z2 hidden = model.fc_hidden(z) hidden = hidden.view(1, -1, model.dim_hidden).transpose(0, 1).contiguous() start_predictions = torch.zeros(1, device=device).fill_( tokenizer.bos_index).long() start_state = {'hidden': hidden.permute(1, 0, 2)} predictions, log_probabilities = searcher.search( start_predictions, start_state, model.step) tokens = predictions[0, 0] tokens = tokens[tokens != tokenizer.eos_index].tolist() print("[%d:%d] %s" % (10 - r, r, tokenizer.decode(tokens))) print(" %s" % sentence2)
class Generator(object): def __init__(self): _, _, self.vocab = get_iterators(opt) self.vae = VAE(opt) self.vae.embedding.weight.data.copy_(self.vocab.vectors) self.vae = get_cuda(self.vae) checkpoint = T.load('data/saved_models/vae_model.121.pyt') self.vae.load_state_dict(checkpoint['vae_dict']) self.vae.eval() del checkpoint def generate(self, encodings): sentences = [] for z in encodings.numpy(): z = get_cuda(T.from_numpy(z)).view((1, -1)) h_0 = get_cuda(T.zeros(opt.n_layers_G, 1, opt.n_hidden_G)) c_0 = get_cuda(T.zeros(opt.n_layers_G, 1, opt.n_hidden_G)) G_hidden = (h_0, c_0) G_inp = T.LongTensor(1, 1).fill_(self.vocab.stoi[opt.start_token]) G_inp = get_cuda(G_inp) sentence = opt.start_token + " " num_words = 0 while G_inp[0][0].item() != self.vocab.stoi[opt.end_token]: with T.autograd.no_grad(): logit, G_hidden, _ = self.vae(None, G_inp, z, G_hidden) probs = F.softmax(logit[0] / TEMPERATURE, dim=1) G_inp = T.multinomial(probs, 1) sentence += (self.vocab.itos[G_inp[0][0].item()] + " ") num_words += 1 if num_words > 64: break sentence = sentence.replace('<unk>', '').replace('<sos>', '').replace( '<eos>', '').replace('<pad>', '') sentences.append(sentence) return sentences
def main(args): conf = None with open(args.config, 'r') as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader) conf = config['separate'] model_params = config['model'] preprocess_params = config['preprocessor'] date_time = time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime()) conf['save_path'] = os.path.join(conf['save_path'], date_time) if not os.path.isdir(conf['save_path']): os.mkdir(conf['save_path']) separator = MidiSeparator( conf['songs_path'], conf['save_path'], conf['save_reconstructed'], model_params['roll_dim'], model_params['time_step'], preprocess_params['low_crop'], preprocess_params['high_crop'], preprocess_params['note_num'], preprocess_params['longest']) model = VAE(model_params['roll_dim'], model_params['hidden_dim'], model_params['infor_dim'], model_params['time_step'], 12) model.eval() separator.import_midi_from_folder(model)
def main() -> None: tokenizer = Tokenizer(args.vocab_file) vocabulary_size = len(tokenizer) searcher = BeamSearch(tokenizer.eos_index, beam_size=args.search_width) model = VAE( num_embeddings=len(tokenizer), dim_embedding=args.dim_embedding, dim_hidden=args.dim_hidden, dim_latent=args.dim_latent, num_layers=args.num_layers, bidirectional=args.bidirectional, dropout=0., word_dropout=0., dropped_index=tokenizer.unk_index, ).to(device) model.load_state_dict(torch.load(args.checkpoint_file, map_location=device)) model.eval() z = torch.randn(args.sample_size, args.dim_latent, device=device) hidden = model.fc_hidden(z) hidden = hidden.view(args.sample_size, -1, model.dim_hidden).transpose(0, 1).contiguous() start_predictions = torch.zeros(args.sample_size, device=device).fill_( tokenizer.bos_index).long() start_state = {'hidden': hidden.permute(1, 0, 2)} predictions, log_probabilities = searcher.search(start_predictions, start_state, model.step) for pred in predictions: tokens = pred[0] tokens = tokens[tokens != tokenizer.eos_index].tolist() print(tokenizer.decode(tokens))
class TrainingModel(object): def __init__(self, args, config): self.__dict__.update(config) self.config = config random.seed(self.seed) torch.manual_seed(self.seed) np.random.seed(self.seed) if use_cuda: torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) torch.cuda.set_device(args.gpu) #torch.backends.cudnn.benchmark = False #torch.backends.cudnn.deterministic = True self.message = args.m self.data_generator = DataGenerator(self.config) self.vocab_size = self.data_generator.vocab_size self.ent_size = self.data_generator.ent_size self.model_name = 'IERM' if args.m != "": self.saveModeladdr = './trainModel/checkpoint_%s_%s.pkl' % ( self.model_name, args.m) else: self.saveModeladdr = './trainModel/' + args.save self.model = Ranker(self.vocab_size, self.ent_size, self.config) self.VAE_model = VAE(self.vocab_size, self.ent_size, self.model.word_emb, self.model.ent_emb, self.config) if use_cuda: self.model.cuda() self.VAE_model.cuda() vae_lr = self.config[ 'pretrain_lr'] if config['pretrain_step'] > 0 else config['vae_lr'] self.vae_optimizer = getOptimizer(config['vae_optim'], self.VAE_model.parameters(), lr=vae_lr, betas=(0.99, 0.99)) self.ranker_optimizer = getOptimizer( config['ranker_optim'], self.model.parameters(), lr=config['ranker_lr'], weight_decay=config['weight_decay']) vae_model_size = sum(p.numel() for p in self.VAE_model.parameters()) ranker_size = sum(p.numel() for p in self.model.parameters()) #print 'Model size: ', vae_model_size, ranker_size #exit(-1) if args.resume and os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) #print checkpoint.keys() self.model.load_state_dict(checkpoint['rank_state_dict']) self.VAE_model.load_state_dict(checkpoint['vae_state_dict']) self.vae_optimizer.load_state_dict(checkpoint['vae_optimizer']) self.ranker_optimizer.load_state_dict(checkpoint['rank_optimizer']) else: print("Creating a new model") self.timings = defaultdict(list) #record the loss iterations self.evaluator = rank_eval() self.epoch = 0 self.step = 0 self.kl_weight = 1 if args.visual: self.config['visual'] = True self.writer = SummaryWriter('runs/' + args.m) else: self.config['visual'] = False self.reconstr_loss = nn.MSELoss() def add_values(self, iter, value_dict): for key in value_dict: self.writer.add_scalar(key, value_dict[key], iter) def adjust_learning_rate(self, optimizer, lr, decay_rate=.5): for param_group in optimizer.param_groups: param_group['lr'] = lr * decay_rate def kl_anneal_function(self, anneal_function, step, k=0.0025, x0=2500): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) def vae_loss(self, input_qw, reconstr_w, input_qe, reconstr_e, prior_mean, prior_var, posterior_mean, posterior_var, posterior_log_var): # Reconstruction term if self.config['reconstruct'] != 'entity': input_qw_bow = to_bow(input_qw, self.vocab_size) input_qw_bow = Tensor2Varible(torch.tensor(input_qw_bow).float()) #reconstr_w = torch.log_softmax(reconstr_w + 1e-10,dim=1) #RL_w = -torch.sum(input_qw_bow * reconstr_w , dim=1) #RL_w = self.reconstr_loss(reconstr_w,input_qw_bow) RL_w = -torch.sum( input_qw_bow * reconstr_w + (1 - input_qw_bow) * torch.log(1 - torch.exp(reconstr_w)), dim=1) else: RL_w = Tensor2Varible(torch.tensor([0]).float()) if self.config['reconstruct'] != 'word': input_qe_bow = to_bow(input_qe, self.ent_size) input_qe_bow = Tensor2Varible(torch.tensor(input_qe_bow).float()) #RL_e = -torch.sum(input_qe_bow * reconstr_e, dim=1) #RL_e = self.reconstr_loss(reconstr_e,input_qe_bow) RL_e = -torch.sum( input_qe_bow * reconstr_e + (1 - input_qe_bow) * torch.log(1 - torch.exp(reconstr_e)), dim=1) else: RL_e = Tensor2Varible(torch.tensor([0]).float()) # KL term # var division term var_division = torch.sum(posterior_var / prior_var, dim=1) # diff means term diff_means = prior_mean - posterior_mean diff_term = torch.sum((diff_means * diff_means) / prior_var, dim=1) # logvar det division term logvar_det_division = \ prior_var.log().sum() - posterior_log_var.sum(dim=1) # combine terms KL = 0.5 * (var_division + diff_term - self.model.intent_num + logvar_det_division) loss = self.kl_weight * KL + RL_w + RL_e #loss = 0.001 * KL + RL_w + RL_e return loss.sum(), KL.sum(), RL_w.sum(), RL_e.sum() def pretraining(self): if self.pretrain_step <= 0: return train_start_time = time.time() data_reader = self.data_generator.pretrain_reader(self.pretrain_bs) total_loss = 0. total_KL_loss = 0. total_RLw_loss = 0. total_RLe_loss = 0. for step in xrange(self.pretrain_step): input_qw, input_qe = next(data_reader) #self.kl_weight = self.kl_anneal_function('logistic', step) topic_e, vae_loss, kl_loss, rl_w_loss, rl_e_loss = self.train_VAE( input_qw, input_qe) vae_loss.backward() torch.nn.utils.clip_grad_value_( self.VAE_model.parameters(), self.clip_grad) # clip_grad_norm(, ) self.vae_optimizer.step() vae_loss = vae_loss.data #print ('VAE loss: %.3f\tKL: %.3f\tRL_w:%.3f\tRL_e:%.3f' % (vae_loss, kl_loss, rl_w_loss, rl_e_loss)) if torch.isnan(vae_loss): print("Got NaN cost .. skipping") exit(-1) continue #if self.config['visual']: # self.add_values(step, {'vae_loss': vae_loss, 'kl_loss': kl_loss, 'rl_w_loss': rl_w_loss, # 'rl_e_loss': rl_e_loss, 'kl_weight': self.kl_weight}) total_loss += vae_loss total_KL_loss += kl_loss total_RLw_loss += rl_w_loss total_RLe_loss += rl_e_loss if step != 0 and step % self.pretrain_freq == 0: total_loss /= self.pretrain_freq total_KL_loss /= self.pretrain_freq total_RLw_loss /= self.pretrain_freq total_RLe_loss /= self.pretrain_freq print('Step: %d\t Elapsed:%.2f' % (step, time.time() - train_start_time)) print( 'Pretrain VAE loss: %.3f\tKL: %.3f\tRL_w:%.3f\tRL_e:%.3f' % (total_loss, total_KL_loss, total_RLw_loss, total_RLe_loss)) if self.config['visual']: self.add_values( step, { 'vae_loss': total_loss, 'kl_loss': total_KL_loss, 'rl_w_loss': total_RLw_loss, 'rl_e_loss': total_RLe_loss, 'kl_weight': self.kl_weight }) total_loss = 0. total_KL_loss = 0. total_RLw_loss = 0. total_RLe_loss = 0. print '==============================================' #self.generate_beta_phi_3(show_topic_limit=5) self.save_checkpoint(message=self.message + '-pretraining') print('Pretraining end') #recovering the learning rate self.adjust_learning_rate(self.vae_optimizer, self.config['vae_lr'], 1) def trainIters(self, ): self.step = 0 train_start_time = time.time() patience = self.patience best_ndcg10 = 0.0 last_ndcg10 = 0.0 data_reader = self.data_generator.pair_reader(self.batch_size) total_loss = 0.0 total_rank_loss = 0. total_vae_loss = 0. total_KL_loss = 0. total_RLw_loss = 0. total_RLe_loss = 0. for step in xrange(self.steps): out = next(data_reader) input_qw, input_qe, input_dw_pos, input_de_pos, input_dw_neg, input_de_neg = out rank_loss, vae_total_loss, KL_loss, RL_w_loss, RL_e_loss \ = self.train(input_qw,input_qe,input_dw_pos,input_de_pos,input_dw_neg,input_de_neg) cur_total_loss = rank_loss + vae_total_loss if torch.isnan(cur_total_loss): print("Got NaN cost .. skipping") continue self.step += 1 total_loss += cur_total_loss total_rank_loss += rank_loss total_vae_loss += vae_total_loss total_KL_loss += KL_loss total_RLw_loss += RL_w_loss total_RLe_loss += RL_e_loss if self.eval_freq != -1 and self.step % self.eval_freq == 0: with torch.no_grad(): valid_performance = self.test( valid_or_test='valid', source=self.config['click_model']) current_ndcg10 = valid_performance['ndcg@10'] if current_ndcg10 > best_ndcg10: print 'Got better result, save to %s' % self.saveModeladdr best_ndcg10 = current_ndcg10 patience = self.patience self.save_checkpoint(message=self.message) #self.generate_beta_phi_3(show_topic_limit=5) elif current_ndcg10 <= last_ndcg10 * self.cost_threshold: patience -= 1 last_ndcg10 = current_ndcg10 if self.step % self.train_freq == 0: total_loss /= self.train_freq total_rank_loss /= self.train_freq total_vae_loss /= self.train_freq total_KL_loss /= self.train_freq total_RLw_loss /= self.train_freq total_RLe_loss /= self.train_freq self.timings['train'].append(total_loss) print('Step: %d\t Elapsed:%.2f' % (step, time.time() - train_start_time)) print( 'Train total loss: %.3f\tRank loss: %.3f\tVAE loss: %.3f' % (total_loss, total_rank_loss, total_vae_loss)) print('KL loss: %.3f\tRL W: %.3f\tRL E: %.3f' % (total_KL_loss, total_RLw_loss, total_RLe_loss)) print('Patience left: %d' % patience) if self.config['visual']: self.add_values( step, { 'Train vae_loss': total_loss, 'Train kl_loss': total_KL_loss, 'Train rl_w_loss': total_RLw_loss, 'Train rl_e_loss': total_RLe_loss, 'Train Rank loss': total_rank_loss }) total_loss = 0 total_rank_loss = 0. total_vae_loss = 0. total_KL_loss = 0. total_RLw_loss = 0. total_RLe_loss = 0. if patience < 0: print 'patience runs out...' break print 'Patience___: ', patience print("All done, exiting...") def test(self, valid_or_test, source): predicted = [] results = defaultdict(list) if valid_or_test == 'valid': is_test = False data_addr = self.valid_rank_addr data_source = self.data_generator.pointwise_reader_evaluation( data_addr, is_test=is_test, label_type=source) elif valid_or_test == 'ntcir13' or valid_or_test == 'ntcir14': is_test = True data_source = self.data_generator.pointwise_ntcir_generator( valid_or_test) source = 'HUMAN' else: is_test = True data_addr = self.test_rank_addr data_source = self.data_generator.pointwise_reader_evaluation( data_addr, is_test=is_test, label_type=source) start = time.clock() count = 0 for out in data_source: (qid, dids, input_qw, input_qe, input_dw, input_de, gt_rels) = out gt_rels = map(lambda t: score2cutoff(source, t), gt_rels) rels_predicted = self.predict(input_qw, input_qe, input_dw, input_de).view(-1).cpu().numpy() result = self.evaluator.eval(gt_rels, rels_predicted) for did, gt, pred in zip(dids, gt_rels, rels_predicted): predicted.append((qid, did, pred, gt)) for k, v in result.items(): results[k].append(v) count += 1 elapsed = (time.clock() - start) print('Elapsed:%.3f\tAvg:%.3f' % (elapsed, elapsed / count)) performances = {} for k, v in results.items(): performances[k] = np.mean(v) print '------Source: %s\tPerformance-------:' % source print 'Validating...' if valid_or_test == 'valid' else 'Testing' print 'Message: %s' % self.message print 'Source: %s' % source print performances if valid_or_test != 'valid': path = './results/' + self.message + '_' + valid_or_test + '_' + source if not os.path.exists(path): os.makedirs(path) out_file = open('%s/%s.predicted.txt' % (path, self.model_name), 'w') for qid, did, pred, gt in predicted: print >> out_file, '\t'.join([qid, did, str(pred), str(gt)]) return performances def get_text(self, input, map_fun): text_list = [] for element in input: if element == 0: break text_list.append(map_fun(element)) return ' '.join(text_list) def generate_beta_phi_3(self, topK=10, show_topic_limit=-1): beta, phi = self.VAE_model.infer_topic_dis(topK) topics = defaultdict(list) topics_ents = defaultdict(list) show_topic_num = self.config[ 'intent_num'] if show_topic_limit == -1 else show_topic_limit for i in range(show_topic_num): idxs = beta[i] eidxs = phi[i] component_words = [ self.data_generator.id2word[idx] for idx in idxs.cpu().numpy() ] component_ents = [ self.data_generator.id2ent[self.data_generator.new2old[idx]] for idx in eidxs.cpu().numpy() ] topics[i] = component_words topics_ents[i] = component_ents print '--------Topic-Word-------' prefix = ('./topic/%s/' % args.m) if not os.path.exists(prefix): os.makedirs(prefix) outfile = open(prefix + 'topic-words.txt', 'w') for k in topics: print >> outfile, (str(k) + ' : ' + ' '.join(topics[k])) print >> outfile, (str(k) + ' : ' + ' '.join(topics_ents[k])) return topics, topics_ents def run_test_topic(self, out_file_name, topK, topicNum): topics_words, topics_ents = self.generate_beta_phi_3(topK) data_addr = self.test_rank_addr data_source = self.data_generator.pointwise_reader_evaluation( data_addr, is_test=True, label_type=self.config['click_model']) out_file = open(out_file_name, 'w') with torch.no_grad(): self.VAE_model.eval() self.model.eval() for i, out in enumerate(data_source): (qid, dids, input_qw, input_qe, input_dw, input_de, gt_rels) = out theta = self.VAE_model.get_theta(input_qw, input_qe) input_qw = input_qw[0] input_qe = input_qe[0] input_w = self.get_text( input_qw, lambda w: self.data_generator.id2word[w]) input_e = self.get_text( input_qe, lambda e: self.data_generator.id2ent[ self.data_generator.new2old[e]]) theta = theta[0].data.cpu().numpy() top_indices = np.argsort(theta)[::-1][:3] #print '=========================' print >> out_file, 'Query: ', input_w print >> out_file, 'Entity: ', input_e for j, k in enumerate(top_indices): ws = topics_words[k] es = topics_ents[k] print >> out_file, '%d Word Topic %d: %s' % (j, k, ' '.join(ws)) print >> out_file, '%d Entity Topic %d: %s' % ( j, k, ' '.join(es)) def generate_topic_word_ent(self, out_file, topK=10): print 'Visualizing ...' data_addr = self.test_rank_addr data_source = self.data_generator.pointwise_reader_evaluation( data_addr, is_test=True, label_type=self.config['click_model']) out_file = open(out_file, 'w') with torch.no_grad(): self.VAE_model.eval() self.model.eval() for i, out in enumerate(data_source): (input_qw, input_qe, input_dw, input_de, gt_rels) = out _, word_indices, ent_indices = self.VAE_model.get_topic_words( input_qw, input_qe, topK=topK) word_indices = word_indices[0].data.cpu().numpy() ent_indices = ent_indices[0].data.cpu().numpy() #print 'ent_indices: ', ent_indices #print 'word_indices: ', word_indices input_qw = input_qw[0] input_qe = input_qe[0] input_w = self.get_text( input_qw, lambda w: self.data_generator.id2word[w]) input_e = self.get_text( input_qe, lambda e: self.data_generator.id2ent[ self.data_generator.new2old[e]]) reconstuct_w = self.get_text( word_indices, lambda w: self.data_generator.id2word[w]) reconstuct_e = self.get_text( ent_indices, lambda e: self.data_generator.id2ent[ self.data_generator.new2old[e]]) print >> out_file, ('%d: Word: %s\tRecons: %s' % (i + 1, input_w, reconstuct_w)) print >> out_file, ('%d: Ent: %s\tRecons: %s' % (i + 1, input_e, reconstuct_e)) def train_VAE(self, input_qw, input_qe): self.VAE_model.train() self.VAE_model.zero_grad() self.vae_optimizer.zero_grad() topic_embeddings, logPw, logPe, prior_mean, prior_variance,\ poster_mu, poster_sigma, poster_log_sigma = self.VAE_model(input_qw,input_qe) vae_total_loss, KL, RL_w, RL_e = self.vae_loss( input_qw, logPw, input_qe, logPe, prior_mean, prior_variance, poster_mu, poster_sigma, poster_log_sigma) #vae_total_loss.backward(retain_graph=True) # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. #torch.nn.utils.clip_grad_value_(self.VAE_model.parameters(), self.clip_grad) # clip_grad_norm(, ) #self.vae_optimizer.step() return topic_embeddings, vae_total_loss, KL.data, RL_w.data, RL_e.data def train(self, input_qw, input_qe, input_dw_pos, input_de_pos, input_dw_neg, input_de_neg): # Turn on training mode which enables dropout. self.model.train() self.model.zero_grad() self.ranker_optimizer.zero_grad() topic_embeddings, vae_total_loss, KL_loss, RL_w_loss, RL_e_loss = self.train_VAE( input_qw, input_qe) score_pos, orth_loss_1 = self.model(input_qw, input_qe, input_dw_pos, input_de_pos, topic_embeddings) score_neg, orth_loss_2 = self.model(input_qw, input_qe, input_dw_neg, input_de_neg, topic_embeddings) rank_loss = torch.sum(torch.clamp(1.0 - score_pos + score_neg, min=0)) vae_weight = self.config['intent_lambda'] orth_loss = (orth_loss_1 + orth_loss_2) / 2 total_loss = rank_loss + vae_weight * vae_total_loss + orth_loss total_loss.backward() ## update parameters # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_value_(self.VAE_model.parameters(), self.clip_grad) # clip_grad_norm(, ) torch.nn.utils.clip_grad_value_(self.model.parameters(), self.clip_grad) #clip_grad_norm(, ) self.ranker_optimizer.step() self.vae_optimizer.step() return rank_loss.data, vae_total_loss.data, KL_loss, RL_w_loss, RL_e_loss def predict(self, input_qw, input_qe, input_dw, input_de): # Turn on evaluation mode which disables dropout. with torch.no_grad(): self.VAE_model.eval() self.model.eval() topic_embeddings = self.VAE_model(input_qw, input_qe) rels_predicted, _ = self.model(input_qw, input_qe, input_dw, input_de, topic_embeddings) return rels_predicted def save_checkpoint(self, message): filePath = os.path.join(self.saveModeladdr) #if not os.path.exists(filePath): # os.makedirs(filePath) torch.save( { 'vae_state_dict': self.VAE_model.state_dict(), 'rank_state_dict': self.model.state_dict(), 'vae_optimizer': self.vae_optimizer.state_dict(), 'rank_optimizer': self.ranker_optimizer.state_dict() }, filePath) def get_embeddings(self): word_embeddings = self.model.word_emb.weight.detach().cpu().numpy() ent_embeddings = self.model.ent_emb.weight.detach().cpu().numpy() topic_embeddings = self.model.topic_embedding.detach().cpu().numpy() print 'Topic size: ', topic_embeddings.shape[0] cPickle.dump((word_embeddings, ent_embeddings, topic_embeddings), open('./topic_analysis/w_e_t_embedding.pkl', 'w')) print 'saved' return
def main(): logger = logging.getLogger(__name__) handler1 = logging.StreamHandler() handler1.setLevel(logging.INFO) handler2 = logging.FileHandler(filename=args.log_file, mode='w') handler2.setFormatter( logging.Formatter("%(asctime)s %(levelname)8s %(message)s")) handler2.setLevel(logging.INFO) logger.setLevel(logging.INFO) logger.addHandler(handler1) logger.addHandler(handler2) tokenizer = Tokenizer(args.vocab_file) train_dataset = SentenceDataset(args.train_file, tokenizer.encode) valid_dataset = SentenceDataset(args.valid_file, tokenizer.encode) train_loader = DataLoader(train_dataset, args.batch_size, shuffle=True, collate_fn=train_dataset.collate_fn, drop_last=True) valid_loader = DataLoader(valid_dataset, args.batch_size, shuffle=False, collate_fn=valid_dataset.collate_fn, drop_last=True) model = VAE( num_embeddings=len(tokenizer), dim_embedding=args.dim_embedding, dim_hidden=args.dim_hidden, dim_latent=args.dim_latent, num_layers=args.num_layers, bidirectional=args.bidirectional, dropout=args.dropout, word_dropout=args.word_dropout, dropped_index=tokenizer.unk_index, ).to(device) annealer = KLAnnealer(x0=args.x0, k=args.k) criterion = LmCrossEntropyLoss(tokenizer.pad_index, reduction='batchmean') optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.98), eps=1e-09) logger.info('Start training') for epoch in range(args.num_epochs): train_loss, train_ce_loss, train_kl_loss, valid_loss, valid_ce_loss, valid_kl_loss = 0., 0., 0., 0., 0., 0. pbar = tqdm(train_loader) pbar.set_description("[Epoch %d/%d]" % (epoch, args.num_epochs)) # Train model.train() for itr, s in enumerate(pbar): beta = annealer() s = s.to(device) length = torch.sum(s != tokenizer.pad_index, dim=-1) output, mean, logvar, z = model(s, length) ce_loss = criterion(output[:, :-1, :], s[:, 1:]) kl_loss = -0.5 * torch.mean( torch.sum(1 + logvar - mean.pow(2) - logvar.exp(), dim=-1)) loss = ce_loss + beta * kl_loss optimizer.zero_grad() loss.backward() optimizer.step() annealer.step() train_loss += loss.item() train_ce_loss += ce_loss.item() train_kl_loss += kl_loss.item() if itr % args.print_every == 0: pbar.set_postfix(loss=train_loss / (itr + 1), beta=beta) train_loss /= len(train_loader) train_ce_loss /= len(train_loader) train_kl_loss /= len(train_loader) # Valid model.eval() with torch.no_grad(): for s in valid_loader: beta = annealer() s = s.to(device) length = torch.sum(s != tokenizer.pad_index, dim=-1) output, mean, logvar, z = model(s, length) ce_loss = criterion(output[:, :-1, :], s[:, 1:]) kl_loss = -0.5 * torch.mean( torch.sum(1 + logvar - mean.pow(2) - logvar.exp(), dim=-1)) loss = ce_loss + beta * kl_loss valid_loss += loss.item() valid_ce_loss += ce_loss.item() valid_kl_loss += kl_loss.item() valid_loss /= len(valid_loader) valid_ce_loss /= len(valid_loader) valid_kl_loss /= len(valid_loader) logger.info( '[Epoch %d/%d] Training loss: %.2f, CE loss: %.2f, KL loss: %.2f, Validation loss: %.2f, CE loss: %.2f, KL loss: %.2f' % ( epoch, args.num_epochs, train_loss, train_ce_loss, train_kl_loss, valid_loss, valid_ce_loss, valid_kl_loss, )) torch.save(model.state_dict(), args.checkpoint_file)
class ReconstructionBERTTrainer: """ BERTTrainer make the pretrained BERT model with two LM training method. 1. Masked Language Model : 3.3.1 Task #1: Masked LM 2. Next Sentence prediction : 3.3.2 Task #2: Next Sentence Prediction please check the details on README.md with simple example. """ def __init__(self, bert: BERT, vocab_size: int, markdown_vocab_size, markdown_emb_size, train_dataloader: DataLoader, test_dataloader: DataLoader, lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000, with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, pad_index=0, loss_lambda=1, model_path=None, n_topics=50, weak_supervise=False, context=False, markdown=False, hinge_loss_start_point=20, entropy_start_point=30): """ :param bert: BERT model which you want to train :param vocab_size: total word vocab size :param train_dataloader: train dataset data loader :param test_dataloader: test dataset data loader [can be None] :param lr: learning rate of optimizer :param betas: Adam optimizer betas :param weight_decay: Adam optimizer weight decay param :param with_cuda: traning with cuda :param log_freq: logging frequency of the batch iteration :param context: use information from neighbor cells """ # Setup cuda device for BERT training, argument -c, --cuda should be true self.loss_lambda = loss_lambda self.n_topics = n_topics self.weak_supervise = weak_supervise self.context = context self.markdown = markdown self.hinge_loss_start_point = hinge_loss_start_point self.entropy_start_point = entropy_start_point cuda_condition = torch.cuda.is_available() and with_cuda self.device = torch.device("cuda:0" if cuda_condition else "cpu") # This BERT model will be saved every epoch self.bert = bert # Initialize the BERT Language Model, with BERT model self.model = VAE(bert, vocab_size, markdown_vocab_size, markdown_emb_size, n_topics=n_topics, weak_supervise=weak_supervise, context=context, markdown=markdown).to(self.device) if model_path: self.model.load_state_dict( torch.load(model_path)["model_state_dict"]) last_epoch = int(model_path.split('.')[-1][2:]) self.last_epoch = last_epoch else: self.last_epoch = None # raise NotImplementedError # pdb.set_trace() # Distributed GPU training if CUDA can detect more than 1 GPU if with_cuda and torch.cuda.device_count() > 1: # pdb.set_trace() print("Using %d GPUS for BERT" % torch.cuda.device_count()) self.model = nn.DataParallel(self.model, device_ids=cuda_devices) # pdb.set_trace() # Setting the train and test data loader self.train_data = train_dataloader self.test_data = test_dataloader self.pad_index = pad_index # Setting the Adam optimizer with hyper-param # self.optim = Adam(self.model.parameters(), lr=lr, # betas=betas, weight_decay=weight_decay) # self.optim_schedule = ScheduledOptim( # self.optim, self.bert.hidden, n_warmup_steps=warmup_steps) self.optim = SGD(self.model.parameters(), lr=lr, momentum=0.9) if self.last_epoch and self.last_epoch >= self.hinge_loss_start_point: self.optim = SGD(self.model.parameters(), lr=0.00002, momentum=0.9) # Using Negative Log Likelihood Loss function for predicting the masked_token # self.criterion = nn.NLLLoss(ignore_index=self.pad_index) self.best_loss = None self.updated = False self.log_freq = log_freq self.cross_entropy = nn.CrossEntropyLoss(ignore_index=0) print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()])) def train(self, epoch): self.model.train() # self.optim.zero_grad() return self.iteration(epoch, self.train_data) def test(self, epoch): self.model.eval() with torch.no_grad(): loss = self.iteration(epoch, self.test_data, train=False) return loss def api(self, data_loader=None): self.model.eval() # str_code = "train" if train else "test" if not data_loader: data_loader = self.test_data # Setting the tqdm progress bar data_iter = tqdm.tqdm( enumerate(data_loader), # desc="EP_%s:%d" % (str_code, epoch), total=len(data_loader), bar_format="{l_bar}{r_bar}") avg_loss = 0.0 total_correct = 0 total_element = 0 # for (i, data), (ni, ndata) in data_iter, neg_data_iter: phases = [] stages = [] stage_vecs = [] with torch.no_grad(): for i, item in data_iter: data = item[0] ndata = item[1] data = { key: value.to(self.device) for key, value in data.items() } ndata = { key: value.to(self.device) for key, value in ndata.items() } # 0. batch_data will be sent into the device(GPU or cpu) data = { key: value.to(self.device) for key, value in data.items() } ndata = { key: value.to(self.device) for key, value in ndata.items() } # pdb.set_trace() # 1. forward the next_sentence_prediction and masked_lm model # pdb.set_trace() reconstructed_vec, graph_vec, origin_neg, topic_dist, stage_vec = self.model.forward( data["bert_input"], ndata["bert_input"], data["segment_label"], ndata["segment_label"], data["adj_mat"], ndata["adj_mat"], train=False, context_topic_dist=data["context_topic_vec"], markdown_label=data["markdown_label"], markdown_len=data["markdown_len"], neg_markdown_label=ndata["markdown_label"], neg_markdown_len=ndata["markdown_len"]) data_loader.dataset.update_topic_dist(topic_dist, data["id"]) phases += torch.max(topic_dist, 1)[-1].tolist() # print(torch.max(stage_vec, 1)[-1].tolist()) stages += torch.max(stage_vec, 1)[-1].tolist() stage_vecs += stage_vec.tolist() # pdb.set_trace() return phases, stages, stage_vecs def iteration(self, epoch, data_loader, train=True): """ loop over the data_loader for training or testing if on train status, backward operation is activated and also auto save the model every peoch :param epoch: current epoch index :param data_loader: torch.utils.data.DataLoader for iteration :param train: boolean value of is train or test :return: None """ str_code = "train" if train else "test" # Setting the tqdm progress bar data_iter = tqdm.tqdm(enumerate(data_loader), desc="EP_%s:%d" % (str_code, epoch), total=len(data_loader), bar_format="{l_bar}{r_bar}") avg_loss = 0.0 total_correct = 0 # def calculate_iter(data): for i, item in data_iter: data = item[0] ndata = item[1] data = {key: value.to(self.device) for key, value in data.items()} ndata = { key: value.to(self.device) for key, value in ndata.items() } # 1. forward the next_sentence_prediction and masked_lm model reconstructed_vec, graph_vec, origin_neg, topic_dist, stage_vec = self.model.forward( data["bert_input"], ndata["bert_input"], data["segment_label"], ndata["segment_label"], data["adj_mat"], ndata["adj_mat"], train=train, context_topic_dist=data["context_topic_vec"], markdown_label=data["markdown_label"], markdown_len=data["markdown_len"], neg_markdown_label=ndata["markdown_label"], neg_markdown_len=ndata["markdown_len"]) # pdb.set_trace() if self.context: data_loader.dataset.update_topic_dist(topic_dist, data["id"]) bs, hid_size = reconstructed_vec.shape nbs, hid_size = origin_neg.shape duplicate = int(nbs / bs) # pdb.set_trace() # if str_code == 'test': # pdb.set_trace() hinge_loss = my_loss(reconstructed_vec, graph_vec, origin_neg) weight_loss = torch.norm( torch.mm(self.model.reconstruction.weight.T, self.model.reconstruction.weight) - torch.eye(self.n_topics).cuda()) loss = self.loss_lambda * weight_loss + hinge_loss # if self.weak_supervise: c_entropy = self.cross_entropy(stage_vec, data['stage']) entropy = -1 * (F.softmax(stage_vec, dim=1) * F.log_softmax(stage_vec, dim=1)).sum() loss += 2 * c_entropy # + 0.001 * entropy if epoch < self.hinge_loss_start_point: loss = c_entropy # else: elif epoch < self.entropy_start_point: loss = c_entropy + self.loss_lambda * weight_loss + hinge_loss else: loss = c_entropy + entropy + self.loss_lambda * weight_loss + hinge_loss if epoch == self.hinge_loss_start_point: self.optim = SGD(self.model.parameters(), lr=0.00002, momentum=0.9) # 3. backward and optimization only in train if train: self.optim.zero_grad() loss.backward() # self.optim.step_and_update_lr() self.optim.step() avg_loss += loss.item() post_fix = { "epoch": epoch, "iter": i, "avg_loss": avg_loss / (i + 1), # "avg_acc": total_correct / total_element * 100, "loss": loss.item(), "cross_entropy": c_entropy.item(), "entropy": entropy.item(), "hinge_loss": hinge_loss.item() } if i % self.log_freq == 0: data_iter.write(str(post_fix)) print("EP%d_%s, avg_loss=" % (epoch, str_code), avg_loss / len(data_iter)) return avg_loss / len(data_iter) def save(self, epoch, file_path="output/bert_trained.model"): """ Saving the current BERT model on file_path :param epoch: current epoch number :param file_path: model output path which gonna be file_path+"ep%d" % epoch :return: final_output_path """ output_path = file_path + ".ep%d" % epoch # if self.updated: # return output_path # torch.save(self.bert.cpu(), output_path) torch.save( { 'epoch': epoch, 'model_state_dict': self.model.state_dict() # 'optimizer_state_dict': optimizer.state_dict(), # 'loss': loss, # ... }, output_path) # self.bert.to(self.device) print("EP:%d Model Saved on:" % epoch, output_path) # self.updated = True return output_path
reparam = False for epoch in range(n_epochs): epoch_batch = 0 verbose_loss = 0 verbose_penalty = 0 verbose_batch = 0 epoch_train_elbo = 0 epoch_val_elbo = 0 # Evaluate and snapshot the model at each epoch (even before training) recs = [] mus = [] log_vars = [] with torch.no_grad(): model.eval() frame_idx = 0 for test_data in test_loader: test_data = test_data.to(device) # test_data -= mean[None, ...] rec, penalty = model(test_data) mu, log_var = model.encoder(test_data) mus.append(mu.clone().detach().cpu().numpy()) log_vars.append(log_var.clone().detach().cpu().numpy()) if reparam: latent = reparameterize(mu, log_var) else: latent = mu rec = model.decoder(latent).cpu().numpy() recs.append(rec)
def main(args): conf = None with open(args.config, 'r') as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader) conf = config['combine'] model_params = config['model'] preprocess_params = config['preprocessor'] date_time = time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime()) path = os.path.join(conf['save_path'], date_time) path = conf['save_path'] model = VAE(model_params['roll_dim'], model_params['hidden_dim'], model_params['infor_dim'], model_params['time_step'], 12) model.load_state_dict(torch.load(conf['model_path'])) if torch.cuda.is_available(): print('Using: ', torch.cuda.get_device_name(torch.cuda.current_device())) model.cuda() else: print('CPU mode') model.eval() pitch_path = conf['p_path'] + ".txt" rhythm_path = conf['r_path'] + ".txt" #chord_path = conf['chord_path'] + ".txt" name1 = pitch_path.split("/")[-3] name2 = rhythm_path.split("/")[-3] name = name1 + "+" + name2 + ".mid" name2 = name1 + "+" + name2 + ".txt" pitch = np.loadtxt(pitch_path) print(pitch) rhythm = np.loadtxt(rhythm_path) print(rhythm) print("Importing " + name1 + " pitch and " + name2 + " rhythm") #line_graph(pitch,rhythm) #bar_graph(pitch,rhythm) pitch = torch.from_numpy(pitch).float() rhythm = torch.from_numpy(rhythm).float() recon = model.decoder(pitch, rhythm) recon = torch.squeeze(recon, 0) recon = mf._sampling(recon) recon = np.array(recon.cpu().detach().numpy()) length = torch.sum(rhythm).int() recon = recon[:length] #打印生成的音符分布 note = recon[:, :-2] note = np.nonzero(note)[1] note = np.bincount(note, minlength=34).astype(float) recon = mf.modify_pianoroll_dimentions(recon, preprocess_params['low_crop'], preprocess_params['high_crop'], "add") #bar_graph(pitch,rhythm) mf.numpy_to_midi(recon, 120, path, name, preprocess_params['smallest_note']) #pitch_rhythm(recon,path,name2) # write pitch information print("combine succeed")
# print('ok2') loss.backward() #retain_graph=True optimizer.step() recon_loss_ += recon_loss.item() #+ kl_loss.item() kl_loss_ += kl_loss.item() loss_+=loss.item() # num_iter += real.size(0) # print('recon', recon_loss.item(), 'kl', kl_loss.item(), 'loss', loss.item()) RE_LOSS.append(recon_loss_ / len_loader) KL_LOSS.append(kl_loss_ / len_loader) LOSS.append(loss_ / len_loader) with torch.no_grad(): vae.eval() ff = vae.dec(fixed_noise).detach().cpu() img_list.append(vutils.make_grid(ff, padding=2, normalize=True)) print('EPOCH %d : recon_loss : %.4f , kl_loss = %.4f , loss = %.4f ' % (epoch, recon_loss_ / len_loader, kl_loss_ / len_loader, loss_ / len_loader)) ## save fig plt.axis('off') plt.imshow(np.transpose(img_list[-1], (1, 2, 0))) plt.savefig('glips.png', format='png') plt.close() ### save plot x = [i for i in range(epoch + 1)] plt.plot(x, RE_LOSS, label='RE_LOSS') plt.plot(x, KL_LOSS, label='KL_LOSS') plt.plot(x, LOSS, label='LOSS')
class Trainer(object): def __init__(self, args): self.args = args torch.manual_seed(self.args.seed) if self.args.cuda: torch.cuda.manual_seed(self.args.seed) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST('./data', train=True, download=True, transform=transforms.ToTensor()), batch_size=self.args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('./data', train=False, transform=transforms.ToTensor()), batch_size=self.args.batch_size, shuffle=True, **kwargs) self.train_loader = train_loader self.test_loader = test_loader self.model = VAE() if self.args.cuda: self.model.cuda() self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3) def loss_function(self, recon_x, x, mu, logvar): BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784)) KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) KLD /= self.args.batch_size * 784 return BCE + KLD def train_one_epoch(self, epoch): train_loader = self.train_loader args = self.args self.model.train() train_loss = 0 for batch_idx, (data, _) in enumerate(train_loader): data = Variable(data) if args.cuda: data = data.cuda() self.optimizer.zero_grad() recon_batch, mu, logvar = self.model(data) loss = self.loss_function(recon_batch, data, mu, logvar) loss.backward() train_loss += loss.data[0] self.optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0] / len(data))) print('=====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(train_loader.dataset))) def test(self, epoch): test_loader = self.test_loader args = self.args self.model.eval() test_loss = 0 for i, (data, _) in enumerate(test_loader): if args.cuda: data = data.cuda() data = Variable(data, volatile=True) recon_batch, mu, logvar = self.model(data) test_loss += self.loss_function(recon_batch, data, mu, logvar).data[0] if i == 0: n = min(data.size(0), 8) comparison = torch.cat([ data[:n], recon_batch.view(args.batch_size, 1, 28, 28)[:n] ]) fname = 'results/reconstruction_' + str(epoch) + '.png' save_image(comparison.data.cpu(), fname, nrow=n) test_loss /= len(test_loader.dataset) print('=====> Test set loss: {:.4f}'.format(test_loss)) def train(self): args = self.args for epoch in range(1, args.epochs + 1): self.train_one_epoch(epoch) self.test(epoch) sample = Variable(torch.randn(64, 20)) if args.cuda: sample = sample.cuda() sample = self.model.decode(sample).cpu() save_image(sample.data.view(64, 1, 28, 28), './results/sample_' + str(epoch) + '.png')
class AudioToBodyDynamics(object): """ Defines a wrapper class for training and evaluating a model. Inputs: args (argparse object): model settings generator (tuple DataLoader): a tuple of at least one DataLoader """ def __init__(self, args, generator, freestyle=False): # TODO super(AudioToBodyDynamics, self).__init__() self.device = args.device self.log_frequency = args.log_frequency self.is_freestyle_mode = freestyle self.generator = generator self.model_name = args.model_name self.ident = args.ident self.model_name = args.model_name input_dim, output_dim = generator[0].dataset.getDimsPerBatch() model_options = { 'seq_len': args.seq_len, 'device': args.device, 'dropout': args.dp, 'batch_size': args.batch_size, 'hidden_dim': args.hidden_size, 'input_dim': input_dim, 'output_dim': output_dim, 'trainable_init': args.trainable_init } if args.model_name == "AudioToJointsThree": from model import AudioToJointsThree self.model = AudioToJointsThree(model_options).cuda(args.device) elif args.model_name == 'AudioToJointsNonlinear': from model import AudioToJointsNonlinear self.model = AudioToJointsNonlinear(model_options).cuda( args.device) elif args.model_name == "AudioToJoints": from model import AudioToJoints self.model = AudioToJoints(model_options).cuda(args.device) elif args.model_name == 'JointsToJoints': from model import JointsToJoints self.model = JointsToJoints(model_options).cuda( args.device).double() elif args.model_name == 'LSTMToDense': from model import LSTMToDense self.model = LSTMToDense(model_options).cuda(args.device).double() elif args.model_name == 'AudioToJointsSeq2Seq': from model import AudioToJointsSeq2Seq self.model = AudioToJointsSeq2Seq(model_options).cuda( args.device).double() elif args.model_name == 'MDNRNN': from model import MDNRNN self.model = MDNRNN(model_options).cuda(args.device).double() elif args.model_name == 'VAE': from model import VAE self.model = VAE(model_options).cuda(args.device).double() # construct the model self.optim = optim.Adam(self.model.parameters(), lr=args.lr) # Load checkpoint model if self.is_freestyle_mode: path = f"{model_dir}{args.model_name}_{str(args.ident)}.pth" print(path) self.loadModelCheckpoint(path) # general loss function def buildLoss(self, predictions, targets): square_diff = (predictions - targets)**2 out = torch.sum(square_diff, -1, keepdim=True) return torch.mean(out) def mdn_loss(self, y, pi, mu, sigma): m = torch.distributions.Normal(loc=mu, scale=sigma) loss = torch.exp(m.log_prob(y)) loss = torch.sum(loss * pi, dim=2) loss = -torch.log(loss) return torch.mean(loss) # Loss function from https://github.com/pytorch/examples/blob/master/vae/main.py, # Appendix B of https://github.com/pytorch/examples/blob/master/vae/main.py def vae_loss(self, targets, recon_targets, mu, logvar): BCE = nn.functional.binary_cross_entropy(recon_targets, targets, reduction='sum') KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) return BCE + KLD def saveModel(self, state_info, path): torch.save(state_info, path) def loadModelCheckpoint(self, path): checkpoint = torch.load(path, map_location=self.device) self.model.load_state_dict(checkpoint['model_state_dict']) self.optim.load_state_dict(checkpoint['optim_state_dict']) def runNetwork(self, inputs, targets): """ Train on one given mfcc pose pair Args: inputs (array): [batch, seq_len, mfcc_features * 3] targets (array): [batch, seq_len, 19 * 2 poses] Returns: predictions, truth, loss """ def to_numpy(x): # import from gpu device to cpu, convert to numpy return x.cpu().data.numpy() inputs = Variable(torch.DoubleTensor(inputs.double()).to(self.device)) # reshape targets into (batch * seq_len, input features) targets = Variable(torch.DoubleTensor(targets).to(self.device)) if self.model_name == 'AudioToJointsSeq2Seq': predictions = self.model.forward(inputs, targets) elif self.model_name == 'VAE': predictions, mu, logvar = self.model.forward(inputs) else: predictions = self.model.forward(inputs) criterion = nn.L1Loss() if self.model_name == 'AudioToJointsSeq2Seq': loss = criterion(predictions.to(self.device), targets.to(self.device).float()) elif self.model_name == 'MDNRNN': # predictions = (pi, mu, sigma), (h, c) loss = self.mdn_loss(targets, predictions[0][0], predictions[0][1], predictions[0][2]) elif self.model_name == 'VAE': loss = self.vae_loss(targets, predictions, mu, logvar) else: loss = criterion(predictions, targets) return (to_numpy(predictions), to_numpy(targets)), loss def runEpoch(self): # given one epoch train_losses = [] #coeff_losses val_losses = [] predictions, targets = [], [] if not self.is_freestyle_mode: # train # for each data point for mfccs, poses in self.generator[0]: self.model.train() # pass train flag to model pred_targs, train_loss = self.runNetwork(mfccs, poses) self.optim.zero_grad() train_loss.backward() self.optim.step() train_loss = train_loss.data.tolist() train_losses.append(train_loss) # validation loss for mfccs, poses in self.generator[1]: self.model.eval() pred_targs, val_loss = self.runNetwork(mfccs, poses) val_loss = val_loss.data.tolist() val_losses.append(val_loss) pred = pred_targs[0].reshape( int(pred_targs[0].shape[0] * pred_targs[0].shape[1]), 19, 2) predictions.append(pred) targets.append(pred_targs[1]) # test or predict / play w/ model if self.is_freestyle_mode: for mfccs, poses in self.generator[0]: self.model.eval() # mfccs = mfccs.float() pred_targs, val_loss = self.runNetwork(mfccs, poses) val_loss = val_loss.data.tolist() val_losses.append(val_loss) pred = pred_targs[0].reshape( int(pred_targs[0].shape[0] * pred_targs[0].shape[1]), 19, 2) predictions.append(pred) targets.append(pred_targs[1]) return train_losses, val_losses, predictions, targets def trainModel(self, max_epochs, logfldr, model_dir): # TODO log.debug("Training model") epoch_losses = [] batch_losses = [] val_losses = [] i, best_loss, iters_without_improvement = 0, float('inf'), 0 best_train_loss, best_val_loss = float('inf'), float('inf') if logfldr: if logfldr[-1] != '/': logfldr += '/' filename = f'{logfldr}epoch_of_model_{str(self.ident)}.txt' state_info = { 'epoch': i, 'epoch_losses': epoch_losses, 'batch_losses': batch_losses, 'validation_losses': val_losses, 'model_state_dict': self.model.state_dict(), 'optim_state_dict': self.optim.state_dict(), } for i in range(max_epochs): if int(i / 10) == 0: if i == 0: with open(filename, 'w') as f: f.write(f"Epoch: {i} started\n") else: with open(filename, 'a+') as f: f.write(f"Epoch: {i} started\n") # save the model if model_dir: if model_dir[-1] != '/': model_dir += '/' path = f"{model_dir}{self.model_name}_{str(self.ident)}.pth" self.saveModel(state_info, path) # train_info, val_info, predictions, targets iter_train, iter_val, predictions, targets = self.runEpoch() iter_mean = np.mean(iter_train) iter_val_mean = np.mean(iter_val) # iter_val_mean = np.mean(iter_val[0]), np.mean(iter_val[1]) epoch_losses.append(iter_mean) batch_losses.extend(iter_train) val_losses.append(iter_val_mean) log.info("Epoch {} / {}".format(i, max_epochs)) log.info(f"Training Loss : {iter_mean}") log.info(f"Validation Loss : {iter_val_mean}") best_train_loss = iter_mean if iter_mean < best_train_loss else best_train_loss best_val_loss = iter_val_mean if iter_val_mean < best_val_loss else best_val_loss # Visualize VAE latent space if self.model_name == 'VAE': self.vae_plot() self.plotResults(logfldr, epoch_losses, batch_losses, val_losses) path = f"{model_dir}{self.model_name}_{str(self.ident)}.pth" self.saveModel(state_info, path) return best_train_loss, best_val_loss # plot random subset of poses in VAE latent space def vae_plot(self): z_list = torch.Tensor(1, 2) poses = [] for input, output in self.generator: for inp in input: poses.append(inp) mu, logvar = self.model.encode(input) z = self.model.reparameterize(mu, logvar) z2 = z[:, -1, :] z_list = torch.cat((z_list.double(), z2.double()), 0) indices = np.random.randint(low=1, high=z_list.shape[0], size=1000) coords = np.array([z_list[ind, :].detach().numpy() for ind in indices]) # # k-means clustering for coloring # kmeans = KMeans(n_clusters=5).fit(coords) # y_kmeans = kmeans.predict(coords) # plt.scatter(coords[:,0], coords[:,1], c=y_kmeans, cmap='viridis') # plt.show() # # # draw each mean pose # centers = kmeans.cluster_centers_ # recons = [self.model.decode(torch.from_numpy(center)).detach().numpy().reshape(19,2) for center in centers] # k-medoids clustering for coloring kmedoids = KMedoids(n_clusters=5).fit(coords) y_kmedoids = kmedoids.predict(coords) plt.scatter(coords[:, 0], coords[:, 1], c=y_kmedoids, cmap='viridis') plt.show() recons = [] for center in kmedoids.cluster_centers_: c = np.array(center) for i in range(len(coords)): if np.array_equal(c, coords[i]): recons.append(poses[indices[i] - 1].detach().numpy().reshape(19, 2)) self.draw_poses(np.array(recons)) # Takes in np array of poses that are each 19x2 arrays def draw_poses(self, poses): count = 0 shift_by = np.array([750, 800]) - poses[0][8] poses += shift_by for pose in poses: person_id = str(0) + ", " + str([0]) canvas = draw_pose_figure(person_id, pose) file_name = "images/" + f"{count:05}.jpg" cv2.imwrite(file_name, canvas) count += 1 def plotResults(self, logfldr, epoch_losses, batch_losses, val_losses): losses = [epoch_losses, batch_losses, val_losses] names = [["Epoch loss"], ["Batch loss"], ["Val loss"]] _, ax = plt.subplots(nrows=len(losses), ncols=1) for index, pair in enumerate(zip(losses, names)): data = [pair[0][j] for j in range(len(pair[0]))] ax[index].plot(data, label=pair[1]) ax[index].legend() if logfldr: if logfldr[-1] != '/': logfldr += '/' save_filename = os.path.join( logfldr, f"{self.model_name}_{str(self.ident)}_results.png") plt.savefig(save_filename) plt.close()
class Experiment(): def __init__(self, args): self.args = args # data self.train_loader = DataLoader( datasets.MNIST('./data', train=True, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size, num_workers=args.n_workers, shuffle=True ) self.test_loader = DataLoader( datasets.MNIST('./data', train=False, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size, num_workers=args.n_workers, shuffle=False ) self.model = VAE().to(args.device) self.loss = VAE.loss self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3) def train(self, epoch): self.model.train() train_loss = 0. n_samples = 0 iter_i = 1 for data, _ in self.train_loader: n_samples += len(data) data = data.to(self.args.device) self.optimizer.zero_grad() x_rec, mu, logvar = self.model(data) loss = self.loss(data, x_rec, mu, logvar) loss.backward() train_loss += loss.item() # * len(data) self.optimizer.step() if iter_i % self.args.log_freq == 0: print('Epoch {} Train [{}/{}]: LossAvg {:.4f}'.format( epoch, n_samples, len(self.train_loader.dataset), loss.item() / len(data))) iter_i += 1 print('Epoch {} Train: LossAvg {:.4f}'.format( epoch, train_loss / len(self.train_loader.dataset))) def test(self, epoch): self.model.eval() test_loss = 0. iter_i = 1 with torch.no_grad(): for data, _ in self.test_loader: data = data.to(self.args.device) x_rec, mu, logvar = self.model(data) loss = self.loss(data, x_rec, mu, logvar) test_loss += loss.item() # * len(data) if iter_i == 1: n = min(data.size(0), 8) comparison = torch.cat( [data[:n], x_rec.view(self.args.batch_size, 1, 28, 28)[:n]]) iter_i += 1 print('Epoch {} Test: LossAvg {:.4f}'.format( epoch, test_loss / len(self.test_loader.dataset))) self.save(epoch) return comparison.cpu() def save(self, epoch): torch.save(self.model.state_dict(), './results/checkpoint_{}.pt'.format(epoch)) def run(self): for epoch_i in range(1, 1 + self.args.epochs): self.train(epoch_i) comparison = self.test(epoch_i) n = 8 save_image(comparison, './results/comparison_{}.png'.format(epoch_i), nrow=n) visualize = make_grid(comparison, nrow=n) show(visualize.cpu())
plt.figure(figsize=(6, 10)) rows, cols = 5, 2 for i in range(rows): plt.subplot(rows, cols, 2 * i + 1) plt.imshow(x[i].reshape(28, 28), vmin=0, vmax=1, cmap="Greys_r") plt.title("test input") plt.colorbar() plt.subplot(rows, cols, 2 * i + 2) plt.imshow(x_reconstr[i].reshape(28, 28), vmin=0, vmax=1, cmap="Greys_r") plt.title("reconstruct") plt.colorbar() plt.tight_layout() plt.show() """ check generation """ vae_model.eval() noise = torch.randn(batch_size, net_arch["n_z"], device=device) images = vae_model.generate(noise).cpu().detach() torch.save(images, gen_images_path) print(f"{gen_images_path} saved") """ check latent space, in order to do that, we need to train another VAE model with n_z=2 """ net_arch["n_z"] = 2 vae_model_2d = VAE(net_arch, lr, batch_size, device) print(vae_model_2d) if not os.path.exists(model_2d_save_path): vae_model_2d.train() n_epoch = 50 for epoch in range(n_epoch):
class Runner(object): def __init__(self, hparams, train_size: int, class_weight: Optional[Tensor] = None): # model, criterion self.model = VAE() # optimizer and scheduler self.optimizer = torch.optim.Adam(self.model.parameters(), lr=hparams.learning_rate, eps=hparams.eps, weight_decay=hparams.weight_decay) self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, **hparams.scheduler) self.bce = nn.BCEWithLogitsLoss(reduction='none') # self.kld = nn.KLDivLoss(reduction='sum') # device device_for_summary = self.__init_device(hparams.device, hparams.out_device) # summary self.writer = SummaryWriter(logdir=hparams.logdir) # TODO: fill in ~~DUMMY~~INPUT~~SIZE~~ path_summary = Path(self.writer.logdir, 'summary.txt') if not path_summary.exists(): print_to_file(path_summary, summary, (self.model, (40, 11)), dict(device=device_for_summary)) # save hyperparameters path_hparam = Path(self.writer.logdir, 'hparams.txt') if not path_hparam.exists(): print_to_file(path_hparam, hparams.print_params) def __init_device(self, device, out_device): if device == 'cpu': self.in_device = torch.device('cpu') self.out_device = torch.device('cpu') self.str_device = 'cpu' return 'cpu' # device type: List[int] if type(device) == int: device = [device] elif type(device) == str: device = [int(device[-1])] else: # sequence of devices if type(device[0]) != int: device = [int(d[-1]) for d in device] self.in_device = torch.device(f'cuda:{device[0]}') if len(device) > 1: if type(out_device) == int: self.out_device = torch.device(f'cuda:{out_device}') else: self.out_device = torch.device(out_device) self.str_device = ', '.join([f'cuda:{d}' for d in device]) self.model = nn.DataParallel(self.model, device_ids=device, output_device=self.out_device) else: self.out_device = self.in_device self.str_device = str(self.in_device) self.model.cuda(self.in_device) self.bce.cuda(self.out_device) ## torch.cuda.set_device(self.in_device) return 'cuda' # Running model for train, test and validation. def run(self, dataloader, mode: str, epoch: int): self.model.train() if mode == 'train' else self.model.eval() if mode == 'test': state_dict = torch.load(Path(self.writer.logdir, f'{epoch}.pt'), map_location='cpu') if isinstance(self.model, nn.DataParallel): self.model.module.load_state_dict(state_dict) else: self.model.load_state_dict(state_dict) path_test_result = Path(self.writer.logdir, f'test_{epoch}') os.makedirs(path_test_result, exist_ok=True) else: path_test_result = None avg_loss = 0. y = [] y_est = [] pred_prob = [] pbar = tqdm(dataloader, desc=f'{mode} {epoch:3d}', postfix='-', dynamic_ncols=True) for i_batch, batch in enumerate(pbar): # data x = batch['batch_x'] x = x.to(self.in_device) # B, F, T # forward reconstruct_x, mu, logvar = self.model(x) # loss BCE = self.bce(reconstruct_x, x.view(-1, 440)).mean(dim=1) # (B,) if mode != 'test': loss = torch.mean( BCE - 0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=1)) else: loss = 0. if mode == 'train': # backward self.optimizer.zero_grad() loss.backward() self.optimizer.step() loss = loss.item() elif mode == 'valid': loss = loss.item() else: y += batch['batch_y'] y_est += (BCE < 0.5).int().tolist() pred_prob += BCE.tolist() pbar.set_postfix_str('') avg_loss += loss avg_loss = avg_loss / len(dataloader.dataset) y = np.array(y) y_est = np.array(y_est) pred_prob = np.array(pred_prob, dtype=np.float32) return avg_loss, (y, y_est, pred_prob) def step(self, valid_loss: float, epoch: int): """ :param valid_loss: :param epoch: :return: test epoch or 0 """ # self.scheduler.step() self.scheduler.step(valid_loss) # print learning rate for param_group in self.optimizer.param_groups: self.writer.add_scalar('learning rate', param_group['lr'], epoch) if epoch % 5 == 0: torch.save((self.model.module.state_dict() if isinstance( self.model, nn.DataParallel) else self.model.state_dict(), ), Path(hparams.logdir) / f'VAE_{epoch}.pt') return 0
print(' bits : {:.2f}\n'.format(losses['bits'][-1])) print(' max : {:.6f}'.format(torch.max(weight_vec[-1]))) print(' max idx : {:.0f}'.format(torch.argmax(weight_vec[-1]))) print(' min : {:.6f}\n'.format(torch.min(weight_vec[-1]))) # print('sparsity : {}'.format(sum(1 for x in weight_vec[-1] if x > 0.2)) / weight_vec) # sparsity of weight vector print('*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*') # print('Epoch {}: train loss: {}'.format(epoch, loss.item())) # losses['weights'].append(weight_vec) # ================ validation ================ with torch.no_grad(): model.eval() # Put model in evaluation mode for batch_idx, local_batch in enumerate(val_loader): # Transfer to GPU local_batch = local_batch.to(device, dtype=torch.float32) # Model computations x_hat, mu, logvar, q_z, _ = model(local_batch) loss, _, _, _, _, _ = criterion( create_stft(local_batch).cpu(), x_hat.cpu(), q_z, 48, 1, model.global_step, 0.2, device, weight_vec, balance, loss_weight(epoch)) print('loading validation loss...\n') print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') print('Validation Loss: {}'.format(loss.item()))
def main(): # parse command line arguments parser = argparse.ArgumentParser(description="parse args") parser.add_argument('-d', '--dataset', default='shapes', type=str, help='dataset name', choices=['shapes', 'faces']) parser.add_argument('-dist', default='normal', type=str, choices=['normal', 'laplace', 'flow']) parser.add_argument('-n', '--num-epochs', default=1, type=int, help='number of training epochs') parser.add_argument('-b', '--batch-size', default=2048, type=int, help='batch size') parser.add_argument('-l', '--learning-rate', default=1e-3, type=float, help='learning rate') parser.add_argument('-z', '--latent-dim', default=10, type=int, help='size of latent dimension') parser.add_argument('--beta', default=5, type=float, help='ELBO penalty term') parser.add_argument('--tcvae', action='store_true') parser.add_argument('--exclude-mutinfo', action='store_true') parser.add_argument('--beta-anneal', action='store_true') parser.add_argument('--lambda-anneal', action='store_true') parser.add_argument('--mss', action='store_true', help='use the improved minibatch estimator') parser.add_argument('--conv', action='store_true') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--save', default='shapes') parser.add_argument('--log_freq', default=200, type=int, help='num iterations per log') args = parser.parse_args() # torch.cuda.set_device(args.gpu) # data loader train_loader = setup_data_loaders(args, use_cuda=False) # setup the VAE prior_dist = dist.Normal() q_dist = dist.Normal() vae = VAE(z_dim=args.latent_dim, use_cuda=False, prior_dist=prior_dist, q_dist=q_dist, include_mutinfo=not args.exclude_mutinfo, tcvae=args.tcvae, mss=args.mss) # setup the optimizer optimizer = optim.Adam(vae.parameters(), lr=args.learning_rate) train_elbo = [] # training loop dataset_size = len(train_loader.dataset) num_iterations = len(train_loader) * args.num_epochs iteration = 0 # initialize loss accumulator elbo_running_mean = utils.running_avg_meter() while iteration < num_iterations: for i, x in enumerate(train_loader): iteration += 1 batch_time = time.time() vae.train() anneal_kl(args, vae, iteration) optimizer.zero_grad() x = Variable(x) obj, elbo = vae.elbo(x, dataset_size) # if utils.isnan(obj).any(): # raise ValueError('NaN spotted in objective.') obj.mean().mul(-1).backward() elbo_running_mean.update(elbo.mean()) optimizer.step() # report training diagnostics if iteration % args.log_freq == 0: train_elbo.append(elbo_running_mean.avg) print( '[iteration %03d] time: %.2f \tbeta %.2f \tlambda %.2f training ELBO: %.4f (%.4f)' % (iteration, time.time() - batch_time, vae.beta, vae.lamb, elbo_running_mean.val, elbo_running_mean.avg)) vae.eval() utils.save_checkpoint( { 'state_dict': vae.state_dict(), 'args': args }, args.save, 0) # eval('plot_vs_gt_' + args.dataset)(vae, train_loader.dataset, # os.path.join(args.save, 'gt_vs_latent_{:05d}.png'.format(iteration))) # Report statistics after training vae.eval() utils.save_checkpoint({ 'state_dict': vae.state_dict(), 'args': args }, args.save, 0) dataset_loader = DataLoader(train_loader.dataset, batch_size=1000, num_workers=1, shuffle=False) logpx, dependence, information, dimwise_kl, analytical_cond_kl, marginal_entropies, joint_entropy = \ elbo_decomposition(vae, dataset_loader) torch.save( { 'logpx': logpx, 'dependence': dependence, 'information': information, 'dimwise_kl': dimwise_kl, 'analytical_cond_kl': analytical_cond_kl, 'marginal_entropies': marginal_entropies, 'joint_entropy': joint_entropy }, os.path.join(args.save, 'elbo_decomposition.pth')) # eval('plot_vs_gt_' + args.dataset)(vae, dataset_loader.dataset, os.path.join(args.save, 'gt_vs_latent.png')) return vae