def main(): # 加载词库,加载数据集 voc = Lang('data/WORDMAP.json') print("词库数量 " + str(voc.n_words)) train_data = SaDataset('train', voc) val_data = SaDataset('valid', voc) # 初始化模型 encoder = EncoderRNN(voc.n_words, hidden_size, encoder_n_layers, dropout) # 将模型使用device进行计算,如果是gpu,则会使用显存,如果是cpu,则会使用内存 encoder = encoder.to(device) # 初始化优化器 优化器的目的是让梯度下降,手段是调整模型的参数,optim是一个pytorch的一个包,adam是一个优化算法,梯度下降 print('Building optimizers ...') ''' 需要优化的参数 学习率 ''' optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) # 基础准确率 best_acc = 0 epochs_since_improvement = 0 # epochs 训练的次数 for epoch in range(0, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(optimizer, 0.8) # 训练一次 train(epoch, train_data, encoder, optimizer) # 使用验证集对训练结果进行验证,防止过拟合 val_acc, val_loss = valid(val_data, encoder) print('\n * ACCURACY - {acc:.3f}, LOSS - {loss:.3f}\n'.format(acc=val_acc, loss=val_loss)) # 检查是否有提升 is_best = val_acc > best_acc best_acc = max(best_acc, val_acc) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, encoder, optimizer, val_acc, is_best) # Reshuffle samples 将验证集合测试集打乱 np.random.shuffle(train_data.samples) np.random.shuffle(val_data.samples)
def init(): print("\tInitialising sentences") print("\t\tLoading and cleaning json files") json_of_convs = load_all_json_conv('./Dataset/messages') print("\t\tLoading two person convs") duo_conversations = get_chat_friend_and_me(json_of_convs) print("\t\tMaking two person convs discussions") discussions = get_discussions(duo_conversations) print("\t\tCreating pairs for training") pairs_of_sentences = make_pairs(discussions) print(f"\t\t{len(pairs_of_sentences)} different pairs") print("\t\tCreating Vocabulary") voc = Voc() print("\t\tPopulating Vocabulary") voc.createVocFromPairs(pairs_of_sentences) print(f"\t\tVocabulary of : {voc.num_words} differents words") print('\tBuilding encoder and decoder ...') embedding = nn.Embedding(voc.num_words, HIDDEN_SIZE) encoder = EncoderRNN(HIDDEN_SIZE, embedding, ENCODER_N_LAYERS, DROPOUT) decoder = LuongAttnDecoderRNN(ATTN_MODEL, embedding, HIDDEN_SIZE, voc.num_words, DECODER_N_LAYERS, DROPOUT) encoder_optimizer = optim.Adam(encoder.parameters(), lr=LEARNING_RATE) decoder_optimizer = optim.Adam(decoder.parameters(), lr=LEARNING_RATE * DECODER_LEARNING_RATIO) checkpoint = None if LOADFILENAME: print("\t\tLoading last training") checkpoint = torch.load(LOADFILENAME) # If loading a model trained on GPU to CPU # checkpoint=torch.load(loadFilename,map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] print("\t\tPopulating from last training") embedding.load_state_dict(embedding_sd) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) encoder = encoder.to(DEVICE) decoder = decoder.to(DEVICE) return (encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, voc, pairs_of_sentences, checkpoint)
def main(opts): # set manual_seed and build vocab print(opts, flush=True) setup(opts, opts.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Usando {device} :)") # create a batch training environment that will also preprocess text vocab = read_vocab(opts.train_vocab) tok = Tokenizer(opts.remove_punctuation == 1, opts.reversed == 1, vocab=vocab, encoding_length=opts.max_cap_length) # create language instruction encoder encoder_kwargs = { 'opts': opts, 'vocab_size': len(vocab), 'embedding_size': opts.word_embedding_size, 'hidden_size': opts.rnn_hidden_size, 'padding_idx': padding_idx, 'dropout_ratio': opts.rnn_dropout, 'bidirectional': opts.bidirectional == 1, 'num_layers': opts.rnn_num_layers } print('Using {} as encoder ...'.format(opts.lang_embed)) if 'lstm' in opts.lang_embed: encoder = EncoderRNN(**encoder_kwargs) else: raise ValueError('Unknown {} language embedding'.format(opts.lang_embed)) print(encoder) # create policy model policy_model_kwargs = { 'opts':opts, 'img_fc_dim': opts.img_fc_dim, 'img_fc_use_batchnorm': opts.img_fc_use_batchnorm == 1, 'img_dropout': opts.img_dropout, 'img_feat_input_dim': opts.img_feat_input_dim, 'rnn_hidden_size': opts.rnn_hidden_size, 'rnn_dropout': opts.rnn_dropout, 'max_len': opts.max_cap_length, 'max_navigable': opts.max_navigable } if opts.arch == 'regretful': model = Regretful(**policy_model_kwargs) elif opts.arch == 'self-monitoring': model = SelfMonitoring(**policy_model_kwargs) elif opts.arch == 'speaker-baseline': model = SpeakerFollowerBaseline(**policy_model_kwargs) else: raise ValueError('Unknown {} model for seq2seq agent'.format(opts.arch)) print(model) encoder = encoder.to(device) model = model.to(device) params = list(encoder.parameters()) + list(model.parameters()) optimizer = torch.optim.Adam(params, lr=opts.learning_rate) # optionally resume from a checkpoint if opts.resume: model, encoder, optimizer, best_success_rate = resume_training(opts, model, encoder, optimizer) # if a secondary exp name is specified, this is useful when resuming from a previous saved # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data if opts.exp_name_secondary: opts.exp_name += opts.exp_name_secondary feature, img_spec = load_features(opts.img_feat_dir, opts.blind) if opts.test_submission: assert opts.resume, 'The model was not resumed before running for submission.' test_env = ('test', (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, splits=['test'], tokenizer=tok), Evaluation(['test'], opts))) agent_kwargs = { 'opts': opts, 'env': test_env[1][0], 'results_path': "", 'encoder': encoder, 'model': model, 'feedback': opts.feedback } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer) epoch = opts.start_epoch - 1 trainer.eval(epoch, test_env) return # set up R2R environments if not opts.train_data_augmentation: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['train'], tokenizer=tok) else: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['synthetic'], tokenizer=tok) val_craft_splits = ['craft_seen', 'craft_unseen'] val_splits = ['val_seen', 'val_unseen'] if opts.craft_eval: val_splits += val_craft_splits val_envs = {split: (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, splits=[split], tokenizer=tok), Evaluation([split], opts)) for split in val_splits} # create agent agent_kwargs = { 'opts': opts, 'env': train_env, 'results_path': "", 'encoder': encoder, 'model': model, 'feedback': opts.feedback } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer, opts.train_iters_epoch) if opts.eval_only: success_rate = [] for val_env in val_envs.items(): success_rate.append(trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None)) return # set up tensorboard logger tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume) sys.stdout.flush() best_success_rate = best_success_rate if opts.resume else 0.0 for epoch in range(opts.start_epoch, opts.max_num_epochs + 1): trainer.train(epoch, train_env, tb_logger) if epoch % opts.eval_every_epochs == 0: success_rate = [] for val_env in val_envs.items(): success_rate.append(trainer.eval(epoch, val_env, tb_logger)) success_rate_compare = success_rate[1] if is_experiment(): # remember best val_seen success rate and save checkpoint is_best = success_rate_compare >= best_success_rate best_success_rate = max(success_rate_compare, best_success_rate) print("--> Highest val_unseen success rate: {}".format(best_success_rate)) sys.stdout.flush() # save the model if it is the best so far save_checkpoint({ 'opts': opts, 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'encoder_state_dict': encoder.state_dict(), 'best_success_rate': best_success_rate, 'optimizer': optimizer.state_dict(), 'max_episode_len': opts.max_episode_len, }, is_best, checkpoint_dir=opts.checkpoint_dir, name=opts.exp_name) if opts.train_data_augmentation and epoch == opts.epochs_data_augmentation: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['train'], tokenizer=tok) print("--> Finished training")
with open("word_index_dict", "rb") as f: word_index_dict = pickle.load(f) with open("index_word_dict", "rb") as f: index_word_dict = pickle.load(f) maxlen_q, maxlen_a = 19, 19 # build the model now encoder = EncoderRNN(len(word_index_dict) + 1, 1024, 1024) #.cuda() decoder = DecoderRNN(1024, 1024, len(index_word_dict) + 2) #.cuda() attention = Attention_layer(maxlen_q + 1) #.cuda() encoder.eval() decoder.eval() attention.eval() params_encoder,params_decoder,params_attention=\ list(encoder.parameters()),list(decoder.parameters()),list(attention.parameters()) # load weights into model with open("weights/encoder", "rb") as f: weights_encoder = pickle.load(f) with open("weights/decoder", "rb") as f: weights_decoder = pickle.load(f) with open("weights/attention", "rb") as f: weights_attention = pickle.load(f) for i in range(len(params_encoder)): params_encoder[i].data = weights_encoder[i].data.cpu() for i in range(len(params_decoder)):
class VSRN(object): """ rkiros/uvs model """ def __init__(self, opt): # tutorials/09 - Image Captioning # Build Models self.grad_clip = opt.grad_clip self.img_enc = EncoderImage(opt.data_name, opt.img_dim, opt.embed_size, opt.finetune, opt.cnn_type, use_abs=opt.use_abs, no_imgnorm=opt.no_imgnorm) self.txt_enc = EncoderText(opt.vocab_size, opt.word_dim, opt.embed_size, opt.num_layers, use_abs=opt.use_abs) if torch.cuda.is_available(): self.img_enc.cuda() self.txt_enc.cuda() cudnn.benchmark = True ##### captioning elements self.encoder = EncoderRNN( opt.dim_vid, opt.dim_hidden, bidirectional=opt.bidirectional, input_dropout_p=opt.input_dropout_p, rnn_cell=opt.rnn_type, rnn_dropout_p=opt.rnn_dropout_p) self.decoder = DecoderRNN( opt.vocab_size, opt.max_len, opt.dim_hidden, opt.dim_word, input_dropout_p=opt.input_dropout_p, rnn_cell=opt.rnn_type, rnn_dropout_p=opt.rnn_dropout_p, bidirectional=opt.bidirectional) self.caption_model = S2VTAttModel(self.encoder, self.decoder) self.crit = utils.LanguageModelCriterion() self.rl_crit = utils.RewardCriterion() if torch.cuda.is_available(): self.caption_model.cuda() # Loss and Optimizer self.criterion = ContrastiveLoss(margin=opt.margin, measure=opt.measure, max_violation=opt.max_violation) params = list(self.txt_enc.parameters()) params += list(self.img_enc.parameters()) params += list(self.decoder.parameters()) params += list(self.encoder.parameters()) params += list(self.caption_model.parameters()) if opt.finetune: params += list(self.img_enc.cnn.parameters()) self.params = params self.optimizer = torch.optim.Adam(params, lr=opt.learning_rate) self.Eiters = 0 def calcualte_caption_loss(self, fc_feats, labels, masks): # labels = Variable(labels, volatile=False) # masks = Variable(masks, volatile=False) torch.cuda.synchronize() labels = labels.cuda() masks = masks.cuda() # if torch.cuda.is_available(): # labels.cuda() # masks.cuda() seq_probs, _ = self.caption_model(fc_feats, labels, 'train') loss = self.crit(seq_probs, labels[:, 1:], masks[:, 1:]) return loss def state_dict(self): state_dict = [self.img_enc.state_dict(), self.txt_enc.state_dict()] return state_dict def load_state_dict(self, state_dict): self.img_enc.load_state_dict(state_dict[0]) self.txt_enc.load_state_dict(state_dict[1]) def train_start(self): """switch to train mode """ self.img_enc.train() self.txt_enc.train() def val_start(self): """switch to evaluate mode """ self.img_enc.eval() self.txt_enc.eval() def forward_emb(self, images, captions, lengths, volatile=False): """Compute the image and caption embeddings """ # Set mini-batch dataset #images = Variable(images, volatile=volatile) #captions = Variable(captions, volatile=volatile) images = Variable(images) captions = Variable(captions) if torch.cuda.is_available(): images = images.cuda() captions = captions.cuda() # Forward cap_emb = self.txt_enc(captions, lengths) img_emb, GCN_img_emd = self.img_enc(images) return img_emb, cap_emb, GCN_img_emd def forward_loss(self, img_emb, cap_emb, **kwargs): """Compute the loss given pairs of image and caption embeddings """ loss = self.criterion(img_emb, cap_emb) # self.logger.update('Le', loss.data[0], img_emb.size(0)) self.logger.update('Le_retrieval', loss.data[0], img_emb.size(0)) return loss def train_emb(self, images, captions, lengths, ids, caption_labels, caption_masks, *args): """One training step given images and captions. """ self.Eiters += 1 self.logger.update('Eit', self.Eiters) self.logger.update('lr', self.optimizer.param_groups[0]['lr']) # compute the embeddings img_emb, cap_emb, GCN_img_emd = self.forward_emb(images, captions, lengths) # calcualte captioning loss self.optimizer.zero_grad() caption_loss = self.calcualte_caption_loss(GCN_img_emd, caption_labels, caption_masks) # measure accuracy and record loss self.optimizer.zero_grad() retrieval_loss = self.forward_loss(img_emb, cap_emb) loss = retrieval_loss + caption_loss self.logger.update('Le_caption', caption_loss.data[0], img_emb.size(0)) self.logger.update('Le', loss.data[0], img_emb.size(0)) # compute gradient and do SGD step loss.backward() if self.grad_clip > 0: clip_grad_norm(self.params, self.grad_clip) self.optimizer.step()
def main(opts): # set manual_seed and build vocab setup(opts, opts.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # create a batch training environment that will also preprocess text vocab = read_vocab(opts.train_vocab) tok = Tokenizer( opts.remove_punctuation == 1, opts.reversed == 1, vocab=vocab, encoding_length=opts.max_cap_length, ) # create language instruction encoder encoder_kwargs = { "opts": opts, "vocab_size": len(vocab), "embedding_size": opts.word_embedding_size, "hidden_size": opts.rnn_hidden_size, "padding_idx": padding_idx, "dropout_ratio": opts.rnn_dropout, "bidirectional": opts.bidirectional == 1, "num_layers": opts.rnn_num_layers, } print("Using {} as encoder ...".format(opts.lang_embed)) if "lstm" in opts.lang_embed: encoder = EncoderRNN(**encoder_kwargs) else: raise ValueError("Unknown {} language embedding".format( opts.lang_embed)) print(encoder) # create policy model policy_model_kwargs = { "opts": opts, "img_fc_dim": opts.img_fc_dim, "img_fc_use_batchnorm": opts.img_fc_use_batchnorm == 1, "img_dropout": opts.img_dropout, "img_feat_input_dim": opts.img_feat_input_dim, "rnn_hidden_size": opts.rnn_hidden_size, "rnn_dropout": opts.rnn_dropout, "max_len": opts.max_cap_length, "max_navigable": opts.max_navigable, } if opts.arch == "self-monitoring": model = SelfMonitoring(**policy_model_kwargs) elif opts.arch == "speaker-baseline": model = SpeakerFollowerBaseline(**policy_model_kwargs) else: raise ValueError("Unknown {} model for seq2seq agent".format( opts.arch)) print(model) encoder = encoder.to(device) model = model.to(device) params = list(encoder.parameters()) + list(model.parameters()) optimizer = torch.optim.Adam(params, lr=opts.learning_rate) # optionally resume from a checkpoint if opts.resume: model, encoder, optimizer, best_success_rate = resume_training( opts, model, encoder, optimizer) # if a secondary exp name is specified, this is useful when resuming from a previous saved # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data if opts.exp_name_secondary: opts.exp_name += opts.exp_name_secondary feature, img_spec = load_features(opts.img_feat_dir) if opts.test_submission: assert (opts.resume ), "The model was not resumed before running for submission." test_env = ( "test", ( R2RPanoBatch( opts, feature, img_spec, batch_size=opts.batch_size, splits=["test"], tokenizer=tok, ), Evaluation(["test"]), ), ) agent_kwargs = { "opts": opts, "env": test_env[1][0], "results_path": "", "encoder": encoder, "model": model, "feedback": opts.feedback, } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer) epoch = opts.start_epoch - 1 trainer.eval(epoch, test_env) return # set up R2R environments if not opts.train_data_augmentation: train_env = R2RPanoBatch( opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=["train"], tokenizer=tok, ) else: train_env = R2RPanoBatch( opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=["synthetic"], tokenizer=tok, ) val_envs = { split: ( R2RPanoBatch( opts, feature, img_spec, batch_size=opts.batch_size, splits=[split], tokenizer=tok, ), Evaluation([split]), ) for split in ["val_seen", "val_unseen"] } # create agent agent_kwargs = { "opts": opts, "env": train_env, "results_path": "", "encoder": encoder, "model": model, "feedback": opts.feedback, } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer, opts.train_iters_epoch) if opts.eval_beam or opts.eval_only: success_rate = [] for val_env in val_envs.items(): success_rate.append( trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None)) return # set up tensorboard logger tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume) best_success_rate = best_success_rate if opts.resume else 0.0 for epoch in range(opts.start_epoch, opts.max_num_epochs + 1): trainer.train(epoch, train_env, tb_logger) if epoch % opts.eval_every_epochs == 0: success_rate = [] for val_env in val_envs.items(): success_rate.append(trainer.eval(epoch, val_env, tb_logger)) success_rate_compare = success_rate[1] if is_experiment(): # remember best val_seen success rate and save checkpoint is_best = success_rate_compare >= best_success_rate best_success_rate = max(success_rate_compare, best_success_rate) print("--> Highest val_unseen success rate: {}".format( best_success_rate)) # save the model if it is the best so far save_checkpoint( { "opts": opts, "epoch": epoch + 1, "state_dict": model.state_dict(), "encoder_state_dict": encoder.state_dict(), "best_success_rate": best_success_rate, "optimizer": optimizer.state_dict(), "max_episode_len": opts.max_episode_len, }, is_best, checkpoint_dir=opts.checkpoint_dir, name=opts.exp_name, ) if (opts.train_data_augmentation and epoch == opts.epochs_data_augmentation): train_env = R2RPanoBatch( opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=["train"], tokenizer=tok, ) print("--> Finished training")
def main(): train_loader = ChatbotDataset('train') val_loader = ChatbotDataset('valid') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) # Initializations print('Initializing ...') batch_time = AverageMeter() # forward prop. + back prop. time losses = AverageMeter() # loss (per word decoded) # Epochs for epoch in range(start_epoch, epochs): # One epoch's training # Ensure dropout layers are in train mode encoder.train() decoder.train() start = time.time() # Batches for i in range(train_loader.__len__()): input_variable, lengths, target_variable, mask, max_target_len = train_loader.__getitem__(i) loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, encoder_optimizer, decoder_optimizer) # Keep track of metrics losses.update(loss, max_target_len) batch_time.update(time.time() - start) start = time.time() if i % print_every == 0: print('[{0}] Epoch: [{1}][{2}/{3}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(timestamp(), epoch, i, len(train_loader), batch_time=batch_time, loss=losses)) # One epoch's validation val_loss = validate(val_loader, encoder, decoder) print('\n * LOSS - {loss:.3f}\n'.format(loss=val_loss)) # Initialize search module searcher = GreedySearchDecoder(encoder, decoder) for sentence in pick_n_valid_sentences(10): decoded_words = evaluate(searcher, sentence) print('Human: {}'.format(sentence)) print('Bot: {}'.format(''.join(decoded_words))) # Save checkpoint if epoch % save_every == 0: directory = save_dir if not os.path.exists(directory): os.makedirs(directory) torch.save({ 'epoch': epoch, 'en': encoder.state_dict(), 'de': decoder.state_dict(), 'en_opt': encoder_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'voc': voc.__dict__ }, os.path.join(directory, '{}_{}_{}.tar'.format('checkpoint', epoch, val_loss)))
def main(): ap = argparse.ArgumentParser() ap.add_argument( '--hidden_size', default=256, type=int, help='hidden size of encoder/decoder, also word vector size') ap.add_argument('--edge_size', default=20, type=int, help='embedding dimension of edges') ap.add_argument('--n_iters', default=100000, type=int, help='total number of examples to train on') ap.add_argument('--print_every', default=5000, type=int, help='print loss info every this many training examples') ap.add_argument( '--checkpoint_every', default=10000, type=int, help='write out checkpoint every this many training examples') ap.add_argument('--initial_learning_rate', default=0.001, type=int, help='initial learning rate') ap.add_argument('--train_files', default='../amr_anno_1.0/data/split/training/*', help='training files.') ap.add_argument('--log_dir', default='./log', help='log directory') ap.add_argument('--exp_name', default='experiment', help='experiment name') ap.add_argument('--batch_size', default=5, type=int, help='batch size') ap.add_argument('--load_checkpoint', action='store_true', help='use existing checkpoint') args = ap.parse_args() logdir = args.log_dir exp_dir = logdir + '/' + args.exp_name if not os.path.exists(logdir): os.makedirs(logdir) if not os.path.exists(exp_dir): os.makedirs(exp_dir) load_state_file = None if args.load_checkpoint: max_iter = 0 state_files = glob.glob(exp_dir + '/*') for sf in state_files: iter_num = int(sf.split('_')[1].split('.')[0]) if iter_num > max_iter: max_iter = iter_num load_state_file = sf # Create vocab from training data iter_num = 0 train_files = glob.glob(args.train_files) train_pairs = AMR.read_AMR_files(train_files, True) amr_vocab, en_vocab = None, None state = None batch_size = args.batch_size hidden_size = args.hidden_size edge_size = args.edge_size drop = DROPOUT_P mlength = MAX_LENGTH if load_state_file is not None: state = torch.load(load_state_file) iter_num = state['iter_num'] amr_vocab = state['amr_vocab'] en_vocab = state['en_vocab'] hidden_size = state['hidden_size'] edge_size = state['edge_size'] drop = state['dropout'] mlength = state['max_length'] logging.info('loaded checkpoint %s', load_state_file) else: amr_vocab, en_vocab = make_vocabs(train_pairs) encoder = EncoderRNN(amr_vocab.n_nodes, hidden_size).to(device) child_sum = ChildSum(amr_vocab.n_edges, edge_size, hidden_size).to(device) decoder = AttnDecoderRNN(hidden_size, en_vocab.n_words, dropout_p=drop, max_length=mlength).to(device) #load checkpoint if state is not None: encoder.load_state_dict(state['enc_state']) child_sum.load_state_dict(state['sum_state']) decoder.load_state_dict(state['dec_state']) # set up optimization/loss params = list(encoder.parameters()) + list(child_sum.parameters()) + list( decoder.parameters()) # .parameters() returns generator optimizer = optim.Adam(params, lr=args.initial_learning_rate) criterion = nn.NLLLoss() #load checkpoint if state is not None: optimizer.load_state_dict(state['opt_state']) start = time.time() print_loss_total = 0 # Reset every args.print_every while iter_num < args.n_iters: num_samples = batch_size remaining = args.checkpoint_every - (iter_num % args.checkpoint_every) remaining2 = args.print_every - (iter_num % args.print_every) if remaining < batch_size: num_samples = remaining elif remaining2 < batch_size: num_samples = remaining2 iter_num += num_samples random_pairs = random.sample(train_pairs, num_samples) target_snt = tensors_from_batch(en_vocab, random_pairs) loss = train(random_pairs, target_snt, amr_vocab, encoder, child_sum, decoder, optimizer, criterion) print_loss_total += loss if iter_num % args.checkpoint_every == 0: state = { 'iter_num': iter_num, 'enc_state': encoder.state_dict(), 'sum_state': child_sum.state_dict(), 'dec_state': decoder.state_dict(), 'opt_state': optimizer.state_dict(), 'amr_vocab': amr_vocab, 'en_vocab': en_vocab, 'hidden_size': hidden_size, 'edge_size': edge_size, 'dropout': drop, 'max_length': mlength } filename = 'state_%010d.pt' % iter_num save_file = exp_dir + '/' + filename torch.save(state, save_file) logging.debug('wrote checkpoint to %s', save_file) if iter_num % args.print_every == 0: print_loss_avg = print_loss_total / args.print_every print_loss_total = 0 logging.info( 'time since start:%s (iter:%d iter/n_iters:%d%%) loss_avg:%.4f', time.time() - start, iter_num, iter_num / args.n_iters * 100, print_loss_avg)
args.hidden_size, args.n_layers, args.dropout, ) decoder = LuongAttnDecoderRNN( args.attn_model, args.hidden_size, len(dataset.out_vocab[0]), args.n_layers, args.dropout, ) # Initialize optimizers and criterion # encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.learning_rate) # decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.learning_rate * decoder_learning_ratio) encoder_optimizer = optim.Adadelta(encoder.parameters()) decoder_optimizer = optim.Adadelta(decoder.parameters()) criterion = nn.CrossEntropyLoss() # Move models to GPU if args.USE_CUDA: encoder.cuda() decoder.cuda() # train(dataset, # args.batch_size, # args.n_epochs, # encoder, # decoder, # encoder_optimizer, # decoder_optimizer,
decoder_input = topi.squeeze().detach() if decoder_input.item() == EOS_token: break loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item() / output_len hidden_size = 256 encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) decoder = AttenDecoderRNN(hidden_size, output_lang.n_words, max_len=MAX_LENGTH, dropout_p=0.1).to(device) lr = 0.01 encoder_optimizer = optim.SGD(encoder.parameters(), lr=lr) decoder_optimizer = optim.SGD(decoder.parameters(), lr=lr) scheduler_encoder = torch.optim.lr_scheduler.StepLR(encoder_optimizer, step_size=1, gamma=0.95) scheduler_decoder = torch.optim.lr_scheduler.StepLR(decoder_optimizer, step_size=1, gamma=0.95) criterion = nn.NLLLoss() n_iters = 1000000 training_pairs = [ tensorsFromPair(random.choice(pairs)) for i in range(n_iters) ] print_every = 100 save_every = 1000
# Configure training/optimization clip = 50.0 learning_rate = 0.0001 decoder_learning_ratio = 5.0 n_iteration = 4000 print_every = 1 save_every = 500 # Ensure dropout layers are in train mode encoder.train() decoder.train() # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) # If you have cuda, configure cuda to call for state in encoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() for state in decoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda()
def main(): corpus_name = "cornell movie-dialogs corpus" corpus = os.path.join("data", corpus_name) printLines(os.path.join(corpus, "movie_lines.txt")) # Define path to new file datafile = os.path.join(corpus, "formatted_movie_lines.txt") linefile = os.path.join(corpus, "movie_lines.txt") conversationfile = os.path.join(corpus, "movie_conversations.txt") # Initialize lines dict, conversations list, and field ids MOVIE_LINES_FIELDS = ["lineID", "characterID", "movieID", "character", "text"] MOVIE_CONVERSATIONS_FIELDS = ["character1ID", "character2ID", "movieID", "utteranceIDs"] # Load lines and process conversations preprocess = Preprocess(datafile, linefile, conversationfile, MOVIE_LINES_FIELDS, MOVIE_CONVERSATIONS_FIELDS) preprocess.loadLines() preprocess.loadConversations() preprocess.writeCSV() # Load/Assemble voc and pairs save_dir = os.path.join("data", "save") dataset = Dataset(corpus, corpus_name, datafile) voc, pairs = dataset.loadPrepareData() # # Print some pairs to validate # print("\npairs:") # for pair in pairs[:10]: # print(pair) # Trim voc and pairs pairs = dataset.trimRareWords(voc, pairs, MIN_COUNT) # Example for validation small_batch_size = 5 batches = dataset.batch2TrainData(voc, [random.choice(pairs) for _ in range(small_batch_size)]) input_variable, lengths, target_variable, mask, max_target_len = batches print("input_variable:", input_variable) print("lengths:", lengths) print("target_variable:", target_variable) print("mask:", mask) print("max_target_len:", max_target_len) # Configure models model_name = 'cb_model' attn_model = 'dot' #attn_model = 'general' #attn_model = 'concat' hidden_size = 500 encoder_n_layers = 2 decoder_n_layers = 2 dropout = 0.1 batch_size = 64 # Set checkpoint to load from; set to None if starting from scratch loadFilename = None checkpoint_iter = 4000 #loadFilename = os.path.join(save_dir, model_name, corpus_name, # '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), # '{}_checkpoint.tar'.format(checkpoint_iter)) if loadFilename: # If loading on same machine the model was trained on checkpoint = torch.load(loadFilename) # If loading a model trained on GPU to CPU #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] print('Building encoder and decoder ...') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) if loadFilename: embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) if loadFilename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') # Configure training/optimization clip = 50.0 teacher_forcing_ratio = 1.0 learning_rate = 0.0001 decoder_learning_ratio = 5.0 n_iteration = 4000 print_every = 1 save_every = 500 # Ensure dropout layers are in train mode encoder.train() decoder.train() # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) # Run training iterations print("Starting Training!") model = Model(dataset.batch2TrainData, teacher_forcing_ratio) model.trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, print_every, save_every, clip, corpus_name, loadFilename) # Set dropout layers to eval mode encoder.eval() decoder.eval() # Initialize search module searcher = GreedySearchDecoder(encoder, decoder)
def main(args): torch.cuda.set_device(6) model_path = args.model_path if not os.path.exists(model_path): os.makedirs(model_path) # load vocablary with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) img_path = args.img_path factual_cap_path = args.factual_caption_path humorous_cap_path = args.humorous_caption_path # import data_loader data_loader = get_data_loader(img_path, factual_cap_path, vocab, args.caption_batch_size) styled_data_loader = get_styled_data_loader(humorous_cap_path, vocab, args.language_batch_size) # import models emb_dim = args.emb_dim hidden_dim = args.hidden_dim factored_dim = args.factored_dim vocab_size = len(vocab) encoder = EncoderRNN(voc_size=vocab_size, emb_size=emb_dim, hidden_size=emb_dim) decoder = FactoredLSTM(emb_dim, hidden_dim, factored_dim, vocab_size) if torch.cuda.is_available(): encoder = encoder.cuda() decoder = decoder.cuda() # loss and optimizer criterion = masked_cross_entropy cap_params = list(decoder.parameters()) + list(encoder.parameters()) lang_params = list(decoder.S_hc.parameters()) + list(decoder.S_hf.parameters()) \ + list(decoder.S_hi.parameters()) + list(decoder.S_ho.parameters()) optimizer_cap = torch.optim.Adam(cap_params, lr=args.lr_caption) optimizer_lang = torch.optim.Adam(lang_params, lr=args.lr_language) # train total_cap_step = len(data_loader) total_lang_step = len(styled_data_loader) epoch_num = args.epoch_num for epoch in range(epoch_num): # caption for i, (messages, m_lengths, targets, t_lengths) in enumerate(data_loader): messages = to_var(messages.long()) targets = to_var(targets.long()) # forward, backward and optimize decoder.zero_grad() encoder.zero_grad() output, features = encoder(messages, list(m_lengths)) outputs = decoder(targets, features, mode="factual") loss = criterion(outputs[:, 1:, :].contiguous(), targets[:, 1:].contiguous(), t_lengths - 1) loss.backward() optimizer_cap.step() # print log if i % args.log_step_caption == 0: print("Epoch [%d/%d], CAP, Step [%d/%d], Loss: %.4f" % (epoch + 1, epoch_num, i, total_cap_step, loss.data[0])) eval_outputs(outputs, vocab) # language for i, (captions, lengths) in enumerate(styled_data_loader): captions = to_var(captions.long()) # forward, backward and optimize decoder.zero_grad() outputs = decoder(captions, mode='humorous') loss = criterion(outputs, captions[:, 1:].contiguous(), lengths - 1) loss.backward() optimizer_lang.step() # print log if i % args.log_step_language == 0: print("Epoch [%d/%d], LANG, Step [%d/%d], Loss: %.4f" % (epoch + 1, epoch_num, i, total_lang_step, loss.data[0])) # save models torch.save(decoder.state_dict(), os.path.join(model_path, 'decoder-%d.pkl' % (epoch + 1, ))) torch.save(encoder.state_dict(), os.path.join(model_path, 'encoder-%d.pkl' % (epoch + 1, )))
encoder_kwargs = { 'opts': opts, 'vocab_size': len(vocab), 'embedding_size': opts.word_embedding_size, 'hidden_size': opts.rnn_hidden_size, 'padding_idx': padding_idx, 'dropout_ratio': opts.rnn_dropout, 'bidirectional': opts.bidirectional == 1, 'num_layers': opts.rnn_num_layers } # Model setup torch.no_grad() model = SelfMonitoring(**policy_model_kwargs).cuda() encoder = EncoderRNN(**encoder_kwargs).cuda() params = list(encoder.parameters()) + list(model.parameters()) optimizer = torch.optim.Adam(params, lr=opts.learning_rate) resume_training(opts, model, encoder, optimizer) model.eval() # model.device = torch.device("cpu") encoder.eval() # encoder.device = torch.device("cpu") resnet = models.resnet152(pretrained=True) resnet.eval() resnet.cuda() # Gibson setup config = parse_config('ped.yaml') def transform_img(im): ''' Prep gibson rgb input for pytorch model '''
# word embedding embedding = nn.Embedding(VOC.num_words, hp.hidden_size) encoder = EncoderRNN(hp.hidden_size, embedding, hp.n_layers, hp.dropout) decoder = LuongAttnDecoderRNN(hp.attn_model, embedding, hp.hidden_size, VOC.num_words, hp.n_layers, hp.dropout) encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') encoder.train() decoder.train() print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=hp.lr) decoder_optimizer = optim.Adam(decoder.parameters(), lr=hp.lr * hp.decoder_learning_ratio) encoder_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( encoder_optimizer, 5) decoder_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( decoder_optimizer, 5) if loadFilename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) print("Starting Training!") trainIters(hp.model_name, VOC, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, hp.n_layers, hp.n_layers, 'savedModels/checkpoint', hp.n_iteration, hp.batch_size, hp.print_every, hp.save_every, hp.clip, 'persuade', loadFilename)
def main(): input_lang = Lang('data/WORDMAP_en.json') output_lang = Lang('data/WORDMAP_zh.json') print("input_lang.n_words: " + str(input_lang.n_words)) print("output_lang.n_words: " + str(output_lang.n_words)) train_data = TranslationDataset('train') val_data = TranslationDataset('valid') # Initialize encoder & decoder models encoder = EncoderRNN(input_lang.n_words, hidden_size, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, hidden_size, output_lang.n_words, decoder_n_layers, dropout) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) # Initializations print('Initializing ...') train_batch_time = ExpoAverageMeter() # forward prop. + back prop. time train_losses = ExpoAverageMeter() # loss (per word decoded) val_batch_time = ExpoAverageMeter() val_losses = ExpoAverageMeter() best_loss = 100000 epochs_since_improvement = 0 # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training # Ensure dropout layers are in train mode encoder.train() decoder.train() start = time.time() # Batches for i_batch in range(len(train_data)): input_variable, lengths, target_variable, mask, max_target_len = train_data[ i_batch] train_loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, encoder_optimizer, decoder_optimizer) # Keep track of metrics train_losses.update(train_loss) train_batch_time.update(time.time() - start) start = time.time() # Print status if i_batch % print_every == 0: print( '[{0}] Epoch: [{1}][{2}/{3}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( timestamp(), epoch, i_batch, len(train_data), batch_time=train_batch_time, loss=train_losses)) # One epoch's validation start = time.time() # Batches for i_batch in range(len(val_data)): input_variable, lengths, target_variable, mask, max_target_len = val_data[ i_batch] val_loss = valid(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder) # Keep track of metrics val_losses.update(val_loss) val_batch_time.update(time.time() - start) start = time.time() # Print status if i_batch % print_every == 0: print( 'Validation: [{0}/{1}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( i_batch, len(val_data), batch_time=val_batch_time, loss=val_losses)) val_loss = val_losses.avg print('\n * LOSS - {loss:.3f}\n'.format(loss=val_loss)) # Check if there was an improvement is_best = val_loss < best_loss best_loss = min(best_loss, val_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 save_checkpoint(epoch, encoder, decoder, encoder_optimizer, decoder_optimizer, input_lang, output_lang, val_loss, is_best) # Initialize search module searcher = GreedySearchDecoder(encoder, decoder) for input_sentence, target_sentence in pick_n_valid_sentences( input_lang, output_lang, 10): decoded_words = evaluate(searcher, input_sentence, input_lang, output_lang) print('> {}'.format(input_sentence)) print('= {}'.format(target_sentence)) print('< {}'.format(''.join(decoded_words))) # Reshuffle train and valid samples np.random.shuffle(train_data.samples) np.random.shuffle(val_data.samples)
def train(x, y, optimizer=optim.Adam, criterion=nn.MSELoss(), n_steps=100, attn_model="general", hidden_size=128, n_layers=1, dropout=0, batch_size=50, elr=0.001, dlr=0.005, clip=50.0, print_every=10, teacher_forcing_ratio=lambda x: 1 if x < 10 else 0): # Configure training/optimization encoder_learning_rate = elr decoder_learning_ratio = dlr # Initialize models encoder = EncoderRNN(1, hidden_size, n_layers, dropout=dropout) decoder = LuongAttnDecoderRNN(attn_model, 1, hidden_size, n_layers, dropout=dropout) # Initialize optimizers and criterion encoder_optimizer = optimizer(encoder.parameters(), lr=encoder_learning_rate) decoder_optimizer = optimizer(decoder.parameters(), lr=decoder_learning_ratio) # Move models to GPU if USE_CUDA: encoder.cuda() decoder.cuda() # Begin! print_loss_total = 0 step = 0 while step < n_steps: step += 1 # Get training data for this cycle batch_idx = np.random.randint(0, x.shape[1], batch_size) input_batches, target_batches = x[:, batch_idx], y[:, batch_idx] # Run the train function loss, _ = _train(input_batches, target_batches, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, teacher_forcing_ratio=teacher_forcing_ratio(step), clip=clip) # print(np.mean(np.square((output.data.cpu().numpy() - series[-20:, batch_idx])))) # Keep track of loss print_loss_total += loss if step % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print_summary = '(%d %d%%) %.4f' % (step, step / n_steps * 100, print_loss_avg) print(print_summary) return encoder, decoder