def run_evaluation1(corpus_dir, save_dir, datafile, config_file): config = Config.from_json_file(config_file) save_dir = config.save_dir # set checkpoint to load from; set to None if starting from scratch load_filename = os.path.join( save_dir, config.model_name, config.corpus_name, '{}-{}_{}'.format(config.encoder_n_layers, config.decoder_n_layers, config.hidden_size), "last_checkpoint.tar") # if loading on the same machine the model trained on checkpoint = torch.load(load_filename, map_location=lambda storage, loc: storage) # if loading a model trained on gpu to cpu # checkpoint = torch.load(load_filename, map_location=torch.device('cpu')) encoder_sd = checkpoint["en"] decoder_sd = checkpoint["de"] encoder_optimizer_sd = checkpoint["en_opt"] decoder_optimizer_sd = checkpoint["de_opt"] embedding_sd = checkpoint["embedding"] vocab = BPEVocab.from_files(config.bpe_vocab_path, config.bpe_codes_path) print("Building encoder and decoder ...") # initialize word embeddings embedding = nn.Embedding(len(vocab), config.hidden_size) embedding.load_state_dict(embedding_sd) # initialize encoder and decoder models encoder = EncoderRNN(config.hidden_size, embedding, config.encoder_n_layers, config.dropout) decoder = LuongAttnDecoderRNN(config.attn_model, embedding, config.hidden_size, len(vocab), config.decoder_n_layers, config.dropout) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Set dropout layers to eval mode encoder.eval() decoder.eval() # Initialize search module searcher = GreedySearchDecoder(encoder, decoder) # Begin chatting (uncomment and run the following line to begin) evaluate_input1(encoder, decoder, searcher, vocab)
def run_training1(config_file, load_filename=""): # read data config = Config.from_json_file(config_file) save_dir = config.save_dir datafile = config.datafile vocab = BPEVocab.from_files(config.bpe_vocab_path, config.bpe_codes_path) if load_filename: # if loading on the same machine the model trained on checkpoint = torch.load(load_filename) # if loading a model trained on gpu to cpu # checkpoint = torch.load(load_filename, map_location=torch.device('cpu')) encoder_sd = checkpoint["en"] decoder_sd = checkpoint["de"] encoder_optimizer_sd = checkpoint["en_opt"] decoder_optimizer_sd = checkpoint["de_opt"] embedding_sd = checkpoint["embedding"] vocab.__dict__ = checkpoint["voc_dict"] dataset = CornellMovieDialogDataset(config, paths=[datafile], vocab=vocab) #todo try shuffle=False #todo the batched input tensor to the model is transosed! is there anything wrong? data_loader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True, collate_fn=dataset.collate_func, drop_last=True) num_tokens = len(vocab) print("Building encoder and decoder ...") # initialize word embeddings embedding = nn.Embedding(num_tokens, config.hidden_size).to(config.device) if load_filename: embedding.load_state_dict(embedding_sd) # initialize encoder and decoder models encoder = EncoderRNN(config.hidden_size, embedding, config.encoder_n_layers, config.dropout).to(config.device) decoder = LuongAttnDecoderRNN(config.attn_model, embedding, config.hidden_size, num_tokens, config.decoder_n_layers, config.dropout).to(config.device) if load_filename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) print("Models built and ready to go.") ##################################### # ensure dropout layers are in train mode encoder.train() decoder.train() # initilize optimizers print("building optimizers") encoder_optimizer = optim.Adam(encoder.parameters(), lr=config.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=config.learning_rate * config.decoder_learning_ratio) if load_filename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) #for epoch in range(config.n_epochs): training_iters1(data_loader, config, vocab, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, save_dir, load_filename, epoch=1)
def run_training(config_file, load_filename=""): # read data config = Config.from_json_file(config_file) save_dir = config.save_dir datafile = config.datafile corpus_dir = config.corpus_dir prepare_data = PrepareData(min_count=config.MIN_COUNT, max_length=config.MAX_LENGTH) vocab, pairs = prepare_data.load_prepare_data(corpus_dir, datafile, save_dir) # set checkpoint to load from; set to None if starting from scratch # load_filename = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), # '{}_checkpoint.tar'.format(checkpoint_iter)) if load_filename: # if loading on the same machine the model trained on checkpoint = torch.load(load_filename) # if loading a model trained on gpu to cpu # checkpoint = torch.load(load_filename, map_location=torch.device('cpu')) encoder_sd = checkpoint["en"] decoder_sd = checkpoint["de"] encoder_optimizer_sd = checkpoint["en_opt"] decoder_optimizer_sd = checkpoint["de_opt"] embedding_sd = checkpoint["embedding"] vocab.__dict__ = checkpoint["voc_dict"] print("Building encoder and decoder ...") # initialize word embeddings embedding = nn.Embedding(vocab.num_words, config.hidden_size).to(config.device) if load_filename: embedding.load_state_dict(embedding_sd) # initialize encoder and decoder models encoder = EncoderRNN(config.hidden_size, embedding, config.encoder_n_layers, config.dropout).to(config.device) decoder = LuongAttnDecoderRNN(config.attn_model, embedding, config.hidden_size, vocab.num_words, config.decoder_n_layers, config.dropout).to(config.device) if load_filename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) print("Models built and ready to go.") ##################################### # ensure dropout layers are in train mode encoder.train() decoder.train() # initilize optimizers print("building optimizers") encoder_optimizer = optim.Adam(encoder.parameters(), lr=config.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=config.learning_rate * config.decoder_learning_ratio) if load_filename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) # run training iterations training_iters(config, vocab, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, save_dir, load_filename)