def __init__(self, word_vec_dim, bidir=True, rnn_cell='LSTM'): super().__init__() self.trainable = True self.word_vec_dim = word_vec_dim self.hidden_state_size = word_vec_dim self.encoder = EncoderRNN(self.word_vec_dim, self.word_vec_dim, bidir=bidir, rnn_cell=rnn_cell) self.encoder.apply(util.weight_init)
def main(): data_path = './data/chatbot.txt' voc, pairs = loadPrepareData(data_path) # 把含有低频词的句子扔掉 MIN_COUNT = Config.MIN_COUNT pairs = trimRareWords(voc, pairs, MIN_COUNT) training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(Config.batch_size)]) for _ in range(Config.total_step)] # 词嵌入部分 embedding = nn.Embedding(voc.num_words, Config.hidden_size) # 定义编码解码器 encoder = EncoderRNN(Config.hidden_size, embedding, Config.encoder_n_layers, Config.dropout) decoder = LuongAttnDecoderRNN(Config.attn_model, embedding, Config.hidden_size, voc.num_words, Config.decoder_n_layers, Config.dropout) # 定义优化器 encoder_optimizer = optim.Adam(encoder.parameters(), lr=Config.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=Config.learning_rate * Config.decoder_learning_ratio) start_iteration = 1 save_every = 4000 # 多少步保存一次模型 for iteration in range(start_iteration, Config.total_step + 1): training_batch = training_batches[iteration - 1] input_variable, lengths, target_variable, mask, max_target_len = training_batch start_time = time.time() # Run a training iteration with batch loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, Config.batch_size, Config.clip) time_str = datetime.datetime.now().isoformat() log_str = "time: {}, Iteration: {}; Percent complete: {:.1f}%; loss: {:.4f}, spend_time: {:6f}".format(time_str, iteration, iteration / Config.total_step * 100, loss, time.time() - start_time) rainbow(log_str) # Save checkpoint if iteration % save_every == 0: save_path = './save_model/' if not os.path.exists(save_path): os.makedirs(save_path) torch.save({ 'iteration': iteration, 'encoder': encoder.state_dict(), 'decoder': decoder.state_dict(), 'en_opt': encoder_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'voc_dict': voc.__dict__, 'embedding': embedding.state_dict() }, os.path.join(save_path, '{}_{}_model.tar'.format(iteration, 'checkpoint')))
class RNNDotJudgeNet(nn.Module): def __init__(self, word_vec_dim, bidir=True, rnn_cell='LSTM'): super().__init__() self.trainable = True self.word_vec_dim = word_vec_dim self.hidden_state_size = word_vec_dim self.encoder = EncoderRNN(self.word_vec_dim, self.word_vec_dim, bidir=bidir, rnn_cell=rnn_cell) self.encoder.apply(util.weight_init) def forward(self, Ks: torch.Tensor, Cs: torch.Tensor, *args): """ :param Ks, keywords used to expand: (batch_size, n_keys, word_vector_dim) :param Cs, candidates searched by Ks: (batch_size, n_candidates, word_vector_dim) :return: probs as good / bad candiates: (batch_size, n_candidates, 2) """ batch_size = Ks.shape[0] n_candidates = Cs.shape[1] sep = torch.zeros(batch_size, 1, self.word_vec_dim) query_string = torch.cat( [Ks, sep, Cs], dim=1) # (batch_size, n_keys + 1 + n_candidates, word_vector_dim) query_string_transposed = query_string.transpose( 0, 1) # (n_keys + 1 + n_candidates, batch_size, word_vector_dim) lengths = [query_string_transposed.shape[0]] encoder_outputs, encoder_states = self.encoder( query_string_transposed, torch.tensor(lengths).long().cpu()) # (n_keys + 1 + n_candidates, batch_size, hidden_state_size) # (n_layers=1, batch_size, hidden_state_size) encoder_hidden = torch.sum(encoder_states[0], dim=0).view(batch_size, self.hidden_state_size, 1) products = torch.bmm(Cs, encoder_hidden) # (batch_size, n_candidates, 1) rest = -1 * products result = torch.cat([products, rest], dim=-1) return result
def train(args): input_lang, output_lang, pairs = prepareData(args) print(random.choice(pairs)) model = {} model['hidden_size'] = 1000 model['dropout'] = 0.1 model['input_lang'] = input_lang model['output_lang'] = output_lang model['max_length'] = max(input_lang.max_length, output_lang.max_length) + 2 print('Max length: {}'.format(model['max_length'])) encoder1 = EncoderRNN(input_lang.n_words, model['hidden_size']).to(getDevice()) encoder1.train() attn_decoder1 = AttnDecoderRNN(model['hidden_size'], output_lang.n_words, dropout_p=model['dropout'], max_length=model['max_length']).to( getDevice()) attn_decoder1.train() n_iters = 30000 training_pairs = [ tensorsFromPair(input_lang, output_lang, random.choice(pairs)) for _ in range(n_iters) ] trainIters(training_pairs, encoder1, attn_decoder1, n_iters, print_every=1000, optim=args.optim, learning_rate=args.learning_rate, max_length=model['max_length']) print('saving models...') model['encoder_state'] = encoder1.state_dict() model['decoder_state'] = attn_decoder1.state_dict() torch.save( model, "data/{}_model_checkpoint.pth".format(args.phase.split('_')[-1]))
def inference(args): model = {} model = torch.load("data/sc_question_model_checkpoint.pth") model['encoder'] = EncoderRNN(model['input_lang'].n_words, model['hidden_size']).to(getDevice()) model['encoder'].load_state_dict(model['encoder_state']) model['encoder'].eval() model['decoder'] = AttnDecoderRNN(model['hidden_size'], model['output_lang'].n_words, dropout_p=model['dropout'], max_length=model['max_length']).to( getDevice()) model['decoder'].load_state_dict(model['decoder_state']) model['decoder'].eval() with open('../executor/parse_results/sc_validation.json') as f: anns = json.load(f) out = {} for ann in tqdm(anns): v = {} v['scene_index'] = ann['scene_index'] v['video_filename'] = ann['video_filename'] v['questions'] = [] for ann_q in ann['questions']: if ann_q['question_type'] == 'descriptive': continue q_program_pred, _ = evaluate(model['encoder'], model['decoder'], normalizeString(ann_q['question']), model['input_lang'], model['output_lang'], max_length=model['max_length']) if q_program_pred[-1] == '<EOS>': q_program_pred = q_program_pred[:-1] q = {} q['question_program'] = q_program_pred q['question'] = ann_q['question'] q['question_type'] = '{}_single_choice'.format( ann_q['question_type']) q['question_subtype'] = ann_q['program'][-1] q['program_gt'] = ann_q['program'] q['answer'] = ann_q['answer'] v['questions'].append(q) out[v['scene_index']] = v out_path = '../executor/parse_results/sc_val_reproduced.json' print('Writing output to {}'.format(out_path)) with open(out_path, 'w') as fout: json.dump(out, fout, indent=4)
def evaluate(vocab: Vocabulary, corpus_filename: str, encoder: EncoderRNN, decoder: AttnDecoderRNN, max_src_length: int, max_tgt_length: int): device: torch.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") encoder.to(device) decoder.to(device) encoder.eval() decoder.eval() with torch.no_grad(): corpus = Corpus( filename=corpus_filename, max_src_length=max_src_length, # decoder.max_src_length, vocab=vocab, device=device) for batch in torch.utils.data.DataLoader(dataset=corpus, batch_size=1): input_tensor: torch.Tensor = batch["data"].permute(1, 0) encoder_outputs = encoder.encode_sequence(input_tensor) decoder_output = decoder.decode_sequence( encoder_outputs=encoder_outputs, start_symbol=corpus.characters.start_of_sequence.integer, max_length=max_tgt_length) _, top_i = decoder_output.topk(k=1) predictions = top_i.squeeze(dim=2).squeeze(dim=1).tolist() predicted_string = "".join( [corpus.characters[i].string for i in predictions]) print(predicted_string)
def run_training(*, config: argparse.Namespace) -> None: import pickle vocab: Vocabulary = pickle.load(open(config.vocab, "rb")) device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") training_corpus = Corpus(vocab=vocab, filename=config.corpus, max_src_length=config.max_src_length, device=device) # for word in test_corpus.words: # print(f"{''.join(word.characters)}\t{''.join(word.label)}") # sys.exit() if config.continue_training: encoder1 = torch.load(config.encoder, map_location=device) attn_decoder1 = torch.load(config.decoder, map_location=device) else: encoder1: EncoderRNN = EncoderRNN(input_size=len(training_corpus.characters), embedding_size=config.encoder_embedding_size, hidden_size=config.encoder_hidden_size, num_hidden_layers=config.encoder_hidden_layers).to(device=device) attn_decoder1 = AttnDecoderRNN(embedding_size=config.decoder_embedding_size, decoder_hidden_size=config.decoder_hidden_size, encoder_hidden_size=config.encoder_hidden_size, num_hidden_layers=config.decoder_hidden_layers, output_size=len(training_corpus.characters), dropout_p=config.decoder_dropout, max_src_length=training_corpus.word_tensor_length).to(device=device) train_iters(corpus=training_corpus, encoder=encoder1, decoder=attn_decoder1, device=device, n_iters=config.num_epochs, batch_size=config.batch_size, print_every=config.print_every, learning_rate=config.learning_rate, teacher_forcing_ratio=config.teacher_forcing_ratio) print(f"Saving encoder to {config.encoder}...") torch.save(encoder1.to(device=torch.device("cpu")), config.encoder) print(f"Saving decoder to {config.decoder}...") torch.save(attn_decoder1.to(device=torch.device("cpu")), config.decoder)
def main(): parser = ArgumentParser("Train Seq2Seq Attention Model") parser.add_argument("-f", "--text_dir", help="Path to all the full text documents", required=True) parser.add_argument("-s", "--summary_dir", help="Path to all the summary documents", required=False) parser.add_argument("-o", "--output_dir", help="Path to save the model", required=True) parser.add_argument("--hidden_units", help="Number of hidden units", type=int, default=256) parser.add_argument("--dropout", help="Dropout value in Attention Decoder", type=float, default=0.1) parser.add_argument( "--trim_dataset", help="Trim the dataset to a small number for testing purposes", required=False, type=int) parser.add_argument("--debug", help="Train the model in debug mode", action="store_true", required=False) parser.add_argument("--print_every", help="Print every n iterations", default=1000, type=int, required=False) parser.add_argument("--save_every", help="Save model every n epochs", default=5000, required=False, type=int) parser.add_argument("-lr", "--learning_rate", help="Learning rate", default=0.001, type=float) parser.add_argument("-n", "--n_epochs", help="Number of epochs to train for", default=500000, type=int) args = parser.parse_args() data = DataLoader(args.text_dir, args.summary_dir) full_text_lang, summary_text_lang, pairs = data.load( trim=args.trim_dataset) LOGGER.info('Creating models...') encoder = EncoderRNN(full_text_lang.n_words, args.hidden_units).to(device) attention_decoder = AttentionDecoderRNN(args.hidden_units, summary_text_lang.n_words, args.dropout).to(device) train(lang_1=full_text_lang, lang_2=summary_text_lang, pairs=pairs, encoder=encoder, decoder=attention_decoder, output_dir=args.output_dir, n_epochs=args.n_epochs, learning_rate=args.learning_rate, print_every=args.print_every, save_every=args.save_every, debug=args.debug)
import torch.nn as nn from torch import optim import torch.nn.functional as F from preprocess import get_dataset from utils.transformer import * import argparse from seq2seq import EncoderRNN, DecoderRNN , Linear device = 'cpu' #*********************** #*******IMPORTANT******* #*********************** #PLEASE NOTE THE VALUE = 2 or 1 for input_size GIVEN BELOW IS JUST FOR TESTING PURPOSES. PLEASE DO NOT HARDCODE ANY VALUES IN YOUR seq2seq.py file torch.manual_seed(0) encoder = EncoderRNN(hidden_size = 1, input_size = 1, batch_size = 1) decoder = DecoderRNN(hidden_size = 1, output_size = 2, batch_size = 1) dense = Linear(bidirectional = False, hidden_size_encoder = 1 , hidden_size_decoder = 1) dense2 = Linear(bidirectional = False, hidden_size_encoder = 2 , hidden_size_decoder = 1) layers = [encoder, decoder , dense] layers_individual = [] for layer in layers: for name,module in layer.named_modules(): if(name==''): continue else: layers_individual.append([name,module])
def train_iters(*, # data: Data, corpus: Corpus, encoder: EncoderRNN, decoder: AttnDecoderRNN, device: torch.device, n_iters: int, batch_size: int, teacher_forcing_ratio: float, print_every: int = 1000, learning_rate: float = 0.01 ) -> None: data = torch.utils.data.DataLoader(dataset=corpus, batch_size=batch_size) start: float = time.time() plot_losses: List[float] = [] print_loss_total: float = 0 # Reset every print_every plot_loss_total: float = 0 # Reset every plot_every encoder_optimizer: Optimizer = SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer: Optimizer = SGD(decoder.parameters(), lr=learning_rate) # # training_pairs: List[ParallelTensor] = [random.choice(data.pairs).tensors(source_vocab=data.source_vocab, # target_vocab=data.target_vocab, # device=device) # for _ in range(n_iters)] criterion: nn.NLLLoss = nn.NLLLoss(reduction='mean') # ignore_index=corpus.characters.pad_int) # for pair in parallel_data: # print(f"src={len(pair['data'])}\ttgt={len(pair['labels'])}") for iteration in range(1, n_iters + 1): # type: int # training_pair: ParallelTensor = training_pairs[iteration - 1] # input_tensor: torch.Tensor = training_pair.source # shape: [seq_len, batch_size=1] # target_tensor: torch.Tensor = training_pair.target # shape: [seq_len, batch_size=1] for batch in data: # print(f"batch['data'].shape={batch['data'].shape}\tbatch['labels'].shape{batch['labels'].shape}") # sys.exit() input_tensor: torch.Tensor = batch["data"].permute(1, 0) target_tensor: torch.Tensor = batch["labels"].permute(1, 0) actual_batch_size: int = min(batch_size, input_tensor.shape[1]) verify_shape(tensor=input_tensor, expected=[corpus.word_tensor_length, actual_batch_size]) verify_shape(tensor=target_tensor, expected=[corpus.label_tensor_length, actual_batch_size]) # print(f"input_tensor.shape={input_tensor.shape}\t\ttarget_tensor.shape={target_tensor.shape}") # sys.exit() loss: float = train(input_tensor=input_tensor, target_tensor=target_tensor, encoder=encoder, decoder=decoder, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, criterion=criterion, device=device, max_src_length=corpus.word_tensor_length, max_tgt_length=corpus.label_tensor_length, batch_size=actual_batch_size, start_of_sequence_symbol=corpus.characters.start_of_sequence.integer, teacher_forcing_ratio=teacher_forcing_ratio) print_loss_total += loss plot_loss_total += loss if iteration % print_every == 0: print_loss_avg: float = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (time_since(since=start, percent=iteration / n_iters), iteration, iteration / n_iters * 100, print_loss_avg)) sys.stdout.flush()
def train(*, input_tensor: torch.Tensor, # shape: [src_seq_len, batch_size] target_tensor: torch.Tensor, # shape: [tgt_seq_len, batch_size] encoder: EncoderRNN, decoder: AttnDecoderRNN, encoder_optimizer: Optimizer, decoder_optimizer: Optimizer, criterion: nn.Module, device: torch.device, max_src_length: int, max_tgt_length: int, batch_size: int, start_of_sequence_symbol: int, teacher_forcing_ratio: float) -> float: encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss: torch.Tensor = torch.tensor(0, dtype=torch.float, device=device) # shape: [] meaning this is a scalar encoder_outputs = encoder.encode_sequence(input_tensor) decoder_input = target_tensor[0].unsqueeze(dim=0) decoder_hidden = decoder.init_hidden(batch_size=batch_size, device=device) verify_shape(tensor=decoder_input, expected=[1, batch_size]) verify_shape(tensor=target_tensor, expected=[max_tgt_length, batch_size]) verify_shape(tensor=decoder_hidden, expected=[decoder.gru.num_layers, batch_size, decoder.gru.hidden_size]) use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False # use_teacher_forcing = False decoder_output = decoder.decode_sequence(encoder_outputs=encoder_outputs, start_symbol=start_of_sequence_symbol, max_length=max_tgt_length, target_tensor=target_tensor if use_teacher_forcing else None) # print(f"input_tensor.shape={input_tensor.shape}\tdecoder_output.shape={decoder_output.shape}\ttarget_tensor.shape={target_tensor.shape}\tmax_tgt_length={max_tgt_length}") # Our loss function requires predictions to be of the shape NxC, where N is the number of predictions and C is the number of possible predicted categories predictions = decoder_output.reshape(-1, decoder.output_size) # Reshaping from [seq_len, batch_size, decoder.output_size] to [seq_len*batch_size, decoder.output_size] labels = target_tensor.reshape( -1) # Reshaping from [seq_len, batch_size] to [seq_len*batch_size] loss += criterion(predictions, labels) # print(f"\t{decoder_output.view(-1,decoder_output.shape[-1]).shape}") # print(target_tensor.reshape(-1)) # print(f"\t{target_tensor.view(-1)}") # sys.exit() # loss += criterion(decoder_output.view(1,1,-1), target_tensor.view(-1)) # loss += criterion(decoder_output.squeeze(dim=1), target_tensor.squeeze(dim=1)) # for index, decoder_output in enumerate(start=1, # iterable=decoder.decode_sequence(encoder_outputs=encoder_outputs, # start_of_sequence_symbol=start_of_sequence_symbol, # max_length=max_tgt_length, # target_tensor=target_tensor if use_teacher_forcing else None)): # # loss += criterion(decoder_output, target_tensor[index]) loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item()
#checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] print('Building encoder and decoder ...') # 初始化word embedding embedding = nn.Embedding(voc.num_words, hidden_size) if model_checkpoint: embedding.load_state_dict(embedding_sd) # 初始化encoder和decoder模型 encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) if model_checkpoint: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # 使用合适的设备 encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') class GreedySearchDecoder(nn.Module): def __init__(self, encoder, decoder): super(GreedySearchDecoder, self).__init__() self.encoder = encoder self.decoder = decoder
def second_aux(sent): seen_aux = 0 for word in sent: if seen_aux: if word in ["do", "does", "don't", "doesn't"]: return word else: if word in ["do", "does", "don't", "doesn't"]: seen_aux = 1 # Where the actual running of the code happens hidden_size = int(sys.argv[6]) # Default 128 encoder1 = EncoderRNN(input_lang.n_words, hidden_size, recurrent_unit) decoder1 = DecoderRNN(hidden_size, output_lang.n_words, recurrent_unit, attn=attention, n_layers=1, dropout_p=0.1) if use_cuda: encoder1 = encoder1.cuda() decoder1 = decoder1.cuda() counter = 0 direcs_to_process = 1 lines = open(testFile, encoding='utf-8').read().strip().split('\n') test_pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines] length_sorted_pairs_dict = {} for i in range(30):
checkpoint = torch.load(loadFilename) # If loading a model trained on GPU to CPU # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) encoder_sd = checkpoint['encoder'] decoder_sd = checkpoint['decoder'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] embedding = nn.Embedding(voc.num_words, Config.hidden_size) embedding.load_state_dict(embedding_sd) encoder = EncoderRNN(Config.hidden_size, embedding, Config.encoder_n_layers, Config.dropout) decoder = LuongAttnDecoderRNN(Config.attn_model, embedding, Config.hidden_size, voc.num_words, Config.decoder_n_layers, Config.dropout) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) encoder = encoder.to(Config.device) decoder = decoder.to(Config.device) # Set dropout layers to eval mode encoder.eval() decoder.eval() # Initialize search module searcher = GreedySearchDecoder(encoder, decoder)
# 否则比如checkpoint是在GPU上得到的,但是我们现在又用CPU来训练或者测试,那么注释掉下面的代码 #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] print('Building encoder and decoder ...') # 初始化word embedding embedding = nn.Embedding(voc.num_words, hidden_size) if loadFilename: embedding.load_state_dict(embedding_sd) # 初始化encoder和decoder模型 encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) if loadFilename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # 使用合适的设备 encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') ###################################################################### # 设置进入训练模式,从而开启dropout encoder.train() decoder.train()
############################### # Creating the dataset object # ############################### # Create training data object bidirectional = config.getboolean("bidirectional") trainset, source_vocab, target_vocab = get_dataset( types="train", batch_size=int(config["batch_size"]), shuffle=True, num_workers=int(config["num_workers"]), pin_memory=False, drop_last=True) encoder1 = EncoderRNN(int(config["hidden_size_encoder"]), len(source_vocab) + 2, int(config["batch_size"]), num_layers=int(config["num_layer_encoder"]), bidirectional=bidirectional).to(device) bridge = Linear(bidirectional, int(config["hidden_size_encoder"]), int(config["hidden_size_decoder"])).to(device) decoder1 = DecoderRNN(int(config["hidden_size_decoder"]), len(target_vocab) + 2, int(config["batch_size"]), num_layers=int(config["num_layer_decoder"])).to(device) trainIters(trainset, encoder1, decoder1, bridge, num_epochs=int(config["num_epoch"]), batch_size=int(config["batch_size"]), print_every=10,
bleu_per_sentence[dutch] = [bleu, eng, output] for n in range(1, N + 1): total_clipped_counts[n] += ngrams_clipped_counts[n] total_counts[n] += ngrams_counts[n] bar.update(i) pp = pprint.PrettyPrinter(indent=4) pp.pprint(bleu_per_sentence) print("bleu on corpus:", computeBlue(total_clipped_counts, total_counts, bp, N)) if __name__ == "__main__": input_lang = Lang(nld_data) output_lang = Lang(eng_data) hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size) encoder1.load_state_dict( torch.load('models_project6/encoder.pt', map_location=lambda storage, loc: storage)) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, 1, dropout_p=0.1) attn_decoder1.load_state_dict( torch.load('models_project6/decoder.pt', map_location=lambda storage, loc: storage)) readTrainData("data/dutch-sentences.txt") # evaluateAndShowAttention("zij vertrekken morgenochtend uit japan")
output_words = evaluate(encoder, decoder, pair[0]) output_sentence = ' '.join(output_words) print('<', output_sentence) score, b2, b3, b4 = BLEU_score(pair[1], output_sentence[:-6]) print(score) print(b2) print(b3) print(b4) total += score print('') print('Avg. score is:') print(total / 50) hidden_size = 2046 encoder1 = EncoderRNN(2046, hidden_size) attn_decoder1 = DecoderRNN(hidden_size, caption_list.n_words) if use_cuda: encoder1 = encoder1.cuda() attn_decoder1 = attn_decoder1.cuda() encoder1.load_state_dict(torch.load('encoder.pt')) attn_decoder1.load_state_dict(torch.load('decoder.pt')) ###################################################################### # evaluateRandomly(encoder1, attn_decoder1) print('Done')
# 建立词典 vocab = build_vocab(cleaned_news, cleaned_summaries, min_freq=3) # 生成 dataset 是DataTensor 格式 news_dataset = build_dataset(vocab, cleaned_news, config['max_len_news'], type='news') summaries_dataset = build_dataset(vocab, cleaned_summaries, config['max_len_summaries'], type='summaries') # 合并在一起 dataset = TensorDataset(news_dataset, summaries_dataset) # 加载预训练的word2vec模型(使用搜狗新闻训练得到的word2vec),维度是300 pre_embeddings = get_pretrained_embedding(config['pretrained_vector_path'], vocab, vector_dim=300).to(device) # 构建模型,选择隐状态和词向量维度相同,都是300 vocab_size = len(vocab) # encoder 使用的是单层双向gru encoder = EncoderRNN(vocab_size, 300, 300, n_layers=1, pre_embeddings=pre_embeddings) # decoder 使用双层单项gru decoder = DecoderRNN(vocab_size, 300, 300, n_layers=2, pre_embeddings=pre_embeddings) # 迁移到cuda上,training 要用 encoder.to(device) decoder.to(device) # 训练模型 training(encoder, decoder, dataset, vocab, config['lr'], config['batch_size'], config['epochs'])
def build_model(self): class MLP(nn.Module): def __init__(self, dims): super(MLP, self).__init__() self.hidden = nn.ModuleList() for k in range(len(dims) - 1): self.hidden.append(nn.Linear(dims[k], dims[k + 1])) def forward(self, x): for layer in self.hidden[:-1]: x = F.relu(layer(x)) output = self.hidden[-1](x.float()) return output # A2V aud_input_MLP = MLP([self.feat_dim, self.hidden_dim, self.hidden_dim]) phn_encoder = EncoderRNN(self.hidden_dim, self.seq_len, self.hidden_dim, input_dropout_p=self.dropout_rate, dropout_p=self.dropout_rate, n_layers=self.enc_num_layers, bidirectional=True, rnn_cell='gru', variable_lengths=True) spk_encoder = EncoderRNN(self.hidden_dim, self.seq_len, self.hidden_dim, input_dropout_p=self.dropout_rate, dropout_p=self.dropout_rate, n_layers=self.enc_num_layers, bidirectional=True, rnn_cell='gru', variable_lengths=True) aud_decoder = DecoderRNN(self.hidden_dim * 4, self.seq_len, self.hidden_dim * 4, n_layers=self.dec_num_layers, rnn_cell='gru', bidirectional=True, input_dropout_p=self.dropout_rate, dropout_p=self.dropout_rate) aud_output_MLP = MLP( [self.hidden_dim * 4, self.hidden_dim, self.feat_dim]) # a2v = A2VwD(input_MLP, phn_encoder, spk_encoder, decoder, output_MLP, self.dec_num_layers) # T2V if self.unit_type == 'char': txt_feat_dim = 27 else: txt_feat_dim = 60 txt_input_MLP = MLP([txt_feat_dim, self.hidden_dim]) txt_encoder = EncoderRNN(self.hidden_dim, self.seq_len, self.hidden_dim, n_layers=1, bidirectional=True, rnn_cell='gru', variable_lengths=True) txt_decoder = DecoderRNN(self.hidden_dim * 2, self.seq_len, self.hidden_dim * 2, n_layers=1, rnn_cell='gru', bidirectional=True) txt_output_MLP = MLP([self.hidden_dim * 2, txt_feat_dim]) # t2v = A2V(txt_input_MLP, txt_encoder, txt_decoder, txt_output_MLP, 1) # size of discriminator input = num_directions * p_hidden_dim # discriminator = FCDiscriminator(2*self.hidden_dim, self.hidden_dim, self.D_num_layers) # the whole model if self.weight_x == 0.: if_x = False else: if_x = True self.model = Model(aud_input_MLP, phn_encoder, spk_encoder, aud_decoder, aud_output_MLP, self.dec_num_layers, txt_input_MLP, txt_encoder, txt_decoder, txt_output_MLP, 1, if_x, self.neg_num) self.model.to(device)
class RNNJudgeNet(nn.Module): """ keys: (n_keys, word_vec_dim) candidates: (n_candidates, word_vec_dim) query = [keys; 0; candidates]: (n_keys + 1 + n_candidates, word_vec_dim), where 0 is used to separate keys and candidates result = GRU-Encoder-Decoder-with-Attention(query): (n_candidates, 2), which indicates the possibility of ith candidates to be good """ def __init__( self, word_vec_dim, hidden_state_size, bidir=True, rnn_cell='LSTM', ): super().__init__() self.trainable = True self.word_vec_dim = word_vec_dim self.hidden_state_size = hidden_state_size self.encoder = EncoderRNN(self.word_vec_dim, self.hidden_state_size, bidir=bidir, rnn_cell=rnn_cell) self.decoder = AttnDecoderRNN(self.word_vec_dim, self.hidden_state_size, 2, rnn_cell=rnn_cell) self.encoder.apply(util.weight_init) self.decoder.apply(util.weight_init) def forward(self, Ks: torch.Tensor, Cs: torch.Tensor, *args): """ :param Ks, keywords used to expand: (batch_size, n_keys, word_vector_dim) :param Cs, candidates searched by Ks: (batch_size, n_candidates, word_vector_dim) :return: probs as good / bad candiates: (batch_size, n_candidates, 2) """ batch_size = Ks.shape[0] n_candidates = Cs.shape[1] sep = torch.zeros(batch_size, 1, self.word_vec_dim) query_string = torch.cat( [Ks, sep, Cs], dim=1) # (batch_size, n_keys + 1 + n_candidates, word_vector_dim) query_string_transposed = query_string.transpose( 0, 1) # (n_keys + 1 + n_candidates, batch_size, word_vector_dim) lengths = [query_string_transposed.shape[0] ] # (n_keys + 1 + n_candidates) encoder_outputs, encoder_hidden = self.encoder( query_string_transposed, torch.tensor(lengths).long().cpu()) # (n_keys + 1 + n_candidates, batch_size, hidden_state_size) # (n_layers=1, batch_size, hidden_state_size) decoder_hidden = encoder_hidden answers = [] for i in range(n_candidates): # logger.debug(f"decoder_hidden: {decoder_hidden[:, :, 0:10]}") decoder_input = Cs[:, i].unsqueeze( 0) # TODO (new dim=1,a candidate=1, word_vector_dim) # (1, batch_size, hidden_state_size) 此处batch指的不是前面的那个了 output, decoder_hidden, _ = self.decoder(decoder_input, decoder_hidden, encoder_outputs) # (1, batch_size, 2) # (n_layers=1, batch_size, hidden_state_size) answers.append(output) probs = torch.cat(answers, dim=0) # (n_candidates, batch_size, 2) probs = probs.transpose(0, 1) # (batch_size, n_candidates, 2) # probs = torch.softmax(probs, dim=-1) return probs
print_loss_total += loss plot_loss_total += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (Batch_no: %d %d%%) %.4f' % (timeSince( start, iter / 5), n_iters / 5, iter / 5 * 100, print_loss_avg)) loss_all += print_loss_avg plot_loss_avg = plot_loss_total / plot_every plot_loss_total = 0 hidden_size = 2046 encoder1 = EncoderRNN(2046, hidden_size) attn_decoder1 = DecoderRNN(hidden_size, caption_list.n_words) if use_cuda: encoder1 = encoder1.cuda() attn_decoder1 = attn_decoder1.cuda() # encoder1.load_state_dict(torch.load('encoder.pt')) # attn_decoder1.load_state_dict(torch.load('decoder.pt')) def evaluate(encoder, decoder, vid_ID, max_length=MAX_LENGTH): input_variable, empty = variableFromId(vid_ID) input_length = len(input_variable) encoder_hidden = encoder.initHidden()
def trainIters(learning_rate=0.001): epochs = 1 plot_train_losses = [] plot_val_losses = [] plot_loss_total = 0 # Reset every plot_every hidden_size = 256 print('------- Hypers --------\n' '- epochs: %i\n' '- learning rate: %g\n' '- hidden size: %i\n' '----------------' '' % (epochs, learning_rate, hidden_size)) # set model vocab_size_encoder = get_vocab_size(CodeEncoder()) vocab_size_decoder = get_vocab_size(CommentEncoder()) print(vocab_size_encoder) print(vocab_size_decoder) print('----------------') # COMMENT OUT WHEN FIRST TRAINING # encoder, decoder = load_model() encoder = EncoderRNN(vocab_size_encoder, hidden_size).to(device) decoder = AttnDecoderRNN(hidden_size, vocab_size_decoder, dropout_p=0.1).to(device) # set training hypers criterion = nn.NLLLoss() encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) # set data dataLoaders = createLoaders(extras=extras, debug=True) # used for initial input of decoder # with open('dicts/comment_dict.pkl', 'rb') as pfile: # SOS_token = pickle.load(pfile)['<SOS>'] # since we already prepend <SOS> to the comment, don't think need this in decoder model anymore SOS_token = None # iteration counts = [] best_val_loss = 100 for eps in range(1, epochs + 1): print('Epoch Number', eps) for count, (inputs, targets) in enumerate(dataLoaders['train'], 0): inputs = torch.LongTensor(inputs[0]) targets = torch.LongTensor(targets[0]) inputs, targets = inputs.to(device), targets.to(device) loss = train(inputs, targets, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, SOS_token=SOS_token) plot_loss_total += loss # if count != 0 and count % 10 == 0: print(count, loss) counts.append(eps) plot_loss_avg = plot_loss_total / len(dataLoaders['train']) plot_train_losses.append(plot_loss_avg) val_loss = validate_model(encoder, decoder, criterion, dataLoaders['valid'], SOS_token=SOS_token, device=device) if val_loss < best_val_loss: save_model(encoder, decoder) best_val_loss = val_loss plot_val_losses.append(val_loss) plot_loss_total = 0 save_loss(plot_train_losses, plot_val_losses) showPlot(counts, plot_train_losses, plot_val_losses)