def run_training(*, config: argparse.Namespace) -> None: import pickle vocab: Vocabulary = pickle.load(open(config.vocab, "rb")) device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") training_corpus = Corpus(vocab=vocab, filename=config.corpus, max_src_length=config.max_src_length, device=device) # for word in test_corpus.words: # print(f"{''.join(word.characters)}\t{''.join(word.label)}") # sys.exit() if config.continue_training: encoder1 = torch.load(config.encoder, map_location=device) attn_decoder1 = torch.load(config.decoder, map_location=device) else: encoder1: EncoderRNN = EncoderRNN(input_size=len(training_corpus.characters), embedding_size=config.encoder_embedding_size, hidden_size=config.encoder_hidden_size, num_hidden_layers=config.encoder_hidden_layers).to(device=device) attn_decoder1 = AttnDecoderRNN(embedding_size=config.decoder_embedding_size, decoder_hidden_size=config.decoder_hidden_size, encoder_hidden_size=config.encoder_hidden_size, num_hidden_layers=config.decoder_hidden_layers, output_size=len(training_corpus.characters), dropout_p=config.decoder_dropout, max_src_length=training_corpus.word_tensor_length).to(device=device) train_iters(corpus=training_corpus, encoder=encoder1, decoder=attn_decoder1, device=device, n_iters=config.num_epochs, batch_size=config.batch_size, print_every=config.print_every, learning_rate=config.learning_rate, teacher_forcing_ratio=config.teacher_forcing_ratio) print(f"Saving encoder to {config.encoder}...") torch.save(encoder1.to(device=torch.device("cpu")), config.encoder) print(f"Saving decoder to {config.decoder}...") torch.save(attn_decoder1.to(device=torch.device("cpu")), config.decoder)
def train(args): input_lang, output_lang, pairs = prepareData(args) print(random.choice(pairs)) model = {} model['hidden_size'] = 1000 model['dropout'] = 0.1 model['input_lang'] = input_lang model['output_lang'] = output_lang model['max_length'] = max(input_lang.max_length, output_lang.max_length) + 2 print('Max length: {}'.format(model['max_length'])) encoder1 = EncoderRNN(input_lang.n_words, model['hidden_size']).to(getDevice()) encoder1.train() attn_decoder1 = AttnDecoderRNN(model['hidden_size'], output_lang.n_words, dropout_p=model['dropout'], max_length=model['max_length']).to( getDevice()) attn_decoder1.train() n_iters = 30000 training_pairs = [ tensorsFromPair(input_lang, output_lang, random.choice(pairs)) for _ in range(n_iters) ] trainIters(training_pairs, encoder1, attn_decoder1, n_iters, print_every=1000, optim=args.optim, learning_rate=args.learning_rate, max_length=model['max_length']) print('saving models...') model['encoder_state'] = encoder1.state_dict() model['decoder_state'] = attn_decoder1.state_dict() torch.save( model, "data/{}_model_checkpoint.pth".format(args.phase.split('_')[-1]))
def inference(args): model = {} model = torch.load("data/sc_question_model_checkpoint.pth") model['encoder'] = EncoderRNN(model['input_lang'].n_words, model['hidden_size']).to(getDevice()) model['encoder'].load_state_dict(model['encoder_state']) model['encoder'].eval() model['decoder'] = AttnDecoderRNN(model['hidden_size'], model['output_lang'].n_words, dropout_p=model['dropout'], max_length=model['max_length']).to( getDevice()) model['decoder'].load_state_dict(model['decoder_state']) model['decoder'].eval() with open('../executor/parse_results/sc_validation.json') as f: anns = json.load(f) out = {} for ann in tqdm(anns): v = {} v['scene_index'] = ann['scene_index'] v['video_filename'] = ann['video_filename'] v['questions'] = [] for ann_q in ann['questions']: if ann_q['question_type'] == 'descriptive': continue q_program_pred, _ = evaluate(model['encoder'], model['decoder'], normalizeString(ann_q['question']), model['input_lang'], model['output_lang'], max_length=model['max_length']) if q_program_pred[-1] == '<EOS>': q_program_pred = q_program_pred[:-1] q = {} q['question_program'] = q_program_pred q['question'] = ann_q['question'] q['question_type'] = '{}_single_choice'.format( ann_q['question_type']) q['question_subtype'] = ann_q['program'][-1] q['program_gt'] = ann_q['program'] q['answer'] = ann_q['answer'] v['questions'].append(q) out[v['scene_index']] = v out_path = '../executor/parse_results/sc_val_reproduced.json' print('Writing output to {}'.format(out_path)) with open(out_path, 'w') as fout: json.dump(out, fout, indent=4)
def __init__( self, word_vec_dim, hidden_state_size, bidir=True, rnn_cell='LSTM', ): super().__init__() self.trainable = True self.word_vec_dim = word_vec_dim self.hidden_state_size = hidden_state_size self.encoder = EncoderRNN(self.word_vec_dim, self.hidden_state_size, bidir=bidir, rnn_cell=rnn_cell) self.decoder = AttnDecoderRNN(self.word_vec_dim, self.hidden_state_size, 2, rnn_cell=rnn_cell) self.encoder.apply(util.weight_init) self.decoder.apply(util.weight_init)
def evaluate(vocab: Vocabulary, corpus_filename: str, encoder: EncoderRNN, decoder: AttnDecoderRNN, max_src_length: int, max_tgt_length: int): device: torch.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") encoder.to(device) decoder.to(device) encoder.eval() decoder.eval() with torch.no_grad(): corpus = Corpus( filename=corpus_filename, max_src_length=max_src_length, # decoder.max_src_length, vocab=vocab, device=device) for batch in torch.utils.data.DataLoader(dataset=corpus, batch_size=1): input_tensor: torch.Tensor = batch["data"].permute(1, 0) encoder_outputs = encoder.encode_sequence(input_tensor) decoder_output = decoder.decode_sequence( encoder_outputs=encoder_outputs, start_symbol=corpus.characters.start_of_sequence.integer, max_length=max_tgt_length) _, top_i = decoder_output.topk(k=1) predictions = top_i.squeeze(dim=2).squeeze(dim=1).tolist() predicted_string = "".join( [corpus.characters[i].string for i in predictions]) print(predicted_string)
bleu_per_sentence[dutch] = [bleu, eng, output] for n in range(1, N + 1): total_clipped_counts[n] += ngrams_clipped_counts[n] total_counts[n] += ngrams_counts[n] bar.update(i) pp = pprint.PrettyPrinter(indent=4) pp.pprint(bleu_per_sentence) print("bleu on corpus:", computeBlue(total_clipped_counts, total_counts, bp, N)) if __name__ == "__main__": input_lang = Lang(nld_data) output_lang = Lang(eng_data) hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size) encoder1.load_state_dict( torch.load('models_project6/encoder.pt', map_location=lambda storage, loc: storage)) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, 1, dropout_p=0.1) attn_decoder1.load_state_dict( torch.load('models_project6/decoder.pt', map_location=lambda storage, loc: storage)) readTrainData("data/dutch-sentences.txt") # evaluateAndShowAttention("zij vertrekken morgenochtend uit japan")
def train_iters(*, # data: Data, corpus: Corpus, encoder: EncoderRNN, decoder: AttnDecoderRNN, device: torch.device, n_iters: int, batch_size: int, teacher_forcing_ratio: float, print_every: int = 1000, learning_rate: float = 0.01 ) -> None: data = torch.utils.data.DataLoader(dataset=corpus, batch_size=batch_size) start: float = time.time() plot_losses: List[float] = [] print_loss_total: float = 0 # Reset every print_every plot_loss_total: float = 0 # Reset every plot_every encoder_optimizer: Optimizer = SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer: Optimizer = SGD(decoder.parameters(), lr=learning_rate) # # training_pairs: List[ParallelTensor] = [random.choice(data.pairs).tensors(source_vocab=data.source_vocab, # target_vocab=data.target_vocab, # device=device) # for _ in range(n_iters)] criterion: nn.NLLLoss = nn.NLLLoss(reduction='mean') # ignore_index=corpus.characters.pad_int) # for pair in parallel_data: # print(f"src={len(pair['data'])}\ttgt={len(pair['labels'])}") for iteration in range(1, n_iters + 1): # type: int # training_pair: ParallelTensor = training_pairs[iteration - 1] # input_tensor: torch.Tensor = training_pair.source # shape: [seq_len, batch_size=1] # target_tensor: torch.Tensor = training_pair.target # shape: [seq_len, batch_size=1] for batch in data: # print(f"batch['data'].shape={batch['data'].shape}\tbatch['labels'].shape{batch['labels'].shape}") # sys.exit() input_tensor: torch.Tensor = batch["data"].permute(1, 0) target_tensor: torch.Tensor = batch["labels"].permute(1, 0) actual_batch_size: int = min(batch_size, input_tensor.shape[1]) verify_shape(tensor=input_tensor, expected=[corpus.word_tensor_length, actual_batch_size]) verify_shape(tensor=target_tensor, expected=[corpus.label_tensor_length, actual_batch_size]) # print(f"input_tensor.shape={input_tensor.shape}\t\ttarget_tensor.shape={target_tensor.shape}") # sys.exit() loss: float = train(input_tensor=input_tensor, target_tensor=target_tensor, encoder=encoder, decoder=decoder, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, criterion=criterion, device=device, max_src_length=corpus.word_tensor_length, max_tgt_length=corpus.label_tensor_length, batch_size=actual_batch_size, start_of_sequence_symbol=corpus.characters.start_of_sequence.integer, teacher_forcing_ratio=teacher_forcing_ratio) print_loss_total += loss plot_loss_total += loss if iteration % print_every == 0: print_loss_avg: float = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (time_since(since=start, percent=iteration / n_iters), iteration, iteration / n_iters * 100, print_loss_avg)) sys.stdout.flush()
def train(*, input_tensor: torch.Tensor, # shape: [src_seq_len, batch_size] target_tensor: torch.Tensor, # shape: [tgt_seq_len, batch_size] encoder: EncoderRNN, decoder: AttnDecoderRNN, encoder_optimizer: Optimizer, decoder_optimizer: Optimizer, criterion: nn.Module, device: torch.device, max_src_length: int, max_tgt_length: int, batch_size: int, start_of_sequence_symbol: int, teacher_forcing_ratio: float) -> float: encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss: torch.Tensor = torch.tensor(0, dtype=torch.float, device=device) # shape: [] meaning this is a scalar encoder_outputs = encoder.encode_sequence(input_tensor) decoder_input = target_tensor[0].unsqueeze(dim=0) decoder_hidden = decoder.init_hidden(batch_size=batch_size, device=device) verify_shape(tensor=decoder_input, expected=[1, batch_size]) verify_shape(tensor=target_tensor, expected=[max_tgt_length, batch_size]) verify_shape(tensor=decoder_hidden, expected=[decoder.gru.num_layers, batch_size, decoder.gru.hidden_size]) use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False # use_teacher_forcing = False decoder_output = decoder.decode_sequence(encoder_outputs=encoder_outputs, start_symbol=start_of_sequence_symbol, max_length=max_tgt_length, target_tensor=target_tensor if use_teacher_forcing else None) # print(f"input_tensor.shape={input_tensor.shape}\tdecoder_output.shape={decoder_output.shape}\ttarget_tensor.shape={target_tensor.shape}\tmax_tgt_length={max_tgt_length}") # Our loss function requires predictions to be of the shape NxC, where N is the number of predictions and C is the number of possible predicted categories predictions = decoder_output.reshape(-1, decoder.output_size) # Reshaping from [seq_len, batch_size, decoder.output_size] to [seq_len*batch_size, decoder.output_size] labels = target_tensor.reshape( -1) # Reshaping from [seq_len, batch_size] to [seq_len*batch_size] loss += criterion(predictions, labels) # print(f"\t{decoder_output.view(-1,decoder_output.shape[-1]).shape}") # print(target_tensor.reshape(-1)) # print(f"\t{target_tensor.view(-1)}") # sys.exit() # loss += criterion(decoder_output.view(1,1,-1), target_tensor.view(-1)) # loss += criterion(decoder_output.squeeze(dim=1), target_tensor.squeeze(dim=1)) # for index, decoder_output in enumerate(start=1, # iterable=decoder.decode_sequence(encoder_outputs=encoder_outputs, # start_of_sequence_symbol=start_of_sequence_symbol, # max_length=max_tgt_length, # target_tensor=target_tensor if use_teacher_forcing else None)): # # loss += criterion(decoder_output, target_tensor[index]) loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item()
class RNNJudgeNet(nn.Module): """ keys: (n_keys, word_vec_dim) candidates: (n_candidates, word_vec_dim) query = [keys; 0; candidates]: (n_keys + 1 + n_candidates, word_vec_dim), where 0 is used to separate keys and candidates result = GRU-Encoder-Decoder-with-Attention(query): (n_candidates, 2), which indicates the possibility of ith candidates to be good """ def __init__( self, word_vec_dim, hidden_state_size, bidir=True, rnn_cell='LSTM', ): super().__init__() self.trainable = True self.word_vec_dim = word_vec_dim self.hidden_state_size = hidden_state_size self.encoder = EncoderRNN(self.word_vec_dim, self.hidden_state_size, bidir=bidir, rnn_cell=rnn_cell) self.decoder = AttnDecoderRNN(self.word_vec_dim, self.hidden_state_size, 2, rnn_cell=rnn_cell) self.encoder.apply(util.weight_init) self.decoder.apply(util.weight_init) def forward(self, Ks: torch.Tensor, Cs: torch.Tensor, *args): """ :param Ks, keywords used to expand: (batch_size, n_keys, word_vector_dim) :param Cs, candidates searched by Ks: (batch_size, n_candidates, word_vector_dim) :return: probs as good / bad candiates: (batch_size, n_candidates, 2) """ batch_size = Ks.shape[0] n_candidates = Cs.shape[1] sep = torch.zeros(batch_size, 1, self.word_vec_dim) query_string = torch.cat( [Ks, sep, Cs], dim=1) # (batch_size, n_keys + 1 + n_candidates, word_vector_dim) query_string_transposed = query_string.transpose( 0, 1) # (n_keys + 1 + n_candidates, batch_size, word_vector_dim) lengths = [query_string_transposed.shape[0] ] # (n_keys + 1 + n_candidates) encoder_outputs, encoder_hidden = self.encoder( query_string_transposed, torch.tensor(lengths).long().cpu()) # (n_keys + 1 + n_candidates, batch_size, hidden_state_size) # (n_layers=1, batch_size, hidden_state_size) decoder_hidden = encoder_hidden answers = [] for i in range(n_candidates): # logger.debug(f"decoder_hidden: {decoder_hidden[:, :, 0:10]}") decoder_input = Cs[:, i].unsqueeze( 0) # TODO (new dim=1,a candidate=1, word_vector_dim) # (1, batch_size, hidden_state_size) 此处batch指的不是前面的那个了 output, decoder_hidden, _ = self.decoder(decoder_input, decoder_hidden, encoder_outputs) # (1, batch_size, 2) # (n_layers=1, batch_size, hidden_state_size) answers.append(output) probs = torch.cat(answers, dim=0) # (n_candidates, batch_size, 2) probs = probs.transpose(0, 1) # (batch_size, n_candidates, 2) # probs = torch.softmax(probs, dim=-1) return probs
def trainIters(learning_rate=0.001): epochs = 1 plot_train_losses = [] plot_val_losses = [] plot_loss_total = 0 # Reset every plot_every hidden_size = 256 print('------- Hypers --------\n' '- epochs: %i\n' '- learning rate: %g\n' '- hidden size: %i\n' '----------------' '' % (epochs, learning_rate, hidden_size)) # set model vocab_size_encoder = get_vocab_size(CodeEncoder()) vocab_size_decoder = get_vocab_size(CommentEncoder()) print(vocab_size_encoder) print(vocab_size_decoder) print('----------------') # COMMENT OUT WHEN FIRST TRAINING # encoder, decoder = load_model() encoder = EncoderRNN(vocab_size_encoder, hidden_size).to(device) decoder = AttnDecoderRNN(hidden_size, vocab_size_decoder, dropout_p=0.1).to(device) # set training hypers criterion = nn.NLLLoss() encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) # set data dataLoaders = createLoaders(extras=extras, debug=True) # used for initial input of decoder # with open('dicts/comment_dict.pkl', 'rb') as pfile: # SOS_token = pickle.load(pfile)['<SOS>'] # since we already prepend <SOS> to the comment, don't think need this in decoder model anymore SOS_token = None # iteration counts = [] best_val_loss = 100 for eps in range(1, epochs + 1): print('Epoch Number', eps) for count, (inputs, targets) in enumerate(dataLoaders['train'], 0): inputs = torch.LongTensor(inputs[0]) targets = torch.LongTensor(targets[0]) inputs, targets = inputs.to(device), targets.to(device) loss = train(inputs, targets, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, SOS_token=SOS_token) plot_loss_total += loss # if count != 0 and count % 10 == 0: print(count, loss) counts.append(eps) plot_loss_avg = plot_loss_total / len(dataLoaders['train']) plot_train_losses.append(plot_loss_avg) val_loss = validate_model(encoder, decoder, criterion, dataLoaders['valid'], SOS_token=SOS_token, device=device) if val_loss < best_val_loss: save_model(encoder, decoder) best_val_loss = val_loss plot_val_losses.append(val_loss) plot_loss_total = 0 save_loss(plot_train_losses, plot_val_losses) showPlot(counts, plot_train_losses, plot_val_losses)