def evaluate(vocab: Vocabulary, corpus_filename: str, encoder: EncoderRNN, decoder: AttnDecoderRNN, max_src_length: int, max_tgt_length: int): device: torch.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") encoder.to(device) decoder.to(device) encoder.eval() decoder.eval() with torch.no_grad(): corpus = Corpus( filename=corpus_filename, max_src_length=max_src_length, # decoder.max_src_length, vocab=vocab, device=device) for batch in torch.utils.data.DataLoader(dataset=corpus, batch_size=1): input_tensor: torch.Tensor = batch["data"].permute(1, 0) encoder_outputs = encoder.encode_sequence(input_tensor) decoder_output = decoder.decode_sequence( encoder_outputs=encoder_outputs, start_symbol=corpus.characters.start_of_sequence.integer, max_length=max_tgt_length) _, top_i = decoder_output.topk(k=1) predictions = top_i.squeeze(dim=2).squeeze(dim=1).tolist() predicted_string = "".join( [corpus.characters[i].string for i in predictions]) print(predicted_string)
def train(*, input_tensor: torch.Tensor, # shape: [src_seq_len, batch_size] target_tensor: torch.Tensor, # shape: [tgt_seq_len, batch_size] encoder: EncoderRNN, decoder: AttnDecoderRNN, encoder_optimizer: Optimizer, decoder_optimizer: Optimizer, criterion: nn.Module, device: torch.device, max_src_length: int, max_tgt_length: int, batch_size: int, start_of_sequence_symbol: int, teacher_forcing_ratio: float) -> float: encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss: torch.Tensor = torch.tensor(0, dtype=torch.float, device=device) # shape: [] meaning this is a scalar encoder_outputs = encoder.encode_sequence(input_tensor) decoder_input = target_tensor[0].unsqueeze(dim=0) decoder_hidden = decoder.init_hidden(batch_size=batch_size, device=device) verify_shape(tensor=decoder_input, expected=[1, batch_size]) verify_shape(tensor=target_tensor, expected=[max_tgt_length, batch_size]) verify_shape(tensor=decoder_hidden, expected=[decoder.gru.num_layers, batch_size, decoder.gru.hidden_size]) use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False # use_teacher_forcing = False decoder_output = decoder.decode_sequence(encoder_outputs=encoder_outputs, start_symbol=start_of_sequence_symbol, max_length=max_tgt_length, target_tensor=target_tensor if use_teacher_forcing else None) # print(f"input_tensor.shape={input_tensor.shape}\tdecoder_output.shape={decoder_output.shape}\ttarget_tensor.shape={target_tensor.shape}\tmax_tgt_length={max_tgt_length}") # Our loss function requires predictions to be of the shape NxC, where N is the number of predictions and C is the number of possible predicted categories predictions = decoder_output.reshape(-1, decoder.output_size) # Reshaping from [seq_len, batch_size, decoder.output_size] to [seq_len*batch_size, decoder.output_size] labels = target_tensor.reshape( -1) # Reshaping from [seq_len, batch_size] to [seq_len*batch_size] loss += criterion(predictions, labels) # print(f"\t{decoder_output.view(-1,decoder_output.shape[-1]).shape}") # print(target_tensor.reshape(-1)) # print(f"\t{target_tensor.view(-1)}") # sys.exit() # loss += criterion(decoder_output.view(1,1,-1), target_tensor.view(-1)) # loss += criterion(decoder_output.squeeze(dim=1), target_tensor.squeeze(dim=1)) # for index, decoder_output in enumerate(start=1, # iterable=decoder.decode_sequence(encoder_outputs=encoder_outputs, # start_of_sequence_symbol=start_of_sequence_symbol, # max_length=max_tgt_length, # target_tensor=target_tensor if use_teacher_forcing else None)): # # loss += criterion(decoder_output, target_tensor[index]) loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item()