Ejemplo n.º 1
0
def evaluate(vocab: Vocabulary, corpus_filename: str, encoder: EncoderRNN,
             decoder: AttnDecoderRNN, max_src_length: int,
             max_tgt_length: int):

    device: torch.device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")

    encoder.to(device)
    decoder.to(device)

    encoder.eval()
    decoder.eval()

    with torch.no_grad():

        corpus = Corpus(
            filename=corpus_filename,
            max_src_length=max_src_length,  # decoder.max_src_length,
            vocab=vocab,
            device=device)

        for batch in torch.utils.data.DataLoader(dataset=corpus, batch_size=1):

            input_tensor: torch.Tensor = batch["data"].permute(1, 0)

            encoder_outputs = encoder.encode_sequence(input_tensor)

            decoder_output = decoder.decode_sequence(
                encoder_outputs=encoder_outputs,
                start_symbol=corpus.characters.start_of_sequence.integer,
                max_length=max_tgt_length)
            _, top_i = decoder_output.topk(k=1)

            predictions = top_i.squeeze(dim=2).squeeze(dim=1).tolist()

            predicted_string = "".join(
                [corpus.characters[i].string for i in predictions])

            print(predicted_string)
Ejemplo n.º 2
0
def train(*,
          input_tensor: torch.Tensor,  # shape: [src_seq_len, batch_size]
          target_tensor: torch.Tensor,  # shape: [tgt_seq_len, batch_size]
          encoder: EncoderRNN,
          decoder: AttnDecoderRNN,
          encoder_optimizer: Optimizer,
          decoder_optimizer: Optimizer,
          criterion: nn.Module,
          device: torch.device,
          max_src_length: int,
          max_tgt_length: int,
          batch_size: int,
          start_of_sequence_symbol: int,
          teacher_forcing_ratio: float) -> float:
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    loss: torch.Tensor = torch.tensor(0, dtype=torch.float, device=device)  # shape: [] meaning this is a scalar

    encoder_outputs = encoder.encode_sequence(input_tensor)

    decoder_input = target_tensor[0].unsqueeze(dim=0)
    decoder_hidden = decoder.init_hidden(batch_size=batch_size, device=device)

    verify_shape(tensor=decoder_input, expected=[1, batch_size])
    verify_shape(tensor=target_tensor, expected=[max_tgt_length, batch_size])
    verify_shape(tensor=decoder_hidden, expected=[decoder.gru.num_layers, batch_size, decoder.gru.hidden_size])

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    # use_teacher_forcing = False

    decoder_output = decoder.decode_sequence(encoder_outputs=encoder_outputs,
                                             start_symbol=start_of_sequence_symbol,
                                             max_length=max_tgt_length,
                                             target_tensor=target_tensor if use_teacher_forcing else None)
    # print(f"input_tensor.shape={input_tensor.shape}\tdecoder_output.shape={decoder_output.shape}\ttarget_tensor.shape={target_tensor.shape}\tmax_tgt_length={max_tgt_length}")

    # Our loss function requires predictions to be of the shape NxC, where N is the number of predictions and C is the number of possible predicted categories
    predictions = decoder_output.reshape(-1,
                                         decoder.output_size)  # Reshaping from [seq_len, batch_size, decoder.output_size] to [seq_len*batch_size, decoder.output_size]
    labels = target_tensor.reshape(
        -1)  # Reshaping from [seq_len, batch_size]                      to [seq_len*batch_size]
    loss += criterion(predictions, labels)
    # print(f"\t{decoder_output.view(-1,decoder_output.shape[-1]).shape}")
    # print(target_tensor.reshape(-1))
    #    print(f"\t{target_tensor.view(-1)}")
    # sys.exit()
    # loss += criterion(decoder_output.view(1,1,-1), target_tensor.view(-1))
    # loss += criterion(decoder_output.squeeze(dim=1), target_tensor.squeeze(dim=1))
    # for index, decoder_output in enumerate(start=1,
    #                                        iterable=decoder.decode_sequence(encoder_outputs=encoder_outputs,
    #                                               start_of_sequence_symbol=start_of_sequence_symbol,
    #                                               max_length=max_tgt_length,
    #                                               target_tensor=target_tensor if use_teacher_forcing else None)):
    #
    #     loss += criterion(decoder_output, target_tensor[index])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item()