Ejemplo n.º 1
0
    def test_batch(self, batch_x, batch_y):
        batch_x = Variable(
            torch.from_numpy(np.asarray(batch_x)).to(self.device))
        batch_y = Variable(
            torch.from_numpy(np.asarray(batch_y)).to(self.device))

        sampled_output = transformer.sample_output(self.model, batch_x, 1, 2,
                                                   batch_y.size(1))

        return sampled_output.cpu().data.numpy()
Ejemplo n.º 2
0
def manual_eval(model):
    i_to_word, w_to_idx = fetch_vocab()
    i_seq, t_seq = prepare_data(w_to_idx)
    i_seq = i_seq.cuda()
    t_seq = t_seq.cuda()

    s_output = transformer.sample_output(model, i_seq[:10], EOS.index,
                                         PAD.index, t_seq.size(1))

    print()
    print("Sampled Outputs on Train:")
    print("----------------")
    for sample_idx in range(10):
        for token_idx in range(10):
            print(i_to_word[i_seq[sample_idx, token_idx].item()], end=" ")
        print(" => ", end=" ")
        for token_idx in range(s_output.size(1)):
            print(i_to_word[s_output[sample_idx, token_idx].item()], end=" ")
        print()

    print('TESTING')
    i_to_word, w_to_idx = fetch_vocab2()
    i_seq, t_seq = prepare_data2(w_to_idx)
    i_seq = i_seq.cuda()
    t_seq = t_seq.cuda()

    s_output = transformer.sample_output(model, i_seq, EOS.index, PAD.index,
                                         t_seq.size(1))

    print()
    print("Sampled Outputs on Test:")
    print("----------------")
    for sample_idx in range(i_seq.size(0)):
        for token_idx in range(i_seq.size(1)):
            print(i_to_word[i_seq[sample_idx, token_idx].item()], end=" ")
        print(" => ", end=" ")
        for token_idx in range(s_output.size(1)):
            print(i_to_word[s_output[sample_idx, token_idx].item()], end=" ")
        print()
def manual_eval(model):
    idx_to_word, word_to_idx = fetch_vocab(DATA_GERMAN, DATA_ENGLISH,
                                           DATA_GERMAN2)
    train_seq, train_seq = prepare_data(DATA_GERMAN, DATA_ENGLISH, word_to_idx)

    s_output = transformer.sample_output(model, train_seq[:10], EOS.index,
                                         PAD.index, train_seq.size(1))

    print()
    print("Sampled Outputs on Train:")
    print("----------------")
    for sample_idx in range(10):
        for token_idx in range(10):
            print(idx_to_word[train_seq[sample_idx, token_idx].item()],
                  end=" ")
        print(" => ", end=" ")
        for token_idx in range(s_output.size(1)):
            print(idx_to_word[s_output[sample_idx, token_idx].item()], end=" ")
        print()

    test_input_seq, test_target_seq = prepare_data(DATA_GERMAN2, DATA_GERMAN2,
                                                   word_to_idx)

    s_output = transformer.sample_output(model, test_input_seq, EOS.index,
                                         PAD.index, test_target_seq.size(1))

    print()
    print("Sampled Outputs on Test:")
    print("----------------")
    for sample_idx in range(test_input_seq.size(0)):
        for token_idx in range(test_input_seq.size(1)):
            print(idx_to_word[test_input_seq[sample_idx, token_idx].item()],
                  end=" ")
        print(" => ", end=" ")
        for token_idx in range(s_output.size(1)):
            print(idx_to_word[s_output[sample_idx, token_idx].item()], end=" ")
        print()
Ejemplo n.º 4
0
    def sample(self, batch_size, seq_len, x=torch.tensor([])):
        if self.test_mode:
            print('In Test mode')
            return None

        if self.data_loader.idx >= self.data_loader.data_num:
            self.data_loader.reset()
        if len(x.shape) > 1:
            input_seq = x
        else:
            input_seq = self.data_loader.next()[0]
        input_seq = input_seq.cuda()
        sampled_output = transformer.sample_output(self.model, input_seq,
                                                   self.EOS_Index,
                                                   self.PAD_Index,
                                                   input_seq.shape[1])
        return sampled_output
        """
Ejemplo n.º 5
0
def main():
    # fetch vocabulary + prepare data
    idx_to_word, word_to_idx = fetch_vocab()
    input_seq, target_seq = prepare_data(word_to_idx)

    # create embeddings to use
    emb = nn.Embedding(len(idx_to_word), EMBEDDING_SIZE)
    emb.reset_parameters()

    # create transformer model
    model = transformer.Transformer(emb,
                                    PAD.index,
                                    emb.num_embeddings,
                                    max_seq_len=max(input_seq.size(1),
                                                    target_seq.size(1)))

    # create an optimizer for training the model + a X-entropy loss
    optimizer = optim.Adam(
        (param for param in model.parameters() if param.requires_grad),
        lr=0.0001)
    loss = nn.CrossEntropyLoss()

    print("Initial Probabilities of Translations:")
    print("--------------------------------------")
    eval_model(model, input_seq, target_seq)
    print()

    # move model + data on the GPU (if possible)
    if GPU:
        model.cuda()
        input_seq = input_seq.cuda()
        target_seq = target_seq.cuda()

    # train the model
    for epoch in range(NUM_EPOCHS):
        print("training epoch {}...".format(epoch + 1), end=" ")

        predictions = model(input_seq, target_seq)
        optimizer.zero_grad()
        current_loss = loss(
            predictions.view(
                predictions.size(0) * predictions.size(1),
                predictions.size(2)), target_seq.view(-1))
        current_loss.backward()
        optimizer.step()

        print("OK (loss: {:.6f})".format(current_loss.item()))

    # put model in evaluation mode
    model.eval()

    print()
    print("Final Probabilities of Translations:")
    print("------------------------------------")
    eval_model(model, input_seq, target_seq)

    # randomly sample outputs from the input sequences based on the probabilities computed by the trained model
    sampled_output = transformer.sample_output(model, input_seq, EOS.index,
                                               PAD.index, target_seq.size(1))

    print()
    print("Sampled Outputs:")
    print("----------------")
    for sample_idx in range(input_seq.size(0)):
        for token_idx in range(input_seq.size(1)):
            print(idx_to_word[input_seq[sample_idx, token_idx].item()],
                  end=" ")
        print(" => ", end=" ")
        for token_idx in range(sampled_output.size(1)):
            print(idx_to_word[sampled_output[sample_idx, token_idx].item()],
                  end=" ")
        print()
def main():
    # fetch vocabulary + prepare data
    idx_to_word, word_to_idx = fetch_vocab(DATA_GERMAN, DATA_ENGLISH,
                                           DATA_GERMAN2)
    input_seq, target_seq = prepare_data(DATA_GERMAN, DATA_ENGLISH,
                                         word_to_idx)

    # create embeddings to use
    emb = nn.Embedding(len(idx_to_word), EMBEDDING_SIZE)
    emb.reset_parameters()
    print("*" * 80, "generated emb")
    print(emb)
    # create transformer model
    model = transformer.Transformer(emb,
                                    PAD.index,
                                    emb.num_embeddings,
                                    max_seq_len=max(input_seq.size(1),
                                                    target_seq.size(1)))

    # create an optimizer for training the model + a X-entropy loss
    optimizer = optim.Adam(
        (param for param in model.parameters() if param.requires_grad),
        lr=0.0001)
    loss = nn.CrossEntropyLoss()

    print("Initial Probabilities of Translations:")
    print("--------------------------------------")
    # eval_model(model, input_seq, target_seq)
    # print()

    # move model + data on the GPU (if possible)
    if GPU:
        model.cuda()
        input_seq = input_seq.cuda()
        target_seq = target_seq.cuda()

    # train the model
    for epoch in range(NUM_EPOCHS):

        for i in range(0, len(input_seq), 10):
            b_input_seq = input_seq[i:i + 10]
            b_target_seq = target_seq[i:i + 10]
            # print("training epoch {}...".format(epoch + 1), end=" ")

            predictions = model(b_input_seq, b_target_seq)
            optimizer.zero_grad()
            current_loss = loss(
                predictions.view(
                    predictions.size(0) * predictions.size(1),
                    predictions.size(2)), b_target_seq.view(-1))
            current_loss.backward()
            optimizer.step()

        if epoch % 10 == 0:
            manual_eval(model)
            torch.save(model.state_dict(),
                       'experiments/2_obama/saved_model.pkl')
        print("Iteration {}: loss: {:.6f}".format(epoch, current_loss.item()))
        sys.stdout.flush()

    # put model in evaluation mode
    # model.eval()

    # print()
    # print("Final Probabilities of Translations:")
    # print("------------------------------------")
    # eval_model(model, input_seq, target_seq)

    # randomly sample outputs from the input sequences based on the probabilities computed by the trained model
    # idx_to_word, word_to_idx = fetch_vocab2()
    # idx_to_word, word_to_idx = fetch_vocab(DATA_GERMAN2, DATA_GERMAN2)
    # test_input_seq, test_target_seq = prepare_data2(word_to_idx)
    test_input_seq, test_target_seq = prepare_data(DATA_GERMAN2, DATA_GERMAN2,
                                                   word_to_idx)
    test_input_seq = test_input_seq.cuda()
    test_target_seq = test_target_seq.cuda()
    print(test_input_seq.shape, test_target_seq.shape)
    sampled_output = transformer.sample_output(model, test_input_seq,
                                               EOS.index, PAD.index,
                                               test_target_seq.size(1))
    print(sampled_output.shape)
    print()
    print("Sampled Outputs:")

    print("----------------")
    for sample_idx in range(test_input_seq.size(0)):

        for token_idx in range(test_input_seq.size(1)):
            print(idx_to_word[test_input_seq[sample_idx, token_idx].item()],
                  end=" ")
        print(" => ", end=" ")
        for token_idx in range(sampled_output.size(1)):
            # print("89898",sampled_output[sample_idx, token_idx].item())

            print(idx_to_word[sampled_output[sample_idx, token_idx].item()],
                  end=" ")
        print()
Ejemplo n.º 7
0
def test(input_sent):
    # fetch vocabulary + prepare data
    metadata = torch.load(
        "./checkpoints/attention_only/checkpoint_0/attention_only_metadata.data"
    )
    idx_to_word = metadata['idx_to_word']
    word_to_idx = metadata['word_to_idx']
    emb = metadata['emb']
    input_seq_size = metadata['input_seq_size']

    # input_sent_padded = pad_sentences(input_sent)
    # input_sent_ids = get_ids(input_sent_padded, idx_to_word, word_to_idx)
    # print("$$input_sent_ids len: ",len(input_sent_ids))

    # idx_to_word, word_to_idx = fetch_vocab(DATA_GERMAN, DATA_ENGLISH, DATA_GERMAN2)
    # input_seq, target_seq = prepare_data(input_sent, input_sent, word_to_idx)

    # create embeddings to use
    # emb = nn.Embedding(len(idx_to_word), EMBEDDING_SIZE)
    # emb.reset_parameters()

    # create transformer model
    model = transformer.Transformer(emb,
                                    PAD.index,
                                    emb.num_embeddings,
                                    max_seq_len=input_seq_size)

    model = torch.load(
        "./checkpoints/attention_only/checkpoint_0/attention_only_model.model")

    if GPU:
        model.cuda()
        # input_seq = torch.LongTensor(input_sent_ids)
        # target_seq = torch.LongTensor(input_sent_ids)
        # input_seq = input_seq.cuda()
        # target_seq = target_seq.cuda()

    # train the model
    # test_input_seq_padded = pad_sentences(input_sent)
    # test_input_seq_ids = get_ids(test_input_seq_padded, idx_to_word, word_to_idx)
    # test_input_seq = torch.LongTensor(test_input_seq_ids)
    # test_target_seq = copy.deepcopy(test_input_seq)

    test_input_seq, test_target_seq = prepare_data([input_sent], [input_sent],
                                                   word_to_idx)
    # print(test_target_seq.shape)
    # print(test_input_seq)
    test_input_seq = test_input_seq.cuda()
    test_target_seq = test_target_seq.cuda()

    test_input_seq = test_input_seq.view(1, -1)
    test_target_seq = test_target_seq.view(1, -1)
    # print(test_target_seq.shape)
    # print("$$test_target_seq: ",test_target_seq.shape)

    # print(test_input_seq.shape, test_target_seq.shape)
    sampled_output = transformer.sample_output(model, test_input_seq,
                                               EOS.index, PAD.index,
                                               test_target_seq.size(1))

    # print("$$$ sample_output", sampled_output)

    output_arr = []
    for sample_idx in range(test_input_seq.size(0)):

        for token_idx in range(sampled_output.size(1)):
            temp = idx_to_word[sampled_output[sample_idx, token_idx].item()]
            output_arr.append(temp)

    return output_arr