def test_text_cnn_trainer_regression_test():

    utils.set_seed_everywhere()

    test_args = Namespace(
        # Model hyper-parameters
        max_sequence_length=50,
        dim_model=128,
        num_filters=128,
        window_sizes=[3, 5, 7],
        num_classes=2,
        dropout=0.5,
        # Training hyper-parameters
        num_epochs=4,
        learning_rate=1.e-6,
        batch_size=64)

    train_loader, vocab = text_cnn_dataset.TextCNNDataset.get_training_dataloader(
        test_args)
    model = text_cnn.TextCNN(vocab_size=len(vocab),
                             dim_model=test_args.dim_model,
                             num_filters=test_args.num_filters,
                             window_sizes=test_args.window_sizes,
                             num_classes=test_args.num_classes,
                             dropout=test_args.dropout)

    trainer = train.TextCNNTrainer(test_args, vocab.mask_index, model,
                                   train_loader, vocab, True)
    trainer.run()
    losses = trainer.loss_cache
    # last loss across initial epochs should be converging
    assert losses[0].data >= losses[-1].data
def test_gpt_trainer_regression_test():

    utils.set_seed_everywhere()

    test_args = Namespace(
        # Model hyper-parameters
        num_layers_per_stack=2,
        dim_model=12,
        dim_ffn=48,
        num_heads=2,
        block_size=64,
        dropout=0.1,
        # Training hyper-parameters
        num_epochs=5,
        learning_rate=0.0,
        batch_size=64,
    )

    train_loader, vocab = gpt_dataset.GPTDataset.get_training_dataloader(
        test_args)
    model = gpt.GPT(vocab_size=len(vocab),
                    num_layers_per_stack=test_args.num_layers_per_stack,
                    dim_model=test_args.dim_model,
                    dim_ffn=test_args.dim_ffn,
                    num_heads=test_args.num_heads,
                    block_size=test_args.block_size,
                    dropout=test_args.dropout)
    trainer = train.GPTTrainer(test_args, vocab.mask_index, model,
                               train_loader, vocab, True)
    trainer.run()
    losses = trainer.loss_cache
    # last loss across initial epochs should be converging
    assert losses[0].data > losses[-1].data
def test_word2vec_trainer_regression_test():

    utils.set_seed_everywhere()

    test_args = Namespace(
        # skip gram data hyper-parameters
        context_window_size=2,
        subsample_t=10.e-200,
        # Model hyper-parameters
        embedding_size=300,
        negative_sample_size=15,
        # Training hyper-parameters
        num_epochs=2,
        learning_rate=1.e-3,
        batch_size=4096,
    )
    train_dataloader, vocab = skipgram_dataset.SkipGramDataset.get_training_dataloader(
        test_args.context_window_size, test_args.subsample_t,
        test_args.batch_size)
    word_frequencies = torch.from_numpy(vocab.get_word_frequencies())

    model = word2vec.SkipGramNSModel(len(vocab), test_args.embedding_size,
                                     test_args.negative_sample_size,
                                     word_frequencies)

    trainer = train.Word2VecTrainer(test_args, model, train_dataloader, True)
    trainer.run()
    losses = trainer.loss_cache
    # last loss across initial epochs should be converging
    assert losses[0].data > losses[-1].data
def test_transformer_regression_test():
    utils.set_seed_everywhere()

    test_2_args = Namespace(
        num_layers_per_stack=2,
        dim_model=512,
        dim_ffn=2048,
        num_heads=8,
        max_sequence_length=20,
        dropout=0.1,
    )

    # mock dataset
    src_tokens = [["the", "cow", "jumped", "over", "the", "moon"],
                  ["the", "british", "are", "coming"]]
    tgt_tokens = [["la", "vache", "a", "sauté", "sur", "la", "lune"],
                  ["les", "britanniques", "arrivent"]]
    batch_size = len(src_tokens)
    dictionary_source = NLPVocabulary.build_vocabulary(src_tokens)
    dictionary_target = NLPVocabulary.build_vocabulary(tgt_tokens)
    max_seq_length = 20
    src_padded = TransformerDataset.padded_string_to_integer(
        src_tokens, max_seq_length, dictionary_source)
    tgt_padded = TransformerDataset.padded_string_to_integer(
        tgt_tokens, max_seq_length + 1, dictionary_target)
    data = TransformerBatch(torch.LongTensor(src_padded),
                            torch.LongTensor(tgt_padded))

    model = transformer.Transformer(len(dictionary_source),
                                    len(dictionary_target),
                                    test_2_args.num_layers_per_stack,
                                    test_2_args.dim_model, test_2_args.dim_ffn,
                                    test_2_args.num_heads,
                                    test_2_args.max_sequence_length,
                                    test_2_args.dropout)
    # push through model
    y_hat = model(data)

    # expected output
    expected_output = transformer_regression_test_data.TRANSFORMER_REGRESSION_TEST_DATA

    # assert y_hat is within eps
    eps = 1.e-4
    assert np.allclose(y_hat.data.numpy(),
                       expected_output.data.numpy(),
                       atol=eps)
def test_regression_test_cnn():
    utils.set_seed_everywhere()

    test_2_args = Namespace(
        vocab_size=300,
        # Model hyper-parameters
        max_sequence_length=
        200,  # Important parameter. Makes a big difference on output.
        dim_model=3,  # embedding size I tried 300->50
        num_filters=100,  # output filters from convolution
        window_sizes=[
            3, 5
        ],  # different filter sizes, total number of filters len(window_sizes)*num_filters
        num_classes=2,  # binary classification problem
        dropout=0.5,  # 0.5 from original implementation, kind of high
        # Training hyper-parameters
        num_epochs=3,  # 30 from original implementation
        learning_rate=
        1.e-4,  # chosing LR is important, often accompanied with scheduler to change
        batch_size=64  # from original implementation
    )

    # mock dataset
    src_tokens = torch.randint(0,
                               test_2_args.vocab_size - 1,
                               size=(test_2_args.batch_size,
                                     test_2_args.max_sequence_length))

    model = text_cnn.TextCNN(test_2_args.vocab_size, test_2_args.dim_model,
                             test_2_args.num_filters, test_2_args.window_sizes,
                             test_2_args.num_classes, test_2_args.dropout)
    # push through model
    y_hat = model((None, src_tokens))

    #expected output
    expected_output = cnn_regression_test_data.CNN_REGRESSION_TEST_DATA

    # assert y_hat is within eps
    eps = 1.e-4
    assert np.allclose(y_hat.data.numpy(),
                       expected_output.data.numpy(),
                       atol=eps)
def test_transformer_trainer_regression_test():

    utils.set_seed_everywhere()

    test_args = Namespace(
        # Model hyper-parameters
        num_layers_per_stack=2,
        dim_model=512,
        dim_ffn=2048,
        num_heads=8,
        max_sequence_length=20,
        dropout=0.1,
        # Label smoothing loss function hyper-parameters
        label_smoothing=0.1,
        # Training hyper-parameters
        num_epochs=10,
        learning_rate=0.0,
        batch_size=128,
    )

    train_dataloader, vocab_source, vocab_target = transformer_dataset.TransformerDataset.get_training_dataloader(
        test_args)
    vocab_source_size = len(vocab_source)
    vocab_target_size = len(vocab_target)
    model = transformer.Transformer(vocab_source_size, vocab_target_size,
                                    test_args.num_layers_per_stack,
                                    test_args.dim_model, test_args.dim_ffn,
                                    test_args.num_heads,
                                    test_args.max_sequence_length,
                                    test_args.dropout)
    trainer = train.TransformerTrainer(test_args, vocab_target_size,
                                       vocab_target.mask_index, model,
                                       train_dataloader, True)
    trainer.run()
    losses = trainer.loss_cache
    # last loss across initial epochs should be converging
    assert losses[0].data >= losses[-1].data
Exemple #7
0
def test_word2vec_regression_test():

    utils.set_seed_everywhere()

    test_2_args = Namespace(
        # Model hyper-parameters
        embedding_size=300,
        negative_sample_size=
        20,  # k examples to be used in negative sampling loss function
        # Training hyper-parameters
        batch_size=4096,
        # Vocabulary
        vocab_size=1000,
    )
    word_frequencies = torch.from_numpy(np.random.rand(1000))
    mock_input_1 = torch.randint(0,
                                 test_2_args.vocab_size - 1,
                                 size=(test_2_args.batch_size, ))
    mock_input_2 = torch.randint(0,
                                 test_2_args.vocab_size - 1,
                                 size=(test_2_args.batch_size, ))
    data = (mock_input_1, mock_input_2)
    model = word2vec.SkipGramNSModel(test_2_args.vocab_size,
                                     test_2_args.embedding_size,
                                     test_2_args.negative_sample_size,
                                     word_frequencies)
    loss = model(data)

    # expected output
    expected_output = word2vec_regression_test_data.WORD2VEC_REGRESSION_TEST_DATA

    # assert y_hat is within eps
    eps = 1.e-4
    assert np.allclose(loss.data.numpy(),
                       expected_output.data.numpy(),
                       atol=eps)
from argparse import Namespace

import torch
import numpy as np
from nlpmodels.models import transformer
from nlpmodels.utils import utils
from nlpmodels.utils.elt.transformer_batch import TransformerBatch
from nlpmodels.utils.elt.transformer_dataset import TransformerDataset
from nlpmodels.utils.vocabulary import NLPVocabulary
from tests.test_data import transformer_regression_test_data

utils.set_seed_everywhere()


def test_input_output_dims_transformer():
    test_1_args = Namespace(
        num_layers_per_stack=2,
        dim_model=512,
        dim_ffn=2048,
        num_heads=8,
        max_sequence_length=20,
        dropout=0.1,
    )

    # mock dataset
    src_tokens = [["the", "cow", "jumped", "over", "the", "moon"],
                  ["the", "british", "are", "coming"]]
    tgt_tokens = [["la", "vache", "a", "sauté", "sur", "la", "lune"],
                  ["les", "britanniques", "arrivent"]]
    batch_size = len(src_tokens)
    dictionary_source = NLPVocabulary.build_vocabulary(src_tokens)