Exemple #1
0
 def test_load_embedding_has_all_words(self, instances, embedding_type):
     single_instance = instances["single_instance"]
     MAX_NUM_WORDS = 100
     vocab = Vocab(
         instances=single_instance,
         max_num_tokens=MAX_NUM_WORDS,
         embedding_type=embedding_type,
     )
     vocab.build_vocab()
     embedding = vocab.load_embedding()
     assert embedding.size(0) == vocab.get_vocab_len()
def setup_lstm2seqdecoder(request, ):
    HIDDEN_DIM = 1024
    NUM_LAYERS = request.param[0]
    BIDIRECTIONAL = request.param[1]
    TEACHER_FORCING_RATIO = request.param[3]
    MAX_LENGTH = 5

    lines = []
    words = []
    # texts = ["First", "second", "Third"]
    texts = ["First sentence", "second sentence", "Third long sentence here"]
    for text in texts:
        line = Line(text=text)
        word = Line(text=text.split()[0])
        lines.append(line)
        words.append(word)
    flat_texts = [[word for sentence in texts for word in sentence]]
    vocab = Vocab(flat_texts)
    vocab.build_vocab()

    num_direction = 2 if BIDIRECTIONAL else 1
    h0 = torch.ones(NUM_LAYERS, len(texts), num_direction * HIDDEN_DIM) * 0.1
    c0 = torch.ones(NUM_LAYERS, len(texts), num_direction * HIDDEN_DIM) * 0.2

    embedder = WordEmbedder(embedding_type="glove_6B_50")
    encoder_outputs = (torch.ones(len(texts), 5, num_direction * HIDDEN_DIM) *
                       0.5 if request.param[2] else None)
    decoder = Lstm2SeqDecoder(
        embedder=embedder,
        vocab=vocab,
        max_length=MAX_LENGTH,
        attn_module=request.param[2],
        dropout_value=0.0,
        hidden_dim=HIDDEN_DIM,
        bidirectional=BIDIRECTIONAL,
        rnn_bias=False,
        num_layers=NUM_LAYERS,
    )

    return (
        decoder,
        {
            "HIDDEN_DIM": HIDDEN_DIM,
            "NUM_LAYERS": NUM_LAYERS,
            "MAX_LENGTH": MAX_LENGTH,
            "TEACHER_FORCING_RATIO": TEACHER_FORCING_RATIO,
            "LINES": lines,
            "WORDS": words,
            "VOCAB_SIZE": vocab.get_vocab_len(),
            "BIDIRECTIONAL": BIDIRECTIONAL,
        },
        encoder_outputs,
        (h0, c0),
    )
Exemple #3
0
    def test_vocab_length_min_freq_1_max_words_1(self, instances):
        single_instance = instances["single_instance"]
        MAX_NUM_WORDS = 1
        MIN_FREQ = 1

        vocab_builder = Vocab(instances=single_instance,
                              min_count=MIN_FREQ,
                              max_num_tokens=MAX_NUM_WORDS)
        vocab_builder.build_vocab()
        len_vocab = vocab_builder.get_vocab_len()
        assert len_vocab == 1 + len(vocab_builder.special_vocab)