Exemple #1
0
    def test_compare_word2vec_glove(self):
        token2idx_word2vec, embedding_matrix_word2vec = load_word2vec(
            path="tests/data/word2vec.wv")
        token2idx_glove, embedding_matrix_glove = load_glove(
            path="tests/data/glove.txt")

        self.assertDictEqual(token2idx_word2vec, token2idx_glove)
        self.assertTrue(
            embedding_matrix_word2vec.shape == embedding_matrix_glove.shape)
Exemple #2
0
    def test_load_word2vec_without_pad_unk(self):
        token2idx, embedding_matrix = load_word2vec(
            path="tests/data/word2vec.wv",
            add_pad=False,
            add_unk=False,
        )

        self.assertEqual(len(token2idx), 8)
        self.assertEqual(len(token2idx), embedding_matrix.shape[0])
        self.assertEqual(embedding_matrix.shape[-1], 100)
        self.assertTrue("<PAD>" not in token2idx)
        self.assertTrue("<UNK>" not in token2idx)
Exemple #3
0
    def test_load_word2vec_without_unk(self):
        token2idx, embedding_matrix = load_word2vec(
            path="tests/data/word2vec.wv",
            add_unk=False,
        )

        self.assertEqual(len(token2idx), 9)
        self.assertEqual(len(token2idx), embedding_matrix.shape[0])
        self.assertEqual(embedding_matrix.shape[-1], 100)
        self.assertEqual(token2idx["<PAD>"], 0)
        self.assertTrue("<UNK>" not in token2idx)
        self.assertTrue(
            np.allclose(embedding_matrix[0],
                        np.zeros_like(embedding_matrix[0])), )
Exemple #4
0
    def test_load_word2vec(self):
        token2idx, embedding_matrix = load_word2vec(
            path="tests/data/word2vec.wv")

        self.assertEqual(len(token2idx), 10)
        self.assertEqual(len(token2idx), embedding_matrix.shape[0])
        self.assertEqual(embedding_matrix.shape[-1], 100)
        self.assertEqual(token2idx["<PAD>"], 0)
        self.assertEqual(token2idx["<UNK>"], 1)
        self.assertTrue(
            np.allclose(embedding_matrix[0],
                        np.zeros_like(embedding_matrix[0])), )
        self.assertTrue(
            np.allclose(embedding_matrix[1],
                        embedding_matrix[2:].mean(axis=0)), )
Exemple #5
0
from pytorch_ner.nn_modules.dropout import SpatialDropout1d
from pytorch_ner.nn_modules.embedding import (
    Embedding,
    EmbeddingPreTrained,
    EmbeddingWithDropout,
    load_glove,
    load_word2vec,
)
from pytorch_ner.prepare_data import prepare_conll_data_format

token_seq, _ = prepare_conll_data_format(path="tests/data/conll.txt",
                                         sep=" ",
                                         verbose=False)
tokens = list(set(token for sentence in token_seq for token in sentence))

_, word2vec_embeddings = load_word2vec(
    path="tests/data/word2vec.wv")  # type: ignore
_, glove_embeddings = load_glove(path="tests/data/glove.txt")  # type: ignore

embedding_w2v_freeze = EmbeddingPreTrained(word2vec_embeddings)
embedding_w2v_fine_tune = EmbeddingPreTrained(word2vec_embeddings,
                                              freeze=False)

embedding_glove_freeze = EmbeddingPreTrained(glove_embeddings)
embedding_glove_fine_tune = EmbeddingPreTrained(glove_embeddings, freeze=False)

random_embedding_with_spatial_dropout = EmbeddingWithDropout(
    embedding_layer=Embedding(num_embeddings=2000, embedding_dim=128),
    dropout=SpatialDropout1d(p=0.5),
)

emb = random_embedding_with_spatial_dropout(