Python WordEmbedding Examples

Programming Language: Python

Namespace/Package Name: dougu.embedding

Class/Type: WordEmbedding

Examples at hotexamples.com: 8

Python WordEmbedding - 8 examples found. These are the top rated real world Python examples of dougu.embedding.WordEmbedding extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

WordEmbedding(8)

compute_cosine_similarity(1)

compute_similar_words_from_vec(1)

compute_similar_words_from_word(1)

is_known(1)

to_index(1)

to_word(1)

Example #1

0

Show file

File: test_embedding.py Project: Ryuto10/dougu

    def test_compute_similar_words_from_word(self) -> None:
        file_path = str(
            Path(__file__).resolve().parent / "samples" / "sample.word2vec.txt"
        )
        word_embeds = WordEmbedding(file_path)

        word = "てすと"
        expected = [("サンプル", 0.7506074093675397)]
        actual = word_embeds.compute_similar_words_from_word(word)
        self.assertEqual(actual, expected)

Example #2

0

Show file

File: test_embedding.py Project: Ryuto10/dougu

 def test_property_shape(self) -> None:
     file_path = str(
         Path(__file__).resolve().parent / "samples" / "sample.word2vec.txt"
     )
     word_embeds = WordEmbedding(file_path)
     self.assertEqual(len(word_embeds), 2)
     self.assertEqual(word_embeds.dim, 10)
     self.assertEqual(word_embeds.shape, (2, 10))

Example #3

0

Show file

File: test_embedding.py Project: Ryuto10/dougu

    def test_error_unknown_words(self) -> None:
        file_path = str(
            Path(__file__).resolve().parent / "samples" / "sample.word2vec.txt"
        )
        word_embeds = WordEmbedding(file_path)

        words = ["てすと", "unk"]
        with self.assertRaises(ValueError, msg=f"unknown word: 'unk'"):
            word_embeds(words)

Example #4

0

Show file

File: test_embedding.py Project: Ryuto10/dougu

    def test_extract_word_embeddings(self) -> None:
        file_path = str(
            Path(__file__).resolve().parent / "samples" / "sample.word2vec.txt"
        )
        word_embeds = WordEmbedding(file_path)

        words = ["てすと", "サンプル"]
        actual = word_embeds(words)
        expected = np.array(
            [
                [
                    0.31882,
                    0.89289,
                    0.90071,
                    0.45753,
                    0.37083,
                    0.64955,
                    0.34075,
                    0.70048,
                    0.89085,
                    0.13621,
                ],
                [
                    0.79375,
                    0.44464,
                    0.07644,
                    0.35242,
                    0.03996,
                    0.68827,
                    0.97103,
                    0.77324,
                    0.72781,
                    0.69158,
                ],
            ]
        )
        np.testing.assert_almost_equal(actual, expected)

        word = "てすと"
        actual = word_embeds(word)
        expected = np.array(
            [
                0.31882,
                0.89289,
                0.90071,
                0.45753,
                0.37083,
                0.64955,
                0.34075,
                0.70048,
                0.89085,
                0.13621,
            ]
        )
        np.testing.assert_almost_equal(actual, expected)

Example #5

0

Show file

File: test_embedding.py Project: Ryuto10/dougu

    def test_compute_cosine_similarity(self) -> None:
        file_path = str(
            Path(__file__).resolve().parent / "samples" / "sample.word2vec.txt"
        )
        word_embeds = WordEmbedding(file_path)

        embed = np.array(
            [
                0.29902,
                0.90019,
                0.89964,
                0.50753,
                0.38001,
                0.59495,
                0.29175,
                0.69909,
                0.90185,
                0.09687,
            ]
        )
        expected = np.array([0.9987114080207757, 0.7286216119815097])
        actual = word_embeds.compute_cosine_similarity(embed)
        np.testing.assert_almost_equal(actual, expected)

Example #6

0

Show file

File: test_embedding.py Project: Ryuto10/dougu

    def test_compute_similar_words_from_vec(self) -> None:
        file_path = str(
            Path(__file__).resolve().parent / "samples" / "sample.word2vec.txt"
        )
        word_embeds = WordEmbedding(file_path)

        embed = np.array(
            [
                0.29902,
                0.90019,
                0.89964,
                0.50753,
                0.38001,
                0.59495,
                0.29175,
                0.69909,
                0.90185,
                0.09687,
            ]
        )
        expected = [("てすと", 0.9987114080207757), ("サンプル", 0.7286216119815097)]
        actual = word_embeds.compute_similar_words_from_vec(embed)
        self.assertEqual(actual, expected)

Example #7

0

Show file

File: test_embedding.py Project: Ryuto10/dougu

    def test_property_vocab(self) -> None:
        file_path = str(
            Path(__file__).resolve().parent / "samples" / "sample.word2vec.txt"
        )
        word_embeds = WordEmbedding(file_path)

        self.assertSetEqual(word_embeds.vocab, {"てすと", "サンプル"})
        self.assertEqual(word_embeds.to_word(1), "サンプル")
        self.assertEqual(word_embeds.to_index("てすと"), 0)
        self.assertTrue(word_embeds.is_known("てすと"))
        self.assertFalse(word_embeds.is_known("test"))

Example #8

0

Show file

File: test_embedding.py Project: Ryuto10/dougu

 def test_load_glove_format_file(self) -> None:
     file_path = str(
         Path(__file__).resolve().parent / "samples" / "sample.glove.txt"
     )
     word_embeds = WordEmbedding(file_path)
     self.assertIsInstance(word_embeds, WordEmbedding)