Python Embedding.Doc2Vec Examples

Programming Language: Python

Namespace/Package Name: textwiser

Class/Type: Embedding

Method/Function: Doc2Vec

Examples at hotexamples.com: 7

Python Embedding.Doc2Vec - 7 examples found. These are the top rated real world Python examples of textwiser.Embedding.Doc2Vec extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TfIdf(15)

Doc2Vec(7)

Word(7)

Compound(6)

USE(4)

Random(1)

Example #1

Show file

File: test_doc2vec.py Project: yashugupta786/textwiser

    def test_deterministic_transform(self):
        """Specifying the `deterministic` option should make Doc2Vec transformation deterministic.

        By default, running inference with doc2vec is not deterministic in gensim.
        This test makes sure we can get a deterministic result when necessary.
        """
        tw = TextWiser(Embedding.Doc2Vec(deterministic=True,
                                         seed=1234,
                                         vector_size=2,
                                         min_count=1,
                                         workers=1,
                                         sample=0,
                                         negative=0,
                                         hashfxn=det_hash),
                       dtype=torch.float32)
        expected = torch.tensor(
            [[0.0471987687, 0.0309393797], [-0.0278387405, -0.2347375602],
             [0.1042766869, -0.0033877781]],
            dtype=torch.float32)
        self._test_fit_before_transform(tw, expected)
        tw = TextWiser(Embedding.Doc2Vec(pretrained=None,
                                         deterministic=True,
                                         seed=1234,
                                         vector_size=2,
                                         min_count=1,
                                         workers=1,
                                         sample=0,
                                         negative=0,
                                         hashfxn=det_hash),
                       dtype=torch.float32)
        self._test_fit_before_transform(tw, expected)

Example #2

Show file

File: test_doc2vec.py Project: yashugupta786/textwiser

    def test_pretrained_error(self):
        # Not a string
        with self.assertRaises(ValueError):
            TextWiser(Embedding.Doc2Vec(pretrained=3), dtype=torch.float32)

        # Not a path
        with self.assertRaises(ValueError):
            TextWiser(Embedding.Doc2Vec(pretrained='|||||||'),
                      dtype=torch.float32)

        # Not a path on the embedding object
        with self.assertRaises(ValueError):
            _Doc2VecEmbeddings(pretrained='|||||||').fit([])

Example #3

Show file

File: test_doc2vec.py Project: yashugupta786/textwiser

    def test_tokenizer_validation(self):
        # shouldn't raise an error
        try:
            TextWiser(
                Embedding.Doc2Vec(tokenizer=lambda doc: doc.lower().split()))
        except TypeError:
            self.fail("This tokenizer should pass the validation.")

        # should raise the first error
        with self.assertRaises(TypeError):
            TextWiser(Embedding.Doc2Vec(tokenizer=lambda doc: doc.lower()))

        # should raise the second error
        with self.assertRaises(TypeError):
            TextWiser(Embedding.Doc2Vec(tokenizer=lambda doc: [1]))

Example #4

Show file

 def test_set_params(self):
     # Set the arguments in container classes
     tw = TextWiser(Embedding.TfIdf(min_df=5),
                    Transformation.NMF(n_components=30),
                    lazy_load=True)
     tw.set_params(embedding__min_df=10,
                   transformations__0__n_components=10)
     self.assertEqual(tw.embedding.min_df, 10)
     self.assertEqual(tw.transformations[0].n_components, 10)
     # Set the arguments in implementation
     tw = TextWiser(Embedding.Doc2Vec(vector_size=2, min_count=1,
                                      workers=1))
     tw.fit(docs)
     tw.set_params(_imp__0__seed=10)
     self.assertEqual(tw._imp[0].seed, 10)
     # Set the arguments in a schema
     schema = {'transform': ['tfidf', ['nmf', {'n_components': 30}]]}
     tw = TextWiser(Embedding.Compound(schema=schema))
     tw.set_params(embedding__schema__transform__0__min_df=10,
                   embedding__schema__transform__1__n_components=10)
     self.assertEqual(tw.embedding.schema['transform'][0][1]['min_df'], 10)
     self.assertEqual(
         tw.embedding.schema['transform'][1][1]['n_components'], 10)
     # Replace a part of the schema in a list
     tw.set_params(embedding__schema__transform__0='bow')
     self.assertEqual(tw.embedding.schema['transform'][0], 'bow')
     # Replace a part of the schema
     tw.set_params(embedding__schema__transform=['bow'])
     self.assertEqual(tw.embedding.schema['transform'][0], 'bow')

Example #5

Show file

File: test_doc2vec.py Project: yashugupta786/textwiser

 def test_pretrained(self):
     tw = TextWiser(Embedding.Doc2Vec(deterministic=True,
                                      seed=1234,
                                      vector_size=2,
                                      min_count=1,
                                      workers=1,
                                      sample=0,
                                      negative=0,
                                      hashfxn=det_hash),
                    dtype=torch.float32)
     expected = torch.tensor(
         [[0.0471987687, 0.0309393797], [-0.0278387405, -0.2347375602],
          [0.1042766869, -0.0033877781]],
         dtype=torch.float32)
     self._test_fit_before_transform(tw, expected)
     # Test loading from bytes
     with NamedTemporaryFile() as file:
         pickle.dump(tw._imp[0].model, file)
         file.seek(0)
         tw = TextWiser(Embedding.Doc2Vec(pretrained=file,
                                          deterministic=True,
                                          seed=1234),
                        dtype=torch.float32)
         predicted = tw.fit_transform(docs)
         self.assertTrue(
             torch.allclose(predicted, expected.to(device), atol=1e-6))
     # Test loading from file
     file_path = self._get_test_path('data', 'doc2vec.pkl')
     with open(file_path, 'wb') as fp:
         pickle.dump(tw._imp[0].model, fp)
     tw = TextWiser(Embedding.Doc2Vec(pretrained=file_path,
                                      deterministic=True,
                                      seed=1234),
                    dtype=torch.float32)
     predicted = tw.fit_transform(docs)
     self.assertTrue(
         torch.allclose(predicted, expected.to(device), atol=1e-6))
     os.remove(file_path)

Example #6

Show file

File: test_doc2vec.py Project: yashugupta786/textwiser

 def test_fit_transform(self):
     tw = TextWiser(Embedding.Doc2Vec(seed=1234,
                                      vector_size=2,
                                      min_count=1,
                                      workers=1,
                                      sample=0,
                                      negative=0,
                                      hashfxn=det_hash),
                    dtype=torch.float32)
     expected = torch.tensor(
         [[0.0471987687, 0.0309393797], [-0.0278387405, -0.2347375602],
          [0.1042766869, -0.0033877781]],
         dtype=torch.float32)
     self._test_fit_transform(tw, expected)

Example #7

Show file

 def test_options_immutable(self):
     """The Embedding and Transformation options should be immutable"""
     embedding = Embedding.Doc2Vec(deterministic=False)
     with self.assertRaises(ValueError):
         embedding.deterministic = True
     self.assertFalse(embedding.deterministic)