Python TextWiser.fit_transform 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: textwiser

클래스/타입: TextWiser

메소드/함수: fit_transform

hotexamples.com에서의 예제들: 7

Python TextWiser.fit_transform - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 textwiser.TextWiser.fit_transform에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

TextWiser(30)

fit_transform(7)

fit(6)

named_parameters(1)

set_params(1)

예제 #1

파일 보기

파일: test_tfidf.py 프로젝트: yashugupta786/textwiser

 def test_pretrained(self):
     tw = TextWiser(Embedding.TfIdf(pretrained=None, min_df=2),
                    dtype=torch.float32)
     expected = torch.tensor(
         [[0.4813341796, 0.6198053956, 0.0000000000, 0.6198053956],
          [0.4091228545, 0.5268201828, 0.5268201828, 0.5268201828],
          [0.6133555174, 0.0000000000, 0.7898069024, 0.0000000000]],
         dtype=torch.float32)
     self._test_fit_transform(tw, expected)
     # Test loading from bytes
     with NamedTemporaryFile() as file:
         pickle.dump(tw._imp[0].vectorizer, file)
         file.seek(0)
         tw = TextWiser(Embedding.TfIdf(pretrained=file),
                        dtype=torch.float32)
         predicted = tw.fit_transform(docs)
         self.assertTrue(
             torch.allclose(predicted, expected.to(device), atol=1e-6))
     # Test loading from file
     file_path = self._get_test_path('data', 'tfidf.pkl')
     with open(file_path, 'wb') as fp:
         pickle.dump(tw._imp[0].vectorizer, fp)
     tw = TextWiser(Embedding.TfIdf(pretrained=file_path),
                    dtype=torch.float32)
     predicted = tw.fit_transform(docs)
     self.assertTrue(
         torch.allclose(predicted, expected.to(device), atol=1e-6))
     os.remove(file_path)

예제 #2

파일 보기

 def test_dtype(self):
     tw = TextWiser(Embedding.Word(word_option=WordOptions.word2vec,
                                   pretrained='en-turian'),
                    Transformation.Pool(pool_option=PoolOptions.max),
                    dtype=torch.float32)
     predicted = tw.fit_transform(docs)
     self.assertEqual(predicted.dtype, torch.float32)
     tw = TextWiser(Embedding.Word(word_option=WordOptions.word2vec,
                                   pretrained='en-turian'),
                    Transformation.Pool(pool_option=PoolOptions.max),
                    dtype=np.float32)
     predicted = tw.fit_transform(docs)
     self.assertEqual(predicted.dtype, np.float32)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         tw = TextWiser(Embedding.Word(word_option=WordOptions.word2vec,
                                       pretrained='en-turian'),
                        dtype=torch.float32)
         predicted = tw.fit_transform(docs)
         self.assertEqual(predicted[0].dtype, torch.float32)
         tw = TextWiser(Embedding.Word(word_option=WordOptions.word2vec,
                                       pretrained='en-turian'),
                        dtype=np.float32)
         predicted = tw.fit_transform(docs)
         self.assertEqual(predicted[0].dtype, np.float32)

예제 #3

파일 보기

파일: test_pool.py 프로젝트: yashugupta786/textwiser

 def _test_index(self, pool_option):
     index = 0 if pool_option == PoolOptions.first else -1
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         tw = TextWiser(Embedding.Word(word_option=WordOptions.word2vec, pretrained='en-turian'),
                        dtype=torch.float32)
         expected = tw.fit_transform(docs[0])[0][index].view(1, -1)
     tw = TextWiser(Embedding.Word(word_option=WordOptions.word2vec, pretrained='en-turian'),
                    Transformation.Pool(pool_option=pool_option), dtype=torch.float32)
     pooled = tw.fit_transform(docs[0])
     self.assertTrue(torch.allclose(expected.to(device), pooled.to(device)))

예제 #4

파일 보기

 def test_num_components(self):
     # The natural # of components is 3.
     n_components = 2  # Restrict the # of components
     tw = TextWiser(Embedding.TfIdf(min_df=2),
                    Transformation.SVD(n_components=n_components),
                    dtype=torch.float32)
     predicted = tw.fit_transform(docs)
     self.assertEqual(predicted.shape[1], n_components)
     self._reset_seed()
     n_components = 200  # Expand the # of components
     tw = TextWiser(Embedding.TfIdf(min_df=2),
                    Transformation.SVD(n_components=n_components),
                    dtype=torch.float32)
     predicted = tw.fit_transform(docs)
     self.assertEqual(predicted.shape[1], n_components)

예제 #5

파일 보기

 def test_list_handling(self):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         tw = TextWiser(Embedding.Word(word_option=WordOptions.word2vec,
                                       pretrained='en-turian'),
                        Transformation.SVD(n_components=2),
                        dtype=torch.float32)
         predicted = tw.fit_transform(docs)
         expected = [
             torch.tensor([[-0.9719871283, 0.0947150663],
                           [-0.3805825114, -1.0427029133],
                           [-0.6929296255, 0.1793890595],
                           [0.0000000000, 0.0000000000]],
                          dtype=torch.float32),
             torch.tensor([[-0.9719871283, 0.0947150663],
                           [-0.3805825114, -1.0427029133],
                           [-0.7170552015, 0.0105144158],
                           [-0.9385635853, 0.6596723199],
                           [0.0000000000, 0.0000000000]],
                          dtype=torch.float32),
             torch.tensor([[-0.8687936068, -0.9333068132],
                           [-0.6859120131, 0.0732812732],
                           [-0.9385635853, 0.6596723199],
                           [0.0000000000, 0.0000000000]],
                          dtype=torch.float32)
         ]
         for p, e in zip(predicted, expected):
             self.assertTrue(torch.allclose(p, e.to(device), atol=1e-6))

예제 #6

파일 보기

 def test_lazy_load(self):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         tw = TextWiser(Embedding.Word(word_option=WordOptions.word2vec,
                                       pretrained='en-turian'),
                        lazy_load=True)
         self.assertIsNone(tw._imp)
         tw.fit(docs)
         self.assertIsNotNone(tw._imp)
         tw = TextWiser(Embedding.Word(word_option=WordOptions.word2vec,
                                       pretrained='en-turian'),
                        lazy_load=True,
                        dtype=torch.float32,
                        is_finetuneable=True)
         self.assertIsNone(tw._imp)
         tw.fit_transform(docs)
         self.assertIsNotNone(tw._imp)

예제 #7

파일 보기

파일: test_doc2vec.py 프로젝트: yashugupta786/textwiser

 def test_pretrained(self):
     tw = TextWiser(Embedding.Doc2Vec(deterministic=True,
                                      seed=1234,
                                      vector_size=2,
                                      min_count=1,
                                      workers=1,
                                      sample=0,
                                      negative=0,
                                      hashfxn=det_hash),
                    dtype=torch.float32)
     expected = torch.tensor(
         [[0.0471987687, 0.0309393797], [-0.0278387405, -0.2347375602],
          [0.1042766869, -0.0033877781]],
         dtype=torch.float32)
     self._test_fit_before_transform(tw, expected)
     # Test loading from bytes
     with NamedTemporaryFile() as file:
         pickle.dump(tw._imp[0].model, file)
         file.seek(0)
         tw = TextWiser(Embedding.Doc2Vec(pretrained=file,
                                          deterministic=True,
                                          seed=1234),
                        dtype=torch.float32)
         predicted = tw.fit_transform(docs)
         self.assertTrue(
             torch.allclose(predicted, expected.to(device), atol=1e-6))
     # Test loading from file
     file_path = self._get_test_path('data', 'doc2vec.pkl')
     with open(file_path, 'wb') as fp:
         pickle.dump(tw._imp[0].model, fp)
     tw = TextWiser(Embedding.Doc2Vec(pretrained=file_path,
                                      deterministic=True,
                                      seed=1234),
                    dtype=torch.float32)
     predicted = tw.fit_transform(docs)
     self.assertTrue(
         torch.allclose(predicted, expected.to(device), atol=1e-6))
     os.remove(file_path)