Esempio n. 1
0
 def test_fit_transform(self):
     tw = TextWiser(Embedding.TfIdf(min_df=2),
                    Transformation.LDA(n_components=2),
                    dtype=torch.float32)
     expected = torch.tensor(
         [[0.7724367976, 0.2275632024], [0.5895692706, 0.4104307294],
          [0.2381444573, 0.7618555427]],
         dtype=torch.float32)
     self._test_fit_transform(tw, expected)
     self._reset_seed()
     self._test_fit_before_transform(tw, expected)
Esempio n. 2
0
    def test_finetune_validation(self):
        # Nothing is fine-tuneable if dtype is numpy
        with self.assertRaises(TypeError):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                TextWiser(Embedding.Word(word_option=WordOptions.word2vec,
                                         pretrained='en_turian'),
                          dtype=np.float32,
                          is_finetuneable=True)

        # Word2Vec is fine-tuneable
        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                TextWiser(Embedding.Word(word_option=WordOptions.word2vec,
                                         pretrained='en-turian'),
                          dtype=torch.float32,
                          is_finetuneable=True,
                          lazy_load=True)
        except ValueError:
            self.fail("Word2vec is fine tuneable")

        # ELMo is not fine-tuneable, and should raise an error
        with self.assertRaises(ValueError):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                TextWiser(Embedding.Word(word_option=WordOptions.elmo),
                          dtype=torch.float32,
                          is_finetuneable=True,
                          lazy_load=True)

        # TfIdf is not fine-tuneable, and should raise an error
        with self.assertRaises(ValueError):
            TextWiser(Embedding.TfIdf(),
                      dtype=torch.float32,
                      is_finetuneable=True,
                      lazy_load=True)

        # TfIdf is not fine-tuneable, but SVD is
        try:
            TextWiser(Embedding.TfIdf(),
                      Transformation.SVD(),
                      dtype=torch.float32,
                      is_finetuneable=True,
                      lazy_load=True)
        except ValueError:
            self.fail("SVD is fine tuneable")

        # LDA cannot propagate gradients, so the whole thing is not fine-tuneable
        with self.assertRaises(ValueError):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                TextWiser(Embedding.Word(word_option=WordOptions.word2vec,
                                         pretrained='en'),
                          Transformation.LDA(),
                          dtype=torch.float32,
                          is_finetuneable=True,
                          lazy_load=True)

        schema = {
            'concat': [{
                'transform': [('word2vec', {
                    'pretrained': 'en-turian'
                }), ('pool', {
                    'pool_option': 'max'
                })]
            }, {
                'transform': ['tfidf', ('nmf', {
                    'n_components': 30
                })]
            }]
        }

        # Word2Vec is fine-tuneable, therefore the whole schema is fine-tuneable
        try:
            TextWiser(Embedding.Compound(schema=schema),
                      dtype=torch.float32,
                      is_finetuneable=True,
                      lazy_load=True)
        except ValueError:
            self.fail(
                "Any fine-tuneable weights is enough for the model to be fine-tuneable"
            )

        # TfIdf is not fine-tuneable, but SVD is
        schema = {'transform': ['tfidf', 'svd']}
        try:
            TextWiser(Embedding.Compound(schema=schema),
                      dtype=torch.float32,
                      is_finetuneable=True,
                      lazy_load=True)
        except ValueError:
            self.fail("SVD is fine tuneable")
Esempio n. 3
0
 def test_min_components(self):
     with self.assertRaises(ValueError):
         TextWiser(Embedding.TfIdf(min_df=2),
                   Transformation.LDA(n_components=1),
                   dtype=torch.float32)