Esempio n. 1
0
 def test_filter_terms_by_ic_max_n_terms(self):
     dtm, i2t = vsm.filter_terms_by_ic(self.doc_term_matrix,
                                       self.id_to_term,
                                       min_ic=0.0,
                                       max_n_terms=3)
     self.assertEqual(dtm.shape, (8, 3))
     self.assertEqual(len(i2t), 3)
Esempio n. 2
0
 def test_filter_terms_by_ic_max_n_terms(self):
     dtm, vocab = vsm.filter_terms_by_ic(self.doc_term_matrix,
                                         self.vectorizer.vocabulary,
                                         min_ic=0.0,
                                         max_n_terms=3)
     self.assertEqual(dtm.shape, (8, 3))
     self.assertEqual(len(vocab), 3)
Esempio n. 3
0
 def test_filter_terms_by_ic_identity(self):
     dtm, i2t = vsm.filter_terms_by_ic(self.doc_term_matrix,
                                       self.id_to_term,
                                       min_ic=0.0,
                                       max_n_terms=None)
     self.assertEqual(dtm.shape, self.doc_term_matrix.shape)
     self.assertEqual(i2t, self.id_to_term)
Esempio n. 4
0
 def test_filter_terms_by_ic_identity(self):
     dtm, vocab = vsm.filter_terms_by_ic(self.doc_term_matrix,
                                         self.vectorizer.vocabulary,
                                         min_ic=0.0,
                                         max_n_terms=None)
     self.assertEqual(dtm.shape, self.doc_term_matrix.shape)
     self.assertEqual(vocab, self.vectorizer.vocabulary)
Esempio n. 5
0
def test_filter_terms_by_ic_max_n_terms(vectorizer_and_dtm):
    vectorizer, doc_term_matrix = vectorizer_and_dtm
    dtm, vocab = vsm.filter_terms_by_ic(
        doc_term_matrix, vectorizer.vocabulary_terms,
        min_ic=0.0, max_n_terms=3)
    assert dtm.shape == (8, 3)
    assert len(vocab) == 3
Esempio n. 6
0
def test_filter_terms_by_ic_identity(vectorizer_and_dtm):
    vectorizer, doc_term_matrix = vectorizer_and_dtm
    dtm, vocab = vsm.filter_terms_by_ic(
        doc_term_matrix, vectorizer.vocabulary_terms,
        min_ic=0.0, max_n_terms=None)
    assert dtm.shape == doc_term_matrix.shape
    assert vocab == vectorizer.vocabulary_terms
Esempio n. 7
0
 def test_filter_terms_by_ic_max_n_terms(self):
     dtm, i2t = vsm.filter_terms_by_ic(self.doc_term_matrix, self.id_to_term,
                                       min_ic=0.0, max_n_terms=3)
     self.assertEqual(dtm.shape, (8, 3))
     self.assertEqual(len(i2t), 3)
Esempio n. 8
0
 def test_filter_terms_by_ic_identity(self):
     dtm, i2t = vsm.filter_terms_by_ic(self.doc_term_matrix, self.id_to_term,
                                       min_ic=0.0, max_n_terms=None)
     self.assertEqual(dtm.shape, self.doc_term_matrix.shape)
     self.assertEqual(i2t, self.id_to_term)