コード例 #1
0
 def test_filter_terms_by_ic_max_n_terms(self):
     dtm, i2t = vsm.filter_terms_by_ic(self.doc_term_matrix,
                                       self.id_to_term,
                                       min_ic=0.0,
                                       max_n_terms=3)
     self.assertEqual(dtm.shape, (8, 3))
     self.assertEqual(len(i2t), 3)
コード例 #2
0
 def test_filter_terms_by_ic_max_n_terms(self):
     dtm, vocab = vsm.filter_terms_by_ic(self.doc_term_matrix,
                                         self.vectorizer.vocabulary,
                                         min_ic=0.0,
                                         max_n_terms=3)
     self.assertEqual(dtm.shape, (8, 3))
     self.assertEqual(len(vocab), 3)
コード例 #3
0
 def test_filter_terms_by_ic_identity(self):
     dtm, i2t = vsm.filter_terms_by_ic(self.doc_term_matrix,
                                       self.id_to_term,
                                       min_ic=0.0,
                                       max_n_terms=None)
     self.assertEqual(dtm.shape, self.doc_term_matrix.shape)
     self.assertEqual(i2t, self.id_to_term)
コード例 #4
0
 def test_filter_terms_by_ic_identity(self):
     dtm, vocab = vsm.filter_terms_by_ic(self.doc_term_matrix,
                                         self.vectorizer.vocabulary,
                                         min_ic=0.0,
                                         max_n_terms=None)
     self.assertEqual(dtm.shape, self.doc_term_matrix.shape)
     self.assertEqual(vocab, self.vectorizer.vocabulary)
コード例 #5
0
def test_filter_terms_by_ic_max_n_terms(vectorizer_and_dtm):
    vectorizer, doc_term_matrix = vectorizer_and_dtm
    dtm, vocab = vsm.filter_terms_by_ic(
        doc_term_matrix, vectorizer.vocabulary_terms,
        min_ic=0.0, max_n_terms=3)
    assert dtm.shape == (8, 3)
    assert len(vocab) == 3
コード例 #6
0
def test_filter_terms_by_ic_identity(vectorizer_and_dtm):
    vectorizer, doc_term_matrix = vectorizer_and_dtm
    dtm, vocab = vsm.filter_terms_by_ic(
        doc_term_matrix, vectorizer.vocabulary_terms,
        min_ic=0.0, max_n_terms=None)
    assert dtm.shape == doc_term_matrix.shape
    assert vocab == vectorizer.vocabulary_terms
コード例 #7
0
ファイル: test_vsm.py プロジェクト: chartbeat-labs/textacy
 def test_filter_terms_by_ic_max_n_terms(self):
     dtm, i2t = vsm.filter_terms_by_ic(self.doc_term_matrix, self.id_to_term,
                                       min_ic=0.0, max_n_terms=3)
     self.assertEqual(dtm.shape, (8, 3))
     self.assertEqual(len(i2t), 3)
コード例 #8
0
ファイル: test_vsm.py プロジェクト: chartbeat-labs/textacy
 def test_filter_terms_by_ic_identity(self):
     dtm, i2t = vsm.filter_terms_by_ic(self.doc_term_matrix, self.id_to_term,
                                       min_ic=0.0, max_n_terms=None)
     self.assertEqual(dtm.shape, self.doc_term_matrix.shape)
     self.assertEqual(i2t, self.id_to_term)