예제 #1
0
 def test_predict_labels(self):
     cf_cv = ClassifierCv(self.labels, self.texts)
     name = 'MultinomialNB'
     metric = 'f1'
     cf_cv.train_save_metrics([
         ('vect', CountVectorizer()),
         ('tfidf', TfidfTransformer()),
         ('clf', MultinomialNB(alpha=.05)),
     ], metric, name, self.test_dir, self.test_dir)
     labels = cf_cv.predict(['bad', 'good'])
     self.assertTrue(all(labels == ['neg', 'pos']))
예제 #2
0
 def test_predict_labels_probas(self):
     cf_cv = ClassifierCv(self.labels, self.texts)
     name = 'MultinomialNB'
     metric = 'f1'
     cf_cv.train_save_metrics([
         ('vect', CountVectorizer()),
         ('tfidf', TfidfTransformer()),
         ('clf', MultinomialNB(alpha=.05)),
     ], metric, name, self.test_dir, self.test_dir)
     labels = cf_cv.predict(['bad', 'good'], proba=True)
     self.assertTrue(labels.shape == (2, 2))
     self.assertEqual(type(labels), pd.DataFrame)
     self.assertEqual(len(labels['pos'].values), 2)
     self.assertEqual(len(labels['neg'].values), 2)
     self.assertEqual(type(labels['neg'].values[0]), np.float64)
예제 #3
0
    def test_unpickle(self):
        cf_cv = ClassifierCv(self.labels, self.texts)
        name = 'MultinomialNB'
        metric = 'f1'
        cf_cv.train_save_metrics([('vect', CountVectorizer()),
                                  ('tfidf', TfidfTransformer()),
                                  ('clf', MultinomialNB(alpha=.05))], metric,
                                 name, self.test_dir, self.test_dir)
        savefile = os.path.join(self.test_dir, 'clf_cv.cv')
        cf_cv.pickle(savefile)
        new_cf_cv = ClassifierCv.unpickle(savefile)

        texts = ['dont know that', 'nice good and bad']
        predicted_labels_orig = cf_cv.predict(texts, proba=True)
        predicted_labesl_new = new_cf_cv.predict(texts, proba=True)

        self.assertTrue(all(predicted_labels_orig == predicted_labesl_new))