Esempio n. 1
0
 def test_fit(self):
     ft_clf = FasttextClassifier(output=self.output)
     ft_clf.fit(self.texts, self.labels)
     self.assertEqual(ft_clf.classes_, list(self.labels.unique()))
     self.assertTrue(all(ft_clf.x == self.texts))
     self.assertTrue(all(ft_clf.y == self.labels))
     self.assertTrue(os.path.isfile(self.output + '.bin'))
Esempio n. 2
0
 def test_load_pretrained(self):
     ft_clf = FasttextClassifier(output=self.output)
     ft_clf.fit(self.texts, self.labels)
     loaded_ft_clf = FasttextClassifier()
     loaded_ft_clf.loadpretrained(self.output + '.bin')
     labels = loaded_ft_clf.predict(['very bad', 'very good'])
     self.assertTrue(all(labels == ['neg', 'pos']))
Esempio n. 3
0
    def test_ft_weights(self):
        clf1 = Pipeline([('vect', CountVectorizer()),
                          ('tfidf', TfidfTransformer()),
                           ('clf', LogisticRegression())])
        clf2 = Pipeline([('vect', CountVectorizer()),
                          ('tfidf', TfidfTransformer()),
                           ('clf', MultinomialNB())])
        clf3 = FasttextClassifier(epoch=2, output=self.ft_output)
        eclf = EnsembleClassifier(clfs=[clf1, clf2, clf3], weights=[1,1,0.5])

        cf_cv = ClassifierCv(self.labels, self.texts)
        name = 'ensemble_clf'
        metric = 'f1'
        cf_cv.train_save_metrics([('clf', eclf)],
                                 metric, name,
                                 self.test_dir,
                                 self.test_dir)

        self.assertTrue(os.path.isfile(os.path.join(self.test_dir, name + '_' + metric + '.png')))
        self.assertTrue(os.path.isfile(os.path.join(self.test_dir, name + 'ROC_AUC.png')))
        self.assertTrue(os.path.isfile(os.path.join(self.test_dir, name + 'prec_recall.png')))
        self.assertTrue(os.path.isfile(os.path.join(self.test_dir, name + '.xlsx')))
        self.assertTrue(os.path.isfile(os.path.join(self.test_dir, name + '_average.xlsx')))
        self.assertEqual(type(cf_cv.roc_auc), dict)
        self.assertEqual(type(cf_cv.tpr), dict)
        self.assertEqual(type(cf_cv.fpr), dict)
        self.assertEqual(type(cf_cv.metrics_average_df), pd.DataFrame)
        self.assertEqual(type(cf_cv.metrics_df), pd.DataFrame)
        self.assertEqual(type(cf_cv.metrics_per_class), list)
        self.assertEqual(type(cf_cv.metrics_average), list)
Esempio n. 4
0
 def test_init(self):
     ft_clf = FasttextClassifier()
     self.assertEqual(ft_clf.lr, 0.1)
     self.assertEqual(ft_clf.lr_update_rate, 100)
     self.assertEqual(ft_clf.dim, 100)
     self.assertEqual(ft_clf.ws, 5)
     self.assertEqual(ft_clf.epoch, 100)
Esempio n. 5
0
    def test_xft_classifiercv(self):
        cf_cv = ClassifierCv(self.labels, self.texts)
        name = 'ft'
        metric = 'f1'
        cf_cv.train_save_metrics(
            [('clf', FasttextClassifier(output=self.output, epoch=1))], metric,
            name, self.test_dir, self.test_dir)

        filename = os.path.join(self.test_dir, '_eval_report')
        cf_cv.calc_evaluation_report(self.df_test['text'],
                                     self.df_test['class'],
                                     savefile=filename)
        self.assertTrue(os.path.isfile(filename + "_" + name + ".csv"))
        self.assertTrue(os.path.isfile(filename + "_" + name + "_average.csv"))
Esempio n. 6
0
 def test_predict_proba_wrong_type(self):
     ft_clf = FasttextClassifier(output=self.output)
     ft_clf.fit(self.texts, self.labels)
     probas = ft_clf.predict_proba('text')
     self.assertEqual(probas, None)
Esempio n. 7
0
 def test_predict_proba(self):
     ft_clf = FasttextClassifier(output=self.output)
     ft_clf.fit(self.texts, self.labels)
     probas = ft_clf.predict_proba(['very bad', 'very good'])
     self.assertTrue(probas[0][0] <= probas[0][1])
     self.assertTrue(probas[1][0] >= probas[1][1])
Esempio n. 8
0
 def test_predict_wrong_type(self):
     ft_clf = FasttextClassifier(output=self.output)
     ft_clf.fit(self.texts, self.labels)
     labels = ft_clf.predict("text")
     self.assertEqual(labels, None)
Esempio n. 9
0
 def test_predict(self):
     ft_clf = FasttextClassifier(output=self.output)
     ft_clf.fit(self.texts, self.labels)
     labels = ft_clf.predict(['very bad', 'very good'])
     self.assertTrue(all(labels == ['neg', 'pos']))