Ejemplo n.º 1
0
    def test_classifier(self):
        categories = [
            'alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med'
        ]
        from sklearn.datasets import fetch_20newsgroups
        train_b = fetch_20newsgroups(subset='train',
                                     categories=categories,
                                     shuffle=True,
                                     random_state=42)
        test_b = fetch_20newsgroups(subset='test',
                                    categories=categories,
                                    shuffle=True,
                                    random_state=42)
        print('size of training set: %s' % (len(train_b['data'])))
        print('size of validation set: %s' % (len(test_b['data'])))
        print('classes: %s' % (train_b.target_names))
        x_train = train_b.data
        y_train = train_b.target
        x_test = test_b.data
        y_test = test_b.target
        classes = train_b.target_names

        clf = snlp.Classifier()
        clf.fit(x_train, y_train, ctype='nbsvm')
        self.assertGreaterEqual(clf.evaluate(x_test, y_test), 0.93)
        test_doc = 'god christ jesus mother mary church sunday lord heaven amen'
        self.assertEqual(clf.predict(test_doc), 3)
Ejemplo n.º 2
0
    def test_classifier(self):
        categories = [
            "alt.atheism",
            "soc.religion.christian",
            "comp.graphics",
            "sci.med",
        ]
        from sklearn.datasets import fetch_20newsgroups

        train_b = fetch_20newsgroups(subset="train",
                                     categories=categories,
                                     shuffle=True,
                                     random_state=42)
        test_b = fetch_20newsgroups(subset="test",
                                    categories=categories,
                                    shuffle=True,
                                    random_state=42)
        print("size of training set: %s" % (len(train_b["data"])))
        print("size of validation set: %s" % (len(test_b["data"])))
        print("classes: %s" % (train_b.target_names))
        x_train = train_b.data
        y_train = train_b.target
        x_test = test_b.data
        y_test = test_b.target
        classes = train_b.target_names

        clf = snlp.Classifier()
        clf.fit(x_train, y_train, ctype="nbsvm")
        self.assertGreaterEqual(clf.evaluate(x_test, y_test), 0.93)
        test_doc = "god christ jesus mother mary church sunday lord heaven amen"
        self.assertEqual(clf.predict(test_doc), 3)
Ejemplo n.º 3
0
 def test_classifier_chinese(self):
     fpath = './text_data/chinese_hotel_reviews.csv'
     (x_train, y_train, label_names) = snlp.Classifier.load_texts_from_csv(
         fpath, text_column='content', label_column='pos', sep='|')
     print('label names: %s' % (label_names))
     clf = snlp.Classifier()
     clf.fit(x_train, y_train, ctype='nbsvm')
     self.assertGreaterEqual(clf.evaluate(x_train, y_train), 0.98)
     neg_text = '我讨厌和鄙视这家酒店。'
     pos_text = '我喜欢这家酒店。'
     self.assertEqual(clf.predict(pos_text), 1)
     self.assertEqual(clf.predict(neg_text), 0)
Ejemplo n.º 4
0
 def test_classifier_chinese(self):
     fpath = "./text_data/chinese_hotel_reviews.csv"
     (x_train, y_train, label_names) = snlp.Classifier.load_texts_from_csv(
         fpath, text_column="content", label_column="pos", sep="|")
     print("label names: %s" % (label_names))
     clf = snlp.Classifier()
     clf.fit(x_train, y_train, ctype="nbsvm")
     self.assertGreaterEqual(clf.evaluate(x_train, y_train), 0.98)
     neg_text = "我讨厌和鄙视这家酒店。"
     pos_text = "我喜欢这家酒店。"
     self.assertEqual(clf.predict(pos_text), 1)
     self.assertEqual(clf.predict(neg_text), 0)