def test_classifier(self): categories = [ 'alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med' ] from sklearn.datasets import fetch_20newsgroups train_b = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42) test_b = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42) print('size of training set: %s' % (len(train_b['data']))) print('size of validation set: %s' % (len(test_b['data']))) print('classes: %s' % (train_b.target_names)) x_train = train_b.data y_train = train_b.target x_test = test_b.data y_test = test_b.target classes = train_b.target_names clf = snlp.Classifier() clf.fit(x_train, y_train, ctype='nbsvm') self.assertGreaterEqual(clf.evaluate(x_test, y_test), 0.93) test_doc = 'god christ jesus mother mary church sunday lord heaven amen' self.assertEqual(clf.predict(test_doc), 3)
def test_classifier(self): categories = [ "alt.atheism", "soc.religion.christian", "comp.graphics", "sci.med", ] from sklearn.datasets import fetch_20newsgroups train_b = fetch_20newsgroups(subset="train", categories=categories, shuffle=True, random_state=42) test_b = fetch_20newsgroups(subset="test", categories=categories, shuffle=True, random_state=42) print("size of training set: %s" % (len(train_b["data"]))) print("size of validation set: %s" % (len(test_b["data"]))) print("classes: %s" % (train_b.target_names)) x_train = train_b.data y_train = train_b.target x_test = test_b.data y_test = test_b.target classes = train_b.target_names clf = snlp.Classifier() clf.fit(x_train, y_train, ctype="nbsvm") self.assertGreaterEqual(clf.evaluate(x_test, y_test), 0.93) test_doc = "god christ jesus mother mary church sunday lord heaven amen" self.assertEqual(clf.predict(test_doc), 3)
def test_classifier_chinese(self): fpath = './text_data/chinese_hotel_reviews.csv' (x_train, y_train, label_names) = snlp.Classifier.load_texts_from_csv( fpath, text_column='content', label_column='pos', sep='|') print('label names: %s' % (label_names)) clf = snlp.Classifier() clf.fit(x_train, y_train, ctype='nbsvm') self.assertGreaterEqual(clf.evaluate(x_train, y_train), 0.98) neg_text = '我讨厌和鄙视这家酒店。' pos_text = '我喜欢这家酒店。' self.assertEqual(clf.predict(pos_text), 1) self.assertEqual(clf.predict(neg_text), 0)
def test_classifier_chinese(self): fpath = "./text_data/chinese_hotel_reviews.csv" (x_train, y_train, label_names) = snlp.Classifier.load_texts_from_csv( fpath, text_column="content", label_column="pos", sep="|") print("label names: %s" % (label_names)) clf = snlp.Classifier() clf.fit(x_train, y_train, ctype="nbsvm") self.assertGreaterEqual(clf.evaluate(x_train, y_train), 0.98) neg_text = "我讨厌和鄙视这家酒店。" pos_text = "我喜欢这家酒店。" self.assertEqual(clf.predict(pos_text), 1) self.assertEqual(clf.predict(neg_text), 0)