예제 #1
0
class TestDecisionTreeClassifier(unittest.TestCase):
    def setUp(self):
        self.classifier = DecisionTreeClassifier(train_set)

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(train_set))

    def test_accuracy(self):
        acc = self.classifier.accuracy(test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        assert_equal(original_length + 1, new_length)

    def test_custom_feature_extractor(self):
        cl = DecisionTreeClassifier(train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

    def test_pseudocode(self):
        code = self.classifier.pseudocode()
        assert_true("if" in code)

    def test_pretty_format(self):
        pp = self.classifier.pprint(width=60)
        pf = self.classifier.pretty_format(width=60)
        assert_true(isinstance(pp, unicode))
        assert_equal(pp, pf)

    def test_repr(self):
        assert_equal(
            repr(self.classifier),
            "<DecisionTreeClassifier trained on {0} instances>".format(
                len(train_set)))
예제 #2
0
class TestDecisionTreeClassifier(unittest.TestCase):

    def setUp(self):
        self.classifier = DecisionTreeClassifier(train_set)

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(train_set))

    def test_accuracy(self):
        acc = self.classifier.accuracy(test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        assert_equal(original_length + 1, new_length)

    def test_custom_feature_extractor(self):
        cl = DecisionTreeClassifier(train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

    def test_pseudocode(self):
        code = self.classifier.pseudocode()
        assert_true("if" in code)

    def test_pretty_format(self):
        pp = self.classifier.pprint(width=60)
        pf = self.classifier.pretty_format(width=60)
        assert_true(isinstance(pp, unicode))
        assert_equal(pp, pf)

    def test_repr(self):
        assert_equal(repr(self.classifier),
            "<DecisionTreeClassifier trained on {0} instances>".format(len(train_set)))
예제 #3
0
# Compute accuracy
print("Accuracy: {0}".format(cl.accuracy(test)))

# Show 5 most informative features
cl.show_informative_features()

#Classifying txt file with DecisionTree

train = [('Buy cheap drugs', 'spam'), ('Cheap viagra', 'spam'),
         ('Win 1000 dollar', 'spam'), ('Greatings, how are you?', 'ham'),
         ('What an awesome picture', 'ham'), ('Send me your adress', 'ham'),
         ('viagra', 'spam')]

tr = DecisionTreeClassifier(train)

file = open("test.txt")
t = file.read()
print(type(t))
blob = TextBlob(t, classifier=tr)
blob.tags
print(blob)

for sentence in blob.sentences:
    print(sentence)
    print(sentence.classify())

print(blob.classify())
print(tr.pseudocode())
print(tr.pretty_format())
print(tr.pprint())