Ejemplo n.º 1
0
def test_Textblog():
    train = [('I love this sandwich.', 'pos'),
             ('This is an amazing place!', 'pos'),
             ('I feel very good about these beers.', 'pos'),
             ('This is my best work.', 'pos'), ("What an awesome view", 'pos'),
             ('I do not like this restaurant', 'neg'),
             ('I am tired of this stuff.', 'neg'),
             ("I can't deal with this", 'neg'),
             ('He is my sworn enemy!', 'neg'), ('My boss is horrible.', 'neg')]
    test = [('The beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
            ("I ain't feeling dandy today.", 'neg'),
            ("I feel amazing!", 'pos'), ('Gary is a friend of mine.', 'pos'),
            ("I can't believe I'm doing this.", 'neg')]
    cl = NaiveBayesClassifier(train)
    #print cl.classify("Their burgers are amazing")  # "pos"
    #print cl.classify("I don't like their pizza.")  # "neg"
    import nltk
    new_train = []
    for item in train:
        token_sent = nltk.word_tokenize(item[0])

        item = list(item)
        item[0] = token_sent
        item[1] = item[1]
        item = tuple(item)
        new_train.append(item)

    print new_train
    cl = NaiveBayesClassifier(new_train)
    new_test = nltk.word_tokenize("I don't like their pizza.")
    print new_test, cl.classify(new_test)
Ejemplo n.º 2
0
def nb(data):
    # check out params

    # divide data into 4 = 3 + 1, 3 for train, 1 for test
    train = data[0:(len(data) / 4) * 3]
    test = data[(len(data) / 4) * 3:]

    print "Training ..."
    classifier = NaiveBayesClassifier(train)
    print "Testing ..."
    print "Accuracy: ", classifier.accuracy(test)
    """
Ejemplo n.º 3
0
def classifier_NB(training_dict, contro_list):
    classifier_list = {}
    for topic in training_dict.keys():
        train_topic = []
        train_topic.extend(contro_list)
        for article in training_dict[topic]:
            cont_kw = article.content_keywords()
            if cont_kw != []:
                item = (cont_kw, topic)
                train_topic.append(item)
        if train_topic != []:
            topic_cl = NaiveBayesClassifier(train_topic)
            classifier_list.update({topic: topic_cl})
    return classifier_list  # return (key, value) = (topic, topic classifier)
Ejemplo n.º 4
0
 def setUp(self):
     self.train_set = [('I love this car', 'positive'),
                       ('This view is amazing', 'positive'),
                       ('I feel great this morning', 'positive'),
                       ('I am so excited about the concert', 'positive'),
                       ('He is my best friend', 'positive'),
                       ('I do not like this car', 'negative'),
                       ('This view is horrible', 'negative'),
                       ('I feel tired this morning', 'negative'),
                       ('I am not looking forward to the concert',
                        'negative'), ('He is my enemy', 'negative')]
     self.classifier = NaiveBayesClassifier(self.train_set)
     self.test_set = [('I feel happy this morning', 'positive'),
                      ('Larry is my friend.', 'positive'),
                      ('I do not like that man.', 'negative'),
                      ('My house is not great.', 'negative'),
                      ('Your song is annoying.', 'negative')]
Ejemplo n.º 5
0
from text.blob import TextBlob

train = [('I love this sandwich.', 'pos'),
         ('This is an amazing place!', 'pos'),
         ('I feel very good about these beers.', 'pos'),
         ('This is my best work.', 'pos'), ("What an awesome view", 'pos'),
         ('I do not like this restaurant', 'neg'),
         ('I am tired of this stuff.', 'neg'),
         ("I can't deal with this", 'neg'), ('He is my sworn enemy!', 'neg'),
         ('My boss is horrible.', 'neg')]
test = [('The beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'), ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')]

cl = NaiveBayesClassifier(train)

# Classify some text
print(cl.classify("Their burgers are amazing."))  # "pos"
print(cl.classify("I don't like their pizza."))  # "neg"

# Classify a TextBlob
blob = TextBlob(
    "The beer was amazing. But the hangover was horrible. "
    "My boss was not pleased.",
    classifier=cl)
print(blob)
print(blob.classify())

for sentence in blob.sentences:
    print(sentence)
Ejemplo n.º 6
0
 def test_init_with_json_file(self):
     cl = NaiveBayesClassifier(JSON_FILE, format="json")
     assert_equal(cl.classify("I feel happy this morning"), 'pos')
     training_sentence = cl.train_set[0][0]
     assert_true(isinstance(training_sentence, unicode))
Ejemplo n.º 7
0
 def test_init_with_csv_file_without_format_specifier(self):
     cl = NaiveBayesClassifier(CSV_FILE)
     assert_equal(cl.classify("I feel happy this morning"), 'pos')
     training_sentence = cl.train_set[0][0]
     assert_true(isinstance(training_sentence, unicode))
Ejemplo n.º 8
0
 def test_custom_feature_extractor(self):
     cl = NaiveBayesClassifier(self.train_set, custom_extractor)
     cl.classify("Yay! I'm so happy it works.")
     assert_equal(cl.train_features[0][1], 'positive')
Ejemplo n.º 9
0
 def test_init_with_bad_format_specifier(self):
     with assert_raises(ValueError):
         NaiveBayesClassifier(CSV_FILE, format='unknown')
Ejemplo n.º 10
0
import os
from text.classifiers import NaiveBayesClassifier

train = [('amor', "spanish"), ("perro", "spanish"), ("playa", "spanish"),
         ("sal", "spanish"), ("oceano", "spanish"), ("love", "english"),
         ("dog", "english"), ("beach", "english"), ("salt", "english"),
         ("ocean", "english")]
test = [("ropa", "spanish"), ("comprar", "spanish"), ("camisa", "spanish"),
        ("agua", "spanish"), ("telefono", "spanish"), ("clothes", "english"),
        ("buy", "english"), ("shirt", "english"), ("water", "english"),
        ("telephone", "english")]


def extractor(word):
    '''Extract the last letter of a word as the only feature.'''
    feats = {}
    last_letter = word[-1]
    feats["last_letter({0})".format(last_letter)] = True
    return feats


lang_detector = NaiveBayesClassifier(train, feature_extractor=extractor)
print(lang_detector.accuracy(test))
print(lang_detector.show_informative_features(5))
Ejemplo n.º 11
0
 def test_train_from_lists_of_words(self):
     # classifier can be trained on lists of words instead of strings
     train = [(doc.split(), label) for doc, label in train_set]
     classifier = NaiveBayesClassifier(train)
     assert_equal(classifier.accuracy(test_set),
                  self.classifier.accuracy(test_set))
Ejemplo n.º 12
0
 def setUp(self):
     self.classifier = NaiveBayesClassifier(train_set)