def enginemongo(text): from textblob.classifiers import NaiveBayesClassifier trainingset = db.trainingset.find() tsarr = [] for t in trainingset: tsarr.append((t["question"], t["answer"])) print(tsarr) cl = NaiveBayesClassifier(tsarr) prob_dist = cl.prob_classify(text) print("TEST:", text, " ", prob_dist, " ", prob_dist.max()) maxprob = 0 maxanswer = "" for a in prob_dist.samples(): pd = round(prob_dist.prob(a), 2) if (pd > maxprob): maxprob = pd maxanswer = a print(a, ":", round(prob_dist.prob(a), 2)) print(cl.show_informative_features()) print("RISPOSTA:", maxanswer, " --- ", maxprob) aa = cl.extract_features(text) print(aa) print("---------------------------------------") return {"answer_key": maxanswer, "answer_prob": maxprob}
def engine(text): from textblob.classifiers import NaiveBayesClassifier from textblob.classifiers import MaxEntClassifier from textblob.classifiers import NLTKClassifier url_train = "https://" file_train = "train.csv" if not (os.path.isfile(file_train)): with open(file_train, 'wb') as handle: print("Train loaded from Request:", url_train) response = requests.get(url_train, stream=True) if not response.ok: # Something went wrong pass for block in response.iter_content(1024): handle.write(block) handle.close() print("Request DONE") else: print("Train loaded from cache:", file_train) with open(file_train, 'r', encoding="utf8") as fp: #cl = MaxEntClassifier(fp) cl = NaiveBayesClassifier(fp) # print(cl.classify("This is an amazing library!")) # print(cl.accuracy(test)) # cl.update(test) # print(cl.accuracy(test)) prob_dist = cl.prob_classify(text) print("TEST:", text, " ", prob_dist, " ", prob_dist.max()) for a in prob_dist.samples(): print(a, ":", round(prob_dist.prob(a), 2)) print(cl.show_informative_features()) aa = cl.extract_features(text) print(aa) print("---------------------------------------") return cl.classify(text)
class TestNaiveBayesClassifier(unittest.TestCase): def setUp(self): self.classifier = NaiveBayesClassifier(train_set) def test_default_extractor(self): text = "I feel happy this morning." assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set)) def test_classify(self): res = self.classifier.classify("I feel happy this morning") assert_equal(res, 'positive') assert_equal(len(self.classifier.train_set), len(train_set)) def test_classify_a_list_of_words(self): res = self.classifier.classify(["I", "feel", "happy", "this", "morning"]) assert_equal(res, "positive") def test_train_from_lists_of_words(self): # classifier can be trained on lists of words instead of strings train = [(doc.split(), label) for doc, label in train_set] classifier = NaiveBayesClassifier(train) assert_equal(classifier.accuracy(test_set), self.classifier.accuracy(test_set)) def test_prob_classify(self): res = self.classifier.prob_classify("I feel happy this morning") assert_equal(res.max(), "positive") assert_true(res.prob("positive") > res.prob("negative")) def test_accuracy(self): acc = self.classifier.accuracy(test_set) assert_true(isinstance(acc, float)) def test_update(self): res1 = self.classifier.prob_classify("lorem ipsum") original_length = len(self.classifier.train_set) self.classifier.update([("lorem ipsum", "positive")]) new_length = len(self.classifier.train_set) res2 = self.classifier.prob_classify("lorem ipsum") assert_true(res2.prob("positive") > res1.prob("positive")) assert_equal(original_length + 1, new_length) def test_labels(self): labels = self.classifier.labels() assert_true("positive" in labels) assert_true("negative" in labels) def test_show_informative_features(self): feats = self.classifier.show_informative_features() def test_informative_features(self): feats = self.classifier.informative_features(3) assert_true(isinstance(feats, list)) assert_true(isinstance(feats[0], tuple)) def test_custom_feature_extractor(self): cl = NaiveBayesClassifier(train_set, custom_extractor) cl.classify("Yay! I'm so happy it works.") assert_equal(cl.train_features[0][1], 'positive') def test_init_with_csv_file(self): with open(CSV_FILE) as fp: cl = NaiveBayesClassifier(fp, format="csv") assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_csv_file_without_format_specifier(self): with open(CSV_FILE) as fp: cl = NaiveBayesClassifier(fp) assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_json_file(self): with open(JSON_FILE) as fp: cl = NaiveBayesClassifier(fp, format="json") assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_json_file_without_format_specifier(self): with open(JSON_FILE) as fp: cl = NaiveBayesClassifier(fp) assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_custom_format(self): redis_train = [('I like turtles', 'pos'), ('I hate turtles', 'neg')] class MockRedisFormat(formats.BaseFormat): def __init__(self, client, port): self.client = client self.port = port @classmethod def detect(cls, stream): return True def to_iterable(self): return redis_train formats.register('redis', MockRedisFormat) mock_redis = mock.Mock() cl = NaiveBayesClassifier(mock_redis, format='redis', port=1234) assert_equal(cl.train_set, redis_train) def test_data_with_no_available_format(self): mock_fp = mock.Mock() mock_fp.read.return_value = '' assert_raises(FormatError, lambda: NaiveBayesClassifier(mock_fp)) def test_accuracy_on_a_csv_file(self): with open(CSV_FILE) as fp: a = self.classifier.accuracy(fp) assert_equal(type(a), float) def test_accuracy_on_json_file(self): with open(CSV_FILE) as fp: a = self.classifier.accuracy(fp) assert_equal(type(a), float) def test_init_with_tsv_file(self): with open(TSV_FILE) as fp: cl = NaiveBayesClassifier(fp) assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_bad_format_specifier(self): assert_raises(ValueError, lambda: NaiveBayesClassifier(CSV_FILE, format='unknown')) def test_repr(self): assert_equal(repr(self.classifier), "<NaiveBayesClassifier trained on {0} instances>".format(len(train_set)))
class TestNaiveBayesClassifier(unittest.TestCase): def setUp(self): self.classifier = NaiveBayesClassifier(train_set) def test_default_extractor(self): text = "I feel happy this morning." assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set)) def test_classify(self): res = self.classifier.classify("I feel happy this morning") assert_equal(res, 'positive') assert_equal(len(self.classifier.train_set), len(train_set)) def test_classify_a_list_of_words(self): res = self.classifier.classify( ["I", "feel", "happy", "this", "morning"]) assert_equal(res, "positive") def test_train_from_lists_of_words(self): # classifier can be trained on lists of words instead of strings train = [(doc.split(), label) for doc, label in train_set] classifier = NaiveBayesClassifier(train) assert_equal(classifier.accuracy(test_set), self.classifier.accuracy(test_set)) def test_prob_classify(self): res = self.classifier.prob_classify("I feel happy this morning") assert_equal(res.max(), "positive") assert_true(res.prob("positive") > res.prob("negative")) def test_accuracy(self): acc = self.classifier.accuracy(test_set) assert_true(isinstance(acc, float)) def test_update(self): res1 = self.classifier.prob_classify("lorem ipsum") original_length = len(self.classifier.train_set) self.classifier.update([("lorem ipsum", "positive")]) new_length = len(self.classifier.train_set) res2 = self.classifier.prob_classify("lorem ipsum") assert_true(res2.prob("positive") > res1.prob("positive")) assert_equal(original_length + 1, new_length) def test_labels(self): labels = self.classifier.labels() assert_true("positive" in labels) assert_true("negative" in labels) def test_show_informative_features(self): feats = self.classifier.show_informative_features() def test_informative_features(self): feats = self.classifier.informative_features(3) assert_true(isinstance(feats, list)) assert_true(isinstance(feats[0], tuple)) def test_custom_feature_extractor(self): cl = NaiveBayesClassifier(train_set, custom_extractor) cl.classify("Yay! I'm so happy it works.") assert_equal(cl.train_features[0][1], 'positive') def test_init_with_csv_file(self): cl = NaiveBayesClassifier(CSV_FILE, format="csv") assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_csv_file_without_format_specifier(self): cl = NaiveBayesClassifier(CSV_FILE) assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_json_file(self): cl = NaiveBayesClassifier(JSON_FILE, format="json") assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_json_file_without_format_specifier(self): cl = NaiveBayesClassifier(JSON_FILE) assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_accuracy_on_a_csv_file(self): a = self.classifier.accuracy(CSV_FILE) assert_true(isinstance(a, float)) def test_accuracy_on_json_file(self): a = self.classifier.accuracy(JSON_FILE) assert_true(isinstance(a, float)) def test_init_with_tsv_file(self): cl = NaiveBayesClassifier(TSV_FILE) assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_bad_format_specifier(self): assert_raises(ValueError, lambda: NaiveBayesClassifier(CSV_FILE, format='unknown')) def test_repr(self): assert_equal( repr(self.classifier), "<NaiveBayesClassifier trained on {0} instances>".format( len(train_set)))
print(c.classify('seu feio')) # B print(c.classify('seu horroroso')) # B print(c.classify('seu bonito')) # B print(c.classify('seu lindo')) # A print('labels:', c.labels()) # labels: ['A', 'B'] test = [('Voce e muito gato', 'A'), ('Voce e muito feio', 'B')] print('acuracia:', c.accuracy(test)) # acuracia: 0.5 test = [('Voce e muito lindo', 'A'), ('Voce e muito feio', 'B')] print('acuracia:', c.accuracy(test)) # acuracia: 1.0 print('features:', c.extract_features('Eu sou horroroso')) # features: {'contains(Eu)': True, 'contains(sou)': True, 'contains(lindo)': False, 'contains(feio)': False} c.show_informative_features() # Most Informative Features # contains(sou) = True B : A = 1.0 : 1.0 # contains(Eu) = True B : A = 1.0 : 1.0 # # So que o pacote textblob eh mais do que # classificacao de texto. Vejamos. # from textblob import TextBlob text = TextBlob( "I went home. Because I'm happy. Clap along if you feel like a room without a roof." )
class TestNaiveBayesClassifier(unittest.TestCase): def setUp(self): self.classifier = NaiveBayesClassifier(train_set) def test_default_extractor(self): text = "I feel happy this morning." assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set)) def test_classify(self): res = self.classifier.classify("I feel happy this morning") assert_equal(res, 'positive') assert_equal(len(self.classifier.train_set), len(train_set)) def test_classify_a_list_of_words(self): res = self.classifier.classify(["I", "feel", "happy", "this", "morning"]) assert_equal(res, "positive") def test_train_from_lists_of_words(self): # classifier can be trained on lists of words instead of strings train = [(doc.split(), label) for doc, label in train_set] classifier = NaiveBayesClassifier(train) assert_equal(classifier.accuracy(test_set), self.classifier.accuracy(test_set)) def test_prob_classify(self): res = self.classifier.prob_classify("I feel happy this morning") assert_equal(res.max(), "positive") assert_true(res.prob("positive") > res.prob("negative")) def test_accuracy(self): acc = self.classifier.accuracy(test_set) assert_true(isinstance(acc, float)) def test_update(self): res1 = self.classifier.prob_classify("lorem ipsum") original_length = len(self.classifier.train_set) self.classifier.update([("lorem ipsum", "positive")]) new_length = len(self.classifier.train_set) res2 = self.classifier.prob_classify("lorem ipsum") assert_true(res2.prob("positive") > res1.prob("positive")) assert_equal(original_length + 1, new_length) def test_labels(self): labels = self.classifier.labels() assert_true("positive" in labels) assert_true("negative" in labels) def test_show_informative_features(self): feats = self.classifier.show_informative_features() def test_informative_features(self): feats = self.classifier.informative_features(3) assert_true(isinstance(feats, list)) assert_true(isinstance(feats[0], tuple)) def test_custom_feature_extractor(self): cl = NaiveBayesClassifier(train_set, custom_extractor) cl.classify("Yay! I'm so happy it works.") assert_equal(cl.train_features[0][1], 'positive') def test_init_with_csv_file(self): cl = NaiveBayesClassifier(CSV_FILE, format="csv") assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_csv_file_without_format_specifier(self): cl = NaiveBayesClassifier(CSV_FILE) assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_json_file(self): cl = NaiveBayesClassifier(JSON_FILE, format="json") assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_json_file_without_format_specifier(self): cl = NaiveBayesClassifier(JSON_FILE) assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_accuracy_on_a_csv_file(self): a = self.classifier.accuracy(CSV_FILE) assert_true(isinstance(a, float)) def test_accuracy_on_json_file(self): a = self.classifier.accuracy(JSON_FILE) assert_true(isinstance(a, float)) def test_init_with_tsv_file(self): cl = NaiveBayesClassifier(TSV_FILE) assert_equal(cl.classify("I feel happy this morning"), 'pos') training_sentence = cl.train_set[0][0] assert_true(isinstance(training_sentence, unicode)) def test_init_with_bad_format_specifier(self): assert_raises(ValueError, lambda: NaiveBayesClassifier(CSV_FILE, format='unknown')) def test_repr(self): assert_equal(repr(self.classifier), "<NaiveBayesClassifier trained on {0} instances>".format(len(train_set)))