def classifyTest(self,test, classifier, feature_extractor): testClassifies = [] for doc,lbl in test: tmpLbl = classifier.classify(feature_extractor(movie_reviews.words(fileids=[doc]))) x = (doc,tmpLbl) testClassifies.append(x) return testClassifies
def classifyTest(self, test, classifier, feature_extractor): testClassifies = [] for doc, lbl in test: tmpLbl = classifier.classify( feature_extractor(movie_reviews.words(fileids=[doc]))) x = (doc, tmpLbl) testClassifies.append(x) return testClassifies
print(nb_classifier.classify(posfeat)) print(accuracy(nb_classifier, test_feats)) probs = nb_classifier.prob_classify(test_feats[0][0]) print(probs.samples()) print(probs.max()) print(probs.prob('pos')) print(probs.prob('neg')) print(nb_classifier.most_informative_features(n=5)) print("############################################################################") print(nb_classifier.show_most_informative_features(n=5)) print("############################################################################") nb_classifier = NaiveBayesClassifier.train(train_feats, estimator=LaplaceProbDist) print("Accuracy: " + str(accuracy(nb_classifier, test_feats))) # Accuracy: 0.76 label_probdist = DictionaryProbDist({'pos': 0.5, 'neg': 0.5}) true_probdist = DictionaryProbDist({True: 1}) feature_probdist = {('pos', 'yes'): true_probdist, ('neg', 'no'): true_probdist} classifier = NaiveBayesClassifier(label_probdist, feature_probdist) print(classifier.classify({'yes': True})) print(classifier.classify({'no': True}))
def hasNegativeSent(text): #take in raw text, not tokens return nb_classifier.classify(word_features(text))
def test(self): testArr= self.matched[100:] random.shuffle(testArr) for item in testArr: classified = NaiveBayesClassifier.classify(self.model,item)
def batchClassify(self, model): for item in self.unmatched: classified = NaiveBayesClassifier.classify(model, item.featurize())
def classify(self, model, item): _name = self.convertItem(item) tokens = _name.getNameDes() classified = NaiveBayesClassifier.classify(model, _name.input_featurize(tokens)) return classified
def nameobj_classify(self, model, _name): tokens = _name.name + name.brand classified = NaiveBayesClassifier.classify(model, _name.input_featurize(tokens)) return classified
labeled=False) # Build the test set _test_X = analyzer.apply_features([ mark_negation(word_tokenize(unidecode(clean_text(instance)))) for instance in test_X ], labeled=False) print "Vader Classifier:" print vader.polarity_scores(train_X[0]) print vader_polarity(train_X[0]), train_y[0] # 0 1 print vader_polarity(train_X[1]), train_y[1] # 0 0 print vader_polarity(train_X[2]), train_y[2] # 1 1 print vader_polarity(train_X[3]), train_y[3] # 0 1 print vader_polarity(train_X[4]), train_y[4] # 0 0 pred_y = [vader_polarity(text) for text in test_X] print "Vader Accuracy:", accuracy_score(test_y, pred_y) # 0.6892 print "Vader Precision:", precision_score(test_y, pred_y, average='binary') print "Vader Recall:", recall_score(test_y, pred_y, average='binary') trainer = NaiveBayesClassifier.train classifier = analyzer.train(trainer, zip(_train_X, train_y[:TRAINING_COUNT])) score = analyzer.evaluate(zip(_test_X, test_y)) print score print "NB Accuracy: ", score['Accuracy'] # 0.8064 for TRAINING_COUNT=5000 classifyed = NaiveBayesClassifier.classify(_test_X) print classifyed