def print_precision_recall(classifier, test_dict): refsets = defaultdict(set) testsets = defaultdict(set) for i, (feats, label) in enumerate(test_dict): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print 'pos precision:', precision(refsets['positive'], testsets['positive']) print 'pos recall:', recall(refsets['positive'], testsets['positive']) print 'pos F-measure:', f_measure(refsets['positive'], testsets['positive']) print 'neg precision:', precision(refsets['negative'], testsets['negative']) print 'neg recall:', recall(refsets['negative'], testsets['negative']) print 'neg F-measure:', f_measure(refsets['negative'], testsets['negative'])
def get_performance(clf_sel, train_features, test_features): ref_set = collections.defaultdict(set) test_set = collections.defaultdict(set) classification_error = False clf = SklearnClassifier(clf_sel) try: classifier = clf.train(train_features) except: classification_error = True # print (str(clf_sel.__class__),'NA') if str(clf_sel.__class__) == "<class 'sklearn.naive_bayes.MultinomialNB'>": pickle_cls(classifier, 'MultinomialNB') # print(str(clf_sel), 'accuracy:'(nltk.classify.accuracy(classifier, test_features)) * 100) if not classification_error: clf_acc = nltk.classify.accuracy(classifier, test_features) for i, (features, label) in enumerate(test_features): ref_set[label].add(i) predicted = classifier.classify(features) test_set[predicted].add(i) pos_precision = precision(ref_set['pos'], test_set['pos']) pos_recall = recall(ref_set['pos'], test_set['pos']) neg_precision = precision(ref_set['neg'], test_set['neg']) neg_recall = recall(ref_set['neg'], test_set['neg']) print( "{0},{1},{2},{3},{4},{5}".format(clf_sel.__class__, clf_acc, pos_precision, pos_recall, neg_precision, neg_recall))
def GetEvaluacion(self): ''' Devuelve las medidas de precision, recall, y matriz de confusion del clasificador. Para esto usamos las funciones precision, recall y confusion matrix de nltk y el conjunto de testeo. Retorna una tupla (positivos, negativos, matriz) donde positivos y negativos es otra tupla con los valores (precision, recall) Precision: Fraccion de las instancias que se clasificaron correctamente / Fraccion de las instancias que se clasificaron en la clase: TP / (TP + FP) Cuanto mayor es esto, menor es la cantidad de falsos positivos, es decir, esto me da el porcentaje de los elementos que fueron clasificados correctamente en esta clase. Recall: Fraccion de las instancias que se clasificaron correctamente / Fraccion de las instancias que realmente estaban en la clase: TP / (TP + FN) Cuanto mayor es esto, menor es la cantidad de falsos negativos, es decir, del total de elementos que realmente existen en la clase, cuantos clasifique. Tanto precision y recall son para las clases y no en general (o sea, hay un valor de precision/recall para la clase de comentarios positivos y otros para la clase de comentarios negativos) Vale la pena leer de aca: http://streamhacker.com/2010/05/17/text-classification-sentiment-analysis-precision-recall/ ''' clasificador = self.GetClasificador() corpus = self.DatosTesteo #Las funciones de NLTK usan sets. #Construyo sets de referencia y testeo para positivos y negativos. refSet = {CLASE_POSITIVO:set(), CLASE_NEGATIVO:set()} #Tiene los valores reales testSet = {CLASE_POSITIVO:set(), CLASE_NEGATIVO:set()} #Tiene los valores luego de clasificar. #Valores para la matriz de conf. refList = [] testList = [] #Tengo que construir conjuntos de referencia y testeo a partir de los de testeo, para poder usar las funciones de nltk for i, c in enumerate(corpus): refSet[c[1]].add(i) #Lo agrega en los positivos o negativos segun su clase. clasificado = clasificador.classify(c[0]) testSet[clasificado].add(i) refList.append(c[1]) testList.append(clasificado) positivos = ( precision(refSet[CLASE_POSITIVO], testSet[CLASE_POSITIVO]), recall(refSet[CLASE_POSITIVO], testSet[CLASE_POSITIVO]) ) negativos = ( precision(refSet[CLASE_NEGATIVO], testSet[CLASE_NEGATIVO]), recall(refSet[CLASE_NEGATIVO], testSet[CLASE_NEGATIVO]) ) return (positivos, negativos, ConfusionMatrix(refList, testList))
def benchmarking(self, classifier,_test_set,all_f_measure=[],all_precision=[],all_recall=[]): from nltk import classify accuracy = classify.accuracy(classifier, _test_set) print("accuracy:",accuracy) from nltk.metrics import precision from nltk.metrics import recall from nltk.metrics import f_measure import collections refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(_test_set): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) prec=precision(refsets['class'], testsets['class']) rec=recall(refsets['class'], testsets['class']) f1=f_measure(refsets['class'], testsets['class']) print('precision:', prec) print('recall:', rec) print('F-measure:', f1) all_f_measure.append(f1) all_precision.append(prec) all_recall.append(rec) print('========Show top 10 most informative features========') classifier.show_most_informative_features(10)
def precision_and_recall(classifier, testfeats): #Finds precision and recall on that big booty classifier. refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) #Feats is the dictionary of words #label is the label, pos or neg for i, (feats, label) in enumerate(testfeats): #a mapping of which entries are pos and negative #ex refsets[pos] = {1,2,3,4,6,7,11,78} refsets[label].add(i) #Classifies something as pos or neg given its feats observed = classifier.classify(feats) #a mapping of entries and their classifications #ex testsets[pos] = {1,2,3,4,5,8,11} testsets[observed].add(i) prec = {} rec = {} for label in classifier.labels(): prec[label] = precision(refsets[label], testsets[label]) rec[label] = recall(refsets[label], testsets[label]) return prec, rec
def multi_metrics(multi_classifier, test_feats): mds = [] refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feat, labels) in enumerate(test_feats): for label in labels: refsets[label].add(i) guessed = multi_classifier.classify(feat) for label in guessed: testsets[label].add(i) mds.append(metrics.masi_distance(set(labels), guessed)) avg_md = sum(mds) / float(len(mds)) precisions = {} recalls = {} for label in multi_classifier.labels(): precisions[label] = metrics.precision(refsets[label], testsets[label]) recalls[label] = metrics.recall(refsets[label], testsets[label]) return precisions, recalls, avg_md
def calculate(classifier, feature_set): refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) print("Calculating refsets for precision and recall") for i, (feats, label) in enumerate(feature_set): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print('country precision:', metrics.precision(refsets['country'], testsets['country'])) print('country recall:', metrics.recall(refsets['country'], testsets['country'])) print('religion precision:', metrics.precision(refsets['religion'], testsets['religion'])) print('religion recall:', metrics.recall(refsets['religion'], testsets['religion'])) print('astronomy precision:', metrics.precision(refsets['astronomy'], testsets['astronomy'])) print('astronomy recall:', metrics.recall(refsets['astronomy'], testsets['astronomy']))
def print_results(classifier, featureset, results, name): print ''' %s classifier results: Classifier accuracy: %s B Precision: %s B Recall: %s I Precision: %s I Recall: %s O Precision: %s O Recall: %s ''' % (name, accuracy(classifier, featureset), precision(results[0]['B-SNP'], results[1]['B-SNP']), recall(results[0]['B-SNP'], results[1]['B-SNP']), precision(results[0]['I-SNP'], results[1]['I-SNP']), recall(results[0]['I-SNP'], results[1]['I-SNP']), precision(results[0]['O'], results[1]['O']), recall(results[0]['O'], results[1]['O']))
def eval_stats(results): ''' Compute recall, precision, and f-measure from passed results. The expected format for results is a dictionary whose keys=<name of article> and values=tuple (<test category>, <reference category>, <scores>), where: test=category suggested by classifier, reference=pre-classified gold category, scores=can be None or dictionary whose keys=category names and values=matching score for this article. ''' # Calculate number of correct matches correct = 0 missed = defaultdict(tuple) for article_name, (suggested, real, scores) in results.iteritems(): if suggested==real: correct += 1 else: missed[article_name] = (suggested, real) success_ratio = correct / float(len(results)) print "Ratio: %0.3f" % success_ratio # Print wrong matches for name, (suggested, real) in missed.iteritems(): print "%s\t%s\t%s" % (name, suggested, real) # Create sets of references / test classification for evaluation cat_ref = defaultdict(set) cat_test= defaultdict(set) for name, (test_category, ref_category, scores) in results.iteritems(): cat_ref[ref_category].add(name) # gold-tagged categories cat_test[test_category].add(name) # suggested categories # Precision, recall, f-measure, support (num of reference articles in # each category) for each category print "\nCategory\tPrecision\tRecall\tF-measure\tSupport" measures = defaultdict(tuple) for category in cat_ref.keys(): cat_prec = metrics.precision(cat_ref[category], cat_test[category]) cat_rec = metrics.recall(cat_ref[category], cat_test[category]) cat_f = metrics.f_measure(cat_ref[category], cat_test[category]) cat_support = len(cat_ref[category]) measures[category] = (cat_prec, cat_rec, cat_f, cat_support) print "%s\t%0.3f\t%0.3f\t%0.3f\t%d" % \ (category, cat_prec, cat_rec, cat_f, cat_support) # Calculate precision, recall, f-measure for entire corpus: # This is a weighted average of the values of separate categories # SUM(product of all precisions, product of all supports)/sum(total number of supports) avg_prec = weighted_average([(cat_measure[0], cat_measure[3]) for \ cat_measure in measures.values()]) avg_rec = weighted_average([(cat_measure[1], cat_measure[3]) for \ cat_measure in measures.values()]) avg_f = weighted_average([(cat_measure[2], cat_measure[3]) for \ cat_measure in measures.values()]) total_support = sum([cat_support[3] for cat_support in measures.values()]) print "%s\t%0.3f\t%0.3f\t%0.3f\t%d" % ("Total", avg_prec, avg_rec, avg_f, total_support)
def evaluate_features(feature_extractor, N, only_acc=False): from nltk.corpus import movie_reviews from nltk.classify import NaiveBayesClassifier as naive from nltk.classify.util import accuracy from nltk.metrics import precision, recall, f_measure from sys import stdout negative = movie_reviews.fileids('neg') positive = movie_reviews.fileids('pos') negfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])), 'neg') for f in negative] posfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])), 'pos') for f in positive] negtrain, negtest = stratifiedSamples(negfeats, N) postrain, postest = stratifiedSamples(posfeats, N) trainfeats = negtrain + postrain testfeats = negtest + postest classifier = naive.train(trainfeats) if only_acc: return accuracy(classifier, testfeats) print 'accuracy: {}'.format(accuracy(classifier, testfeats)) # Precision, Recall, F-measure from collections import defaultdict refsets = defaultdict(set) testsets = defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print 'pos precision:', precision(refsets['pos'], testsets['pos']) print 'pos recall:', recall(refsets['pos'], testsets['pos']) print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos']) print 'neg precision:', precision(refsets['neg'], testsets['neg']) print 'neg recall:', recall(refsets['neg'], testsets['neg']) print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg']) stdout.flush() classifier.show_most_informative_features() return classifier
def evaluate_features(self, feature_select): #reading pre-labeled input and splitting into lines posSentences = open('rt-polarity-pos.txt', 'r') negSentences = open('rt-polarity-neg.txt', 'r') posSentences = re.split(r'\n', posSentences.read()) negSentences = re.split(r'\n', negSentences.read()) posFeatures = [] negFeatures = [] #breaks up the sentences into lists of individual words (as selected by the input mechanism) and appends 'pos' or 'neg' after each list for i in posSentences: posWords = re.findall(r"[\w']+|[.,!?;]", i) posWords = [feature_select(posWords), 'pos'] posFeatures.append(posWords) for i in negSentences: negWords = re.findall(r"[\w']+|[.,!?;]", i) negWords = [feature_select(negWords), 'neg'] negFeatures.append(negWords) #selects 3/4 of the features to be used for training and 1/4 to be used for testing posCutoff = int(math.floor(len(posFeatures)*3/4)) negCutoff = int(math.floor(len(negFeatures)*3/4)) trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff] testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:] #Training Phase: classifier = NaiveBayesClassifier.train(trainFeatures) referenceSets = collections.defaultdict(set) testSets = collections.defaultdict(set) #Testing Phase: for i, (features, label) in enumerate(testFeatures): referenceSets[label].add(i) predicted = classifier.classify(features) testSets[predicted].add(i) print 'Trained on %d instances, Tested on %d instances' % (len(trainFeatures), len(testFeatures)) print 'Accuracy:', nltk.classify.util.accuracy(classifier, testFeatures) print 'Positive Precision:', precision(referenceSets['pos'], testSets['pos']) print 'Positive Recall:', recall(referenceSets['pos'], testSets['pos']) print 'Negative Precision:', precision(referenceSets['neg'], testSets['neg']) print 'Negative Recall:', recall(referenceSets['neg'], testSets['neg'])
def calcAllClassesRecall(classSet, refsets, testsets): rSum = 0.0 denominator = 0 for category in classSet: num = recall(refsets[category], testsets[category]) if num is None: continue rSum += num denominator += 1 return rSum/denominator
def main(): global best_words tweets = get_tweets_from_db() tweet_list = tweets[1000:1599000] test_list = tweets[:1000]+ tweets[1599000:] word_scores = create_word_scores() best_words = find_best_words(word_scores, 500000) f = open('bestwords.pickle', 'wb') pickle.dump(best_words, f) f.close() training_set = classify.apply_features(best_word_features, tweet_list) print "extracted features" # train the classifier with the training set classifier = NaiveBayesClassifier.train(training_set) print "trained classifier" # create the pickle file f = open('NBclassifier_new.pickle', 'wb') pickle.dump(classifier, f) f.close() print "created pickle" # test for precision and recall refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) test_set = classify.apply_features(best_word_features, test_list) for i, (feats, label) in enumerate(test_set): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print 'neg precision:', metrics.precision(refsets['0'], testsets['0']) print 'neg recall:', metrics.recall(refsets['0'], testsets['0']) print 'pos precision:', metrics.precision(refsets['4'], testsets['4']) print 'pos recall:', metrics.recall(refsets['4'], testsets['4']) # test_set = classify.apply_features(extract_features, test_list) # print "extracted features" print classify.accuracy(classifier, test_set) print classifier.show_most_informative_features(30)
def precision_recall(classifier, testfeats): refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) precisions = {} recalls = {} for label in classifier.labels(): precisions[label] = metrics.precision(refsets[label], testsets[label]) recalls[label] = metrics.recall(refsets[label], testsets[label]) return precisions, recalls
def precision_recall(classifier, testfeats): #gives precision and recall of classifiers #precision = lack of false positives #recall = lack of false negatives refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) precisions = {} recalls = {} for label in classifier.labels(): precisions[label] = precision(refsets[label], testsets[label]) recalls[label] = recall(refsets[label], testsets[label]) return precisions, recalls
def recall(self, reference): """ Return the recall of an aligned sentence with respect to a "gold standard" reference ``AlignedSent``. :type reference: AlignedSent or Alignment :param reference: A "gold standard" reference aligned sentence. :rtype: float or None """ # Get alignments in set of 2-tuples form # The "sure" recall is used so we don't penalize for missing an # alignment that was only marked as "possible". align = self.alignment if isinstance(reference, AlignedSent): sure = reference.alignment else: sure = Alignment(reference) # Call NLTKs existing functions for recall return recall(sure, align)
def train_classifiers(posFeatures,negFeatures): #selects 3/4 of the features to be used for training and 1/4 to be used for testing posCutoff = int(math.floor(len(posFeatures)*3/4)) negCutoff = int(math.floor(len(negFeatures)*3/4)) trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff] testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:] #trains a Naive Bayes Classifier print ("----------------Naive Bayes Classifier-----------") classifier = NaiveBayesClassifier.train(trainFeatures) #initiates referenceSets and testSets referenceSets = collections.defaultdict(set) testSets = collections.defaultdict(set) #puts correctly labeled sentences in referenceSets and the predictively labeled version in testsets for i, (features, label) in enumerate(testFeatures): referenceSets[label].add(i) predicted = classifier.classify(features) testSets[predicted].add(i) #prints metrics to show how well the feature selection did print ('train on %d instances, test on %d instances' % (len(trainFeatures), len(testFeatures))) print ('Original Naive Bayes Accuracy:', (nltk.classify.util.accuracy(classifier, testFeatures))*100) print ('pos precision:', precision(referenceSets['pos'], testSets['pos'])) print ('pos recall:', recall(referenceSets['pos'], testSets['pos'])) print ('neg precision:',precision(referenceSets['neg'], testSets['neg'])) print ('neg recall:', recall(referenceSets['neg'], testSets['neg'])) classifier.show_most_informative_features(10) #Pickle the algorithm for future use save_classifier = open("pickled_algos/originalnaivebayes.pickle","wb") pickle.dump(classifier, save_classifier) save_classifier.close() MNB_classifier = SklearnClassifier(MultinomialNB()) MNB_classifier.train(trainFeatures) print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, testFeatures))*100) #Pickle the algorithm for future use save_classifier = open("pickled_algos/MNB_classifier.pickle","wb") pickle.dump(MNB_classifier, save_classifier) save_classifier.close() BernoulliNB_classifier = SklearnClassifier(BernoulliNB()) BernoulliNB_classifier.train(trainFeatures) print("BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(BernoulliNB_classifier, testFeatures))*100) #Pickle the algorithm for future use save_classifier = open("pickled_algos/BernoulliNB_classifier.pickle","wb") pickle.dump(BernoulliNB_classifier, save_classifier) save_classifier.close() LogisticRegression_classifier = SklearnClassifier(LogisticRegression()) LogisticRegression_classifier.train(trainFeatures) print("LogisticRegression_classifier accuracy percent:", (nltk.classify.accuracy(LogisticRegression_classifier, testFeatures))*100) #Pickle the algorithm for future use save_classifier = open("pickled_algos/LogisticRegression_classifier.pickle","wb") pickle.dump(LogisticRegression_classifier, save_classifier) save_classifier.close() LinearSVC_classifier = SklearnClassifier(LinearSVC()) LinearSVC_classifier.train(trainFeatures) print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testFeatures))*100) #Pickle the algorithm for future use save_classifier = open("pickled_algos/LinearSVC_classifier.pickle","wb") pickle.dump(LinearSVC_classifier, save_classifier) save_classifier.close() SGDC_classifier = SklearnClassifier(SGDClassifier()) SGDC_classifier.train(trainFeatures) print("SGDClassifier accuracy percent:",nltk.classify.accuracy(SGDC_classifier, testFeatures)*100) #Pickle the algorithm for future use save_classifier = open("pickled_algos/SGDC_classifier.pickle","wb") pickle.dump(SGDC_classifier, save_classifier) save_classifier.close() Dec_Tree_Classifier = SklearnClassifier(DecisionTreeClassifier()) Dec_Tree_Classifier.train(trainFeatures) print("DecisionTreeClassifier Accuracy:",(nltk.classify.accuracy(Dec_Tree_Classifier,testFeatures))*100) #Pickle the algorithm for future use save_classifier = open("pickled_algos/decision_tree.pickle","wb") pickle.dump(Dec_Tree_Classifier, save_classifier) save_classifier.close() """ # Grad_Boost_Classifier = SklearnClassifier(GradientBoostingClassifier()) # Grad_Boost_Classifier.train(trainFeatures) # print("Gradient Boosting Classifier Accuracy:", (nltk.classify.accuracy(Grad_Boost_Classifier,testFeatures))*100) """ Random_Forest_Classifier = SklearnClassifier(RandomForestClassifier()) Random_Forest_Classifier.train(trainFeatures) print("Random Forest Classifier Accuracy:",(nltk.classify.accuracy(Random_Forest_Classifier,testFeatures ))*100) #Pickle the algorithm for future use save_classifier = open("pickled_algos/random_forest.pickle","wb") pickle.dump(Random_Forest_Classifier, save_classifier) save_classifier.close() Ada_Boost_Classifier = SklearnClassifier(AdaBoostClassifier()) Ada_Boost_Classifier.train(trainFeatures) print("Ada Boost Classifier Accuracy:",(nltk.classify.accuracy(Ada_Boost_Classifier,testFeatures))*100) #Pickle the algorithm for future use save_classifier = open("pickled_algos/Ada_Boost.pickle","wb") pickle.dump(Ada_Boost_Classifier, save_classifier) save_classifier.close() voted_classifier = VoteClassifier(classifier, LinearSVC_classifier, MNB_classifier, BernoulliNB_classifier, LogisticRegression_classifier, Random_Forest_Classifier, Ada_Boost_Classifier ) print("Voted classifier accuracy percent:", (nltk.classify.accuracy(voted_classifier, testFeatures))*100) # The voted classifier could not be pickled. Check this later! return trainFeatures,testFeatures
trainfeats = negfeats[:4000] + posfeats[:4000] testfeats = negfeats[4000:] + posfeats[4000:] print("train on %d instances, test on %d instances" % (len(trainfeats), len(testfeats))) classifier = NaiveBayesClassifier.train(trainfeats) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) # cross validation 3-fold feats = negfeats + posfeats M = math.floor(len(feats) / 3) result = [] for n in range(3): val_set = feats[n * M :][:M] train_set = feats[(n + 1) * M :] + feats[: n * M] classifier = nltk.NaiveBayesClassifier.train(train_set) result.append("{:.4f}".format(round(nltk.classify.accuracy(classifier, val_set) * 100, 4))) print("cross_validation:", result) print("pos precision:", precision(refsets["pos"], testsets["pos"])) print("pos recall:", recall(refsets["pos"], testsets["pos"])) print("pos F-measure:", f_measure(refsets["pos"], testsets["pos"])) print("neg precision:", precision(refsets["neg"], testsets["neg"])) print("neg recall:", recall(refsets["neg"], testsets["neg"])) print("neg F-measure:", f_measure(refsets["neg"], testsets["neg"])) classifier.show_most_informative_features()
# Now create the data structure for model evaluation # refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testfeats): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) #print len(refsets) #print len(testsets) #print refsets precisions = {} recalls = {} for label in classifier.labels(): precisions[label] = metrics.precision(refsets[label],testsets[label]) recalls[label] = metrics.recall(refsets[label], testsets[label]) # # Let us calculate Precision & Recall and compare with nltk # # Luckily the data structures are symmetric # c_00=len(refsets[labels[0]].intersection(testsets[labels[0]])) c_01=len(refsets[labels[0]].intersection(testsets[labels[1]])) c_10=len(refsets[labels[1]].intersection(testsets[labels[0]])) c_11=len(refsets[labels[1]].intersection(testsets[labels[1]])) # print ' | H | S |' print '--|-------|-------|' print 'H | %5d | %5d |' % (c_00,c_01) print '--|-------|-------|' print 'S | %5d | %5d |' % (c_10,c_11)
if not args.no_precision or not args.no_recall or not args.no_fmeasure: if args.multi and args.binary: refsets, testsets = scoring.multi_ref_test_sets(classifier, test_feats) else: refsets, testsets = scoring.ref_test_sets(classifier, test_feats) for label in labels: ref = refsets[label] test = testsets[label] if not args.no_precision: print '%s precision: %f' % (label, precision(ref, test) or 0) if not args.no_recall: print '%s recall: %f' % (label, recall(ref, test) or 0) if not args.no_fmeasure: print '%s f-measure: %f' % (label, f_measure(ref, test) or 0) if args.show_most_informative and args.algorithm != 'DecisionTree' and not (args.multi and args.binary): print '%d most informative features' % args.show_most_informative classifier.show_most_informative_features(args.show_most_informative) ############## ## pickling ## ############## if not args.no_pickle: if args.filename: fname = os.path.expanduser(args.filename)
#script to validate coding import cPickle as pickle import sys from nltk.metrics import accuracy, ConfusionMatrix, precision, recall, f_measure from collections import defaultdict import classifier if __name__=='__main__': validation_pickle=sys.argv[1] classifier_pickle=sys.argv[2] validation_set=pickle.load(open(validation_pickle, 'rb')) c=pickle.load(open(classifier_pickle, 'rb')) reference=defaultdict(set) observed=defaultdict(set) for i, (tweet, label) in enumerate(validation_set): reference[label].add(i) observation=c.classify(tweet) observed[observation].add(i) print "accuracy: %s" % accuracy(observed, reference) print "pos precision: %s" % precision(reference['positive'], observed['positive']) print "pos recall: %s" % recall(reference['positive'], observed['positive']) print "pos f-measure: %s" % f_measure(reference['positive'], observed['positive']) print "neg precision: %s" % precision(reference['negative'], observed['negative']) print "neg recall: %s" % recall(reference['negative'], observed['negative']) print "neg f-measure: %s" % f_measure(reference['negative'], observed['negative'])
def avaliate_new_classifier(featureSet): print("Vamos treinar o classificador agora!") print("\n") #random.shuffle(featureSet) #Cada um tem 197 positive_tweets = featureSet[:196] #Misturando as paradas pra nao ficar testando só os mesmos últimos random.shuffle(positive_tweets) #print(featureSet[7185]) #Pra pegar 7185 do pos e 7185 do negativo mas o negativo tem 7213 negative_tweets = featureSet[196:293] random.shuffle(negative_tweets) neutral_tweets = featureSet[293:] random.shuffle(neutral_tweets) #Agora vou dividir cada classe em um conjunto de referencia e outro de teste pos_cutoff = len(positive_tweets)*3/4 neg_cutoff = len(negative_tweets)*3/4 neu_cutoff = len(neutral_tweets)*3/4 # 75% dos tweets vao pra ser de referencia(treinamento) e o resto pra teste pos_references = positive_tweets[:pos_cutoff] pos_tests = positive_tweets[pos_cutoff:] neg_references = negative_tweets[:neg_cutoff] neg_tests = negative_tweets[neg_cutoff:] neu_references = neutral_tweets[:neu_cutoff] neu_tests = neutral_tweets[neu_cutoff:] #COnjunto de treinamento e de testes pra calcular a accuracy training_set = pos_references + neg_references + neu_references testing_set = pos_tests + neg_tests + neu_tests start_time = time.time() global classifier print("Comecou a treina-lo agora!") #training_set2 = [(t,l) for (t,l,twe) in training_set] classifier = nltk.NaiveBayesClassifier.train(training_set) #testing_set2 = [(t,l) for (t,l,twe) in testing_set] print("Naive Bayes Algo accuracy:", (nltk.classify.accuracy(classifier, testing_set)) * 100) classifier.show_most_informative_features(30) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats, label) in enumerate(testing_set): refsets[label].add(i) observed = classifier.classify(feats) testsets[observed].add(i) print 'pos precision:', precision(refsets['pos'], testsets['pos']) print 'pos recall:', recall(refsets['pos'], testsets['pos']) print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos']) print 'neg precision:', precision(refsets['neg'], testsets['neg']) print 'neg recall:', recall(refsets['neg'], testsets['neg']) print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg']) print 'neutral precision:', precision(refsets['neutral'], testsets['neutral']) print 'neutral recall:', recall(refsets['neutral'], testsets['neutral']) print 'neutral F-measure:', f_measure(refsets['neutral'], testsets['neutral']) print("--- Classifier executed in %s seconds ---" % (time.time() - start_time))
def recall(self): return recall(self._reference, self._test)
texts = list(lif(categorized_corpus, label)) stop = int(len(texts) * args.fraction) for t in texts[:stop]: feat = bag_of_words(norm_words(t)) feats.append(feat) test_feats.append((feat, label)) print "accuracy:", accuracy(classifier, test_feats) refsets, testsets = scoring.ref_test_sets(classifier, test_feats) for label in labels: ref = refsets[label] test = testsets[label] print "%s precision: %f" % (label, precision(ref, test) or 0) print "%s recall: %f" % (label, recall(ref, test) or 0) print "%s f-measure: %f" % (label, f_measure(ref, test) or 0) else: if args.instances == "sents": texts = categorized_corpus.sents() total = len(texts) elif args.instances == "paras": texts = (itertools.chain(*para) for para in categorized_corpus.paras()) total = len(categorized_corpus.paras) elif args.instances == "files": texts = (categorized_corpus.words(fileids=[fid]) for fid in categorized_corpus.fileids()) total = len(categorized_corpus.fileids()) stop = int(total * args.fraction) feats = (bag_of_words(norm_words(i)) for i in itertools.islice(texts, stop))
def avaliate_classifiers(featureSet): print("Vamos treinar o classificador agora!") print("\n") #random.shuffle(featureSet) #Vai fazer o calculo de recall e precision # You need to build 2 sets for each classification label: # a reference set of correct values, and a test set of observed values. #Os primeiros 6686 + 500(dia 14) tweets sao positivos e resto(6757 + 500(dia 14)) negativo positive_tweets = featureSet[:7185] #Misturando as paradas pra nao ficar testando só os mesmos últimos random.shuffle(positive_tweets) #print(featureSet[7185]) #Pra pegar 7185 do pos e 7185 do negativo mas o negativo tem 7213 negative_tweets = featureSet[7185:14372] random.shuffle(negative_tweets) #Agora vou dividir cada classe em um conjunto de referencia e outro de teste pos_cutoff = len(positive_tweets)*3/4 neg_cutoff = len(negative_tweets)*3/4 # 75% dos tweets vao pra ser de referencia(treinamento) e o resto pra teste pos_references = positive_tweets[:pos_cutoff] pos_tests = positive_tweets[pos_cutoff:] neg_references = negative_tweets[:neg_cutoff] neg_tests = negative_tweets[neg_cutoff:] #COnjunto de treinamento e de testes pra calcular a accuracy training_set = pos_references + neg_references testing_set = pos_tests + neg_tests start_time = time.time() global classifier print("Comecou a treina-lo agora!") #training_set2 = [(t,l) for (t,l,twe) in training_set] classifier = nltk.NaiveBayesClassifier.train(training_set) #testing_set2 = [(t,l) for (t,l,twe) in testing_set] print("Naive Bayes Algo accuracy:", (nltk.classify.accuracy(classifier, testing_set)) * 100) classifier.show_most_informative_features(30) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) # for i, (feats, label, l) in enumerate(testing_set): # refsets[label].add(i) # observed = classifier.classify(feats) # testsets[observed].add(i) # print("--"*200) # print() # print("Classified as: ",observed) # print() # print(l) # print() # print("--"*200) # raw_input("Press any key to continue:") print 'pos precision:', precision(refsets['pos'], testsets['pos']) print 'pos recall:', recall(refsets['pos'], testsets['pos']) print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos']) print 'neg precision:', precision(refsets['neg'], testsets['neg']) print 'neg recall:', recall(refsets['neg'], testsets['neg']) print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg']) print("--- Classifier executed in %s seconds ---" % (time.time() - start_time))
def cross_fold(instances, trainf, testf, folds=10, trace=1, metrics=True, informative=0): if folds < 2: raise ValueError('must have at least 3 folds') # ensure isn't an exhaustible iterable instances = list(instances) # randomize so get an even distribution, in case labeled instances are # ordered by label random.shuffle(instances) l = len(instances) step = l / folds if trace: print('step %d over %d folds of %d instances' % (step, folds, l)) accuracies = [] precisions = collections.defaultdict(list) recalls = collections.defaultdict(list) f_measures = collections.defaultdict(list) for f in range(folds): if trace: print('\nfold %d' % (f+1)) print('-----%s' % ('-'*len('%s' % (f+1)))) start = f * step end = start + step train_instances = instances[:start] + instances[end:] test_instances = instances[start:end] if trace: print('training on %d:%d + %d:%d' % (0, start, end, l)) obj = trainf(train_instances) if trace: print('testing on %d:%d' % (start, end)) if metrics: refsets, testsets = ref_test_sets(obj, test_instances) for key in set(refsets.keys() + testsets.keys()): ref = refsets[key] test = testsets[key] p = precision(ref, test) or 0 r = recall(ref, test) or 0 f = f_measure(ref, test) or 0 precisions[key].append(p) recalls[key].append(r) f_measures[key].append(f) if trace: print('%s precision: %f' % (key, p)) print('%s recall: %f' % (key, r)) print('%s f-measure: %f' % (key, f)) accuracy = testf(obj, test_instances) if trace: print('accuracy: %f' % accuracy) accuracies.append(accuracy) if trace and informative and hasattr(obj, 'show_most_informative_features'): obj.show_most_informative_features(informative) if trace: print('\nmean and variance across folds') print('------------------------------') print('accuracy mean: %f' % (sum(accuracies) / folds)) print('accuracy variance: %f' % array(accuracies).var()) for key, ps in iteritems(precisions): print('%s precision mean: %f' % (key, sum(ps) / folds)) print('%s precision variance: %f' % (key, array(ps).var())) for key, rs in iteritems(recalls): print('%s recall mean: %f' % (key, sum(rs) / folds)) print('%s recall variance: %f' % (key, array(rs).var())) for key, fs in iteritems(f_measures): print('%s f_measure mean: %f' % (key, sum(fs) / folds)) print('%s f_measure variance: %f' % (key, array(fs).var())) return accuracies, precisions, recalls, f_measures
# print the classification results print 'Dictionary : ', dictionary.get_name(), '\n' print ConfusionMatrix(gold_standard,results).pp() print 'Accuracy: ', accuracy(gold_standard,results) for c in [0,1,-1]: print 'Metrics for class ', c gold = set() test = set() for i,x in enumerate(gold_standard): if x == c: gold.add(i) for i,x in enumerate(results): if x == c: test.add(i) print 'Precision: ', precision(gold, test) print 'Recall : ', recall(gold, test) print 'F_measure: ', f_measure(gold, test) print '\n\n' #################### Sentences classification ########################## # Not reported in the paper because LIWC doesn't have neutral class positive_sents = [reli.words_sentence_pos(s) for s in reli.sents(polarity='positive')] negative_sents = [reli.words_sentence_pos(s) for s in reli.sents(polarity='negative')] neutral_sents = [reli.words_sentence_pos(s) for s in reli.sents(polarity='neutral')] print '#########################################################################' print '###################### Sentences classification #########################'
def evaluate_features(feature_select, classifier_sel): posFeatures = [] negFeatures = [] # http://stackoverflow.com/questions/367155/splitting-a-string-into-words-and-punctuation # breaks up the sentences into lists of individual words (as selected by the input mechanism) and appends 'pos' or 'neg' after each list with open(RT_POLARITY_POS_FILE, 'r') as posSentences: for i in posSentences: i = clean_text(i) posWords = [w for w in i.lower().split() if w not in stopWords] # posWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip()) posWords = [feature_select(posWords), 'pos'] posFeatures.append(posWords) with open(RT_POLARITY_NEG_FILE, 'r') as negSentences: for i in negSentences: i = clean_text(i) negWords = [w for w in i.lower().split() if w not in stopWords] # negWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip()) negWords = [feature_select(negWords), 'neg'] negFeatures.append(negWords) # selects 3/4 of the features to be used for training and 1/4 to be used for testing if (DATA_FLAG == 'kooshas_data'): trainFeatures = posFeatures + negFeatures neg_test, pos_test, neut_test = get_features(testing_data, no_classes=2, feat_select=feature_select) testFeatures = pos_test + neg_test # for three class data add neutFeat else: posCutoff = int(math.floor(len(posFeatures) * 4 / 5)) negCutoff = int(math.floor(len(negFeatures) * 4 / 5)) trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff] testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:] aa, bb, cc = get_features(testing_data, no_classes=2, feat_select=feature_select) testFeatures = aa + bb # initiates referenceSets and testSets referenceSets = collections.defaultdict(set) testSets = collections.defaultdict(set) # trains a Classifier if classifier_sel == 'NB': classifier = NaiveBayesClassifier.train(trainFeatures) # puts correctly labeled sentences in referenceSets and the predictively labeled version in testsets for i, (features, label) in enumerate(testFeatures): referenceSets[label].add(i) predicted = classifier.classify(features) testSets[predicted].add(i) # prints metrics to show how well the feature selection did print ('train on %d instances, test on %d instances' % (len(trainFeatures), len(testFeatures))) print ('accuracy:', nltk.classify.util.accuracy(classifier, testFeatures)) for i, (features, label) in enumerate(testFeatures): referenceSets[label].add(i) predicted = classifier.classify(features) testSets[predicted].add(i) print ('pos precision:', precision(referenceSets['pos'], testSets['pos'])) print ('pos recall:', recall(referenceSets['pos'], testSets['pos'])) print ('neg precision:', precision(referenceSets['neg'], testSets['neg'])) print ('neg recall:', recall(referenceSets['neg'], testSets['neg'])) classifier.show_most_informative_features(10) elif classifier_sel == 'MaxEnt': get_performance(LogisticRegression(), trainFeatures, testFeatures) elif classifier_sel == 'all_classifiers': get_performance(MultinomialNB(), trainFeatures, testFeatures) get_performance(BernoulliNB(), trainFeatures, testFeatures) get_performance(LogisticRegression(), trainFeatures, testFeatures) get_performance(SGDClassifier(), trainFeatures, testFeatures) get_performance(SVC(), trainFeatures, testFeatures) get_performance(LinearSVC(), trainFeatures, testFeatures) get_performance(NuSVC(kernel='rbf', nu=1), trainFeatures, testFeatures) elif classifier_sel == 'SVM': # use SVM SVC_classifier = SklearnClassifier(SVC()) classifier = SVC_classifier.train(trainFeatures) print("SVC_classifier accuracy:", (nltk.classify.accuracy(classifier, testFeatures)) * 100) for i, (features, label) in enumerate(testFeatures): referenceSets[label].add(i) predicted = classifier.classify(features) testSets[predicted].add(i) get_performance(classifier, referenceSets, testSets)
##pprint.pprint(results) #pprint(results) import sys #sys.exit() print ''' Classifier accuracy (Bayes): %s B Precision (Bayes): %s B Recall (Bayes): %s I Precision (Bayes): %s I Recall (Bayes): %s O Precision (Bayes): %s O Recall (Bayes): %s ''' % (accuracy(bayes_classifier, test_featureset), precision(results[0]['B-SNP'], results[1]['B-SNP']), recall(results[0]['B-SNP'], results[1]['B-SNP']), precision(results[0]['I-SNP'], results[1]['I-SNP']), recall(results[0]['I-SNP'], results[1]['I-SNP']), precision(results[0]['O'], results[1]['O']), recall(results[0]['O'], results[1]['O'])) #bayes_classifier.show_most_informative_features(10) sys.exit() maxent_classifier = nltk.classify.MaxentClassifier.train(training_featureset) maxent_results = get_results(maxent_classifier) print ''' Classifier accuracy (MaxEnt): %s B Precision (MaxEnt): %s
def validate(self, validation_set): if self.classifier is None: raise Exception("self.classifier is None") reference=defaultdict(set) observed=defaultdict(set) observed['neutral']=set() for i, (tweet, label) in enumerate(validation_set): reference[label].add(i) observation=self.classify(tweet) observed[observation].add(i) acc=classify.accuracy(self.classifier, observed) posp=precision(reference['positive'],observed['positive']) posr=recall(reference['positive'], observed['positive']) posf=f_measure(reference['positive'], observed['positive']) negp=precision(reference['negative'],observed['negative']) negr=recall(reference['negative'], observed['negative']) negf=f_measure(reference['negative'], observed['negative']) print "accuracy: %s" % acc print "pos precision: %s" % posp print "pos recall: %s" % posr print "pos f-measure: %s" % posf print "neg precision: %s" % negp print "neg recall: %s" % negr print "neg f-measure: %s" % negf return (acc, posp, posr, posf, negp, negr, negf)