def evaluate_model(NBClassifier):
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    accuracy = classify.accuracy(NBClassifier, validation_features) * 100

    for i, (feats, label) in enumerate(validation_features):
        refsets[label].add(i)
        observed = NBClassifier.classify(feats)
        testsets[observed].add(i)
        negative_precision = precision(refsets['negative'],
                                       testsets['negative'])
        neutral_precision = precision(refsets['neutral'], testsets['neutral'])
        positive_precision = precision(refsets['positive'],
                                       testsets['positive'])
        positive_recall = recall(refsets['positive'], testsets['positive'])
        neutral_recall = recall(refsets['neutral'], testsets['neutral'])
        negative_recall = recall(refsets['negative'], testsets['negative'])
        try:
            avg_recall = (1 / 3) * (negative_recall + positive_recall +
                                    neutral_recall)
            avg_precision = (1 / 3) * (negative_precision +
                                       positive_precision + neutral_precision)
            print(accuracy, avg_recall, avg_precision)
        except TypeError:
            pass
Example #2
0
def evaluate_features(feature_select):
    # reading pre-labeled input and splitting into lines
    negSentences = open(os.path.join(__location__, 'rt-polarity-neg.txt'),
                        'r',
                        encoding='utf8')
    posSentences = open(os.path.join(__location__, 'rt-polarity-pos.txt'),
                        'r',
                        encoding='utf8')
    negSentences = re.split(r'\n', negSentences.read())
    posSentences = re.split(r'\n', posSentences.read())
    stop = stopwords.words('english')

    posFeatures = []
    negFeatures = []
    # breaks up the sentences into lists of individual words
    # creates instance structures for classifier
    for i in posSentences:
        posWords = re.findall(r"[\w']+|[.,!?;]", i)
        # print(posWords)
        posWords = [j for j in posWords if j not in stop]
        # print(posWords)
        posWords = [feature_select(posWords), 'pos']
        posFeatures.append(posWords)
    for i in negSentences:
        negWords = re.findall(r"[\w']+|[.,!?;]", i)
        negWords = [j for j in negWords if j not in stop]
        negWords = [feature_select(negWords), 'neg']
        negFeatures.append(negWords)

    # Make 2/3s of features training set
    posCutoff = int(math.floor(len(posFeatures) * 3 / 4))
    negCutoff = int(math.floor(len(negFeatures) * 3 / 4))
    trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff]
    testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:]

    # Runs the classifier on the testFeatures
    classifier = NaiveBayesClassifier.train(trainFeatures)

    # Sets up labels to look at output
    referenceSets = collections.defaultdict(set)
    testSets = collections.defaultdict(set)
    for i, (features, label) in enumerate(
            testFeatures):  # enumerate adds number-count to each item
        referenceSets[label].add(
            i)  # recorded polarity for these test sentences
        predicted = classifier.classify(
            features)  # classifiers' proposed polarity for tests
        testSets[predicted].add(i)

    # Outputs
    print('train on %s instances, test on %s instances' %
          (len(trainFeatures), len(testFeatures)))
    print('accuracy:', nltk.classify.util.accuracy(classifier, testFeatures))
    print('pos precision:',
          scores.precision(referenceSets['pos'], testSets['pos']))
    print('pos recall:', scores.recall(referenceSets['pos'], testSets['pos']))
    print('neg precision:',
          scores.precision(referenceSets['neg'], testSets['neg']))
    print('neg recall:', scores.recall(referenceSets['neg'], testSets['neg']))
    classifier.show_most_informative_features(10)
def evaluate_features(feature_select):
	## Label all Trump tweets with 'pos' and other tweets with 'neg'
	## Divide them into Train and Test subset 
	posFeatures_train =[]
	negFeatures_train =[]

	for i in Trump_train:
		posWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
		posWords = [feature_select(posWords), 'pos']
		posFeatures_train.append(posWords)

	for i in Adele_train + Clinton_train:
		negWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
		negWords = [feature_select(negWords), 'neg']
		negFeatures_train.append(negWords)

	posFeatures_test = []
	negFeatures_test = []

	for i in Trump_test:
		posWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
		posWords = [feature_select(posWords), 'pos']
		posFeatures_test.append(posWords)

	for i in Adele_test + Clinton_test:
		negWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
		negWords = [feature_select(negWords), 'neg']
		negFeatures_test.append(negWords)

	trainFeatures = posFeatures_train + negFeatures_train
	testFeatures = posFeatures_test + negFeatures_test

	## Trains a Naive Bayes Classifier
	## Read more here: https://en.wikipedia.org/wiki/Naive_Bayes_classifier
	classifier = NaiveBayesClassifier.train(trainFeatures)

	## Initiates referenceSets and testSets
	referenceSets = collections.defaultdict(set)
	testSets = collections.defaultdict(set)

	## Puts correctly labeled sentences in referenceSets and the predictively labeled version in testsets
	for i, (features, label) in enumerate(testFeatures):
		referenceSets[label].add(i)
		predicted = classifier.classify(features)
		testSets[predicted].add(i)

	## Prints metrics to show how well the feature selection did
	## Accuracy: percentage of items in test set that the classifier correctly labeled.
	## Precision: True_Positive / (True_Positive+False_Positive) 
	## Recall: True_Positive / (True_Positive+False_Negative) 
	print 'train on %d instances, test on %d instances' % (len(trainFeatures), len(testFeatures))
	print 'accuracy:', nltk.classify.util.accuracy(classifier, testFeatures)
	print 'pos precision:', precision(referenceSets['pos'], testSets['pos'])
	print 'pos recall:', recall(referenceSets['pos'], testSets['pos'])
	print 'neg precision:', precision(referenceSets['neg'], testSets['neg'])
	print 'neg recall:', recall(referenceSets['neg'], testSets['neg'])
	classifier.show_most_informative_features(10)
def evaluate_features(feature_select):
    posFeatures = []
    negFeatures = []
    global cnt
    cnt += 1
    #http://stackoverflow.com/questions/367155/splitting-a-string-into-words-and-punctuation
    #breaks up the sentences into lists of individual words (as selected by the input mechanism) and appends 'pos' or 'neg' after each list
    with open(RT_POLARITY_POS_FILE, 'r') as posSentences:
        for i in posSentences:
            posWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
            posWords = [
                feature_select(posWords), 'pos'
            ]  #calls make_full_dict and returns a dict with [word,'True']
            posFeatures.append(posWords)
    with open(RT_POLARITY_NEG_FILE, 'r') as negSentences:
        for i in negSentences:
            negWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
            negWords = [feature_select(negWords), 'neg']
            negFeatures.append(negWords)

    #selects 3/4 of the features to be used for training and 1/4 to be used for testing
    posCutoff = int(math.floor(len(posFeatures) * 3 / 4))
    negCutoff = int(math.floor(len(negFeatures) * 3 / 4))
    trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff]
    testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:]

    #trains a Naive Bayes Classifier
    classifier = NaiveBayesClassifier.train(trainFeatures)

    #initiates referenceSets and testSets
    referenceSets = collections.defaultdict(set)
    testSets = collections.defaultdict(set)

    #puts correctly labeled sentences in referenceSets and the predictively labeled version in testsets
    for i, (features, label) in enumerate(testFeatures):
        referenceSets[label].add(i)
        predicted = classifier.classify(features)
        testSets[predicted].add(i)

    #prints metrics to show how well the feature selection did
    print 'train on %d instances, test on %d instances' % (len(trainFeatures),
                                                           len(testFeatures))
    print 'accuracy:', nltk.classify.util.accuracy(classifier, testFeatures)
    print 'pos precision:', precision(referenceSets['pos'], testSets['pos'])
    print 'pos recall:', recall(referenceSets['pos'], testSets['pos'])
    print 'pos f1-score:', f_measure(referenceSets['pos'], testSets['pos'])
    print 'neg precision:', precision(referenceSets['neg'], testSets['neg'])
    print 'neg recall:', recall(referenceSets['neg'], testSets['neg'])
    print 'neg f1-score:', f_measure(referenceSets['neg'], testSets['neg'])
    classifier.show_most_informative_features(10)
    print '================================================='
Example #5
0
def accuracy_measure(classifier, cross_valid_set):
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    for i, (feats, label) in enumerate(cross_valid_set):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    print 'pos Precision:', precision(refsets[1], testsets[1])
    print 'pos Recall:', recall(refsets[1], testsets[1])
    print 'pos F-measure:', f_measure(refsets[1], testsets[1])
    print 'neg Precision:', precision(refsets[0], testsets[0])
    print 'neg Recall:', recall(refsets[0], testsets[0])
    print 'neg F-measure:', f_measure(refsets[0], testsets[0])
def evaluate_features(feature_select):
    posFeatures = []
    negFeatures = []
    global cnt
    cnt += 1
    # http://stackoverflow.com/questions/367155/splitting-a-string-into-words-and-punctuation
    # breaks up the sentences into lists of individual words (as selected by the input mechanism) and appends 'pos' or 'neg' after each list
    with open(RT_POLARITY_POS_FILE, "r") as posSentences:
        for i in posSentences:
            posWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
            posWords = [feature_select(posWords), "pos"]  # calls make_full_dict and returns a dict with [word,'True']
            posFeatures.append(posWords)
    with open(RT_POLARITY_NEG_FILE, "r") as negSentences:
        for i in negSentences:
            negWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
            negWords = [feature_select(negWords), "neg"]
            negFeatures.append(negWords)

    # selects 3/4 of the features to be used for training and 1/4 to be used for testing
    posCutoff = int(math.floor(len(posFeatures) * 3 / 4))
    negCutoff = int(math.floor(len(negFeatures) * 3 / 4))
    trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff]
    testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:]

    # trains a Naive Bayes Classifier
    classifier = NaiveBayesClassifier.train(trainFeatures)

    # initiates referenceSets and testSets
    referenceSets = collections.defaultdict(set)
    testSets = collections.defaultdict(set)

    # puts correctly labeled sentences in referenceSets and the predictively labeled version in testsets
    for i, (features, label) in enumerate(testFeatures):
        referenceSets[label].add(i)
        predicted = classifier.classify(features)
        testSets[predicted].add(i)

    # prints metrics to show how well the feature selection did
    print "train on %d instances, test on %d instances" % (len(trainFeatures), len(testFeatures))
    print "accuracy:", nltk.classify.util.accuracy(classifier, testFeatures)
    print "pos precision:", precision(referenceSets["pos"], testSets["pos"])
    print "pos recall:", recall(referenceSets["pos"], testSets["pos"])
    print "pos f1-score:", f_measure(referenceSets["pos"], testSets["pos"])
    print "neg precision:", precision(referenceSets["neg"], testSets["neg"])
    print "neg recall:", recall(referenceSets["neg"], testSets["neg"])
    print "neg f1-score:", f_measure(referenceSets["neg"], testSets["neg"])
    classifier.show_most_informative_features(10)
    print "================================================="
Example #7
0
def getMetrics(fileName, resultPath, expectedPath, enc):
    output = open(resultPath, 'r', encoding=enc)
    outputResult = output.read()
    outputSentences = set(sent_tokenize(outputResult))
    output.close()

    expected = open(expectedPath, 'r', encoding=enc)
    expectedResult = expected.readlines()

    #removing newlines for better results
    expectedSentences = []
    for line in expectedResult:
        expectedSentences += [line[:-1]]

    expectedSentences = set(expectedSentences)
    expected.close()

    recallResult = recall(expectedSentences, outputSentences)
    precisionResult = precision(expectedSentences, outputSentences)
    f1Score = calculateF1score(
        precisionResult,
        recallResult) if recallResult != 0 and precisionResult != 0 else 0

    resultString = "File: " + fileName + " Recall: " + str(
        recallResult) + " Precision: " + str(
            precisionResult) + " F1 Score: " + str(f1Score)

    print(resultString)

    return {
        "recall": recallResult,
        "precision": precisionResult,
        "relevance": 1 if precisionResult != 0 else 0,
        "f1": f1Score
    }
Example #8
0
def evaluate(ref_tags, hyp_tags):
    if len(ref_tags) != len(hyp_tags):
        raise ValueError(
            'reference and hypothesis has different number of lines')

    n = len(ref_tags)
    counter = Counter(ref_tags)
    unique_tags = set(ref_tags)
    prec_dict, rec_dict, f_dict = defaultdict(float), defaultdict(
        float), defaultdict(float)
    for tag in sorted(unique_tags):
        ref_ids = {i for i, ref_tag in enumerate(ref_tags) if ref_tag == tag}
        hyp_ids = {i for i, hyp_tag in enumerate(hyp_tags) if hyp_tag == tag}
        prec_dict[tag] = precision(ref_ids, hyp_ids)
        rec_dict[tag] = recall(ref_ids, hyp_ids)
        f_dict[tag] = f_measure(ref_ids, hyp_ids)
        if prec_dict[tag] is None:
            warn(f'Undefined precision for {tag}; converting to 0.0')
            prec_dict[tag] = 0.
        if rec_dict[tag] is None:
            warn(f'Undefined recall for {tag}; converting to 0.0')
            rec_dict[tag] = 0.
        if f_dict[tag] is None:
            warn(f'Undefined F-score for {tag}; converting to 0.0')
            f_dict[tag] = 0.
        prec_dict[OVERALL_KEY] += counter[tag] * prec_dict[tag] / n
        rec_dict[OVERALL_KEY] += counter[tag] * rec_dict[tag] / n
        f_dict[OVERALL_KEY] += counter[tag] * f_dict[tag] / n

    return EvalResult(precision=prec_dict,
                      recall=rec_dict,
                      f1=f_dict,
                      conf_matrix=ConfusionMatrix(ref_tags,
                                                  hyp_tags,
                                                  sort_by_count=True))
def me_classifier(exclude_list):
    me_classifier = 0

    with open(train_data, 'r', encoding='utf-8', errors='ignore') as csvfile:
        reader = csv.reader(csvfile)
        feature_set = [(feature_set_generator(text, length, label,
                                              exclude_list), label)
                       for text, length, label in reader]
        #print(feature_set)
        me_classifier = MaxentClassifier.train(feature_set, "megam")

    accuracy = 0.0
    with open(test_data, 'r', encoding='utf-8',
              errors='ignore') as testcsvfile:
        test_reader = csv.reader(testcsvfile)
        test_feature_set = [(feature_set_generator(text, length, label,
                                                   exclude_list), label)
                            for text, length, label in test_reader]
        accuracy = classify.accuracy(me_classifier, test_feature_set)

    classified = collections.defaultdict(set)
    observed = collections.defaultdict(set)
    i = 1
    with open(test_data, 'r', encoding='utf-8',
              errors='ignore') as testcsvfile:
        test_reader = csv.reader(testcsvfile)
        for text, length, label in test_reader:
            observed[label].add(i)
            classified[me_classifier.classify(
                feature_set_generator(text, length, label,
                                      exclude_list))].add(i)
            i += 1

    return accuracy,precision(observed["1"], classified["1"]),recall(observed['1'], classified['1']),\
           f_measure(observed['1'], classified['1']),precision(observed['0'], classified['0']),recall(observed['1'], classified['0']),f_measure(observed['1'], classified['0'])
Example #10
0
def multi_metrics(multi_classifier, test_feats):
    mds = []
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    for i, (feat, labels) in enumerate(test_feats):
        for label in labels:
            refsets[label].add(i)

        guessed = multi_classifier.classify(feat)

        for label in guessed:
            testsets[label].add(i)

        mds.append(masi_distance(set(labels), guessed))

    avg_md = sum(mds) / float(len(mds))
    precisions = {}
    recalls = {}

    for label in multi_classifier.labels():
        precisions[label] = precision(refsets[label], testsets[label])
        recalls[label] = recall(refsets[label], testsets[label])

    return precisions, recalls, avg_md
Example #11
0
    def calculate_metrics(self):
        included_logs = 0
        metrics = {}
        cc = SmoothingFunction()
        for identifier in self._values:
            if self._values[identifier].get('target_text', None) is not None:
                included_logs += 1
                target_text = self._values[identifier]['target_text']
                output_text = self._values[identifier]['output_text']
                metrics['BLEU'] = metrics.get('BLEU', 0) + sentence_bleu(
                    [target_text], output_text, smoothing_function=cc.method4)
                metrics['accuracy'] = metrics.get('accuracy', 0) + accuracy(
                    target_text, output_text)
                target_text = set(target_text)
                output_text = set(output_text)
                metrics['precision'] = metrics.get('precision', 0) + precision(
                    target_text, output_text)
                metrics['recall'] = metrics.get('recall', 0) + recall(
                    target_text, output_text)
                metrics['f_measure'] = metrics.get('f_measure', 0) + f_measure(
                    target_text, output_text)

        if included_logs != 0:
            for metric in metrics:
                metrics[metric] /= included_logs

        return metrics, included_logs
Example #12
0
def precision_recall_2way_with_threshold(classifier, testFeatures, threshold):
    refsets = defaultdict(set)
    testsets = defaultdict(set)

    probs = classifier.prob_classify_many([feats for (feats, label) in testFeatures])

    trues = 0
    for i, (feats, label) in enumerate(testFeatures):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        prob = probs[i]
        if prob.prob(observed) < threshold:
            observed = 'neu'
        testsets[observed].add(i)
        if observed == label:
            trues += 1

    precisions = {}
    recalls = {}

    for label in classifier.labels():
        precisions[label] = precision(refsets[label], testsets[label])
        recalls[label] = recall(refsets[label], testsets[label])

    accuracy = float(trues)/len(testFeatures)

    return precisions, recalls, accuracy
def calculate_topk_recall(reference, test, k):
    substitute_pairs = set()
    for food in reference:
        substitutes = reference[food][:k]
        substitute_pairs.update({tuple([food, substitute]) for substitute in substitutes})

    topk_rec = scores.recall(reference=substitute_pairs, test=test)
    return topk_rec
Example #14
0
def evaluate_features(feature_select):
    posFeatures = []
    negFeatures = []
    #将这些句子分解成单词的列表(由输入机制选择)并在每个列表后附加'pos'或'neg'
    with open(RT_POLARITY_POS_FILE, 'r') as posSentences:
        for i in posSentences:
            posWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
            posWords = [feature_select(posWords), 'pos']
            posFeatures.append(posWords)
    with open(RT_POLARITY_NEG_FILE, 'r') as negSentences:
        for i in negSentences:
            negWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
            negWords = [feature_select(negWords), 'neg']
            negFeatures.append(negWords)

    #选择3/4用于训练和1/4用于测试
    posCutoff = int(math.floor(len(posFeatures) * 3 / 4))
    negCutoff = int(math.floor(len(negFeatures) * 3 / 4))
    trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff]
    testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:]

    #训练朴素贝叶斯分类器
    classifier = NaiveBayesClassifier.train(trainFeatures)

    #启动referenceSets和testSets
    referenceSets = collections.defaultdict(set)
    testSets = collections.defaultdict(set)

    #在referenceSets中放置正确标记的句子,在测试集中放置预测性标记的版本
    for i, (features, label) in enumerate(testFeatures):
        referenceSets[label].add(i)
        predicted = classifier.classify(features)
        testSets[predicted].add(i)

    #打印指标以显示特征选择的效果
    print 'train on %d instances, test on %d instances' % (len(trainFeatures),
                                                           len(testFeatures))
    print 'accuracy:', nltk.classify.util.accuracy(classifier, testFeatures)
    print 'pos precision:', precision(referenceSets['pos'], testSets['pos'])
    print 'pos recall:', recall(referenceSets['pos'], testSets['pos'])
    print 'pos F1:', f_measure(referenceSets['pos'], testSets['pos'])
    print 'neg precision:', precision(referenceSets['neg'], testSets['neg'])
    print 'neg recall:', recall(referenceSets['neg'], testSets['neg'])
    print 'neg F1:', f_measure(referenceSets['pos'], testSets['pos'])
    classifier.show_most_informative_features(10)
Example #15
0
def testing(sent_classifier):

	refsets = collections.defaultdict(set)
	testsets = collections.defaultdict(set)
	 
	for i, (feats, category) in enumerate(testing_set):
	    refsets[category].add(i)
	    observed = sent_classifier.classify(feats)
	    testsets[observed].add(i)

	print ('Classifier Accuracy: ', (nltk.classify.accuracy(sent_classifier, testing_set))*100, "%")
	print ('Classifier pos Precision:', scores.precision(refsets['pos'], testsets['pos'])*100, "%")
	print ('Classifier pos Recall:', scores.recall(refsets['pos'], testsets['pos'])*100, "%")
	print ('Classifier pos F-measure:', scores.f_measure(refsets['pos'], testsets['pos'])*100, "%")
	print ('Classifier neg Precision:', scores.precision(refsets['neg'], testsets['neg'])*100, "%")
	print ('Classifier neg Recall:', scores.recall(refsets['neg'], testsets['neg'])*100, "%")
	print ('Classifier neg F-measure:', scores.f_measure(refsets['neg'], testsets['neg'])*100, "%")
	print ('\n')
Example #16
0
def kset_stat(silvs,golds) :
  s1 = set(map(to_root,golds))
  s2 = set(map(to_root,silvs))
  #print(s1,s2)
  p=precision(s1,s2)
  r=recall(s1,s2)
  f=f_measure(s1,s2)
  if not (p and r and f) : return {'p':0,'r':0,'f':0}
  return {'p':p,'r':r,'f':f}
def test_trained_classifier(classifier, test_samples):
    """Prints precision/recall statistics of a NLTK classifier"""
    import collections

    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    for i, (sample, label) in enumerate(test_samples):
        refsets[label].add(i)
        observed = classifier.classify(sample)
        testsets[observed].add(i)

    print("pos precision:", scores.precision(refsets["pos"], testsets["pos"]))
    print("pos recall:", scores.recall(refsets["pos"], testsets["pos"]))
    print("pos F-measure:", scores.f_measure(refsets["pos"], testsets["pos"]))
    print("neg precision:", scores.precision(refsets["neg"], testsets["neg"]))
    print("neg recall:", scores.recall(refsets["neg"], testsets["neg"]))
    print("neg F-measure:", scores.f_measure(refsets["neg"], testsets["neg"]))
def classification_result(classifier, test_set):
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
    reflist = []
    testlist = []
    for i, (tweet, label) in enumerate(test_set):
        refsets[label].add(i)
        reflist.append(label)
        observed = classifier.classify(tweet)
        testsets[observed].add(i)
        testlist.append(observed)
    print(len(refsets['1']))
    print("Accuracy : ", nltk.classify.accuracy(classifier, test_set) * 100)
    print("Precision Pos: ", precision(refsets['1'], testsets['1']) * 100)
    print("Recall Pos: ", recall(refsets['1'], testsets['1']) * 100)
    print("F Measure Pos: ", f_measure(refsets['1'], testsets['1']) * 100)
    print("Precision Neg: ", precision(refsets['0'], testsets['0']) * 100)
    print("Recall Neg: ", recall(refsets['0'], testsets['0']) * 100)
    print("F Measure Neg: ", f_measure(refsets['0'], testsets['0']) * 100)
    print("Confusion Metrics : \n", ConfusionMatrix(reflist, testlist))
Example #19
0
def prec_rec(test_users, exact_neighbors, apx_neighbors):
    pr = []
    rc = []
    for uid in test_users:
        p = precision(set(exact_neighbors[uid]), set(apx_neighbors[uid]))
        r = recall(set(exact_neighbors[uid]), set(apx_neighbors[uid]))
        if p != None:
            pr.append(p)
        if r != None:
            rc.append(r)
    return (np.mean(pr), np.mean(rc))
Example #20
0
def avgOffEval(inpath1, inpath2):

    print('\n=============================')
    print(
        'NER evaluation (single entity class/mention-level, full/offsets, avg. of abstract-level)'
    )
    print('=============================')
    print('==> gold', inpath1)
    print('==> pred', inpath2)
    print('=============================')
    recs = []
    pres = []
    fscs = []
    for filename1 in glob.glob(inpath1 + "/*ann"):
        filen1 = filename1.split('/')[len(filename1.split('/')) - 1]
        for filename2 in glob.glob(inpath2 + "/*ann"):
            filen2 = filename2.split('/')[len(filename2.split('/')) - 1]
            if filen1 == filen2:
                preds = set([])
                refrs = set([])
                file1 = codecs.open(filename1, 'r', encoding='utf-8')
                file2 = codecs.open(filename2, 'r', encoding='utf-8')
                for line1 in file1.readlines():
                    if len(line1.split('\t')) > 1:
                        men1 = line1.split('\t')[2].strip()
                        off1 = '-'.join([
                            w.strip() for w in line1.split('\t')[1].split(' ')
                        ])
                        gold = men1 + '_' + off1
                        refrs.add(gold)
                for line2 in file2.readlines():
                    if len(line2.split('\t')) > 1:
                        men2 = line2.split('\t')[2].strip()
                        off2 = '-'.join([
                            w.strip() for w in line2.split('\t')[1].split(' ')
                        ])
                        pred = men2 + '_' + off2
                        preds.add(pred)
                if len(preds) > 0 and len(refrs) > 0:
                    rec = scores.recall(refrs, preds)
                    pre = scores.precision(refrs, preds)
                    fsc = scores.f_measure(refrs, preds)
                else:
                    rec = 0
                    pre = 0
                    fsc = 0
                recs.append(rec)
                pres.append(pre)
                fscs.append(fsc)
    print('average \t R={R} \t P={P} \t F1={F}'.format(R=str(np.mean(recs)),
                                                       P=str(np.mean(pres)),
                                                       F=str(np.mean(fscs))))
    print('=============================\n')
Example #21
0
 def get_results(self, classifier, test_set, target):
     refsets = collections.defaultdict(set)
     testsets = collections.defaultdict(set)
     for i, (feats, label) in enumerate(test_set):
         refsets[label].add(i)
         observed = classifier.classify(feats)
         testsets[observed].add(i)
     target_precision = precision(refsets[target], testsets[target])
     target_recall = recall(refsets[target], testsets[target])
     target_f_measure = f_measure(refsets[target], testsets[target])
     results = (target_precision, target_recall, target_f_measure)
     return (results)
Example #22
0
def printEval(realSet, testSet):

    precisionPos = precision(realSet['pos'], testSet['pos'])
    precisionNeg = precision(realSet['neg'], testSet['neg'])
    precisionNeutre = precision(realSet['neutre'], testSet['neutre'])


    recallPos = recall(realSet['pos'], testSet['pos'])
    recallNeg = recall(realSet['neg'], testSet['neg'])


    fmesurePos = f_measure(realSet['pos'], testSet['pos'])
    fmesureNeg = f_measure(realSet['neg'], testSet['neg'])


    # print("Precision    Pos: " + precisionPos + " - Neg: " + float(precisionNeg)
    # # print("Recall   Pos: %f - Neg: %f - Neutral: %f" %(recallPos, recallNeg, recallNeutre))
    # # print("F-Mesure Pos: %f - Neg: %f - Neutral: %f" %(fmesurePos, fmesureNeg, fmesureNeutre))

    print("Precision    Pos: %f - Neg: %f " %(float(precisionPos), float(precisionNeg)))
    print("Recall   Pos: %f - Neg: %f " %(float(recallPos), float(recallNeg)))
    print("F-Mesure Pos: %f - Neg: %f " %(float(fmesurePos), float(fmesureNeg)))
Example #23
0
    def print_precision_recall(self):
        refset = collections.defaultdict(set)
        testset = collections.defaultdict(set)

        for i, (ft, label) in enumerate(self.test_set):
            refset[label].add(i)
            predicted = self.classifier.classify(ft)
            testset[predicted].add(i)

        for tag in refset.keys():
            prc = precision(refset[tag], testset[tag])
            rec = recall(refset[tag], testset[tag])
            print('{}: precision={:4.2f} recall={:4.2f}'.format(tag, prc, rec))
def main(command, classifier_type):
    feature_functions = [unigram_freqs]

    corpus_file = open('ratings_corpus.json')
    corpus = json.load(corpus_file)
    corpus_file.close()

    feature_representation = [(extract_features(document, feature_functions), label)
                              for document, label in corpus]

    train_set, test_set = split_data(feature_representation)

    classifier = ''
    if command == 'new':
        if classifier_type == 'decision_tree':
            classifier = nltk.classify.DecisionTreeClassifier.train(train_set)
        elif classifier_type == 'maxent':
            classifier = nltk.classify.maxent.MaxentClassifier.train(train_set)
    elif command == 'load':
        if classifier_type == 'decision_tree':
            classifier_file = open('decisiontree_classifier.pickle', 'rb')
            classifier = pickle.load(classifier_file)
            classifier_file.close()
        elif classifier_type == 'maxent':
            classifier_file = open('maxent_classifier.pickle', 'rb')
            classifier = pickle.load(classifier_file)
            classifier_file.close()

    predictions = []
    golds = []

    for test_doc, rating in test_set:
        predictions.append(classifier.classify(test_doc))
        golds.append(rating)

    pred_sets = initialize_sets(ALL_RATINGS)
    gold_sets = initialize_sets(ALL_RATINGS)

    for doc_id, rating in enumerate(predictions):
        pred_sets[rating].add(doc_id)
    for doc_id, rating in enumerate(golds):
        gold_sets[rating].add(doc_id)

    for label in ALL_RATINGS:
        r = scores.recall(gold_sets[label], pred_sets[label])
        p = scores.precision(gold_sets[label], pred_sets[label])
        f = scores.f_measure(gold_sets[label], pred_sets[label])
        
        if not (r==None or p==None or f==None):
            f = float(f)
            print('<{}> P: {:.2}, R: {:.2}, F: {:.2}'.format(label, p, r, f))
Example #25
0
File: ml.py Project: neuzxy/ML2015
def evaluate_features(feature_select, classify_method):
    posFeatures = []
    negFeatures = []
    with open(RT_POLARITY_POS_FILE, 'r') as posSentences:
        posWords = []
        for i in posSentences:
            if "<review" in i:
                continue
            if "</review" in i:
                posWords = [feature_select(posWords), 'pos']
                posFeatures.append(posWords)
                posWords = []
                continue
            i = i.decode('utf8')
            line = re.sub(r'[{}]+'.format(PUNCTUATION).decode("utf8"), "".decode("utf8"),i)
            posWords += jieba.cut(line, cut_all=False)
    with open(RT_POLARITY_NEG_FILE, 'r') as negSentences:
        negWords = []
        for i in negSentences:
            if "<review" in i:
                continue
            if "</review" in i:
                negWords = [feature_select(negWords), 'neg']
                negFeatures.append(negWords)
                negWords = []
                continue
            i = i.decode('utf8')
            line = re.sub(r'[{}]+'.format(PUNCTUATION).decode("utf8"), "".decode("utf8"),i)
            negWords += jieba.cut(line, cut_all=False)

    #get trainFeatures and testFeatures
    trainFeatures = posFeatures + negFeatures
    testFeatures = getTestFeatures(feature_select)
    classifier = nltk.classify.SklearnClassifier(classify_method)
    classifier.train(trainFeatures)

    #initiates referenceSets and testSets
    referenceSets = collections.defaultdict(set)
    testSets = collections.defaultdict(set)

    #puts correctly labeled sentences in referenceSets and the predictively labeled version in testsets
    for i, (features, label) in enumerate(testFeatures):
        referenceSets[label].add(i)
        predicted = classifier.classify(features)
        testSets[predicted].add(i)

    #prints metrics to show how well the feature selection did
    print 'train on %d instances, test on %d instances' % (len(trainFeatures), len(testFeatures))
    print 'accuracy:', nltk.classify.util.accuracy(classifier, testFeatures)
    print 'pos precision:', scores.precision(referenceSets['pos'], testSets['pos'])
    print 'pos recall:', scores.recall(referenceSets['pos'], testSets['pos'])
    print 'neg precision:', scores.precision(referenceSets['neg'], testSets['neg'])
    print 'neg recall:', scores.recall(referenceSets['neg'], testSets['neg'])
    print 'F1 Pos:',2*scores.precision(referenceSets['pos'], testSets['pos'])* scores.recall(referenceSets['pos'], testSets['pos'])/ \
                    (scores.precision(referenceSets['pos'], testSets['pos'])+scores.recall(referenceSets['pos'], testSets['pos']))
    print 'F1 neg:',2*scores.precision(referenceSets['neg'], testSets['neg'])* scores.recall(referenceSets['neg'], testSets['neg'])/ \
                    (scores.precision(referenceSets['neg'], testSets['neg'])+scores.recall(referenceSets['neg'], testSets['neg']))
Example #26
0
def assess_classifier(classifier, test_set):
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
    for i, (feats, label) in enumerate(test_set):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)
    count = 0
    print('Precision = ' + str(precision(refsets['spam'], testsets['spam'])))
    print('Recall = ' + str(recall(refsets['spam'], testsets['spam'])))
    print('F measure = ' +
          str(f_measure(refsets['spam'], testsets['spam'], alpha=0.5)))
    print('FP rate = ' + str(
        abs((len(refsets['ham']) - len(testsets['ham'])) /
            (len(refsets['spam']) + len(refsets['ham'])))))
def main():
    brown_tagged_sents = brown.tagged_sents(categories='news')
    size = int(len(brown_tagged_sents) * 0.8)
    train_data = brown_tagged_sents[:size]
    test_data = brown_tagged_sents[size:]

    # store pickle file
    if not (os.path.isfile('UnigramTagger.pkl') and os.path.isfile('Tnt_Tagger.pkl')
            and os.path.isfile('PerceptronTagger.pkl')):
        unigram_tagger = unigram_tag(train_data)
        tnt_tagger = tnt_tag(train_data)
        perc_tagger = perceptron_tag(train_data)

        [store_pickle(each_) for each_ in [unigram_tagger, tnt_tagger, perc_tagger]]

    # load pickle file and get each model file with a tuple
    models_files_tuple = [(each_.split('.')[0], retrieve_pickle(each_)) for each_ in
                    ['UnigramTagger.pkl', 'PerceptronTagger.pkl', 'Tnt_Tagger.pkl']]

    # test the loaded models on test data
    print("TESTING LOADED MODELS")
    for tagg_name, tagg_mode in models_files_tuple:
        print("Loaded {tag_name} evaluation results: {evaluate_res}".format(tag_name=tagg_name,
                                                                            evaluate_res=tagg_mode.evaluate(test_data)))

    # Tabulate and calculate accuracies, choose best one based on F1 value
    reference_sentences_lists = [list(map(lambda pair_: pair_[1], each)) for each in test_data]
    test_sentences_lists = [list(map(lambda pair_: pair_[0], each)) for each in test_data]

    reference_lst = list()
    test_lst = list()
    [reference_lst.extend(each_lst) for each_lst in reference_sentences_lists[:1000]]
    [test_lst.extend(each_lst) for each_lst in test_sentences_lists[:1000]]

    for tagg_name, tagger_mod in models_files_tuple:

        if tagg_name == "Tnt_Tagger":
            reference_lst = reference_lst[:700]
            test_lst = test_lst[:700]
        result_tokens = tagger_mod.tag(test_lst)

        result_tokens__ = list(map(lambda pair: 'UNKNOWN' if pair[1] is None else pair[1], result_tokens))

        print("{} Evaluation Results".format(tagg_name))
        print("Precision: ", precision(set(reference_lst), set(result_tokens__)))
        print("Recall: ", recall(set(reference_lst), set(result_tokens__)))
        print("F measure: ", f_measure(set(reference_lst), set(result_tokens__)))
Example #28
0
def precision_recall(classifier, testfeats):
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)

    for i, (feats, label) in enumerate(testfeats):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    precisions = {}
    recalls = {}

    for label in classifier.labels():
        precisions[label] = scores.precision(refsets[label], testsets[label])
        recalls[label] = scores.recall(refsets[label], testsets[label])

    return precisions, recalls
Example #29
0
def precision_recall(classifier, testFeatures):
    refsets = defaultdict(set)
    testsets = defaultdict(set)

    for i, (feats, label) in enumerate(testFeatures):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    precisions = {}
    recalls = {}

    for label in classifier.labels():
        precisions[label] = precision(refsets[label], testsets[label])
        recalls[label] = recall(refsets[label], testsets[label])

    return precisions, recalls
Example #30
0
def compute_evaluation_scores(classifier: ClassifierBase,
                              data_set: List[Tuple[Dict, str]],
                              evaluated_class: LikeTypeEnum) \
        -> Dict[str, float]:
    """Evaluate classifier on dataset with common metrics.

    Namely calculates:
    precision, recall, accuracy, f-measure.

    And adds:
    tp, fp, np, tn (true/false positives/negatives)."""
    clas_scores: dict = {}
    correctly_classified: int = 0

    # metrics
    refsets: DefaultDict[str, set] = defaultdict(set)
    testsets: DefaultDict[str, set] = defaultdict(set)
    for i, (fs, label) in enumerate(data_set):
        refsets[label].add(i)
        classified = classifier.classify(fs)
        testsets[classified].add(i)

        if label == classified:
            correctly_classified += 1

    # we don't know how many and what are the values of negative classes
    # therefore we compute union of all and subtract positive elements
    negative_test: set = reduce(lambda a, b: a.union(b), testsets.values()) \
                         - testsets[evaluated_class.value]
    negative_ref: set = reduce(lambda a, b: a.union(b), refsets.values()) \
                        - refsets[evaluated_class.value]
    positive_test: set = testsets[evaluated_class.value]
    positive_ref: set = refsets[evaluated_class.value]

    clas_scores['tp'] = len(positive_test & positive_ref) / len(data_set)
    clas_scores['fp'] = len(positive_test & negative_ref) / len(data_set)
    clas_scores['tn'] = len(negative_test & negative_ref) / len(data_set)
    clas_scores['fn'] = len(negative_test & positive_ref) / len(data_set)

    clas_scores['precision'] = scores.precision(positive_ref, positive_test)
    clas_scores['recall'] = scores.recall(positive_ref, positive_test)
    clas_scores['f_measure'] = scores.f_measure(positive_ref, positive_test)
    # accuracy is true positives and true negatives over all instances
    clas_scores['accuracy'] = correctly_classified / len(data_set)

    return clas_scores
Example #31
0
def macroOffEval(inpath1, inpath2):

    print('\n=============================')
    print(
        'NER evaluation (single entity class/mention-level, full/offsets, corpus-level)'
    )
    print('=============================')
    print('==> gold', inpath1)
    print('==> pred', inpath2)
    print('=============================')
    preds = set([])
    refrs = set([])
    for filename1 in glob.glob(inpath1 + "/*ann"):
        filen1 = filename1.split('/')[len(filename1.split('/')) - 1]
        for filename2 in glob.glob(inpath2 + "/*ann"):
            filen2 = filename2.split('/')[len(filename2.split('/')) - 1]
            if filen1 == filen2:
                file1 = codecs.open(filename1, 'r', encoding='utf-8')
                file2 = codecs.open(filename2, 'r', encoding='utf-8')
                for line1 in file1.readlines():
                    if len(line1.split('\t')) > 1:
                        men1 = line1.split('\t')[2].strip()
                        off1 = '-'.join([
                            w.strip() for w in line1.split('\t')[1].split(' ')
                        ])
                        gold = men1 + '_' + off1
                        refrs.add(gold)
                for line2 in file2.readlines():
                    if len(line2.split('\t')) > 1:
                        men2 = line2.split('\t')[2].strip()
                        off2 = '-'.join([
                            w.strip() for w in line2.split('\t')[1].split(' ')
                        ])
                        pred = men2 + '_' + off2
                        preds.add(pred)
    rec = scores.recall(refrs, preds)
    pre = scores.precision(refrs, preds)
    fsc = scores.f_measure(refrs, preds)
    print('macro \t R={R} \t P={P} \t F1={F}'.format(R=str(rec),
                                                     P=str(pre),
                                                     F=str(fsc)))
    print('=============================\n')
Example #32
0
def show_metrics(classifier, test_set):
    description = ""
    # Given a classifier and a set to test it, it will print metrics for the classifier
    description = description + "\n" + "Accuracy: " + str(
        nltk.classify.accuracy(classifier, test_set))

    # Creates two sets: one with references (correct results) and other with tests (classifier predictions)
    # This sets are divided in fact-checkable and non-fact-checkable sets that contain a unique id (integer)
    # for each sentence
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
    for i, (feats, label) in enumerate(test_set):
        refsets[label].add(i)  # 1, neg
        observed = classifier.classify(feats)  #neg
        testsets[observed].add(i)  #1, neg

    model_precision = int(
        precision(refsets['fact-checkable'], testsets['fact-checkable']) * 100)
    model_recall = int(
        recall(refsets['fact-checkable'], testsets['fact-checkable']) * 100)
    model_f_measure = int(
        f_measure(refsets['fact-checkable'], testsets['fact-checkable'], 0.3) *
        100)

    description += "\n" + "PRECISION: Of the sentences predicted fact-checkable, " + str(
        model_precision) + "% were actually fact-checkable"
    description += "\n" + "RECALL: Of the sentences that were fact-checkable, " + str(
        model_recall) + "% were predicted correctly"
    description += "\n" + "F-MEASURE (balance between precission and recall): " + str(
        model_f_measure) + "%"

    # Same for non fact-checkables
    #print('non-fact-checkable precision:', precision(refsets['non-fact-checkable'], testsets['non-fact-checkable']))
    #print('non-fact-checkable recall:', recall(refsets['non-fact-checkable'], testsets['non-fact-checkable']))
    #print('non-fact-checkable F-measure:', f_measure(refsets['non-fact-checkable'], testsets['non-fact-checkable']))

    print(description)

    # informative
    classifier.show_most_informative_features(25)

    return description
Example #33
0
def get_measures(reference, test):
    tp = tn = fp = fn = 0

    for ((_, r), (_, t)) in zip(reference, test):
        if r == t == "O":
            tn += 1
        elif r == t == "ORG":
            tp += 1
        elif r == "O" and t == "ORG":
            fp += 1
        elif r == "ORG" and t == "O":
            fn += 1
    matrix = [tp, tn, fp, fn]
    acc = accuracy(reference, test)
    reference_set = set(reference)
    test_set = set(test)
    pre = precision(reference_set, test_set)
    rec = recall(reference_set, test_set)
    f = f_measure(reference_set, test_set)
    return acc, pre, rec, f, matrix
Example #34
0
def get_performance_dataframe(tagger, test_tag_list):
    """Returns DataFrame with metrics for individual tag combinations. For NLTK taggers."""
    truth_sets = defaultdict(set)
    test_sets = defaultdict(set)
    
    for n, (w, label) in enumerate(test_tag_list):
        observed = tagger.tag([w])[0][1]
        truth_sets[label].add(n)
        test_sets[observed].add(n)

    performance_dict = dict()
    for key in test_sets.keys():
        performance_dict.setdefault(
            key,
            {
                'Precision': precision(truth_sets[key], test_sets[key]),
                'Recall': recall(truth_sets[key], test_sets[key]),
                'F1': f_measure(truth_sets[key], test_sets[key])
            }
        )
    df = pd.DataFrame(performance_dict).T
    return df
Example #35
0
def compute_pairwise(hashed_er_anns_df):
    """
        Returns pairwise comparision between users (uesr_a & user_b)
        that have completed similar documents
    """
    # Make user_pks unique
    userset = set(hashed_er_anns_df.user_id)

    inter_annotator_arr = []
    # For each unique user comparision, compute
    for user_a, user_b in itertools.combinations(userset, 2):
        # The list of document_pks that each user had completed
        user_a_set = set(hashed_er_anns_df[hashed_er_anns_df['user_id'] ==
                                           user_a].document_pk)
        user_b_set = set(hashed_er_anns_df[hashed_er_anns_df['user_id'] ==
                                           user_b].document_pk)

        # Only compare documents both users have completed
        pmid_set = user_a_set.intersection(user_b_set)

        # If user_a and user_b have completed shared PMID, compute comparisions
        if len(pmid_set) != 0:
            pmid_df = hashed_er_anns_df[hashed_er_anns_df['document_pk'].isin(
                pmid_set)]
            ref_set = set(pmid_df[pmid_df['user_id'] == user_a].hash)
            test_set = set(pmid_df[pmid_df['user_id'] == user_b].hash)

            # Compute the precision, recall and F-measure based on
            # the unique hashes
            inter_annotator_arr.append(
                (user_a, user_b, len(pmid_set),
                 nltk_scoring.precision(ref_set, test_set),
                 nltk_scoring.recall(ref_set, test_set),
                 nltk_scoring.f_measure(ref_set, test_set)))

    return pd.DataFrame(inter_annotator_arr,
                        columns=('user_a', 'user_b', 'docs_compared',
                                 'precision', 'recall', 'f-score'))
Example #36
0
def compute_pairwise(hashed_annotations_df):
    '''
        Returns pairwise comparision between users (uesr_a & user_b)
        that have completed similar documents
    '''
    # Make user_pks unique
    userset = set(hashed_annotations_df.user)

    inter_annotator_arr = []
    # For each unique user comparision, compute
    for user_a, user_b in itertools.combinations(userset, 2):
        # The list of document_ids that each user had completed
        user_a_set = set(hashed_annotations_df[hashed_annotations_df['user'] == user_a].document_id)
        user_b_set = set(hashed_annotations_df[hashed_annotations_df['user'] == user_b].document_id)

        # Only compare documents both users have completed
        pmid_set = user_a_set.intersection(user_b_set)

        # If user_a and user_b have completed shared PMID, compute comparisions
        if len(pmid_set) != 0:
            pmid_df = hashed_annotations_df[hashed_annotations_df['document_id'].isin(pmid_set)]
            ref_set = set(pmid_df[pmid_df['user'] == user_a].hash)
            test_set = set(pmid_df[pmid_df['user'] == user_b].hash)

            # Compute the precision, recall and F-measure based on
            # the unique hashes
            inter_annotator_arr.append((
                user_a,
                user_b,
                len(pmid_set),
                nltk_scoring.precision(ref_set, test_set),
                nltk_scoring.recall(ref_set, test_set),
                nltk_scoring.f_measure(ref_set, test_set)
            ))

    return pd.DataFrame(inter_annotator_arr, columns=('user_a', 'user_b', 'docs_compared', 'precision', 'recall', 'f-score'))
Example #37
0
def scores(classifier, test, ids):
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
    for i, (feats, label) in enumerate(test):
        refsets[label].add(i)
        observed = classifier.classify(feats)
        testsets[observed].add(i)

    accuracy = nltk.classify.accuracy(classifier, test)
    print("accuracy: " + str(accuracy))
    p = filter(partial(is_not, None),
               [precision(refsets[sense], testsets[sense]) for sense in ids])
    p = sum(p) / len(p)
    print("precision: " + str(p))
    r = filter(partial(is_not, None),
               [recall(refsets[sense], testsets[sense]) for sense in ids])
    r = sum(r) / len(r)
    print("recall: " + str(r))
    f_1 = filter(partial(is_not, None),
                 [f_measure(refsets[sense], testsets[sense]) for sense in ids])
    f_1 = sum(f_1) / len(f_1)
    print("f-1 score: " + str(f_1))

    return ({"precision": p, "recall": r, "f_1": f_1, "accuracy": accuracy})
Example #38
0
    if flip:
        model.class_prior = [1-categorized_proportion, categorized_proportion]
    else:
        model.class_prior = [categorized_proportion, 1-categorized_proportion]

    classifier.train(train_set)

    # test classifier
    test_results = classifier.classify_many([feat for (feat, label) in test_set])
    pos_test_set = set(i for i, result in enumerate(test_results) if result == category)
    reference_values = [label for (feat, label) in test_set]
    pos_ref_set = set(i for i, (feat, label) in enumerate(test_set) if label == category)
    accuracy = scores.accuracy(reference_values, test_results)
    accuracies.append(accuracy)
    precision = scores.precision(pos_ref_set, pos_test_set)
    recall = scores.recall(pos_ref_set, pos_test_set)
    f1 = scores.f_measure(pos_ref_set, pos_test_set)
    f1_scores.append(f1)

    print "%s: accuracy %s, precision %s, recall %s, F1 %s" % (colored(category, "blue"), colored(accuracy, "yellow"), colored(precision, "yellow"), colored(recall, "yellow"), colored(f1, "yellow"))
    ## print(nltk.classify.accuracy(classifier, test_set))
    # classifier.show_most_informative_features(5)
    # print ""

    # save trained classifier and word features to file
    dump_file = open("classifiers/%s.pickle" % category, "wb")
    pickle.dump({
        "classifier": classifier,
        "word_features": word_features
    }, dump_file)
    dump_file.close()
Example #39
0
 def getRecall(self):
     return recall(self._refsets['POS'], self._testsets['POS'])
# #-----------------------------classifier-------------------------------------------



posTweets = int(math.floor(len(preprocess_pos_tweets)*3/4))
negTweets = int(math.floor(len(preprocess_neg_tweets)*3/4))

trainFeatures = preprocess_pos_tweets[:posTweets] + preprocess_neg_tweets[:negTweets]
testFeatures = preprocess_pos_tweets[posTweets:] + preprocess_neg_tweets[negTweets:]

classifier = NaiveBayesClassifier.train(trainFeatures)

referenceSets = {'positive': set(), 'negative':set()}
testSets = {'positive':set(), 'negative':set()}

for i , (features, label) in enumerate(testFeatures):
	
	referenceSets[label].add(i)
	predicted = classifier.classify(features)
	testSets[predicted].add(i)


print 'train on %d instances, test on %d instances' % (len(trainFeatures), len(testFeatures))
print 'accuracy:', nltk.classify.util.accuracy(classifier, testFeatures)
print 'pos precision:', precision(referenceSets['positive'], testSets['positive'])
print 'pos recall:', recall(referenceSets['positive'], testSets['positive'])
print 'neg precision:', precision(referenceSets['negative'], testSets['negative'])
print 'neg recall:', recall(referenceSets['negative'], testSets['negative'])

classifier.show_most_informative_features(10)	
def test_iteration(i, train_set, test_dict, feature_sets_by_match,
                   classifier_type='decision_tree'):
    """Performs one iteration of the k-fold cross validation, returing a dict
    containing overall micro and macro score averages, in addition to scores for
    each label.

    Args:
        i: the iteration of the k-fold cross validation.
        train_set: a list containing feature, rating pairs
        test_dict: a dicitonary containing feature and rating information for
            the test set.
        feature_sets_by_match: feature respresentations of documents organized
            by match.
        classifier_type: the type of classifier to use.
    Returns:
        A dict containing overall micro and macro score averages, in addition
        to scores for each label.
    """
    classifier = ''
    if classifier_type == 'decision_tree':
        #classifier = nltk.classify.DecisionTreeClassifier.train(train_set)
        classifier = nltk.classify.scikitlearn.SklearnClassifier(tree.DecisionTreeClassifier(random_state=8246)).train(train_set)
    elif classifier_type == 'maxent':
        #classifier = nltk.classify.maxent.MaxentClassifier.train(train_set)
        classifier = nltk.classify.scikitlearn.SklearnClassifier(linear_model.LogisticRegression()).train(train_set)
    elif classifier_type == 'svr':
        classifier = nltk.classify.scikitlearn.SklearnClassifier(svm.SVR()).train(train_set)
    
    pred_sets = initialize_sets(ALL_RATINGS)
    gold_sets = initialize_sets(ALL_RATINGS)
    pred_list = []
    gold_list = []

    # Classify predictions and add them to relevant dicts and lists.
    for match in test_dict:
        for doc_id in test_dict[match]:
            test_doc = test_dict[match][doc_id]['features']
            pred = classifier.classify(test_doc)
            gold = test_dict[match][doc_id]['gold']
            test_dict[match][doc_id]['pred'] = pred

            gold_list.append(str(gold))
            pred_list.append(str(pred))
            gold_sets[gold].add(doc_id)
            pred_sets[pred].add(doc_id)

    # Calculate pairwise ranking accuracy
    correct= 0
    total = 0
    for match in test_dict:
        for pl1, pl2 in combinations(test_dict[match].keys(), 2):
            p1 = test_dict[match][pl1]
            p2 = test_dict[match][pl2]
            if p1['gold'] > p2['gold'] and p1['pred'] > p2['pred']:
                correct += 1
            elif p1['gold'] < p2['gold'] and p1['pred'] < p2['pred']:
                correct += 1
            elif p1['gold'] == p2['gold'] and p1['pred'] == p2['pred']:
                correct += 1
            total += 1

    print('Pairwise ranking accuracy: ' + str(correct/total))
    
    fold_scores = {'micro': '',
                   'macro': '',
                   'by_label': {rating: {'p': 0, 'r': 0, 'f': 0}
                                for rating in ALL_RATINGS}
                   }
    prf_micro = precision_recall_fscore_support(gold_list, pred_list, average='micro')
    print(prf_micro)
    fold_scores['micro'] = prf_micro

    prf_macro = precision_recall_fscore_support(gold_list, pred_list, average='macro')
    print(prf_macro)
    fold_scores['macro'] = prf_macro

    for label in ALL_RATINGS:
        r = scores.recall(gold_sets[label], pred_sets[label])
        p = scores.precision(gold_sets[label], pred_sets[label])
        f = scores.f_measure(gold_sets[label], pred_sets[label])
        
        if r == None:
            r = 0.0
        if p == None:
            p = 0.0
        if f == None:
            f = 0.0
            
        fold_scores['by_label'][label]['p'] = p
        fold_scores['by_label'][label]['r'] = r
        fold_scores['by_label'][label]['f'] = f
        f = float(f)
        print('<{}> P: {:.3}, R: {:.3}, F: {:.3}'.format(label, p, r, f))

    return fold_scores
 def recall(self, label):
     return scores.recall(self._referenceSets[label], \
                         self._testSets[label])