Exemple #1
0
def score(change):
    if change <-.02:
        return "down"
    elif change <.02:
        return "flat"
    else:
        return "up"

#Generate Features & Results 
training_data = [(FreqDist(d["tokens"]), score(d["2-2dayPriceChange"])) for d in training]
test_features = [FreqDist(d["tokens"]) for d in testing]
test_results = [score(d["2-2dayPriceChange"]) for d in testing]

#Train Model
model = nltk.NaiveBayesClassifier.train(training_data)
 
#Generate Predictions
preds = model.classify_many(test_features)

#Print Results
amounts = [ (direction, len([ t for t in test_results if t ==direction])) for direction in ["down", "flat", "up"]]
print(amounts)
print("Majority Baseline: %.2f" % (max([b for a,b in amounts]) / len(test_results)))

print("Accuracy: %.2f" % (nltk.accuracy(preds, test_results)))

print(ConfusionMatrix(preds, test_results))
 
print(model.show_most_informative_features(10))

sentences = []
evaluation_sents = []
for gold_sent in gold:
	sentences.append([w for w, t, c, l in gold_sent])
#tokens = tagger.tag_sents(sentences)
#chunk_trees = list(chunker.parse_sents(tokens))
#dep_trees = parser.parse_sents(sentences)
dep_tagged_sents = []
chunk_tagged_sents = []
for number, gold_sent in enumerate(gold):

	sentence = ' '.join(sentences[number])
	chunk_tree = chunk_trees[number]
	dep_tree = dep_trees[number]
	chunk_informations = list(chunk_extractor.extract(chunk_tree))
	dep_informations = list(dep_extractor.extract(dep_tree))
	evaluation_sent = [(w, l) for w, t, c, l in gold_sent]
	dep_tagged_sent = [(w,l) for w, t, c, l in [tokens for tokens in info2iob(sentence, chunk_tree, dep_informations)]]
	chunk_tagged_sent = [(w,l) for w, t, c, l in [tokens for tokens in info2iob(sentence, chunk_tree, chunk_informations)]]
	if len(evaluation_sent) == len(dep_tagged_sent):
		evaluation_sents.append(evaluation_sent)
		dep_tagged_sents.append(dep_tagged_sent)
		chunk_tagged_sents.append(chunk_tagged_sent)
	else:
		print(chunk_tagged_sent)
		print()
print('dependency accuracy: %f' % (accuracy(sum(evaluation_sents, []), sum(dep_tagged_sents, []))))
print('chunk accuracy: %f' % (accuracy(sum(evaluation_sents, []), sum(chunk_tagged_sents, []))))

information_tagger = IOBTagger(model='informations-all.model')
print(information_tagger.evaluate(gold))
    random.shuffle(test)
    cf = train_classifier(train,classifier=CLASSIFIER, feature_extract_fun=FEATURE_FUN)
    results = test_classifier(test, cf, feature_extract_fun=FEATURE_FUN)
    gold, predictions = zip(*results)
    cm = nltk.ConfusionMatrix(predictions, gold)

    print(cf.most_informative_features(10))
    print(results)
    print("The confusion matrix of the test results:")
    print(cm)
    for l in LANGS:
        p = precision(l, results)
        r = recall(l, results)
        print(l+": ", end="")
        if p == -1:
            print("Precision: N\A", end=" ")
        else:
            print("Precision: {:.3f}".format(p), end=" ")

        if r == -1:
            print("Recall: N\A")
        else:
            print("Recall: {:.3f}".format(r))

    print("Accuracy: {:.3f}". format(accuracy(gold,predictions)))

    f1 = macro_average_f1(LANGS, results)
    if f1 == -1:
        print("Macro-averaged F1: N\A")
    else:
        print("Macro-averaged F1: {:.3}".format(f1))
        if gold_to_test == [tag] and test_to_gold == [tag]:
            continue
        elif len(gold_to_test) > 2 or len(test_to_gold) > 2:
            continue
        else:
            print("'%s':" % tag, end = '')
            print(gold_to_test)
            print("'%s': " % tag, end = '')
            print(test_to_gold)
        print()
"""

test_tags = generate_test_tags()
gold_tags = generate_gold_tags()
print('      test tagger accuracy : %.4f' %
      nltk.accuracy(gold_tags, test_tags))

#make confusion matrix
cm = nltk.ConfusionMatrix(gold_tags, test_tags)
tag_list = [w.split()[1] for w in re.findall(r'[0-9]+.*\n', cm.key())]

#make simple tag dict.
marge = {
    '*-HL': '*',
    ',-HL': ',',
    '---HL': '--',
    '.-HL': '.',
    ':-HL': ':',
    ':-TL': ':',
    'ABN-TL': 'ABN',
    'AP-TL': 'AP',
Exemple #5
0
#dep_trees = parser.parse_sents(sentences)
dep_tagged_sents = []
chunk_tagged_sents = []
for number, gold_sent in enumerate(gold):

    sentence = ' '.join(sentences[number])
    chunk_tree = chunk_trees[number]
    dep_tree = dep_trees[number]
    chunk_informations = list(chunk_extractor.extract(chunk_tree))
    dep_informations = list(dep_extractor.extract(dep_tree))
    evaluation_sent = [(w, l) for w, t, c, l in gold_sent]
    dep_tagged_sent = [(w, l) for w, t, c, l in [
        tokens for tokens in info2iob(sentence, chunk_tree, dep_informations)
    ]]
    chunk_tagged_sent = [(w, l) for w, t, c, l in [
        tokens for tokens in info2iob(sentence, chunk_tree, chunk_informations)
    ]]
    if len(evaluation_sent) == len(dep_tagged_sent):
        evaluation_sents.append(evaluation_sent)
        dep_tagged_sents.append(dep_tagged_sent)
        chunk_tagged_sents.append(chunk_tagged_sent)
    else:
        print(chunk_tagged_sent)
        print()
print('dependency accuracy: %f' %
      (accuracy(sum(evaluation_sents, []), sum(dep_tagged_sents, []))))
print('chunk accuracy: %f' %
      (accuracy(sum(evaluation_sents, []), sum(chunk_tagged_sents, []))))

information_tagger = IOBTagger(model='informations-all.model')
print(information_tagger.evaluate(gold))
import urllib2

## Import for nltk and classification
import random
import nltk
from nltk import FreqDist
import yaml 
import itertools

## Import classifier
from Classifier import myClassify 

## Open validation list
f=open('validationList.yaml')
validationList=yaml.load(f)

## Construct gold set
gold = [b for (a,b) in validationList]

## Construct predicted set
predicted = [myClassify(a) for (a,b) in validationList]

## Confusion matrix
cm = nltk.ConfusionMatrix(gold,predicted)
print cm

## Print accuracy
print nltk.accuracy(gold,predicted)

## End of code