def score(change): if change <-.02: return "down" elif change <.02: return "flat" else: return "up" #Generate Features & Results training_data = [(FreqDist(d["tokens"]), score(d["2-2dayPriceChange"])) for d in training] test_features = [FreqDist(d["tokens"]) for d in testing] test_results = [score(d["2-2dayPriceChange"]) for d in testing] #Train Model model = nltk.NaiveBayesClassifier.train(training_data) #Generate Predictions preds = model.classify_many(test_features) #Print Results amounts = [ (direction, len([ t for t in test_results if t ==direction])) for direction in ["down", "flat", "up"]] print(amounts) print("Majority Baseline: %.2f" % (max([b for a,b in amounts]) / len(test_results))) print("Accuracy: %.2f" % (nltk.accuracy(preds, test_results))) print(ConfusionMatrix(preds, test_results)) print(model.show_most_informative_features(10))
sentences = [] evaluation_sents = [] for gold_sent in gold: sentences.append([w for w, t, c, l in gold_sent]) #tokens = tagger.tag_sents(sentences) #chunk_trees = list(chunker.parse_sents(tokens)) #dep_trees = parser.parse_sents(sentences) dep_tagged_sents = [] chunk_tagged_sents = [] for number, gold_sent in enumerate(gold): sentence = ' '.join(sentences[number]) chunk_tree = chunk_trees[number] dep_tree = dep_trees[number] chunk_informations = list(chunk_extractor.extract(chunk_tree)) dep_informations = list(dep_extractor.extract(dep_tree)) evaluation_sent = [(w, l) for w, t, c, l in gold_sent] dep_tagged_sent = [(w,l) for w, t, c, l in [tokens for tokens in info2iob(sentence, chunk_tree, dep_informations)]] chunk_tagged_sent = [(w,l) for w, t, c, l in [tokens for tokens in info2iob(sentence, chunk_tree, chunk_informations)]] if len(evaluation_sent) == len(dep_tagged_sent): evaluation_sents.append(evaluation_sent) dep_tagged_sents.append(dep_tagged_sent) chunk_tagged_sents.append(chunk_tagged_sent) else: print(chunk_tagged_sent) print() print('dependency accuracy: %f' % (accuracy(sum(evaluation_sents, []), sum(dep_tagged_sents, [])))) print('chunk accuracy: %f' % (accuracy(sum(evaluation_sents, []), sum(chunk_tagged_sents, [])))) information_tagger = IOBTagger(model='informations-all.model') print(information_tagger.evaluate(gold))
random.shuffle(test) cf = train_classifier(train,classifier=CLASSIFIER, feature_extract_fun=FEATURE_FUN) results = test_classifier(test, cf, feature_extract_fun=FEATURE_FUN) gold, predictions = zip(*results) cm = nltk.ConfusionMatrix(predictions, gold) print(cf.most_informative_features(10)) print(results) print("The confusion matrix of the test results:") print(cm) for l in LANGS: p = precision(l, results) r = recall(l, results) print(l+": ", end="") if p == -1: print("Precision: N\A", end=" ") else: print("Precision: {:.3f}".format(p), end=" ") if r == -1: print("Recall: N\A") else: print("Recall: {:.3f}".format(r)) print("Accuracy: {:.3f}". format(accuracy(gold,predictions))) f1 = macro_average_f1(LANGS, results) if f1 == -1: print("Macro-averaged F1: N\A") else: print("Macro-averaged F1: {:.3}".format(f1))
if gold_to_test == [tag] and test_to_gold == [tag]: continue elif len(gold_to_test) > 2 or len(test_to_gold) > 2: continue else: print("'%s':" % tag, end = '') print(gold_to_test) print("'%s': " % tag, end = '') print(test_to_gold) print() """ test_tags = generate_test_tags() gold_tags = generate_gold_tags() print(' test tagger accuracy : %.4f' % nltk.accuracy(gold_tags, test_tags)) #make confusion matrix cm = nltk.ConfusionMatrix(gold_tags, test_tags) tag_list = [w.split()[1] for w in re.findall(r'[0-9]+.*\n', cm.key())] #make simple tag dict. marge = { '*-HL': '*', ',-HL': ',', '---HL': '--', '.-HL': '.', ':-HL': ':', ':-TL': ':', 'ABN-TL': 'ABN', 'AP-TL': 'AP',
#dep_trees = parser.parse_sents(sentences) dep_tagged_sents = [] chunk_tagged_sents = [] for number, gold_sent in enumerate(gold): sentence = ' '.join(sentences[number]) chunk_tree = chunk_trees[number] dep_tree = dep_trees[number] chunk_informations = list(chunk_extractor.extract(chunk_tree)) dep_informations = list(dep_extractor.extract(dep_tree)) evaluation_sent = [(w, l) for w, t, c, l in gold_sent] dep_tagged_sent = [(w, l) for w, t, c, l in [ tokens for tokens in info2iob(sentence, chunk_tree, dep_informations) ]] chunk_tagged_sent = [(w, l) for w, t, c, l in [ tokens for tokens in info2iob(sentence, chunk_tree, chunk_informations) ]] if len(evaluation_sent) == len(dep_tagged_sent): evaluation_sents.append(evaluation_sent) dep_tagged_sents.append(dep_tagged_sent) chunk_tagged_sents.append(chunk_tagged_sent) else: print(chunk_tagged_sent) print() print('dependency accuracy: %f' % (accuracy(sum(evaluation_sents, []), sum(dep_tagged_sents, [])))) print('chunk accuracy: %f' % (accuracy(sum(evaluation_sents, []), sum(chunk_tagged_sents, [])))) information_tagger = IOBTagger(model='informations-all.model') print(information_tagger.evaluate(gold))
import urllib2 ## Import for nltk and classification import random import nltk from nltk import FreqDist import yaml import itertools ## Import classifier from Classifier import myClassify ## Open validation list f=open('validationList.yaml') validationList=yaml.load(f) ## Construct gold set gold = [b for (a,b) in validationList] ## Construct predicted set predicted = [myClassify(a) for (a,b) in validationList] ## Confusion matrix cm = nltk.ConfusionMatrix(gold,predicted) print cm ## Print accuracy print nltk.accuracy(gold,predicted) ## End of code