def test_get_most_common_tag(): expected = 0.63 weights = most_common.get_most_common_weights(TRAIN_FILE) confusion = tagger_base.evalTagger(tagger_base.makeClassifierTagger(weights),'mcc') actual = scorer.accuracy(confusion) ok_(expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected, actual))
def trainAvgStructPerceptron(N_its,inst_generator,featfunc,tagset): """ :param N_its: number of iterations :param inst_generator: A generator of (words,tags) tuples :param tagger: A function from (words, weights) to tags :param features: A function from (words, tags) to a dict of features and weights """ tr_acc = [None]*N_its dv_acc = [None]*N_its T = 0 weights = defaultdict(float) wsum = defaultdict(float) avg_weights = defaultdict(float) for i in xrange(N_its): # your code here weights, wsum, tr_acc_i, num_instances = oneItAvgStructPerceptron(inst_generator, featfunc, weights, wsum, tagset, T) # note that I call evalTagger to produce the dev set results T += num_instances for w in wsum: avg_weights[w] = weights[w] - wsum[w] / float(T) confusion = evalTagger(lambda words,tags : viterbiTagger(words,featfunc,avg_weights,tags)[0],'sp.txt') dv_acc[i] = scorer.accuracy(confusion) tr_acc[i] = tr_acc_i#1. - tr_err/float(sum([len(s) for s,t in inst_generator])) print i,'dev:',dv_acc[i],'train:',tr_acc[i] return avg_weights, tr_acc, dv_acc
def trainAvgPerceptron(N_its,inst_generator,featfunc,tagset): """ :param N_its: number of iterations :param inst_generator: generate words,tags pairs :param featfunc: feature function :param tagset: set of all possible tags :returns average weights, training accuracy, dev accuracy """ tr_acc = [None]*N_its dv_acc = [None]*N_its T = 0 weights = defaultdict(float) wsum = defaultdict(float) avg_weights = defaultdict(float) for i in xrange(N_its): # your code here weights, wsum, tr_acc_i, num_instances = oneItAvgPerceptron(inst_generator, featfunc, weights, wsum, tagset, T) T += num_instances for w in wsum: avg_weights[w] = weights[w] - wsum[w] / float(T) confusion = evalTagger(lambda words, alltags: classifierTagger(words,featfunc,avg_weights,tagset),'perc') dv_acc[i] = scorer.accuracy(confusion) tr_acc[i] = tr_acc_i print i,'dev:',dv_acc[i],'train:',tr_acc[i] return avg_weights, tr_acc, dv_acc
def trainAvgStructPerceptron(N_its, inst_generator, featfunc, tagset): """ :param N_its: number of iterations :param inst_generator: A generator of (words,tags) tuples :param tagger: A function from (words, weights) to tags :param features: A function from (words, tags) to a dict of features and weights """ tr_acc = [None] * N_its dv_acc = [None] * N_its T = 0 weights = defaultdict(float) wsum = defaultdict(float) avg_weights = defaultdict(float) for i in xrange(N_its): # your code here weights, wsum, tr_acc_i, num_instances = oneItAvgStructPerceptron( inst_generator, featfunc, weights, wsum, tagset, T) # note that I call evalTagger to produce the dev set results T += num_instances for w in wsum: avg_weights[w] = weights[w] - wsum[w] / float(T) confusion = evalTagger( lambda words, tags: viterbiTagger(words, featfunc, avg_weights, tags)[0], 'sp.txt') dv_acc[i] = scorer.accuracy(confusion) tr_acc[ i] = tr_acc_i #1. - tr_err/float(sum([len(s) for s,t in inst_generator])) print i, 'dev:', dv_acc[i], 'train:', tr_acc[i] return avg_weights, tr_acc, dv_acc
def test_hmm_weights_accuracy(): confusion = tagger_base.evalTagger( lambda words, alltags: viterbi.viterbiTagger( words, viterbi.hmm_feats, hmm_weights, alltags)[0], 'hmm') actual = scorer.accuracy(confusion) expected = 0.74 ok_(expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected, actual))
def test_get_most_common_tag(): expected = 0.63 weights = most_common.get_most_common_weights(TRAIN_FILE) confusion = tagger_base.evalTagger( tagger_base.makeClassifierTagger(weights), 'mcc') actual = scorer.accuracy(confusion) ok_(expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected, actual))
def test_classifier_tagger(): expected = 0.136844287788 noun_weights = most_common.get_noun_weights() noun_tagger = tagger_base.makeClassifierTagger(noun_weights) confusion = tagger_base.evalTagger(noun_tagger,'nouns') actual = scorer.accuracy(confusion) assert_almost_equals(expected, actual,places=3, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
def test_str_perceptron_small(): w, tr_acc, dv_acc = str_perceptron.trainAvgStructPerceptron( 5, tr_all[:50], features.wordTransFeatures, alltags) confusion = tagger_base.evalTagger( lambda words, alltags: viterbi.viterbiTagger( words, features.wordTransFeatures, w, alltags)[0], 'str_classifier_small') expected_acc = 0.506 actual_acc = scorer.accuracy(confusion) ok_(expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, actual_acc))
def test_classifier_tagger(): expected = 0.136844287788 noun_weights = most_common.get_noun_weights() noun_tagger = tagger_base.makeClassifierTagger(noun_weights) confusion = tagger_base.evalTagger(noun_tagger, 'nouns') actual = scorer.accuracy(confusion) assert_almost_equals(expected, actual, places=3, msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
def test_basic_classifer(): test_weights = defaultdict(float) test_tags = ['N','V','V','N'] for i in range(len(sent)): for feat in features.wordFeatures(sent,test_tags[i],'X',i): test_weights[feat] = 1 for feat in features.wordFeatures(sent,'X','X',i): test_weights[feat] = 1 expected = test_tags actual = tagger_base.classifierTagger(sent,features.wordFeatures,test_weights,alltags) eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual) ) expected_acc = 0.139539705577 confusion = tagger_base.evalTagger(lambda words,alltags : tagger_base.classifierTagger(words,features.wordFeatures,test_weights,alltags),'test') actual_acc =scorer.accuracy(confusion) assert_almost_equals(expected_acc ,actual_acc,places = 3)
def test_basic_classifer(): test_weights = defaultdict(float) test_tags = ['N', 'V', 'V', 'N'] for i in range(len(sent)): for feat in features.wordFeatures(sent, test_tags[i], 'X', i): test_weights[feat] = 1 for feat in features.wordFeatures(sent, 'X', 'X', i): test_weights[feat] = 1 expected = test_tags actual = tagger_base.classifierTagger(sent, features.wordFeatures, test_weights, alltags) eq_(expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual)) expected_acc = 0.139539705577 confusion = tagger_base.evalTagger( lambda words, alltags: tagger_base.classifierTagger( words, features.wordFeatures, test_weights, alltags), 'test') actual_acc = scorer.accuracy(confusion) assert_almost_equals(expected_acc, actual_acc, places=3)
def test_hmm_weights_accuracy(): confusion = tagger_base.evalTagger(lambda words, alltags : viterbi.viterbiTagger(words,viterbi.hmm_feats,hmm_weights,alltags)[0],'hmm') actual = scorer.accuracy(confusion) expected = 0.74 ok_ (expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected, actual))
def test_str_perceptron_small(): w,tr_acc,dv_acc = str_perceptron.trainAvgStructPerceptron(5,tr_all[:50],features.wordTransFeatures,alltags) confusion = tagger_base.evalTagger(lambda words,alltags : viterbi.viterbiTagger(words,features.wordTransFeatures,w,alltags)[0],'str_classifier_small') expected_acc = 0.506 actual_acc = scorer.accuracy(confusion) ok_ (expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected_acc, actual_acc))