def regularization_using_grid_search (alphas, counts, class_counts, allkeys, tr_outfile='nb.alpha.tr.txt', dv_outfile='nb.alpha.dv.txt'): tr_accs = [] dv_accs = [] # Choose your alphas here weights_nb_alphas = dict() for alpha in alphas: weights_nb_alphas[alpha] = learnNBWeights(counts, class_counts, allkeys, alpha) confusion = evalClassifier(weights_nb_alphas[alpha],tr_outfile,TRAINKEY) tr_accs.append(scorer.accuracy(confusion)) confusion = evalClassifier(weights_nb_alphas[alpha],dv_outfile,DEVKEY) dv_accs.append(scorer.accuracy(confusion)) return weights_nb_alphas, tr_accs, dv_accs
def regularization_using_grid_search(alphas, netas, N_its,inst_generator, outfile, devkey, learning_rate=1e-4, regularizer=1e-2, tr_outfile='logreg.alpha.tr.txt', dv_outfile='logreg.alpha.dv.txt'): tr_accs = [] dv_accs = [] # Choose your alphas here weights_log_reg_alphas = dict() for alpha in alphas: for neta in netas: weights_log_reg_alphas[(alpha, neta)] = trainLRbySGD(N_its,inst_generator, outfile, devkey, learning_rate=neta, regularizer=alpha) confusion = evalClassifier(weights_log_reg_alphas[(alpha, neta)],tr_outfile,TRAINKEY) tr_accs.append(scorer.accuracy(confusion)) confusion = evalClassifier(weights_log_reg_alphas[(alpha, neta)],dv_outfile,DEVKEY) dv_accs.append(scorer.accuracy(confusion)) return weights_log_reg_alphas, tr_accs, dv_accs
def test_wlc_dev_almost_there_accuracy (): global weights_wlc global WLC_FILE mat = evalClassifier (weights_wlc, WLC_FILE, DEVKEY) actual = scorer.accuracy(mat) expected = 0.40 ok_ (expected <= actual, msg="UNEQUAL Expected:%f, Actual:%f" %(expected,actual))
def test_custom_str_perceptron(): # w,tr_acc,dv_acc = str_perceptron.trainAvgStructPerceptron(10,tr_all,features.yourHMMFeatures,alltags) # confusion = tagger_base.evalTagger(lambda words,alltags : viterbi.viterbiTagger(words,features.yourHMMFeatures,w,alltags)[0],'custom_str_classifier') confusion = scorer.getConfusion(DEV_FILE, 'str_avg_perceptron_custom.response') expected_acc = 0.810 actual_acc = scorer.accuracy(confusion) ok_ (expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected_acc, actual_acc))
def trainAvgStructPerceptron(N_its,inst_generator,featfunc,tagset): """ :param N_its: number of iterations :param inst_generator: A generator of (words,tags) tuples :param tagger: A function from (words, weights) to tags :param features: A function from (words, tags) to a dict of features and weights """ tr_acc = [None]*N_its dv_acc = [None]*N_its T = 0 weights = defaultdict(float) wsum = defaultdict(float) avg_weights = defaultdict(float) for i in xrange(N_its): # your code here weights, wsum, tr_acc_i, num_instances = oneItAvgStructPerceptron(inst_generator, featfunc, weights, wsum, tagset, T) # note that I call evalTagger to produce the dev set results T += num_instances for w in wsum: avg_weights[w] = weights[w] - wsum[w] / float(T) confusion = evalTagger(lambda words,tags : viterbiTagger(words,featfunc,avg_weights,tags)[0],'sp.txt') dv_acc[i] = scorer.accuracy(confusion) tr_acc[i] = tr_acc_i#1. - tr_err/float(sum([len(s) for s,t in inst_generator])) print i,'dev:',dv_acc[i],'train:',tr_acc[i] return avg_weights, tr_acc, dv_acc
def trainAvgPerceptron(N_its,inst_generator,featfunc,tagset): """ :param N_its: number of iterations :param inst_generator: generate words,tags pairs :param featfunc: feature function :param tagset: set of all possible tags :returns average weights, training accuracy, dev accuracy """ tr_acc = [None]*N_its dv_acc = [None]*N_its T = 0 weights = defaultdict(float) wsum = defaultdict(float) avg_weights = defaultdict(float) for i in xrange(N_its): # your code here weights, wsum, tr_acc_i, num_instances = oneItAvgPerceptron(inst_generator, featfunc, weights, wsum, tagset, T) T += num_instances for w in wsum: avg_weights[w] = weights[w] - wsum[w] / float(T) confusion = evalTagger(lambda words, alltags: classifierTagger(words,featfunc,avg_weights,tagset),'perc') dv_acc[i] = scorer.accuracy(confusion) tr_acc[i] = tr_acc_i print i,'dev:',dv_acc[i],'train:',tr_acc[i] return avg_weights, tr_acc, dv_acc
def trainAvgStructPerceptron(N_its, inst_generator, featfunc, tagset): """ :param N_its: number of iterations :param inst_generator: A generator of (words,tags) tuples :param tagger: A function from (words, weights) to tags :param features: A function from (words, tags) to a dict of features and weights """ tr_acc = [None] * N_its dv_acc = [None] * N_its T = 0 weights = defaultdict(float) wsum = defaultdict(float) avg_weights = defaultdict(float) for i in xrange(N_its): # your code here weights, wsum, tr_acc_i, num_instances = oneItAvgStructPerceptron( inst_generator, featfunc, weights, wsum, tagset, T) # note that I call evalTagger to produce the dev set results T += num_instances for w in wsum: avg_weights[w] = weights[w] - wsum[w] / float(T) confusion = evalTagger( lambda words, tags: viterbiTagger(words, featfunc, avg_weights, tags)[0], 'sp.txt') dv_acc[i] = scorer.accuracy(confusion) tr_acc[ i] = tr_acc_i #1. - tr_err/float(sum([len(s) for s,t in inst_generator])) print i, 'dev:', dv_acc[i], 'train:', tr_acc[i] return avg_weights, tr_acc, dv_acc
def trainLRbySGD(N_its,inst_generator, outfile, devkey, learning_rate=1e-4, regularizer=1e-2): weights = defaultdict(float) dv_acc = [None]*N_its tr_acc = [None]*N_its # this block is all to take care of regularization ratereg = learning_rate * regularizer def regularize(base_feats,t): for base_feat in base_feats: for label in ALL_LABELS: weights[(label,base_feat)] *= (1 - ratereg) ** (t-last_update[base_feat]) last_update[base_feat] = t for it in xrange(N_its): tr_err = 0 last_update = defaultdict(int) # reset, since we regularize at the end of every iteration for i,(inst,true_label) in enumerate(inst_generator): # apply "just-in-time" regularization to the weights for features in this instance regularize(inst,i) # compute likelihood gradient from this instance probs = computeLabelProbs(inst,weights,ALL_LABELS) if true_label != argmax(probs): tr_err += 1 # your code for updating the weights goes here # regularize all features at the end of each iteration regularize([base_feature for label,base_feature in weights.keys()],i) dv_acc[it] = scorer.accuracy(evalClassifier(weights, outfile, devkey)) tr_acc[it] = 1. - tr_err/float(i) print it,'dev:',dv_acc[it],'train:',tr_acc[it] return weights,tr_acc,dv_acc
def test_custom_feat_avg_perceptron(): # w, tr_acc, dv_acc = avg_perceptron.trainAvgPerceptron(10,tr_all,features.yourFeatures,alltags) # confusion = tagger_base.evalTagger(lambda words,alltags : tagger_base.classifierTagger(words,features.yourFeatures,w,alltags),'classifier') confusion = scorer.getConfusion(constants.DEV_FILE, 'avg_perceptron_custom.response') expected_acc = 0.810 actual_acc = scorer.accuracy(confusion) ok_ (expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected_acc, actual_acc))
def test_wlc_dev_exact_accuracy(): global weights_wlc global WLC_FILE mat = evalClassifier(weights_wlc, WLC_FILE, DEVKEY) actual = scorer.accuracy(mat) expected = 0.4467 assert_almost_equals(expected, actual, places=4, msg="UNEQUAL Expected:%f, Actual:%f" % (expected, actual))
def test_mcc_dev_accuracy (): global weights_mcc global MCC_FILE mat = evalClassifier (weights_mcc, MCC_FILE, DEVKEY) actual = scorer.accuracy(mat) expected = 0.3756 assert_almost_equals (expected, actual, places=4, msg="UNEQUAL Expected:%f, Actual:%f" %(expected,actual))
def test_wlc_dev_exact_accuracy (): global weights_wlc global WLC_FILE mat = evalClassifier (weights_wlc, WLC_FILE, DEVKEY) actual = scorer.accuracy(mat) expected = 0.4467 assert_almost_equals (expected, actual, places=4, msg="UNEQUAL Expected:%f, Actual:%f" %(expected,actual))
def test_get_most_common_tag(): expected = 0.63 weights = most_common.get_most_common_weights(TRAIN_FILE) confusion = tagger_base.evalTagger(tagger_base.makeClassifierTagger(weights),'mcc') actual = scorer.accuracy(confusion) ok_(expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected, actual))
def test_str_perceptron(): # w,tr_acc,dv_acc = str_perceptron.trainAvgStructPerceptron(10,tr_all,features.wordTransFeatures,alltags) # confusion = tagger_base.evalTagger(lambda words,alltags : viterbi.viterbiTagger(words,features.wordTransFeatures,w,alltags)[0],'str_classifier') confusion = scorer.getConfusion(DEV_FILE, 'str_avg_perceptron.response') expected_acc = 0.749 actual_acc = scorer.accuracy(confusion) ok_(expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, actual_acc))
def test_hmm_weights_accuracy(): confusion = tagger_base.evalTagger( lambda words, alltags: viterbi.viterbiTagger( words, viterbi.hmm_feats, hmm_weights, alltags)[0], 'hmm') actual = scorer.accuracy(confusion) expected = 0.74 ok_(expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected, actual))
def test_get_most_common_tag(): expected = 0.63 weights = most_common.get_most_common_weights(TRAIN_FILE) confusion = tagger_base.evalTagger( tagger_base.makeClassifierTagger(weights), 'mcc') actual = scorer.accuracy(confusion) ok_(expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected, actual))
def test_mcc_tagger_accuracy(): global tagger_mc, all_tags expected = 0.811124 confusion = tagger_base.eval_tagger(tagger_mc,'most-common.preds',all_tags=all_tags) actual = scorer.accuracy(confusion) ok_(expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected, actual))
def test_avg_perceptron(): # w, tr_acc, dv_acc = avg_perceptron.trainAvgPerceptron(10,tr_all,features.wordCharFeatures,alltags) # confusion = tagger_base.evalTagger(lambda words,alltags : tagger_base.classifierTagger(words,features.wordCharFeatures,w,alltags),'classifier') confusion = scorer.getConfusion(constants.DEV_FILE, 'avg_perceptron.response') expected_acc = 0.740 actual_acc = scorer.accuracy(confusion) ok_(expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, actual_acc))
def test_wlc_dev_exact_accuracy (): global weights_wlc global WLC_FILE mat = evalClassifier (weights_wlc, WLC_FILE, DEVKEY) actual = scorer.accuracy(mat) expected_lower = 0.4440 expected_higher = 0.4470 ok = (actual >=expected_lower) and (actual <= expected_higher) ok_(ok, msg="NOT IN RANGE Expected:%f,%f, Actual:%f" %(expected_lower,expected_higher,actual))
def test_classifier_tagger(): expected = 0.136844287788 noun_weights = most_common.get_noun_weights() noun_tagger = tagger_base.makeClassifierTagger(noun_weights) confusion = tagger_base.evalTagger(noun_tagger,'nouns') actual = scorer.accuracy(confusion) assert_almost_equals(expected, actual,places=3, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
def test_nb2_tagger_is_good(): global theta_nb_fixed tagger = tagger_base.make_classifier_tagger(theta_nb_fixed) confusion = tagger_base.eval_tagger(tagger,'nb2') dev_acc = scorer.accuracy(confusion) expected_acc = 0.84 ok_(expected_acc < dev_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected_acc, dev_acc))
def test_model_crf_nr_dev_accuracies(): confusion = scorer.get_confusion(DEV_FILE, '../bilstm-dev-en.preds') acc = scorer.accuracy(confusion) print("Acc: " + str(acc)) # ok_(acc > .86, "Accuracy Obt: " + str(acc)) confusion = scorer.get_confusion(DEV_FILE, '../bilstm_crf-dev-en.preds') acc = scorer.accuracy(confusion) print("Acc: " + str(acc)) # ok_(acc > .86, "Accuracy Obt: " + str(acc)) confusion = scorer.get_confusion(NR_DEV_FILE, '../bilstm_crf-dev-nr.preds') acc = scorer.accuracy(confusion) print("Acc: " + str(acc)) # ok_(acc > .86, "Accuracy Obt: " + str(acc)) confusion = scorer.get_confusion(NR_DEV_FILE, '../bilstm-dev-nr.preds') acc = scorer.accuracy(confusion) print("Acc: " + str(acc))
def eval_tagging_model(testfile,tagger_func,features,weights,all_tags,output_file=None): tagger = lambda words, all_tags : tagger_func(words, features, weights, all_tags)[0] confusion = eval_tagger(tagger, output_file, testfile=testfile, all_tags=all_tags) return scorer.accuracy(confusion)
def test_nb2_tagger_is_good(): global theta_nb_fixed tagger = tagger_base.make_classifier_tagger(theta_nb_fixed) confusion = tagger_base.eval_tagger(tagger, 'nb2') dev_acc = scorer.accuracy(confusion) expected_acc = 0.84 ok_(expected_acc < dev_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, dev_acc))
def test_str_perceptron_small(): w, tr_acc, dv_acc = str_perceptron.trainAvgStructPerceptron( 5, tr_all[:50], features.wordTransFeatures, alltags) confusion = tagger_base.evalTagger( lambda words, alltags: viterbi.viterbiTagger( words, features.wordTransFeatures, w, alltags)[0], 'str_classifier_small') expected_acc = 0.506 actual_acc = scorer.accuracy(confusion) ok_(expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, actual_acc))
def test_classifier_tagger(): global all_tags expected = 0.1668919993637665 noun_weights = most_common.get_noun_weights() noun_tagger = tagger_base.make_classifier_tagger(noun_weights) confusion = tagger_base.eval_tagger(noun_tagger,'all_nouns.preds',all_tags=all_tags) actual = scorer.accuracy(confusion) assert_almost_equals(expected, actual,places=3, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
def test_classifier(): global all_tags expected = 0.1527613022274944 noun_weights = most_common.get_noun_weights() noun_tagger = tagger_base.make_classifier_tagger(noun_weights) confusion = tagger_base.eval_tagger(noun_tagger,'all_nouns.preds',all_tags=all_tags) actual = scorer.accuracy(confusion) assert_almost_equal(expected, actual,places=3, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
def test_classifier_tagger(): expected = 0.136844287788 noun_weights = most_common.get_noun_weights() noun_tagger = tagger_base.makeClassifierTagger(noun_weights) confusion = tagger_base.evalTagger(noun_tagger, 'nouns') actual = scorer.accuracy(confusion) assert_almost_equals(expected, actual, places=3, msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
def test_basic_classifer(): test_weights = defaultdict(float) test_tags = ['N','V','V','N'] for i in range(len(sent)): for feat in features.wordFeatures(sent,test_tags[i],'X',i): test_weights[feat] = 1 for feat in features.wordFeatures(sent,'X','X',i): test_weights[feat] = 1 expected = test_tags actual = tagger_base.classifierTagger(sent,features.wordFeatures,test_weights,alltags) eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual) ) expected_acc = 0.139539705577 confusion = tagger_base.evalTagger(lambda words,alltags : tagger_base.classifierTagger(words,features.wordFeatures,test_weights,alltags),'test') actual_acc =scorer.accuracy(confusion) assert_almost_equals(expected_acc ,actual_acc,places = 3)
def trainLRbyAdaGrad(N_its,inst_generator, outfile, devkey, learning_rate=1e-4, regularizer=1e-2): weights = defaultdict(float) dv_acc = [None]*N_its tr_acc = [None]*N_its running_value = defaultdict(float) num_inst = len(inst_generator) # this block is all to take care of regularization ratereg = learning_rate * regularizer def regularize(base_feats, t): for base_feat in base_feats: for label in ALL_LABELS: weights[(label, base_feat)] *= (1 - ratereg) ** (t-last_update[base_feat]) last_update[base_feat] = t for it in xrange(N_its): tr_err = 0 last_update = defaultdict(int) # reset, since we regularize at the end of every iteration for i, (inst, true_label) in enumerate(inst_generator): # apply "just-in-time" regularization to the weights for features in this instance regularize(inst, i) # compute likelihood gradient from this instance probs = computeLabelProbs(inst, weights, ALL_LABELS) label_pred = argmax(probs) if true_label != label_pred:tr_err += 1 for word, value in inst.items(): weights[(true_label, word)] += num_inst * learning_rate * value / running_value.get((true_label, word), 1) for label in ALL_LABELS: weights[(label, word)] -= num_inst * probs[label] * learning_rate * value / running_value.get((label, word), 1) running_value[(true_label, word)] = value**2 # regularize all features at the end of each iteration regularize([base_feature for label,base_feature in weights.keys()], i) dv_acc[it] = scorer.accuracy(evalClassifier(weights, outfile, devkey)) tr_acc[it] = 1. - tr_err/float(i) print it,'dev:',dv_acc[it],'train:',tr_acc[it] return weights,tr_acc,dv_acc
def test_basic_classifer(): test_weights = defaultdict(float) test_tags = ['N', 'V', 'V', 'N'] for i in range(len(sent)): for feat in features.wordFeatures(sent, test_tags[i], 'X', i): test_weights[feat] = 1 for feat in features.wordFeatures(sent, 'X', 'X', i): test_weights[feat] = 1 expected = test_tags actual = tagger_base.classifierTagger(sent, features.wordFeatures, test_weights, alltags) eq_(expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual)) expected_acc = 0.139539705577 confusion = tagger_base.evalTagger( lambda words, alltags: tagger_base.classifierTagger( words, features.wordFeatures, test_weights, alltags), 'test') actual_acc = scorer.accuracy(confusion) assert_almost_equals(expected_acc, actual_acc, places=3)
def test_bakeoff_acc_d3_4_ja_beat_the_prof(): acc = scorer.accuracy( scorer.get_confusion(JA_TEST_FILE, 'sp-best-te.ja.preds')) assert_greater(acc, .879926)
def test_bakeoff_acc_d3_4_en_half_credit(): acc = scorer.accuracy(scorer.get_confusion(DEV_FILE,'sp-best.preds')) assert_greater(acc,.885)
def test_bakeoff_acc_d3_4_en_beat_the_prof(): acc = scorer.accuracy(scorer.get_confusion(TEST_FILE, 'sp-best-te.preds')) assert_greater(acc, .88735) # same as with the classification-based tagger!
def test_bakeoff_acc_d3_4_ja_full_credit(): acc = scorer.accuracy(scorer.get_confusion(JA_DEV_FILE, 'sp-best.ja.preds')) assert_greater(acc, .91)
def test_nr_hmm_dev_accuracy(): confusion = scorer.get_confusion(NR_DEV_FILE,'hmm-dev-nr.preds') acc = scorer.accuracy(confusion) ok_(acc > .861)
def test_bakeoff_acc_d3_4_en_half_credit(): acc = scorer.accuracy(scorer.get_confusion(DEV_FILE, 'sp-best.preds')) assert_greater(acc, .885)
def test_bakeoff_acc_d2_6_ja_beat_the_prof(): acc = scorer.accuracy( scorer.get_confusion(JA_TEST_FILE, 'avp-words-best-te.ja.preds')) assert_greater(acc, .87882)
def test_bakeoff_acc_d3_4_ja_beat_the_prof(): acc = scorer.accuracy(scorer.get_confusion(JA_TEST_FILE,'sp-best-te.ja.preds')) assert_greater(acc,.879926)
def test_bakeoff_acc_d3_4_en_beat_the_prof(): acc = scorer.accuracy(scorer.get_confusion(TEST_FILE,'sp-best-te.preds')) assert_greater(acc,.88735) # same as with the classification-based tagger!
def test_bilstm_test_accuracy(): confusion = scorer.get_confusion(DEV_FILE,'bilstm-te-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .83) #change the no's
def test_neighbor_acc_d2_5_ja(): confusion = scorer.get_confusion(JA_DEV_FILE, 'avp-words-neighbor.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .792) # should be .802
def test_hmm_dev_accuracy(): confusion = scorer.get_confusion(DEV_FILE,'hmm-dev-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .840)
def test_nr_hmm_test_accuracy(): confusion = scorer.get_confusion(NR_TEST_FILE,'hmm-te-nr.preds') acc = scorer.accuracy(confusion) ok_(acc > .853)
def test_hmm_feat_acc_d3_3_ja(): confusion = scorer.get_confusion(JA_DEV_FILE,'sp-hmm.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc,.797) # should be .807
def test_hmm_feat_acc_d3_3_en(): confusion = scorer.get_confusion(DEV_FILE, 'sp-hmm.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .862) # should be .872
def test_bakeoff_acc_d3_4_ja_full_credit(): acc = scorer.accuracy(scorer.get_confusion(JA_DEV_FILE,'sp-best.ja.preds')) assert_greater(acc,.91)
def test_hmm_feat_acc_d3_3_ja(): confusion = scorer.get_confusion(JA_DEV_FILE, 'sp-hmm.ja.preds') acc = scorer.accuracy(confusion) assert_greater(acc, .797) # should be .807
def test_bakeoff_acc_d2_6_en_half_credit(): acc = scorer.accuracy( scorer.get_confusion(DEV_FILE, 'avp-words-best.preds')) assert_greater(acc, .87)
def test_hmm_test_accuracy(): confusion = scorer.get_confusion(TEST_FILE,'hmm-te-en.preds') acc = scorer.accuracy(confusion) ok_(acc > .840)
def test_bakeoff_acc_d2_6_ja_full_credit(): acc = scorer.accuracy( scorer.get_confusion(JA_DEV_FILE, 'avp-words-best.ja.preds')) assert_greater(acc, .90)
def test_hmm_weights_accuracy(): confusion = tagger_base.evalTagger(lambda words, alltags : viterbi.viterbiTagger(words,viterbi.hmm_feats,hmm_weights,alltags)[0],'hmm') actual = scorer.accuracy(confusion) expected = 0.74 ok_ (expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected, actual))