Ejemplo n.º 1
0
def test_get_most_common_tag():
    expected = 0.63
    weights = most_common.get_most_common_weights(TRAIN_FILE)
    confusion = tagger_base.evalTagger(tagger_base.makeClassifierTagger(weights),'mcc')
    actual = scorer.accuracy(confusion)
    
    ok_(expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected, actual))
Ejemplo n.º 2
0
def trainAvgStructPerceptron(N_its,inst_generator,featfunc,tagset):
    """
    :param N_its: number of iterations
    :param inst_generator: A generator of (words,tags) tuples
    :param tagger: A function from (words, weights) to tags
    :param features: A function from (words, tags) to a dict of features and weights
    """

    tr_acc = [None]*N_its
    dv_acc = [None]*N_its
    T = 0
    weights = defaultdict(float)
    wsum = defaultdict(float)
    avg_weights = defaultdict(float)
    for i in xrange(N_its):
        # your code here
        weights, wsum, tr_acc_i, num_instances = oneItAvgStructPerceptron(inst_generator, featfunc, weights, wsum, tagset, T)
        # note that I call evalTagger to produce the dev set results
        T += num_instances
        for w in wsum:
            avg_weights[w] = weights[w] - wsum[w] / float(T)
        confusion = evalTagger(lambda words,tags : viterbiTagger(words,featfunc,avg_weights,tags)[0],'sp.txt')
        dv_acc[i] = scorer.accuracy(confusion)
        tr_acc[i] = tr_acc_i#1. - tr_err/float(sum([len(s) for s,t in inst_generator]))
        print i,'dev:',dv_acc[i],'train:',tr_acc[i]
    return avg_weights, tr_acc, dv_acc
Ejemplo n.º 3
0
def trainAvgPerceptron(N_its,inst_generator,featfunc,tagset):
    """
    :param N_its: number of iterations
    :param inst_generator: generate words,tags pairs
    :param featfunc: feature function
    :param tagset: set of all possible tags
    :returns average weights, training accuracy, dev accuracy
    """
    tr_acc = [None]*N_its
    dv_acc = [None]*N_its
    T = 0
    weights = defaultdict(float)
    wsum = defaultdict(float)
    avg_weights = defaultdict(float)
    for i in xrange(N_its):
        # your code here
        weights, wsum, tr_acc_i, num_instances = oneItAvgPerceptron(inst_generator, featfunc, weights, wsum, tagset, T)
        T += num_instances
        for w in wsum:
            avg_weights[w] = weights[w] - wsum[w] / float(T) 
        confusion = evalTagger(lambda words, alltags: classifierTagger(words,featfunc,avg_weights,tagset),'perc')
        dv_acc[i] = scorer.accuracy(confusion)
        tr_acc[i] = tr_acc_i
        print i,'dev:',dv_acc[i],'train:',tr_acc[i]
    return avg_weights, tr_acc, dv_acc
Ejemplo n.º 4
0
def trainAvgStructPerceptron(N_its, inst_generator, featfunc, tagset):
    """
    :param N_its: number of iterations
    :param inst_generator: A generator of (words,tags) tuples
    :param tagger: A function from (words, weights) to tags
    :param features: A function from (words, tags) to a dict of features and weights
    """

    tr_acc = [None] * N_its
    dv_acc = [None] * N_its
    T = 0
    weights = defaultdict(float)
    wsum = defaultdict(float)
    avg_weights = defaultdict(float)
    for i in xrange(N_its):
        # your code here
        weights, wsum, tr_acc_i, num_instances = oneItAvgStructPerceptron(
            inst_generator, featfunc, weights, wsum, tagset, T)
        # note that I call evalTagger to produce the dev set results
        T += num_instances
        for w in wsum:
            avg_weights[w] = weights[w] - wsum[w] / float(T)
        confusion = evalTagger(
            lambda words, tags: viterbiTagger(words, featfunc, avg_weights,
                                              tags)[0], 'sp.txt')
        dv_acc[i] = scorer.accuracy(confusion)
        tr_acc[
            i] = tr_acc_i  #1. - tr_err/float(sum([len(s) for s,t in inst_generator]))
        print i, 'dev:', dv_acc[i], 'train:', tr_acc[i]
    return avg_weights, tr_acc, dv_acc
Ejemplo n.º 5
0
def test_hmm_weights_accuracy():
    confusion = tagger_base.evalTagger(
        lambda words, alltags: viterbi.viterbiTagger(
            words, viterbi.hmm_feats, hmm_weights, alltags)[0], 'hmm')
    actual = scorer.accuracy(confusion)
    expected = 0.74
    ok_(expected < actual,
        msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected, actual))
Ejemplo n.º 6
0
def test_get_most_common_tag():
    expected = 0.63
    weights = most_common.get_most_common_weights(TRAIN_FILE)
    confusion = tagger_base.evalTagger(
        tagger_base.makeClassifierTagger(weights), 'mcc')
    actual = scorer.accuracy(confusion)

    ok_(expected < actual,
        msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected, actual))
Ejemplo n.º 7
0
def test_classifier_tagger():
    expected = 0.136844287788
    noun_weights = most_common.get_noun_weights()
    noun_tagger = tagger_base.makeClassifierTagger(noun_weights)
    
    confusion = tagger_base.evalTagger(noun_tagger,'nouns')
    actual  = scorer.accuracy(confusion)

    assert_almost_equals(expected, actual,places=3, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
def test_str_perceptron_small():
    w, tr_acc, dv_acc = str_perceptron.trainAvgStructPerceptron(
        5, tr_all[:50], features.wordTransFeatures, alltags)
    confusion = tagger_base.evalTagger(
        lambda words, alltags: viterbi.viterbiTagger(
            words, features.wordTransFeatures, w, alltags)[0],
        'str_classifier_small')
    expected_acc = 0.506
    actual_acc = scorer.accuracy(confusion)
    ok_(expected_acc < actual_acc,
        msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, actual_acc))
Ejemplo n.º 9
0
def test_classifier_tagger():
    expected = 0.136844287788
    noun_weights = most_common.get_noun_weights()
    noun_tagger = tagger_base.makeClassifierTagger(noun_weights)

    confusion = tagger_base.evalTagger(noun_tagger, 'nouns')
    actual = scorer.accuracy(confusion)

    assert_almost_equals(expected,
                         actual,
                         places=3,
                         msg="UNEQUAL Expected:%s, Actual:%s" %
                         (expected, actual))
def test_basic_classifer():
    test_weights = defaultdict(float)
    test_tags = ['N','V','V','N']
    for i in range(len(sent)):
        for feat in features.wordFeatures(sent,test_tags[i],'X',i):
            test_weights[feat] = 1
        for feat in features.wordFeatures(sent,'X','X',i):
            test_weights[feat] = 1
    expected = test_tags
    actual = tagger_base.classifierTagger(sent,features.wordFeatures,test_weights,alltags)
    eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual) )

    expected_acc = 0.139539705577
    confusion = tagger_base.evalTagger(lambda words,alltags : tagger_base.classifierTagger(words,features.wordFeatures,test_weights,alltags),'test')
    actual_acc =scorer.accuracy(confusion)
    assert_almost_equals(expected_acc ,actual_acc,places = 3)
Ejemplo n.º 11
0
def test_basic_classifer():
    test_weights = defaultdict(float)
    test_tags = ['N', 'V', 'V', 'N']
    for i in range(len(sent)):
        for feat in features.wordFeatures(sent, test_tags[i], 'X', i):
            test_weights[feat] = 1
        for feat in features.wordFeatures(sent, 'X', 'X', i):
            test_weights[feat] = 1
    expected = test_tags
    actual = tagger_base.classifierTagger(sent, features.wordFeatures,
                                          test_weights, alltags)
    eq_(expected,
        actual,
        msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))

    expected_acc = 0.139539705577
    confusion = tagger_base.evalTagger(
        lambda words, alltags: tagger_base.classifierTagger(
            words, features.wordFeatures, test_weights, alltags), 'test')
    actual_acc = scorer.accuracy(confusion)
    assert_almost_equals(expected_acc, actual_acc, places=3)
Ejemplo n.º 12
0
def test_hmm_weights_accuracy():
    confusion = tagger_base.evalTagger(lambda words, alltags : viterbi.viterbiTagger(words,viterbi.hmm_feats,hmm_weights,alltags)[0],'hmm')
    actual =  scorer.accuracy(confusion)
    expected = 0.74
    ok_ (expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected, actual))
def test_str_perceptron_small():
    w,tr_acc,dv_acc = str_perceptron.trainAvgStructPerceptron(5,tr_all[:50],features.wordTransFeatures,alltags)
    confusion = tagger_base.evalTagger(lambda words,alltags : viterbi.viterbiTagger(words,features.wordTransFeatures,w,alltags)[0],'str_classifier_small')
    expected_acc = 0.506
    actual_acc = scorer.accuracy(confusion)
    ok_ (expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected_acc, actual_acc))