def test_basic_classifer():
    test_weights = defaultdict(float)
    test_tags = ['N','V','V','N']
    for i in range(len(sent)):
        for feat in features.wordFeatures(sent,test_tags[i],'X',i):
            test_weights[feat] = 1
        for feat in features.wordFeatures(sent,'X','X',i):
            test_weights[feat] = 1
    expected = test_tags
    actual = tagger_base.classifierTagger(sent,features.wordFeatures,test_weights,alltags)
    eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual) )

    expected_acc = 0.139539705577
    confusion = tagger_base.evalTagger(lambda words,alltags : tagger_base.classifierTagger(words,features.wordFeatures,test_weights,alltags),'test')
    actual_acc =scorer.accuracy(confusion)
    assert_almost_equals(expected_acc ,actual_acc,places = 3)
Exemple #2
0
def oneItAvgPerceptron(inst_generator,featfunc,weights,wsum,tagset,Tinit=0):
    """
    :param inst_generator: iterator over instances
    :param featfunc: feature function on (words, tag_m, tag_m_1, m)
    :param weights: default dict
    :param wsum: weight sum, for averaging
    :param tagset: set of permissible tags
    :param Tinit: initial value of t, the counter over instances
    """
    tr_err = 0.0
    t = Tinit
    for i,(words,y_true) in enumerate(inst_generator):
        pred = classifierTagger(words, featfunc, weights, tagset)
        batch_size = float(len(words))
        prev_tag = START_TAG
        for m in range(len(words)):
            if pred[m] != y_true[m]:
                tr_err += 1
                wrong_sample = featfunc(words, pred[m], prev_tag, m)
                true_sample = featfunc(words, y_true[m], prev_tag, m)
                for key in wrong_sample: 
                    weights[key] -= wrong_sample[key]
                    wsum[key] -= t * wrong_sample[key]
                for key in true_sample:
                    weights[key] += true_sample[key]
                    wsum[key] += t * true_sample[key] 
        t += 1
        
    # note that i'm computing tr_acc for you, as long as you properly update tr_err
    return weights, wsum, 1.-tr_err / float(sum([len(s) for s,t in inst_generator])), i
def oneItAvgPerceptron(inst_generator,featfunc,weights,wsum,tagset,Tinit=0):
    """
    :param inst_generator: iterator over instances
    :param featfunc: feature function on (words, tag_m, tag_m_1, m)
    :param weights: default dict
    :param wsum: weight sum, for averaging
    :param tagset: set of permissible tags
    :param Tinit: initial value of t, the counter over instances
    """
    tr_err = 0.0
    for i,(words,y_true) in enumerate(inst_generator):
        # your code here
        y_pred = classifierTagger(words, featfunc, weights, tagset)
        for m in xrange(len(words)):
            if y_pred[m] != y_true[m]:
                prev_tag_pred = y_pred[m-1] if m > 0 else START_TAG
                prev_tag_true = y_true[m-1] if m > 0 else START_TAG

                for feat, value in featfunc(words, y_true[m], prev_tag_true, m).iteritems():
                    wsum[feat] += (Tinit+i)*value
                    weights[feat] += value

                for feat, value in featfunc(words, y_pred[m], prev_tag_pred, m).iteritems():
                    wsum[feat] -= (Tinit+i)*value
                    weights[feat] -= value
                tr_err += 1

    # note that i'm computing tr_acc for you, as long as you properly update tr_err
    return weights, wsum, 1.-tr_err / float(sum([len(s) for s,t in inst_generator])), i
Exemple #4
0
def trainAvgPerceptron(N_its,inst_generator,featfunc,tagset):
    """
    :param N_its: number of iterations
    :param inst_generator: generate words,tags pairs
    :param featfunc: feature function
    :param tagset: set of all possible tags
    :returns average weights, training accuracy, dev accuracy
    """
    tr_acc = [None]*N_its
    dv_acc = [None]*N_its
    T = 0
    weights = defaultdict(float)
    wsum = defaultdict(float)
    avg_weights = defaultdict(float)
    for i in xrange(N_its):
        # your code here
        weights, wsum, tr_acc_i, num_instances = oneItAvgPerceptron(inst_generator, featfunc, weights, wsum, tagset, T)
        T += num_instances
        for w in wsum:
            avg_weights[w] = weights[w] - wsum[w] / float(T) 
        confusion = evalTagger(lambda words, alltags: classifierTagger(words,featfunc,avg_weights,tagset),'perc')
        dv_acc[i] = scorer.accuracy(confusion)
        tr_acc[i] = tr_acc_i
        print i,'dev:',dv_acc[i],'train:',tr_acc[i]
    return avg_weights, tr_acc, dv_acc
def test_basic_classifer():
    test_weights = defaultdict(float)
    test_tags = ['N', 'V', 'V', 'N']
    for i in range(len(sent)):
        for feat in features.wordFeatures(sent, test_tags[i], 'X', i):
            test_weights[feat] = 1
        for feat in features.wordFeatures(sent, 'X', 'X', i):
            test_weights[feat] = 1
    expected = test_tags
    actual = tagger_base.classifierTagger(sent, features.wordFeatures,
                                          test_weights, alltags)
    eq_(expected,
        actual,
        msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))

    expected_acc = 0.139539705577
    confusion = tagger_base.evalTagger(
        lambda words, alltags: tagger_base.classifierTagger(
            words, features.wordFeatures, test_weights, alltags), 'test')
    actual_acc = scorer.accuracy(confusion)
    assert_almost_equals(expected_acc, actual_acc, places=3)