Example #1
0
def test_hmm_weights_tag():
    expected = (['E', 'O', 'V', 'V', 'N', 'E'])
    actual = viterbi.viterbiTagger([':))', 'we', 'can', 'can', 'fish', ':-)'],
                                   viterbi.hmm_feats, hmm_weights, alltags)[0]
    eq_(expected,
        actual,
        msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
def trainAvgStructPerceptron(N_its,inst_generator,featfunc,tagset):
    """
    :param N_its: number of iterations
    :param inst_generator: A generator of (words,tags) tuples
    :param tagger: A function from (words, weights) to tags
    :param features: A function from (words, tags) to a dict of features and weights
    """

    tr_acc = [None]*N_its
    dv_acc = [None]*N_its
    T = 0
    weights = defaultdict(float)
    wsum = defaultdict(float)
    avg_weights = defaultdict(float)
    for i in xrange(N_its):
        # your code here
        weights, wsum, tr_acc_i, num_instances = oneItAvgStructPerceptron(inst_generator, featfunc, weights, wsum, tagset, T)
        # note that I call evalTagger to produce the dev set results
        T += num_instances
        for w in wsum:
            avg_weights[w] = weights[w] - wsum[w] / float(T)
        confusion = evalTagger(lambda words,tags : viterbiTagger(words,featfunc,avg_weights,tags)[0],'sp.txt')
        dv_acc[i] = scorer.accuracy(confusion)
        tr_acc[i] = tr_acc_i#1. - tr_err/float(sum([len(s) for s,t in inst_generator]))
        print i,'dev:',dv_acc[i],'train:',tr_acc[i]
    return avg_weights, tr_acc, dv_acc
Example #3
0
def trainAvgStructPerceptron(N_its, inst_generator, featfunc, tagset):
    """
    :param N_its: number of iterations
    :param inst_generator: A generator of (words,tags) tuples
    :param tagger: A function from (words, weights) to tags
    :param features: A function from (words, tags) to a dict of features and weights
    """

    tr_acc = [None] * N_its
    dv_acc = [None] * N_its
    T = 0
    weights = defaultdict(float)
    wsum = defaultdict(float)
    avg_weights = defaultdict(float)
    for i in xrange(N_its):
        # your code here
        weights, wsum, tr_acc_i, num_instances = oneItAvgStructPerceptron(
            inst_generator, featfunc, weights, wsum, tagset, T)
        # note that I call evalTagger to produce the dev set results
        T += num_instances
        for w in wsum:
            avg_weights[w] = weights[w] - wsum[w] / float(T)
        confusion = evalTagger(
            lambda words, tags: viterbiTagger(words, featfunc, avg_weights,
                                              tags)[0], 'sp.txt')
        dv_acc[i] = scorer.accuracy(confusion)
        tr_acc[
            i] = tr_acc_i  #1. - tr_err/float(sum([len(s) for s,t in inst_generator]))
        print i, 'dev:', dv_acc[i], 'train:', tr_acc[i]
    return avg_weights, tr_acc, dv_acc
Example #4
0
def test_hmm_weights_accuracy():
    confusion = tagger_base.evalTagger(
        lambda words, alltags: viterbi.viterbiTagger(
            words, viterbi.hmm_feats, hmm_weights, alltags)[0], 'hmm')
    actual = scorer.accuracy(confusion)
    expected = 0.74
    ok_(expected < actual,
        msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected, actual))
Example #5
0
def test_tag_complex_score():
    expected = (-37)
    actual = viterbi.viterbiTagger(
        'they can can can can can can can fish'.split(), viterbi.hmm_feats,
        defined_weights, ['N', 'V'])[1]
    eq_(expected,
        actual,
        msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
Example #6
0
def test_tag_simple_score():
    expected = (-18)
    actual = viterbi.viterbiTagger(['they', 'can', 'can', 'fish'],
                                   viterbi.hmm_feats, defined_weights,
                                   ['N', 'V'])[1]
    eq_(expected,
        actual,
        msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
def test_str_perceptron_small():
    w, tr_acc, dv_acc = str_perceptron.trainAvgStructPerceptron(
        5, tr_all[:50], features.wordTransFeatures, alltags)
    confusion = tagger_base.evalTagger(
        lambda words, alltags: viterbi.viterbiTagger(
            words, features.wordTransFeatures, w, alltags)[0],
        'str_classifier_small')
    expected_acc = 0.506
    actual_acc = scorer.accuracy(confusion)
    ok_(expected_acc < actual_acc,
        msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, actual_acc))
def test_viterbi_trans():
    test_weights = defaultdict(float)
    test_tags = ['N','V','V','N']
    for i in range(len(sent)):
        for feat in features.wordFeatures(sent,test_tags[i],'X',i):
            test_weights[feat] = 1
        for feat in features.wordFeatures(sent,'X','X',i):
            test_weights[feat] = 1
    
    expected_output = test_tags
    expected_score = 8.0
    actual_output, actual_score=viterbi.viterbiTagger(sent,features.wordTransFeatures,test_weights,alltags)
    eq_(expected_output, actual_output, msg="UNEQUAL viterbi trans output Expected:%s, Actual:%s" %(expected_output, actual_output) )
    eq_(expected_score, actual_score, msg="UNEQUAL viterbi trans score Expected:%s, Actual:%s" %(expected_score, actual_score) )
def oneItAvgStructPerceptron(inst_generator,
                             featfunc,
                             weights,
                             wsum,
                             tagset,
                             Tinit=0):
    """
    :param inst_generator: A generator of (words,tags) tuples
    :param tagger: A function from (words, weights) to tags
    :param features: A function from (words, tags) to a dict of features and weights
    :param weights: A defaultdict of weights
    :param wsum: A defaultdict of weight sums
    :param Tinit: the initial value of the $t$ counter at the beginning of this iteration
    :returns weights: a defaultdict of weights
    :returns wsum: a defaultdict of weight sums, for averaging
    :returns tr_acc: the training accuracy
    :returns i: the number of instances (sentences) seen
    """
    tr_err = 0.
    tr_tot = 0.
    # your code
    for i,(words,y_true) in enumerate(inst_generator):
        y_pred, score = viterbiTagger(words, featfunc, weights, tagset)


        # if '!' not in y_true and i  > 0:
        #     print i, words
        #     print y_true
        #     # Make sure features is right
        #     for feat, value in seqFeatures(words, y_true, featfunc).iteritems():
        #         print feat, value
        #
        #     print



        if y_pred != y_true:
            for feat, value in seqFeatures(words, y_true, featfunc).iteritems():
                wsum[feat] += (Tinit+i)*value
                weights[feat] += value
            for feat, value in seqFeatures(words, y_pred, featfunc).iteritems():
                wsum[feat] -= (Tinit+i)*value
                weights[feat] -= value
            tr_err += sum([y_true[m] != y_pred[m] for m, _ in enumerate(y_true)])
        tr_tot += len(words)
    return weights, wsum, 1-tr_err/tr_tot, i
def test_viterbi_trans():
    test_weights = defaultdict(float)
    test_tags = ['N', 'V', 'V', 'N']
    for i in range(len(sent)):
        for feat in features.wordFeatures(sent, test_tags[i], 'X', i):
            test_weights[feat] = 1
        for feat in features.wordFeatures(sent, 'X', 'X', i):
            test_weights[feat] = 1

    expected_output = test_tags
    expected_score = 8.0
    actual_output, actual_score = viterbi.viterbiTagger(
        sent, features.wordTransFeatures, test_weights, alltags)
    eq_(expected_output,
        actual_output,
        msg="UNEQUAL viterbi trans output Expected:%s, Actual:%s" %
        (expected_output, actual_output))
    eq_(expected_score,
        actual_score,
        msg="UNEQUAL viterbi trans score Expected:%s, Actual:%s" %
        (expected_score, actual_score))
Example #11
0
def oneItAvgStructPerceptron(inst_generator,
                             featfunc,
                             weights,
                             wsum,
                             tagset,
                             Tinit=0):
    """
    :param inst_generator: A generator of (words,tags) tuples
    :param tagger: A function from (words, weights) to tags
    :param features: A function from (words, tags) to a dict of features and weights
    :param weights: A defaultdict of weights
    :param wsum: A defaultdict of weight sums
    :param Tinit: the initial value of the $t$ counter at the beginning of this iteration
    :returns weights: a defaultdict of weights
    :returns wsum: a defaultdict of weight sums, for averaging
    :returns tr_acc: the training accuracy
    :returns i: the number of instances (sentences) seen
    """
    tr_err = 0.
    tr_tot = 0.
    
    t = Tinit
    for i,(words,y_true) in enumerate(inst_generator):
        pred = viterbiTagger(words, featfunc, weights, tagset)[0]
        pred_feat = seqFeatures(words, pred, featfunc)
        true_feat = seqFeatures(words, y_true, featfunc)
        for key in pred_feat:
            weights[key] -= pred_feat[key]
            wsum[key] -= t * pred_feat[key]
        for key in true_feat:
            weights[key] += true_feat[key]
            wsum[key] += t * true_feat[key]
           
        for m in range(len(words)):
            if pred[m] != y_true[m]:
                tr_err += 1
        tr_tot += len(words)                             
        t += 1
      # your code
    return weights, wsum, 1-tr_err/tr_tot, i
Example #12
0
def oneItAvgStructPerceptron(inst_generator,
                             featfunc,
                             weights,
                             wsum,
                             tagset,
                             Tinit=0):
    """
    :param inst_generator: A generator of (words,tags) tuples
    :param tagger: A function from (words, weights) to tags
    :param features: A function from (words, tags) to a dict of features and weights
    :param weights: A defaultdict of weights
    :param wsum: A defaultdict of weight sums
    :param Tinit: the initial value of the $t$ counter at the beginning of this iteration
    :returns weights: a defaultdict of weights
    :returns wsum: a defaultdict of weight sums, for averaging
    :returns tr_acc: the training accuracy
    :returns i: the number of instances (sentences) seen
    """
    tr_err = 0.
    tr_tot = 0.

    t = Tinit
    for i, (words, y_true) in enumerate(inst_generator):
        pred = viterbiTagger(words, featfunc, weights, tagset)[0]
        pred_feat = seqFeatures(words, pred, featfunc)
        true_feat = seqFeatures(words, y_true, featfunc)
        for key in pred_feat:
            weights[key] -= pred_feat[key]
            wsum[key] -= t * pred_feat[key]
        for key in true_feat:
            weights[key] += true_feat[key]
            wsum[key] += t * true_feat[key]

        for m in range(len(words)):
            if pred[m] != y_true[m]:
                tr_err += 1
        tr_tot += len(words)
        t += 1
    # your code
    return weights, wsum, 1 - tr_err / tr_tot, i
Example #13
0
def test_hmm_weights_accuracy():
    confusion = tagger_base.evalTagger(lambda words, alltags : viterbi.viterbiTagger(words,viterbi.hmm_feats,hmm_weights,alltags)[0],'hmm')
    actual =  scorer.accuracy(confusion)
    expected = 0.74
    ok_ (expected < actual, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected, actual))
Example #14
0
def test_hmm_weights_tag():
    expected = (['E', 'O', 'V', 'V', 'N', 'E'])
    actual = viterbi.viterbiTagger([':))','we','can','can','fish',':-)'],viterbi.hmm_feats,hmm_weights,alltags)[0]
    eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
Example #15
0
def test_tag_complex_score():
    expected = (-37)
    actual = viterbi.viterbiTagger('they can can can can can can can fish'.split(),viterbi.hmm_feats,defined_weights,['N','V'])[1]
    eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
Example #16
0
def test_tag_simple_score():
    expected = (-18)
    actual = viterbi.viterbiTagger(['they','can','can','fish'],viterbi.hmm_feats,defined_weights,['N','V'])[1]
    eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
def test_str_perceptron_small():
    w,tr_acc,dv_acc = str_perceptron.trainAvgStructPerceptron(5,tr_all[:50],features.wordTransFeatures,alltags)
    confusion = tagger_base.evalTagger(lambda words,alltags : viterbi.viterbiTagger(words,features.wordTransFeatures,w,alltags)[0],'str_classifier_small')
    expected_acc = 0.506
    actual_acc = scorer.accuracy(confusion)
    ok_ (expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected_acc, actual_acc))