def test_seq_features():
    expected = ({(EMIT, 'N', 'fish'): 1.0, (EMIT, 'V', 'can'): 2.0, (OFFSET, 'V'): 2.0, (EMIT, 'N', 'they'): 1.0, \
        (OFFSET, 'N'): 2.0, (OFFSET, END_TAG): 1.0})
    actual = features.seqFeatures(sent, ['N', 'V', 'V', 'N'],
                                  features.wordFeatures)
    eq_(expected,
        actual,
        msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
예제 #2
0
def oneItAvgStructPerceptron(inst_generator,
                             featfunc,
                             weights,
                             wsum,
                             tagset,
                             Tinit=0):
    """
    :param inst_generator: A generator of (words,tags) tuples
    :param tagger: A function from (words, weights) to tags
    :param features: A function from (words, tags) to a dict of features and weights
    :param weights: A defaultdict of weights
    :param wsum: A defaultdict of weight sums
    :param Tinit: the initial value of the $t$ counter at the beginning of this iteration
    :returns weights: a defaultdict of weights
    :returns wsum: a defaultdict of weight sums, for averaging
    :returns tr_acc: the training accuracy
    :returns i: the number of instances (sentences) seen
    """
    tr_err = 0.
    tr_tot = 0.
    # your code
    for i,(words,y_true) in enumerate(inst_generator):
        y_pred, score = viterbiTagger(words, featfunc, weights, tagset)


        # if '!' not in y_true and i  > 0:
        #     print i, words
        #     print y_true
        #     # Make sure features is right
        #     for feat, value in seqFeatures(words, y_true, featfunc).iteritems():
        #         print feat, value
        #
        #     print



        if y_pred != y_true:
            for feat, value in seqFeatures(words, y_true, featfunc).iteritems():
                wsum[feat] += (Tinit+i)*value
                weights[feat] += value
            for feat, value in seqFeatures(words, y_pred, featfunc).iteritems():
                wsum[feat] -= (Tinit+i)*value
                weights[feat] -= value
            tr_err += sum([y_true[m] != y_pred[m] for m, _ in enumerate(y_true)])
        tr_tot += len(words)
    return weights, wsum, 1-tr_err/tr_tot, i
예제 #3
0
def oneItAvgStructPerceptron(inst_generator,
                             featfunc,
                             weights,
                             wsum,
                             tagset,
                             Tinit=0):
    """
    :param inst_generator: A generator of (words,tags) tuples
    :param tagger: A function from (words, weights) to tags
    :param features: A function from (words, tags) to a dict of features and weights
    :param weights: A defaultdict of weights
    :param wsum: A defaultdict of weight sums
    :param Tinit: the initial value of the $t$ counter at the beginning of this iteration
    :returns weights: a defaultdict of weights
    :returns wsum: a defaultdict of weight sums, for averaging
    :returns tr_acc: the training accuracy
    :returns i: the number of instances (sentences) seen
    """
    tr_err = 0.
    tr_tot = 0.
    
    t = Tinit
    for i,(words,y_true) in enumerate(inst_generator):
        pred = viterbiTagger(words, featfunc, weights, tagset)[0]
        pred_feat = seqFeatures(words, pred, featfunc)
        true_feat = seqFeatures(words, y_true, featfunc)
        for key in pred_feat:
            weights[key] -= pred_feat[key]
            wsum[key] -= t * pred_feat[key]
        for key in true_feat:
            weights[key] += true_feat[key]
            wsum[key] += t * true_feat[key]
           
        for m in range(len(words)):
            if pred[m] != y_true[m]:
                tr_err += 1
        tr_tot += len(words)                             
        t += 1
      # your code
    return weights, wsum, 1-tr_err/tr_tot, i
예제 #4
0
def oneItAvgStructPerceptron(inst_generator,
                             featfunc,
                             weights,
                             wsum,
                             tagset,
                             Tinit=0):
    """
    :param inst_generator: A generator of (words,tags) tuples
    :param tagger: A function from (words, weights) to tags
    :param features: A function from (words, tags) to a dict of features and weights
    :param weights: A defaultdict of weights
    :param wsum: A defaultdict of weight sums
    :param Tinit: the initial value of the $t$ counter at the beginning of this iteration
    :returns weights: a defaultdict of weights
    :returns wsum: a defaultdict of weight sums, for averaging
    :returns tr_acc: the training accuracy
    :returns i: the number of instances (sentences) seen
    """
    tr_err = 0.
    tr_tot = 0.

    t = Tinit
    for i, (words, y_true) in enumerate(inst_generator):
        pred = viterbiTagger(words, featfunc, weights, tagset)[0]
        pred_feat = seqFeatures(words, pred, featfunc)
        true_feat = seqFeatures(words, y_true, featfunc)
        for key in pred_feat:
            weights[key] -= pred_feat[key]
            wsum[key] -= t * pred_feat[key]
        for key in true_feat:
            weights[key] += true_feat[key]
            wsum[key] += t * true_feat[key]

        for m in range(len(words)):
            if pred[m] != y_true[m]:
                tr_err += 1
        tr_tot += len(words)
        t += 1
    # your code
    return weights, wsum, 1 - tr_err / tr_tot, i
def test_seq_trans_features():
    expected = ({(TRANS, 'N', '--START--'): 1.0, (TRANS, '--END--', 'N'): 1.0, (EMIT, 'N', 'fish'): 1.0, (EMIT, 'V', 'can'): 2.0, \
        (OFFSET, 'V'): 2.0, (EMIT, 'N', 'they'): 1.0, (TRANS, 'V', 'V'): 1.0, (TRANS, 'N', 'V'): 1.0, (OFFSET, 'N'): 2.0, (TRANS, 'V', 'N'): 1.0,\
         (OFFSET, END_TAG): 1.0})
    actual = features.seqFeatures(sent,['N','V','V','N'],features.wordTransFeatures)
    eq_(expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual) )