Example #1
0
def perc_train(train_data, tagset, numepochs):
    """
    :current_global_vector: a dict of features for the predicted labels
    :gold_global_vector: a dict of features for the standard
    """
    feat_vec = defaultdict(int)
    avg_vec = defaultdict(int)
    default_tag = tagset[0]
    for t in range(numepochs):
        error_num = 0
        for (labeled_list, feat_list) in train_data:
            std_labels = get_labels(labeled_list)
            output = perc.perc_test(feat_vec, labeled_list, feat_list, tagset,
                                    default_tag)
            if std_labels != output:
                error_num += 1
            gold_global_vector = get_global_vector(std_labels, feat_list)
            current_global_vector = get_global_vector(output, feat_list)
            add_vector(feat_vec, gold_global_vector, 1)
            add_vector(feat_vec, current_global_vector, -1)

        print >> sys.stderr, "Epoch", t + 1, "done. # of incorrect sentences: ", error_num
        # Supposedly we should average over all epoch * len(train_data) feature vectors,
        # but that would lead to too many long-vector additions and is painfully slow.
        add_vector(avg_vec, feat_vec, 1)
        perc.perc_write_to_file(
            {key: float(avg_vec[key]) / (t + 1)
             for key in avg_vec}, opts.modelfile + str(t))

    return {key: float(avg_vec[key]) / numepochs for key in avg_vec}
Example #2
0
def perc_train(train_data, tagset, numepochs, pos_dict):
    feat_vec = defaultdict(int)
    # insert your code here
    if len(tagset) <= 0:
        raise ValueError("Empty tagset")

    numepochs = int(20)
    default_tag = tagset[0]
    for t in range(numepochs):
        tmp = 0
        # Count sentence
        print 'Iteration#',t,' is processing now.'
        for (labeled_list, feat_list) in train_data:
            labels = copy.deepcopy(labeled_list)
            # add in the start and end buffers for the context
            # for every sentence in the training set, iterate numepochs times
            output = perc.perc_test(feat_vec, labeled_list, feat_list, tagset, default_tag, pos_dict)
            # compare current output and true result
            # correct_flag = True
            feat_index = 0
            # check word by word if the predicted tag is equal to the true tag
            for i, v in enumerate(output):
                (feat_index, feats) = perc.feats_for_word(feat_index, feat_list)
                # retrieve the feature for a word
                if len(feats) == 0:
                    print >>sys.stderr, " ".join(labels), " ".join(feat_list), "\n"
                    raise ValueError("features do not align with input sentence")
                
                fields = labels[i].split()
                label = fields[2]
                if i > 0: 
                    label_pre = labels[i-1].split()[2]
                    if output[i-1] is not label_pre or output[i] != label:
                        for feat in feats:
                            if feat[0] == 'B': # for bigram feature
                                feat_out = feat + ":" + output[i-1]  # feat_out is the "B:<previous output>"
                                feat_lab = feat + ":" + label_pre  # feat_lab is the "B:<previous label>"
                                feat_vec[feat_out, output[i]] = feat_vec[feat_out, output[i]] - 1
                                feat_vec[feat_out, label] = feat_vec[feat_out, label] + 1
                                feat_vec[feat_lab, output[i]] = feat_vec[feat_lab, output[i]] - 1
                                feat_vec[feat_lab, label] = feat_vec[feat_lab, label] + 1
                            else: # for U00 to U22 feature
                                feat_vec[feat, output[i]] = feat_vec[feat, output[i]] - 1
                                feat_vec[feat, label] = feat_vec[feat, label] + 1
                else:  # for i==0 case, all the first word in each sentence
                    label_pre = 'B_-1'  # previous label will be denoted by B_-1
                    for feat in feats:
                        if feat[0] == 'B':  # bigram feature case
                            feat = feat + ":" + label_pre
                        feat_vec[feat, output[i]] = feat_vec[feat, output[i]] - 1
                        feat_vec[feat, label] = feat_vec[feat, label] + 1

        perc.perc_write_to_file(feat_vec, 'model_' + str(t))

    # please limit the number of iterations of training to n iterations
    return feat_vec
def perc_train(train_data, tagset, n):
    feat_vec = defaultdict(int)
    feat_avg_vec = defaultdict(int)
    # insert your code here
    # please limit the number of iterations of training to n iterations
    default_tag = tagset[0]  # tag any word with 'B-NP' in the beginning
    num_sentence = len(train_data)
    num_words = 0
    count = 0
    for iteration in range(n):
        sent_index = 0
        for sentence in train_data:  #sentence = (labeled_list, feat_list) for each sentence
            sent_index += 1
            print '{0}\r'.format("\rIteration: %d/%d. Sentence: %d/%d\t" %
                                 (iteration + 1, n, sent_index, num_sentence)),

            (labeled_list, feat_list) = sentence
            num_words += len(labeled_list)

            #compute tags based on current weights
            estimated_tags = perc.perc_test(feat_vec, labeled_list, feat_list,
                                            tagset, default_tag)
            #the target 'right' tag list
            standard_tags = [item.split()[2] for item in labeled_list]

            if estimated_tags != standard_tags:
                st_prev = es_prev = 'B_-1'
                index = 0
                #reference: http://gul.gu.se/public/pp/public_courses/course38351/published/1360057354030/resourceId/19456476/content/9adb1f1e-52e4-48b4-8001-ada93be18089/9adb1f1e-52e4-48b4-8001-ada93be18089.html
                step = (n * num_sentence - count) * 1.0 / (n * num_sentence)
                for (st_tag, es_tag) in zip(standard_tags, estimated_tags):
                    (index, feats) = perc.feats_for_word(index, feat_list)

                    for feat in feats:
                        #deal with feat B: according to the given output example.
                        if feat == 'B':
                            if st_prev != es_prev or st_tag != es_tag:
                                feat_vec[('B:' + es_prev, es_tag)] -= 1
                                feat_vec[('B:' + st_prev, st_tag)] += 1
                                feat_avg_vec[('B:' + es_prev, es_tag)] -= step
                                feat_avg_vec[('B:' + st_prev, st_tag)] += step
                                es_prev = es_tag
                                st_prev = st_tag

                        else:
                            if st_tag != es_tag:
                                feat_vec[(feat, es_tag)] -= 1
                                feat_vec[(feat, st_tag)] += 1
                                feat_avg_vec[(feat, es_tag)] -= step
                                feat_avg_vec[(feat, st_tag)] += step
            count += 1
        perc.perc_write_to_file(
            feat_avg_vec, 'models/n' + str(iteration) + 'avg_params.model')

    return feat_avg_vec
Example #4
0
def perc_train(train_data, tagset, numepochs):
    """
    :current_global_vector: a dict of features for the predicted labels
    :gold_global_vector: a dict of features for the standard
    """
    feat_vec = defaultdict(int)
    #for t in range(numepochs):
    default_tag = tagset[0]
    for t in range(numepochs):
        for (labeled_list, feat_list) in train_data:
            std_labels = get_labels(labeled_list)
            output = perc.perc_test(feat_vec, labeled_list, feat_list, tagset,
                                    default_tag)
            gold_global_vector = get_global_vector(std_labels, feat_list)
            current_global_vector = get_global_vector(output, feat_list)
            add_vector(feat_vec, gold_global_vector, 1)
            add_vector(feat_vec, current_global_vector, -1)

        perc.perc_write_to_file(feat_vec, opts.modelfile + str(t))

    return feat_vec
Example #5
0
        help=
        "precomputed features for the input data, i.e. the values of \phi(x,_) without y"
    )
    optparser.add_option(
        "-e",
        "--numepochs",
        dest="numepochs",
        default=int(10),
        help=
        "number of epochs of training; in each epoch we iterate over over all the training examples"
    )
    optparser.add_option("-m",
                         "--modelfile",
                         dest="modelfile",
                         default=os.path.join("data", "default.model"),
                         help="weights for all features stored on disk")
    (opts, _) = optparser.parse_args()

    # each element in the feat_vec dictionary is:
    # key=feature_id value=weight
    feat_vec = {}
    tagset = []
    train_data = []

    tagset = perc.read_tagset(opts.tagsetfile)
    print >> sys.stderr, "reading data ..."
    train_data = perc.read_labeled_data(opts.trainfile, opts.featfile)
    print >> sys.stderr, "done."
    feat_vec = perc_train(train_data, tagset, int(opts.numepochs))
    perc.perc_write_to_file(feat_vec, opts.modelfile)
Example #6
0
from collections import defaultdict

def perc_train(train_data, tagset, numepochs):
    feat_vec = defaultdict(int)
    # insert your code here
    # please limit the number of iterations of training to n iterations
    return feat_vec

if __name__ == '__main__':
    optparser = optparse.OptionParser()
    optparser.add_option("-t", "--tagsetfile", dest="tagsetfile", default=os.path.join("data", "tagset.txt"), help="tagset that contains all the labels produced in the output, i.e. the y in \phi(x,y)")
    optparser.add_option("-i", "--trainfile", dest="trainfile", default=os.path.join("data", "train.txt.gz"), help="input data, i.e. the x in \phi(x,y)")
    optparser.add_option("-f", "--featfile", dest="featfile", default=os.path.join("data", "train.feats.gz"), help="precomputed features for the input data, i.e. the values of \phi(x,_) without y")
    optparser.add_option("-e", "--numepochs", dest="numepochs", default=int(10), help="number of epochs of training; in each epoch we iterate over over all the training examples")
    optparser.add_option("-m", "--modelfile", dest="modelfile", default=os.path.join("data", "default.model"), help="weights for all features stored on disk")
    (opts, _) = optparser.parse_args()

    # each element in the feat_vec dictionary is:
    # key=feature_id value=weight
    feat_vec = {}
    tagset = []
    train_data = []

    tagset = perc.read_tagset(opts.tagsetfile)
    print >>sys.stderr, "reading data ..."
    train_data = perc.read_labeled_data(opts.trainfile, opts.featfile)
    print >>sys.stderr, "done."
    feat_vec = perc_train(train_data, tagset, int(opts.numepochs))
    perc.perc_write_to_file(feat_vec, opts.modelfile)

Example #7
0
def perc_train(train_data, tagset, numepochs):
    feat_vec = defaultdict(float)
    avg_feat_vec = defaultdict(float)
    tau_feat_vec = dict()

    # insert your code here
    if len(tagset) <= 0:
        raise ValueError("Empty tagset")

    default_tag = tagset[0]
    m = len(train_data) # length of training data
    for t in range(numepochs):
        print 'Iteration#',t,' is processing now.'
        for j, (labeled_list, feat_list) in enumerate(train_data):

            labels = copy.deepcopy(labeled_list)
            # print 'sentence[',j,']'
            # add in the start and end buffers for the context
            # for every sentence in the training set, iterate numepochs times
            output = perc.perc_test(feat_vec, labeled_list, feat_list, tagset, default_tag)
            # compare current output and true result

            if j != m - 1 or t != numepochs - 1:
                feat_index = 0
                # check word by word if the predicted tag is equal to the true tag
                for i, v in enumerate(output):
                    (feat_index, feats) = perc.feats_for_word(feat_index, feat_list)
                    # retrieve the feature for a word
                    if len(feats) == 0:
                        print >>sys.stderr, " ".join(labels), " ".join(feat_list), "\n"
                        raise ValueError("features do not align with input sentence")
                    
                    label = labels[i].split()[2]
                    if i > 0: 
                        label_pre = labels[i-1].split()[2]
                        for feat in feats:

                            if feat[0] == 'B': # for bigram feature
                                feat_out = feat + ":" + output[i-1]  # feat_out is the "B:<previous output>"
                                feat_lab = feat + ":" + label_pre  # feat_lab is the "B:<previous label>"

                                if output[i] != label or feat_out != feat_lab:

                                    # laze update the tau vector value
                                    lazy_update_vect(feat_out, output[i], tau_feat_vec, feat_vec, avg_feat_vec, t, j, m)
                                    lazy_update_vect(feat_lab, label, tau_feat_vec, feat_vec, avg_feat_vec, t, j, m)


                                    # update original feature vector, if feat_out == feat_lab perform 2nd type updating
                                    update_bigram_vect(feat_vec, avg_feat_vec, feat_out, feat_lab, output[i], label)

                                    # if feat_out == feat_lab then update twice for the same tau
                                    tau_feat_vec[feat_out, output[i]] = (j, t)
                                    tau_feat_vec[feat_lab, label] = (j, t)



                            elif output[i] != label:
                                lazy_update_vect(feat, output[i], tau_feat_vec, feat_vec, avg_feat_vec, t, j, m)
                                lazy_update_vect(feat, label, tau_feat_vec, feat_vec, avg_feat_vec, t, j, m)
                                
                                # for U00 to U22 feature                                
                                update_unigram_vect(feat_vec, avg_feat_vec, feat, output[i],label)

                                # update vector
                                tau_feat_vec[feat, output[i]] = (j, t)
                                tau_feat_vec[feat, label] = (j, t)


                    else:  # for i==0 case, all the first word in each sentence
                        label_pre = '_B-2'  # previous label will be denoted by _B-2
                        for feat in feats:
                            if feat[0] == 'B' and output[i] != label:
                                # bigram feature case
                                feat = feat + ":" + label_pre

                                lazy_update_vect(feat, output[i], tau_feat_vec, feat_vec, avg_feat_vec, t, j, m)  
                                lazy_update_vect(feat, label, tau_feat_vec, feat_vec, avg_feat_vec, t, j, m)  

                                update_bigram_vect(feat_vec, avg_feat_vec, feat, feat, output[i], label)

                                tau_feat_vec[feat, label] = (j, t)
                                tau_feat_vec[feat, output[i]] = (j, t)


                            elif output[i] != label:
                                lazy_update_vect(feat, output[i], tau_feat_vec, feat_vec, avg_feat_vec, t, j, m)
                                lazy_update_vect(feat, label, tau_feat_vec, feat_vec, avg_feat_vec, t, j, m)
                                
                                # for U00 to U22 feature
                                update_unigram_vect(feat_vec, avg_feat_vec, feat, output[i],label)

                                tau_feat_vec[feat, output[i]] = (j, t)
                                tau_feat_vec[feat, label] = (j, t)


            else:
                final_lazy_update_vect(tau_feat_vec, feat_vec, avg_feat_vec, t, j, m)

                # special case for the last sentence 
                feat_index = 0
                # check word by word if the predicted tag is equal to the true tag
                for i, v in enumerate(output):
                    (feat_index, feats) = perc.feats_for_word(feat_index, feat_list)
                    # retrieve the feature for a word
                    if len(feats) == 0:
                        print >>sys.stderr, " ".join(labels), " ".join(feat_list), "\n"
                        raise ValueError("features do not align with input sentence")
                    
                    label = labels[i].split()[2]
                    if i > 0: 
                        label_pre = labels[i-1].split()[2]
                        for feat in feats:

                            if feat[0] == 'B': # for bigram feature
                                feat_out = feat + ":" + output[i-1]  # feat_out is the "B:<previous output>"
                                feat_lab = feat + ":" + label_pre  # feat_lab is the "B:<previous label>"
                                if output[i] != label:
                                    # update original feature vector
                                    update_bigram_vect(feat_vec, avg_feat_vec, feat_out, feat_lab, output[i], label)

                            elif output[i] != label:                                
                                update_unigram_vect(feat_vec, avg_feat_vec, feat, output[i],label)

                    else:  
                        # for i==0 case, all the first word in each sentence
                        label_pre = '_B-1'  # previous label will be denoted by _B-2
                        for feat in feats:
                            if feat[0] == 'B' and output[i] != label:  
                                # bigram feature case
                                feat = feat + ":" + label_pre
                                update_bigram_vect(feat_vec, avg_feat_vec, feat, feat, output[i], label)

                            elif output[i] != label:
                                # for U00 to U22 feature
                                update_unigram_vect(feat_vec, avg_feat_vec, feat, output[i],label)

        # end of iteration

    # averaging perceptron
    for key in avg_feat_vec.keys():
        avg_feat_vec[key] = avg_feat_vec[key]/float(numepochs*m)
    # please limit the number of iterations of training to n iterations
    perc.perc_write_to_file(feat_vec, 'model_feat_vec')
    return avg_feat_vec
def perc_train(train_data, tagset, numepochs, word_set):
    feat_vec = defaultdict(int)
    # insert your code here
    if len(tagset) <= 0:
        raise ValueError("Empty tagset")

    # numepochs = int(50)
    default_tag = tagset[0]
    for t in range(numepochs):
        tmp = 0
        # Count sentence
        print 'Iteration#',t,' is processing now.'
        cnt = 0
        for (labeled_list, feat_list) in train_data:
            cnt = cnt + 1
            if cnt % 1000 == 0:
                print 'current status: ', str(round(100*cnt/9000.0,2)),'%'
            labels = copy.deepcopy(labeled_list)
            # add in the start and end buffers for the context
            # for every sentence in the training set, iterate numepochs times
            output = perc.perc_test(feat_vec, labeled_list, feat_list, tagset, default_tag, word_set)

            feat_index = 0
            # check word by word if the predicted tag is equal to the true tag
            for i, v in enumerate(output):
                (feat_index, feats) = perc.feats_for_word(feat_index, feat_list)
                # retrieve the feature for a word
                if len(feats) == 0:
                    print >>sys.stderr, " ".join(labels), " ".join(feat_list), "\n"
                    raise ValueError("features do not align with input sentence")
                
                fields = labels[i].split()
                label = fields[2]

                if i > 0: 
                    label_pre = labels[i-1].split()[2]
                    for feat in feats:
                        if feat[0] == 'B': # for bigram feature
                            feat_out = feat + ":" + output[i-1]  # feat_out is the "B:<previous output>"
                            feat_lab = feat + ":" + label_pre  # feat_lab is the "B:<previous label>"

                            if   output[i-1] != label_pre and output[i] != label:
                                feat_vec[feat_out, output[i]]   -= 1
                                feat_vec[feat_lab, output[i]]   -= 1
                                feat_vec[feat_out, label]       += 1
                                feat_vec[feat_lab, label]       += 1

                            elif output[i-1] == label_pre and output[i] != label:
                                feat_vec[feat_lab, output[i]]   -= 2
                                feat_vec[feat_lab, label]       += 2

                            elif output[i-1] != label_pre and output[i] == label:
                                pass

                            elif output[i-1] == label_pre and output[i] == label:
                                pass

                            # feat_vec[feat_out, output[i]] = feat_vec[feat_out, output[i]] - 1
                            # feat_vec[feat_lab, label] = feat_vec[feat_lab, label] + 1

                            # feat_vec[feat_out, label] = feat_vec[feat_out, label] + 1
                            # feat_vec[feat_lab, output[i]] = feat_vec[feat_lab, output[i]] - 1

                        else: 
                            # for U00 to U22 feature
                            # if the condition is not right, there will be no penaulty and rewarding
                            feat_vec[feat, output[i]] -= 1
                            feat_vec[feat, label]     += 1
                else:  # for i==0 case, all the first word in each sentence
                    label_pre = '_B-1'  # previous label will be denoted by _B-1
                    for feat in feats:
                        if feat[0] == 'B':  # bigram feature case
                            feat = feat + ":" + label_pre
                        feat_vec[feat, output[i]] = feat_vec[feat, output[i]] - 1
                        feat_vec[feat, label] = feat_vec[feat, label] + 1

                # if i > 0: 
                #     label_pre = labels[i-1].split()[2]
                #     if output[i-1] != label_pre or output[i] != label:
                #         for feat in feats:
                #             if feat[0] == 'B': 
                #             # for bigram feature
                #                 feat_out = feat + ":" + output[i-1]  
                #                 # feat_out is the "B:<previous output>"
                #                 feat_lab = feat + ":" + label_pre  
                #                 # feat_lab is the "B:<previous label>"
                #                 # reward best condition

                #                 feat_vec[feat_lab, label] = feat_vec[feat_lab, label] + 1

                #                 # penalize condition
                #                 feat_vec[feat_out, output[i]] = feat_vec[feat_out, output[i]] - 1
                                
                #             else: 
                #             # for U00 to U22 feature
                #                 feat_vec[feat, output[i]] = feat_vec[feat, output[i]] - 1
                #                 feat_vec[feat, label] = feat_vec[feat, label] + 1
                # else:
                #     # for i==0 case, all the first word in each sentence
                #     label_pre = '_B-1'  # previous label will be denoted by _B-1
                #     for feat in feats:
                #         if feat[0] == 'B':  
                #         # bigram feature case
                #             feat = feat + ":" + label_pre
                #         feat_vec[feat, output[i]] = feat_vec[feat, output[i]] - 1
                #         feat_vec[feat, label] = feat_vec[feat, label] + 1

        if t % 3 == 0:
            perc.perc_write_to_file(feat_vec, 'model_' + str(t))

        perc.perc_write_to_file(feat_vec, 'model')
        os.system('python perc.py -m model | python score-chunks.py')

    # please limit the number of iterations of training to n iterations
    return feat_vec
if __name__ == '__main__':
    optparser = optparse.OptionParser()
    optparser.add_option("-t", "--tagsetfile", dest="tagsetfile", default=os.path.join("data", "tagset.txt"), help="tagset that contains all the labels produced in the output, i.e. the y in \phi(x,y)")
    optparser.add_option("-i", "--trainfile", dest="trainfile", default=os.path.join("data", "train.txt.gz"), help="input data, i.e. the x in \phi(x,y)")
    optparser.add_option("-f", "--featfile", dest="featfile", default=os.path.join("data", "train.feats.gz"), help="precomputed features for the input data, i.e. the values of \phi(x,_) without y")
    # optparser.add_option("-i", "--trainfile", dest="trainfile", default=os.path.join("data", "train.dev"), help="input data, i.e. the x in \phi(x,y)")
    # optparser.add_option("-f", "--featfile", dest="featfile", default=os.path.join("data", "train.feats.dev"), help="precomputed features for the input data, i.e. the values of \phi(x,_) without y")
    optparser.add_option("-e", "--numepochs", dest="numepochs", default=int(10), help="number of epochs of training; in each epoch we iterate over over all the training examples")
    optparser.add_option("-m", "--modelfile", dest="modelfile", default=os.path.join("data", "default.model"), help="weights for all features stored on disk")
    optparser.add_option("-w", "--wordsetfile", dest="wordsetfile", default=os.path.join("data", "word_set"), help="the word set write to disk")
    (opts, _) = optparser.parse_args()

    # each element in the feat_vec dictionary is:
    # key=feature_id value=weight
    feat_vec = {}
    # format: {('U14:VBG','B-VP'):w1, ...}
    tagset = []
    train_data = []

    tagset = perc.read_tagset(opts.tagsetfile)
    print >>sys.stderr, "reading data ..."
    data = perc.read_labeled_data(opts.trainfile, opts.featfile)
    word_set = data[0]
    perc.perc_write_to_file(word_set, opts.wordsetfile)
    train_data = data[1]
    print >>sys.stderr, "done."
    feat_vec = perc_train(train_data, tagset, int(opts.numepochs), word_set)
    perc.perc_write_to_file(feat_vec, opts.modelfile)

Example #10
0
def perc_train(train_data, tagset, numepochs):
    feat_vec = defaultdict(int)
    # insert your code here
    if len(tagset) <= 0:
        raise ValueError("Empty tagset")

    numepochs = int(1)
    default_tag = tagset[0]
    for t in range(numepochs):
        tmp = 0
        # Count sentence
        print 'Iteration#',t,' is processing now.'
        cnt = 0
        for (labeled_list, feat_list) in train_data:
            cnt = cnt + 1
            print 'Sentence[',cnt,'] is now processing...'
            labels = copy.deepcopy(labeled_list)
            # add in the start and end buffers for the context
            # for every sentence in the training set, iterate numepochs times
            output = perc.perc_test(feat_vec, labeled_list, feat_list, tagset, default_tag)

            feat_index = 0
            # check word by word if the predicted tag is equal to the true tag
            for i, v in enumerate(output):
                (feat_index, feats) = perc.feats_for_word(feat_index, feat_list)
                # retrieve the feature for a word
                if len(feats) == 0:
                    print >>sys.stderr, " ".join(labels), " ".join(feat_list), "\n"
                    raise ValueError("features do not align with input sentence")
                
                label = labels[i].split()[2]
                if i > 1: 
                    label_i_1 = labels[i-1].split()[2]
                    label_i_2 = labels[i-2].split()[2]
                    if output[i] != label:
                        for feat in feats:
                            if feat[0] == 'T' and output[i-2] != label_i_2 and output[i-1] != label_i_1: 
                                # trigram case 
                                feat_out = feat + ":" + output[i-2] + "," + output[i-1]  
                                # feat_out is the "B:<previous output>"
                                feat_lab = feat + ":" + label_i_2 + "," + label_i_1
                                # feat_lab is the "B:<previous label>"
                                # reward best condition
                                feat_vec[feat_lab, label] = feat_vec[feat_lab, label] + 1

                                # penalize condition
                                feat_vec[feat_out, output[i]] = feat_vec[feat_out, output[i]] - 1

                            elif feat[0] == 'B' and output[i-1] != label_i_1:
                                # bigram case
                                feat_out = feat + ":" + output[i-1]  
                                feat_lab = feat + ":" + label_i_1
                                feat_vec[feat_lab, label] = feat_vec[feat_lab, label] + 1
                                feat_vec[feat_out, output[i]] = feat_vec[feat_out, output[i]] - 1

                            else: 
                            # for U00 to U22 feature
                                feat_vec[feat, output[i]] = feat_vec[feat, output[i]] - 1
                                feat_vec[feat, label] = feat_vec[feat, label] + 1
                elif i == 1:
                    # for i==0 case, all the first word in each sentence
                    label_i_2 = '_-1'  # previous label will be denoted by B_-1
                    label_i_1 = labels[i-1].split()[2]
                    if  output[i] != label:
                        for feat in feats:
                            if feat[0] == 'T' and output[i-1] != label_i_1:
                            # trigram case 
                                feat_out = feat + ":" + label_i_2 + "," + output[i-1]  
                                feat_lab = feat + ":" + label_i_2 + "," + label_i_1
                                # reward best condition
                                feat_vec[feat_lab, label] = feat_vec[feat_lab, label] + 1

                                # penalize condition
                                feat_vec[feat_out, output[i]] = feat_vec[feat_out, output[i]] - 1
                            
                            elif feat[0] == 'B':
                                feat_out = feat + ":" + output[i-1]  
                                feat_lab = feat + ":" + label_i_1
                                feat_vec[feat_lab, label] = feat_vec[feat_lab, label] + 1
                                feat_vec[feat_out, output[i]] = feat_vec[feat_out, output[i]] - 1

                            else: 
                            # for U00 to U22 feature
                                feat_vec[feat, output[i]] = feat_vec[feat, output[i]] - 1
                                feat_vec[feat, label] = feat_vec[feat, label] + 1
                elif i == 0:
                    label_i_2 = '_B-2'
                    label_i_1 = '_B-1'
                    if output[i] != label:
                        for feat in feats:
                            if feat[0] == 'T':
                            # trigram case 
                                feat = feat + ":" + label_i_2 + "," + label_i_1
                            
                            elif feat[0] == 'B':
                            #bigram case
                                feat = feat + ":" + label_i_1

                            feat_vec[feat, output[i]] = feat_vec[feat, output[i]] - 1
                            feat_vec[feat, label] = feat_vec[feat, label] + 1

        filename = 'mid_model_iter' + str(t)
        perc.perc_write_to_file(feat_vec, filename)


    for (k1, k2), v in feat_vec.items():
        if v == 0:
            del feat_vec[k1,k2]


    # please limit the number of iterations of training to n iterations
    return feat_vec