def unavg_perc_train(trainfile, devfile, dictionary, model): print("-------------------------------------------------------") print("UNAVERAGED PERCEPTRON - BIGRAM") updates = 0 epoch = 5 lr = 1 start = time.time() for i in range(epoch): for x_i, y_i in readfile(trainfile): z_i = decode(x_i, dictionary, model) if y_i != z_i: updates += 1 phi_xy = phi(x_i, y_i) phi_xz = phi(x_i, z_i) for key in phi_xy: if key not in phi_xz: model[key] += lr * 1 for key in phi_xz: if key not in phi_xy: model[key] -= lr * 1 train_err = test(trainfile, dictionary, model) dev_err = test(devfile, dictionary, model) print("epoch:", i + 1, "updates:", updates, "train_err: {0:.2%}".format(train_err), "dev_err: {0:.2%}".format(dev_err)) end = time.time() elap_time = end - start print("elapsed time:", elap_time) print("-------------------------------------------------------")
def train(trainfile, devfile, dictionary, epochs=1, isAVG = 1): w_model = defaultdict(float) wa_model = defaultdict(float) features = set() train_set = list(tagger.readfile(trainfile)) c = 1. for epoch in xrange(epochs): update = 0. error = 0. total_words = 0. for _, (wordseq, tagseq) in enumerate(train_set): total_words += len(wordseq) zseq = tagger.decode(wordseq, dictionary, w_model) if isAVG: c += 1. if zseq != tagseq: delta = defaultdict(float) wordseq = [startsym]*2 + wordseq + [stopsym]*2 tagseq = [startsym]*2 + tagseq + [stopsym]*2 zseq = [startsym]*2 + zseq + [stopsym]*2 update += 1 for i, (w, t1, t2) in enumerate(zip(wordseq,tagseq,zseq)[2:],2): if t1 != t2: delta[t1, wordseq[i-1],wordseq[i+1]] += 1 delta[t2, wordseq[i-1],wordseq[i+1]] -= 1 features.add(t1, wordseq[i-1],wordseq[i+1]) features.add(t2, wordseq[i-1],wordseq[i+1]) error += 1 if t1 != t2 or tagseq[i-2] != zseq[i-2]: delta[(tagseq[i-2],tagseq[i-1]), t1] += 1 delta[(zseq[i-2],zseq[i-1]), t2] -= 1 features.add(((tagseq[i-2],tagseq[i-1]), t1)) features.add(((zseq[i-2],zseq[i-1]), t2)) #w_model += delta w_model = dictadd(w_model, delta) if isAVG: wa_model = dictadd(wa_model, delta, c) #train_err = tagger.test(trainfile,dictionary, w_model) train_err = error/total_words dev_err = tagger.test(devfile, dictionary,w_model) feature_size = len(features) #print total_words if not isAVG: print "epoch %d, update %d,featrues %d, train_err %.2f%%, dev_err %.2f%%" % (epoch+1, update,feature_size, 100*train_err, 100*dev_err ) else: #train_avg_err = tagger.test(trainfile,dictionary, dictadd(w_model, wa_model, -1/c)) dev_AVG_err = tagger.test(devfile,dictionary, dictadd(w_model, wa_model, -1/c)) print "epoch %d, update %d,featrues %d, train_err %.2f%%, dev_err %.2f%%, dev_avg %.2f%%" % (epoch+1, update, feature_size,100*train_err, 100*dev_err, 100*dev_AVG_err) if not isAVG: return w_model else: return dictadd(w_model, wa_model, -1/c)
def predict_lable(testfile, dictionary, weight, MultiGrams): with open(testfile, 'r') as csvinput: # with open('test.lower.unk.best', 'w') as csvoutput: with open('dev.lower.unk.best', 'w') as csvoutput: writer = csv.writer(csvoutput) for line in csvinput.readlines(): wt = '' words = [x for x in line.split()] predict_tag = tagger.decode(words, dictionary, weight, MultiGrams) for word, tag in zip(words, predict_tag): wt += word + '/' + tag + ' ' # print(wt.split(',')) writer.writerow(wt.split(',')) csvinput.close() csvoutput.close()
def avg_perc_train(trainfile, devfile, dictionary, model): print("-------------------------------------------------------") print("AVERAGED PERCEPTRON - BIGRAM") updates = 0 c = 1 epoch = 5 lr = 1 # weights = defaultdict(float) model_0 = copy.deepcopy(model) model_a = copy.deepcopy(model) start = time.time() for i in range(epoch): for x_i, y_i in readfile(trainfile): z_i = decode(x_i, dictionary, model_0) if y_i != z_i: updates += 1 phi_xy = phi(x_i, y_i) phi_xz = phi(x_i, z_i) for key in phi_xy: if key not in phi_xz: model_0[key] += lr * 1 model_a[key] += c * lr * 1 for key in phi_xz: if key not in phi_xy: model_0[key] -= lr * 1 model_a[key] -= c * lr * 1 c += 1 weights = {key: model_0[key] - model_a[key] / c for key in model} train_err = test(trainfile, dictionary, weights) dev_err = test(devfile, dictionary, weights) print("epoch:", i + 1, "updates:", c, "train_err: {0:.2%}".format(train_err), "dev_err: {0:.2%}".format(dev_err)) end = time.time() elap_time = end - start print("elapsed time:", elap_time) print("-------------------------------------------------------")
def train(trainfile, devfile, dictionary, Average=False, MultiGrams=False, epochs=10): weight = defaultdict(float) avg_weight = defaultdict(float) trainset = tagger.readfile(trainfile) c = 0 best_dev_err = 1 error_rates = [] error_rates_train = [] location_epoch = [] if not Average: print('Unaverage Structure Perceptron, MultiGrams = %s' % (MultiGrams)) else: print('Average Structure Perceptron, MultiGrams = %s' % (MultiGrams)) for epoch in range(1, epochs + 1): errors = 0 for wordseq, gold_tagseq in trainset: c += 1 cur_tagseq = tagger.decode(wordseq, dictionary, weight, MultiGrams) if cur_tagseq != gold_tagseq: errors += 1 phi_total = defaultdict(float) wordseq = [startsym] + wordseq + [stopsym] gold_tagseq = [startsym] + gold_tagseq + [stopsym] cur_tagseq = [startsym] + cur_tagseq + [stopsym] for i, (word, tag_gold, tag_cur) in enumerate( zip(wordseq[1:], gold_tagseq[1:], cur_tagseq[1:]), 1): if tag_gold != tag_cur: phi_total[('tw', tag_gold, word)] += 1 # tag(y) -> word phi_total[('tw', tag_cur, word)] -= 1 # tag(z) -> word if MultiGrams: phi_total[('tt_1w', tag_gold, gold_tagseq[i - 1], word)] += 1 phi_total[('tt_1w', tag_cur, cur_tagseq[i - 1], word)] -= 1 if tag_gold != tag_cur or gold_tagseq[i - 1] != cur_tagseq[i - 1]: phi_total[(tag_gold, gold_tagseq[i - 1])] += 1 # phi(x, y) phi_total[(tag_cur, cur_tagseq[i - 1])] -= 1 # phi(x, z) if MultiGrams: phi_total[(tag_gold, word, wordseq[i - 1])] += 1 phi_total[(tag_cur, word, wordseq[i - 1])] -= 1 phi_total[('tt_1w_1', tag_gold, gold_tagseq[i - 1], wordseq[i - 1])] += 1 phi_total[('tt_1w_1', tag_cur, cur_tagseq[i - 1], wordseq[i - 1])] -= 1 phi_total[(tag_gold, gold_tagseq[i - 1], word, wordseq[i - 1])] += 1 phi_total[(tag_cur, cur_tagseq[i - 1], word, wordseq[i - 1])] -= 1 if i >= 2: phi_total[(tag_gold, gold_tagseq[i - 2], gold_tagseq[i - 1])] += 1 phi_total[(tag_cur, cur_tagseq[i - 2], cur_tagseq[i - 1])] -= 1 if i >= 3: phi_total[(tag_gold, gold_tagseq[i - 3], gold_tagseq[i - 2], gold_tagseq[i - 1])] += 1 phi_total[(tag_cur, cur_tagseq[i - 3], cur_tagseq[i - 2], cur_tagseq[i - 1])] -= 1 if not Average: for e in phi_total.keys(): weight[e] += phi_total[e] else: for e in phi_total.keys(): # avg_weight weight[e] += phi_total[e] avg_weight[e] += c * phi_total[e] # update(avg_weight, phi_total, c) if Average: for e in weight: avg_weight[e] = weight[e] - avg_weight[e] / c if not Average: train_err = tagger.test(trainfile, dictionary, weight, MultiGrams) dev_err = tagger.test(devfile, dictionary, weight, MultiGrams) if best_dev_err > dev_err: best_dev_err = dev_err best_epoch = epoch best_weight = weight error_rates.append(dev_err) error_rates_train.append(train_err) location_epoch.append(epoch) print( "epoch %d, updates %d, feature = %d, train_err = %.2f%%, dev_err = %.2f%%" % (epoch, errors, num_feature(weight), train_err * 100, dev_err * 100)) else: train_avg_err = tagger.test(trainfile, dictionary, avg_weight, MultiGrams) dev_avg_err = tagger.test(devfile, dictionary, avg_weight, MultiGrams) if best_dev_err > dev_avg_err: best_dev_err = dev_avg_err best_epoch = epoch best_weight = weight error_rates.append(dev_avg_err) error_rates_train.append(train_avg_err) location_epoch.append(epoch) print( "epoch %d, updates %d, feature = %d, train_err = %.2f%%, dev_avg_err = %.2f%%" % (epoch, errors, num_feature(weight), train_avg_err * 100, dev_avg_err * 100)) if not Average: print("The best dev_err = %.2f%% at %d epoch" % (best_dev_err * 100, best_epoch)) else: print("The best dev_avg_err = %.2f%% at %d epoch" % (best_dev_err * 100, best_epoch)) return error_rates_train, error_rates, location_epoch, best_dev_err, best_weight