def q2_5(): gold_words = load_gold('test') test_size = len(gold_words) test_chars_size = 0 word_acc = 0.0 char_acc = 0.0 for i in range(1, test_size + 1): data_fname = "../data/test_img%d.txt" % (i) data = load_data(data_fname) f_weights = load_feature_weights() t_weights = load_transition_weights() node_potentials = crf.compute_node_potentials(data, f_weights) clique_potentials = crf.compute_clique_potentials( node_potentials, t_weights) forward, backward = crf.compute_messages(clique_potentials) beliefs = crf.compute_beliefs(clique_potentials, forward, backward) pos_probs, _ = crf.compute_marginals(clique_potentials, beliefs) word = crf.classify(pos_probs) gold = ''.join(map(lambda c: char_map[c], gold_words[i - 1])) word_acc += 1.0 if word == gold else 0.0 chars_correct = sum( [1 if word[j] == gold[j] else 0 for j in range(len(word))]) char_acc += chars_correct test_chars_size += len(word) if i < 6: # only print first 5 test words print word, gold word_acc /= test_size char_acc /= test_chars_size print "Word accuracy:", word_acc print "Character accuracy:", char_acc
def q2_5(): gold_words = load_gold('test') test_size = len(gold_words) test_chars_size = 0 word_acc = 0.0 char_acc = 0.0 for i in range(1,test_size+1): data_fname = "../data/test_img%d.txt" % (i) data = load_data(data_fname) f_weights = load_feature_weights() t_weights = load_transition_weights() node_potentials = crf.compute_node_potentials(data, f_weights) clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights) forward, backward = crf.compute_messages(clique_potentials) beliefs = crf.compute_beliefs(clique_potentials, forward, backward) pos_probs, _ = crf.compute_marginals(clique_potentials, beliefs) word = crf.classify(pos_probs) gold = ''.join(map(lambda c: char_map[c], gold_words[i-1])) word_acc += 1.0 if word==gold else 0.0 chars_correct = sum([1 if word[j]==gold[j] else 0 for j in range(len(word))]) char_acc += chars_correct test_chars_size += len(word) if i < 6 : # only print first 5 test words print word, gold word_acc /= test_size char_acc /= test_chars_size print "Word accuracy:", word_acc print "Character accuracy:", char_acc
def q2_1(): data_fname = "../data/test_img1.txt" data = load_data(data_fname) f_weights = load_feature_weights() t_weights = load_transition_weights() node_potentials = crf.compute_node_potentials(data, f_weights) clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights) indices = (0,1,8) # only want e, t, r print clique_potentials[...,indices][:,indices]
def q1_4(): data_fnames = ["../data/test_img1.txt", \ "../data/test_img2.txt", \ "../data/test_img3.txt"] t_weights = load_transition_weights() f_weights = load_feature_weights() for fname in data_fnames: data = load_data(fname) potentials = crf.compute_node_potentials(data, f_weights) print crf.infer_labels(potentials, t_weights)
def q2_1(): data_fname = "../data/test_img1.txt" data = load_data(data_fname) f_weights = load_feature_weights() t_weights = load_transition_weights() node_potentials = crf.compute_node_potentials(data, f_weights) clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights) indices = (0, 1, 8) # only want e, t, r print clique_potentials[..., indices][:, indices]
def q2_2(): data_fname = "../data/test_img1.txt" data = load_data(data_fname) f_weights = load_feature_weights() t_weights = load_transition_weights() node_potentials = crf.compute_node_potentials(data, f_weights) clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights) forward, backward = crf.compute_messages(clique_potentials) print "forward:\n", forward print "backward:\n", backward
def q2_3(): data_fname = "../data/test_img1.txt" data = load_data(data_fname) f_weights = load_feature_weights() t_weights = load_transition_weights() node_potentials = crf.compute_node_potentials(data, f_weights) clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights) forward, backward = crf.compute_messages(clique_potentials) beliefs = crf.compute_beliefs(clique_potentials, forward, backward) print "beliefs:\n", beliefs[:,:2,:2] # only want e, t
def q2_3(): data_fname = "../data/test_img1.txt" data = load_data(data_fname) f_weights = load_feature_weights() t_weights = load_transition_weights() node_potentials = crf.compute_node_potentials(data, f_weights) clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights) forward, backward = crf.compute_messages(clique_potentials) beliefs = crf.compute_beliefs(clique_potentials, forward, backward) print "beliefs:\n", beliefs[:, :2, :2] # only want e, t
def q1_2(): data_fnames = ["../data/test_img1.txt", \ "../data/test_img2.txt", \ "../data/test_img3.txt"] t_weights = load_transition_weights() f_weights = load_feature_weights() gold_vals = load_gold('test') for i,fname in enumerate(data_fnames): data = load_data(fname) potentials = crf.compute_node_potentials(data, f_weights) print crf.compute_neg_energy(potentials, t_weights, gold_vals[i])
def q1_2(): data_fnames = ["../data/test_img1.txt", \ "../data/test_img2.txt", \ "../data/test_img3.txt"] t_weights = load_transition_weights() f_weights = load_feature_weights() gold_vals = load_gold('test') for i, fname in enumerate(data_fnames): data = load_data(fname) potentials = crf.compute_node_potentials(data, f_weights) print crf.compute_neg_energy(potentials, t_weights, gold_vals[i])
def q1_3(): data_fnames = ["../data/test_img1.txt", \ "../data/test_img2.txt", \ "../data/test_img3.txt"] t_weights = load_transition_weights() f_weights = load_feature_weights() for fname in data_fnames: data = load_data(fname) potentials = crf.compute_node_potentials(data, f_weights) seqs = itertools.product('0123456789', repeat=np.shape(potentials)[0]) energies = [crf.compute_neg_energy(potentials, t_weights, seq) for seq in seqs] print crf.log_Z(energies)
def q1_3(): data_fnames = ["../data/test_img1.txt", \ "../data/test_img2.txt", \ "../data/test_img3.txt"] t_weights = load_transition_weights() f_weights = load_feature_weights() for fname in data_fnames: data = load_data(fname) potentials = crf.compute_node_potentials(data, f_weights) seqs = itertools.product('0123456789', repeat=np.shape(potentials)[0]) energies = [ crf.compute_neg_energy(potentials, t_weights, seq) for seq in seqs ] print crf.log_Z(energies)
def q2_4(): data_fname = "../data/test_img1.txt" data = load_data(data_fname) f_weights = load_feature_weights() t_weights = load_transition_weights() node_potentials = crf.compute_node_potentials(data, f_weights) clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights) forward, backward = crf.compute_messages(clique_potentials) beliefs = crf.compute_beliefs(clique_potentials, forward, backward) pos_probs, trans_probs = crf.compute_marginals(clique_potentials, beliefs) print "marginals:\n", pos_probs # uncomment to print LaTeX tabular version #print_marginals_table(pos_probs) indices = (0,1,8) # only want e, t, r print "pairwise:\n", trans_probs[...,indices][:,indices]
def q2_4(): data_fname = "../data/test_img1.txt" data = load_data(data_fname) f_weights = load_feature_weights() t_weights = load_transition_weights() node_potentials = crf.compute_node_potentials(data, f_weights) clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights) forward, backward = crf.compute_messages(clique_potentials) beliefs = crf.compute_beliefs(clique_potentials, forward, backward) pos_probs, trans_probs = crf.compute_marginals(clique_potentials, beliefs) print "marginals:\n", pos_probs # uncomment to print LaTeX tabular version #print_marginals_table(pos_probs) indices = (0, 1, 8) # only want e, t, r print "pairwise:\n", trans_probs[..., indices][:, indices]
def test_model(f_weights, t_weights): test_chars_size = 0 word_acc = 0.0 char_acc = 0.0 for i in range(test_size): node_potentials = crf.compute_node_potentials(test_data[i], f_weights) clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights) forward, backward = crf.compute_messages(clique_potentials) beliefs = crf.compute_beliefs(clique_potentials, forward, backward) pos_probs, _ = crf.compute_marginals(clique_potentials, beliefs) word = crf.classify(pos_probs) gold = ''.join(map(lambda c: char_map[c], gold_test_words[i])) word_acc += 1.0 if word == gold else 0.0 chars_correct = sum([1 if word[j] == gold[j] else 0 for j in range(len(word))]) char_acc += chars_correct test_chars_size += len(word) word_acc /= test_size char_acc /= test_chars_size return 1-char_acc
def test_model(f_weights, t_weights): test_chars_size = 0 word_acc = 0.0 char_acc = 0.0 for i in range(test_size): node_potentials = crf.compute_node_potentials(test_data[i], f_weights) clique_potentials = crf.compute_clique_potentials( node_potentials, t_weights) forward, backward = crf.compute_messages(clique_potentials) beliefs = crf.compute_beliefs(clique_potentials, forward, backward) pos_probs, _ = crf.compute_marginals(clique_potentials, beliefs) word = crf.classify(pos_probs) gold = ''.join(map(lambda c: char_map[c], gold_test_words[i])) word_acc += 1.0 if word == gold else 0.0 chars_correct = sum( [1 if word[j] == gold[j] else 0 for j in range(len(word))]) char_acc += chars_correct test_chars_size += len(word) word_acc /= test_size char_acc /= test_chars_size return 1 - char_acc
def get_sum_prod_marginals(num_examples, f_weights, t_weights): all_node_potentials = [ crf.compute_node_potentials(example, f_weights) for example in training_data[:num_examples] ] all_clique_potentials = [ crf.compute_clique_potentials(node_potentials, t_weights) for node_potentials in all_node_potentials ] all_fb = [ crf.compute_messages(clique_potentials) for clique_potentials in all_clique_potentials ] all_beliefs = [ crf.compute_beliefs(all_clique_potentials[i], all_fb[i][0], all_fb[i][1]) for i in range(num_examples) ] return np.array([ crf.compute_marginals(all_clique_potentials[i], all_beliefs[i]) for i in range(num_examples) ])
def q1_1(): data_fname = "../data/test_img1.txt" data = load_data(data_fname) f_weights = load_feature_weights() potentials = crf.compute_node_potentials(data, f_weights) print potentials
''' CS688 HW02: Chain CRF Question 3: Maximum Log Likelihood Learning Derivation @author: Emma Strubell ''' from data_loader import * import chain_crf as crf # compute log likelihood over first 50 training examples gold_words = load_gold('train') train_size = 50 f_weights = load_feature_weights() t_weights = load_transition_weights() total = 0.0 for i in range(train_size): data_fname = "../data/train_img%d.txt" % (i+1) example = load_data(data_fname) node_potentials = crf.compute_node_potentials(example, f_weights) clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights) forward, backward = crf.compute_messages(clique_potentials) beliefs = crf.compute_beliefs(clique_potentials, forward, backward) pos_probs, _ = crf.compute_marginals(clique_potentials, beliefs) total += np.sum(np.log([pos_probs[i,j] for i,j in enumerate(gold_words[i])])) total /= train_size print "Log likelihood:", total
def get_sum_prod_marginals(num_examples, f_weights, t_weights): all_node_potentials = [crf.compute_node_potentials(example, f_weights) for example in training_data[:num_examples]] all_clique_potentials = [crf.compute_clique_potentials(node_potentials, t_weights) for node_potentials in all_node_potentials] all_fb = [crf.compute_messages(clique_potentials) for clique_potentials in all_clique_potentials] all_beliefs = [crf.compute_beliefs(all_clique_potentials[i], all_fb[i][0], all_fb[i][1]) for i in range(num_examples)] return np.array([crf.compute_marginals(all_clique_potentials[i], all_beliefs[i]) for i in range(num_examples)])