def execute(dataset, hmm_file, tag_file, k): hmm, tagset = hmm_utils.get_param_tagset(hmm_file, tag_file) ts, test_tags = data_reader.read_tagging_data(dataset) test_sentences = replace_test(ts, hmm, tagset) i = 0 conv_rates = [] # to keep track of the convergence rates varying with k for j in range(k): conv_rates.append(0.0) for sentence in test_sentences: k_best = [] if True: i += 1 truetags = test_tags[test_sentences.index(sentence)] sys.stderr.write('\n' + str(i)+ '\n') sys.stderr.write(' '.join(sentence) + "\n") print ' '.join(sentence) #TODO remove redundancy best_tags, num_iter, second_best, sb2 = dd_tagger_fst.run(sentence, tagset, hmm) conv_rates[0] += 1 k_best.append(best_tags) next_best, num_iter = dd_k_best.run(sentence, tagset, hmm, k_best) if num_iter == -1: sys.stderr.write("2nd best does not converge :( \n") print ' '.join(best_tags) print continue j = 2 # we have the best, and the second best now conv_rates[j-1] += 1 sys.stderr.write(str(j) + " best converges in " + str(num_iter) + " iterations \n") k_best.append(second_best) while j < k: next_best, num_iter = dd_k_best.run(sentence, tagset, hmm, k_best) if num_iter != -1: conv_rates[j] += 1 k_best.append(next_best) sys.stderr.write(str(j+1) + " best converges in " + str(num_iter) + " iterations \n") else: sys.stderr.write(str(j+1) + "th best does not converge\n") break j += 1 for best in k_best: print ' '.join(best) print for j in range(k): sys.stderr.write("convergence rate of " + str(j) + " best = " + str(conv_rates[j]*100/conv_rates[0]) + "% \n")
def execute(dataset, hmm_file, tag_file): hmm, tagset = hmm_utils.get_param_tagset(hmm_file, tag_file) ts, test_tags = data_reader.read_tagging_data(dataset) test_sentences = replace_test(ts, hmm, tagset) i = 0 conv = 0 sec_conv = 0 for sentence in test_sentences: if True: #len(sentence) < 15: i += 1 truetags = test_tags[test_sentences.index(sentence)] sys.stderr.write('\n' + str(i)+ '\n') sys.stderr.write(' '.join(sentence) + "\n") print ' '.join(sentence) k_best = [] best_tags, num_iter, tags1, tags2 = dd_tagger_fst.run(sentence, tagset, hmm) if tags2 == best_tags: sys.stderr.write("YOU ARE WRONG!\n") if num_iter != -1: sec_conv += 1 sys.stderr.write("2nd best converges in " + str(num_iter) + "\n") k_best.append(best_tags) k_best.append(tags1) third_best, num_iter2 = dd_k_best.run(sentence, tagset, hmm, k_best) if num_iter2 != -1: sys.stderr.write("3rd best converges in " + str(num_iter2) + "\n") conv += 1 k_best.append(third_best) fourth_best, num_iter3 = dd_k_best.run(sentence, tagset, hmm, k_best) sys.stderr.write("4th best converges in " + str(num_iter3) + "\n") print ' '.join(best_tags) print ' '.join(tags2) print ' '.join(third_best) print ' '.join(fourth_best) else: sys.stderr.write( "3rd best does not converge :(\n") else: sys.stderr.write("2nd best does not converge :(\n") continue print sys.stderr.write("% convergence of 2nd best =" + str(sec_conv*100/i) + "\n") sys.stderr.write("% convergence of 3rd best =" + str(conv*100/sec_conv) + "\n")