def align_file(fin, fout, em_model, hmm_model): pairs = Aligner.readAMR(fin) print("Aligning") algs = Aligner.alignPairs(pairs, em_model, hmm_model) print("Writing alignments to file") Aligner.printAlignments(algs, pairs, fout) print("done")
def train_models(fnames, emiter, hmmiter, model_name): if not fnames: sys.exit("No file provided") print("Reading AMR files") pairs = [] for fname in fnames: f = open(fname, "r") pairs += Aligner.readAMR(f) f.close() sentences = [Aligner.tokenize(pair[0]) for pair in pairs] graphs = [AMRGraph(pair[1], False) for pair in pairs] emprobs = EM.train(sentences, graphs, model_name + ".em", emiter) #emprobs = EM.load_model(model_name + ".em") print("Initializing rule-based alignments") n = len(sentences) initalgs = [{}] * n for i in range(n): initalgs[i] = Aligner.initalign(graphs[i].ref, sentences[i]) if (i+1) % 1000 == 0: print(str(i+1) + "/" + str(n)) hmmprobs = HMM.train(sentences, graphs, emprobs, model_name + ".hmm", hmmiter, initalgs) print("Done")