def add_JAMR_align(instances,aligned_amr_file): comments,amr_strings = readAMR(aligned_amr_file) for i in range(len(instances)): amr = AMR.parse_string(amr_strings[i]) alignment = Aligner.readJAMRAlignment(amr,comments[i]['alignments']) ggraph = SpanGraph.init_ref_graph(amr,alignment,instances[i].tokens) ggraph.pre_merge_netag(instances[i]) #print >> log, "Graph ID:%s\n%s\n"%(ggraph.graphID,ggraph.print_tuples()) instances[i].addAMR(amr) instances[i].addGoldGraph(ggraph)
def add_JAMR_align(instances, aligned_amr_file): comments, amr_strings = readAMR(aligned_amr_file) for i in range(len(instances)): amr = AMR.parse_string(amr_strings[i]) alignment = Aligner.readJAMRAlignment(amr, comments[i]['alignments']) ggraph = SpanGraph.init_ref_graph(amr, alignment, instances[i].tokens) ggraph.pre_merge_netag(instances[i]) #print >> log, "Graph ID:%s\n%s\n"%(ggraph.graphID,ggraph.print_tuples()) instances[i].addAMR(amr) instances[i].addGoldGraph(ggraph)
def _init_instances(sent_file, amr_strings, comments): print >> log, "Preprocess 1:pos, ner and dependency using stanford parser..." proc = StanfordCoreNLP() instances = proc.parse(sent_file) print >> log, "Preprocess 2:adding amr and generating gold graph" assert len(instances) == len(amr_strings) for i in range(len(instances)): amr = AMR.parse_string(amr_strings[i]) instances[i].addAMR(amr) alignment = Aligner.readJAMRAlignment(amr, comments[i]['alignments']) ggraph = SpanGraph.init_ref_graph(amr, alignment, comments[i]['snt']) ggraph.pre_merge_netag(instances[i]) instances[i].addGoldGraph(ggraph) return instances
def _init_instances(sent_file,amr_strings,comments): print >> log, "Preprocess 1:pos, ner and dependency using stanford parser..." proc = StanfordCoreNLP() instances = proc.parse(sent_file) print >> log, "Preprocess 2:adding amr and generating gold graph" assert len(instances) == len(amr_strings) for i in range(len(instances)): amr = AMR.parse_string(amr_strings[i]) instances[i].addAMR(amr) alignment = Aligner.readJAMRAlignment(amr,comments[i]['alignments']) ggraph = SpanGraph.init_ref_graph(amr,alignment,comments[i]['snt']) ggraph.pre_merge_netag(instances[i]) instances[i].addGoldGraph(ggraph) return instances
def preprocess(amr_file,START_SNLP=True): '''nasty function''' aligned_amr_file = amr_file + '.aligned' if os.path.exists(aligned_amr_file): comments,amr_strings = readAMR(aligned_amr_file) else: comments,amr_strings = readAMR(amr_file) #comments,amr_strings = readAMR(aligned_amr_file) sentences = [c['snt'] for c in comments] tmp_sentence_file = amr_file+'.sent' if not os.path.exists(tmp_sentence_file): _write_sentences(tmp_sentence_file,sentences) print >> log, "pos, ner and dependency..." proc = StanfordCoreNLP() if START_SNLP: proc.setup() instances = proc.parse(tmp_sentence_file) tok_amr_filename = amr_file + '.tok' if not os.path.exists(tok_amr_filename): _write_tok_amr(tok_amr_filename,amr_file,instances) SpanGraph.graphID = 0 for i in range(len(instances)): amr = AMR.parse_string(amr_strings[i]) alignment = Aligner.readJAMRAlignment(amr,comments[i]['alignments']) ggraph = SpanGraph.init_ref_graph(amr,alignment,instances[i].tokens) #ggraph.pre_merge_netag(instances[i]) #print >> log, "Graph ID:%s\n%s\n"%(ggraph.graphID,ggraph.print_tuples()) instances[i].addAMR(amr) instances[i].addGoldGraph(ggraph) #print >> log, "adding amr" #_add_amr(instances,amr_strings) #if writeToFile: # output_file = amr_file.rsplit('.',1)[0]+'_dataInst.p' # pickle.dump(instances,open(output_file,'wb'),pickle.HIGHEST_PROTOCOL) return instances
def preprocess(amr_file, START_SNLP=True): '''nasty function''' aligned_amr_file = amr_file + '.aligned' if os.path.exists(aligned_amr_file): comments, amr_strings = readAMR(aligned_amr_file) else: comments, amr_strings = readAMR(amr_file) #comments,amr_strings = readAMR(aligned_amr_file) sentences = [c['snt'] for c in comments] tmp_sentence_file = amr_file + '.sent' if not os.path.exists(tmp_sentence_file): _write_sentences(tmp_sentence_file, sentences) print >> log, "pos, ner and dependency..." proc = StanfordCoreNLP() if START_SNLP: proc.setup() instances = proc.parse(tmp_sentence_file) tok_amr_filename = amr_file + '.tok' if not os.path.exists(tok_amr_filename): _write_tok_amr(tok_amr_filename, amr_file, instances) SpanGraph.graphID = 0 for i in range(len(instances)): amr = AMR.parse_string(amr_strings[i]) alignment = Aligner.readJAMRAlignment(amr, comments[i]['alignments']) ggraph = SpanGraph.init_ref_graph(amr, alignment, instances[i].tokens) #ggraph.pre_merge_netag(instances[i]) #print >> log, "Graph ID:%s\n%s\n"%(ggraph.graphID,ggraph.print_tuples()) instances[i].addAMR(amr) instances[i].addGoldGraph(ggraph) #print >> log, "adding amr" #_add_amr(instances,amr_strings) #if writeToFile: # output_file = amr_file.rsplit('.',1)[0]+'_dataInst.p' # pickle.dump(instances,open(output_file,'wb'),pickle.HIGHEST_PROTOCOL) return instances