def parse_batch_preprocessed_data(self, preprocessed_input_examples, set_wiki, normalize_mod): order,idsBatch,srcBatch, src_charBatch, sourceBatch,srcBertBatch,srcBertIndexBatch,data_iterator = self.feature_to_torch(preprocessed_input_examples) probBatch, src_enc = self.model(srcBatch, src_charBatch, rel=False, bertBatch=srcBertBatch, bertIndexBatch = srcBertIndexBatch) amr_pred_seq,concept_batches,aligns_raw,dependent_mark_batch = self.decoder.probAndSourceToConcepts(sourceBatch,srcBatch, src_charBatch, probBatch,getsense = True ) amr_pred_seq = [ [(uni.cat,uni.le,uni.aux,uni.sense,uni) for uni in seq ] for seq in amr_pred_seq ] rel_batch,aligns = rel_to_batch(concept_batches,aligns_raw,data_iterator,self.dicts) rel_prob,roots = self.model((rel_batch,srcBatch, src_charBatch, src_enc,aligns), rel=True, bertBatch=srcBertBatch) graphs,rel_triples = self.decoder.relProbAndConToGraph(concept_batches, sourceBatch, rel_prob,roots,(dependent_mark_batch,aligns_raw),True,set_wiki,normalizeMod=normalize_mod) batch_out = [0]*len(graphs) batch_mrp_graphs = [0]*len(graphs) for i,data in enumerate(zip(idsBatch, sourceBatch,amr_pred_seq,concept_batches,rel_triples,graphs)): example_id, source,amr_pred,concept, rel_triple,graph= data mrp_graph, predicated_graph = graph_to_mrpGraph(example_id, graph, normalizeMod = True, flavor=2, framework="amr", sentence=" ".join(source[0])) out = [] out.append( "# ::id "+ example_id +"\n") out.append( "# ::tok "+" ".join(source[0])+"\n") out.append( "# ::lemma "+" ".join(source[1])+"\n") out.append( "# ::pos "+" ".join(source[2])+"\n") out.append( "# ::ner "+" ".join(source[3])+"\n") out.append( self.decoder.nodes_jamr(graph)) out.append( self.decoder.edges_jamr(graph)) out.append( predicated_graph) batch_out[order[i]] = "".join(out)+"\n" batch_mrp_graphs[order[i]] = mrp_graph return batch_mrp_graphs, batch_out
def parse_batch_preprocessed_data(self, preprocessed_input_examplesi, set_wiki, normalize_mod): """ only mrp outputs, no txt outputs """ order, idsBatch, srcBatch, src_charBatch, sourceBatch, srcBertBatch, data_iterator = self.feature_to_torch( preprocessed_input_examples) probBatch, src_enc = self.model(srcBatch, src_charBatch, rel=False, bertBatch=srcBertBatch, bertIndexBatch=srcBertIndexBatch) concepts_pred_seq, concept_batches, aligns_raw, dependent_mark_batch = self.decoder.probAndSourceToConcepts( sourceBatch, srcBatch, src_charBatch, probBatch, getsense=True) concepts_pred_seq = [[(uni.cat, uni.le, uni.aux, uni.sense, uni) for uni in seq] for seq in concepts_pred_seq] rel_batch, aligns = rel_to_batch(concept_batches, aligns_raw, data_iterator, self.dicts) rel_prob, roots = self.model( (rel_batch, srcBatch, src_charBatch, src_enc, aligns), rel=True, bertBatch=srcBertBatch, bertIndexBatch=srcBertIndexBatch) graphs, rel_triples = self.decoderrelProbAndConToGraph( concept_batches, rel_prob, roots, (dependent_mark_batch, aligns), True, set_wiki, normalizeMod=normalize_mod) batch_out = [0] * len(graphs) batch_mrp_graphs = [0] * len(graphs) for i, data in enumerate( zip(idsBatch, sourceBatch, concepts_pred_seq, concept_batches, rel_triples, graphs)): example_id, source, amr_pred, concept, rel_triple, graph = data mrp_graph, catd_graph = self.decoder.graph_to_mrpGraph( example_id, graph, normalizeMod=opt.normalize_mod, flavor=0, framework="ucca", sentence=" ".join(source[0])) batch_mrp_graphs[order[i]] = mrp_graph return batch_mrp_graphs, batch_out
def parse_batch(self, src_text_batch_or_data_batch): if not self.parse_from_processed: all_data = [ self.feature_extractor.preprocess(src_text) for src_text in src_text_batch_or_data_batch ] else: all_data = src_text_batch_or_data_batch order, srcBatch, sourceBatch, data_iterator = self.feature_to_torch( all_data) probBatch = self.model(srcBatch) amr_pred_seq, concept_batches, aligns_raw, dependent_mark_batch = self.decoder.probAndSourceToAmr( sourceBatch, srcBatch, probBatch, getsense=True) amr_pred_seq = [[(uni.cat, uni.le, uni.aux, uni.sense, uni) for uni in seq] for seq in amr_pred_seq] rel_batch, aligns = rel_to_batch(concept_batches, aligns_raw, data_iterator, self.dicts) rel_prob, roots = self.model((rel_batch, srcBatch, aligns), rel=True) graphs, rel_triples = self.decoder.relProbAndConToGraph( concept_batches, rel_prob, roots, (dependent_mark_batch, aligns_raw), True, True) batch_out = [0] * len(graphs) for i, data in enumerate( zip(sourceBatch, amr_pred_seq, concept_batches, rel_triples, graphs)): source, amr_pred, concept, rel_triple, graph = data predicated_graph = graph_to_amr(graph) out = [] out.append("# ::tok " + " ".join(source[0]) + "\n") out.append("# ::lemma " + " ".join(source[1]) + "\n") out.append("# ::pos " + " ".join(source[2]) + "\n") out.append("# ::ner " + " ".join(source[3]) + "\n") out.append(self.decoder.nodes_jamr(graph)) out.append(self.decoder.edges_jamr(graph)) out.append(predicated_graph) batch_out[order[i]] = "".join(out) + "\n" return batch_out
def generate_graph(model,AmrDecoder, data_set,dicts,file): concept_scores = concept_score_initial(dicts) rel_scores = rel_scores_initial() model.eval() AmrDecoder.eval() output = [] gold_file = [] for batchIdx in range(len(data_set)): order,srcBatch,_,_,_,_,_,gold_roots,sourceBatch =data_set[batchIdx] probBatch = model(srcBatch ) amr_pred_seq,concept_batches,aligns_raw,dependent_mark_batch = AmrDecoder.probAndSourceToAmr(sourceBatch,srcBatch,probBatch,getsense = opt.get_sense ) amr_pred_seq = [ [(uni.cat,uni.le,uni.aux,uni.sense,uni) for uni in seq ] for seq in amr_pred_seq ] rel_batch,aligns = rel_to_batch(concept_batches,aligns_raw,data_set,dicts) rel_prob,roots = model((rel_batch,srcBatch,aligns),rel=True) graphs,rel_triples = AmrDecoder.relProbAndConToGraph(concept_batches,rel_prob,roots,(dependent_mark_batch,aligns_raw),opt.get_sense,opt.get_wiki) batch_out = [0]*len(graphs) for score_h in rel_scores: if score_h.second_filter: t,p,tp = score_h.T_P_TP_Batch(rel_triples,list(zip(*sourceBatch))[5],second_filter_material = (concept_batches,list(zip(*sourceBatch))[4])) else: t,p,tp = score_h.T_P_TP_Batch(rel_triples,list(zip(*sourceBatch))[5]) for score_h in concept_scores: t,p,tp = score_h.T_P_TP_Batch(concept_batches,list(zip(*sourceBatch))[4]) for i,data in enumerate(zip( sourceBatch,amr_pred_seq,concept_batches,rel_triples,graphs)): source,amr_pred,concept, rel_triple,graph= data predicated_graph = graph_to_amr(graph) out = [] out.append( "# ::tok "+" ".join(source[0])+"\n") out.append( "# ::lem "+" ".join(source[1])+"\n") out.append( "# ::pos "+" ".join(source[2])+"\n") out.append( "# ::ner "+" ".join(source[3])+"\n") out.append( "# ::predicated "+" ".join([str(re_cat[-1]) for re_cat in amr_pred])+"\n") out.append( "# ::transformed final predication "+" ".join([str(c) for c in concept])+"\n") out.append( AmrDecoder.nodes_jamr(graph)) out.append( AmrDecoder.edges_jamr(graph)) out.append( predicated_graph) batch_out[order[i]] = "".join(out)+"\n" output += batch_out t_p_tp = list(map(lambda a,b:a+b, concept_scores[1].t_p_tp,rel_scores[1].t_p_tp)) total_out = "Smatch"+"\nT,P,TP: "+ " ".join([str(i) for i in t_p_tp])+"\nPrecesion,Recall,F1: "+ " ".join([str(i)for i in P_R_F1(*t_p_tp)]) print(total_out) for score_h in rel_scores: print("") print(score_h) file = file.replace(".pickle",".txt") with open(file+ opt.output, 'w+') as the_file: for data in output: the_file.write(data+'\n') print(file+ opt.output+" written.") return concept_scores,rel_scores,output