Exemple #1
0
    def parse_batch_preprocessed_data(self, preprocessed_input_examples, set_wiki, normalize_mod):
        order,idsBatch,srcBatch, src_charBatch, sourceBatch,srcBertBatch,srcBertIndexBatch,data_iterator = self.feature_to_torch(preprocessed_input_examples)

        probBatch, src_enc = self.model(srcBatch, src_charBatch, rel=False, bertBatch=srcBertBatch, bertIndexBatch = srcBertIndexBatch)

        amr_pred_seq,concept_batches,aligns_raw,dependent_mark_batch = self.decoder.probAndSourceToConcepts(sourceBatch,srcBatch, src_charBatch, probBatch,getsense = True )

        amr_pred_seq = [ [(uni.cat,uni.le,uni.aux,uni.sense,uni)  for uni in seq ] for  seq in amr_pred_seq ]


        rel_batch,aligns = rel_to_batch(concept_batches,aligns_raw,data_iterator,self.dicts)
        rel_prob,roots = self.model((rel_batch,srcBatch, src_charBatch, src_enc,aligns), rel=True, bertBatch=srcBertBatch)
        graphs,rel_triples  =  self.decoder.relProbAndConToGraph(concept_batches, sourceBatch, rel_prob,roots,(dependent_mark_batch,aligns_raw),True,set_wiki,normalizeMod=normalize_mod)
        batch_out = [0]*len(graphs)

        batch_mrp_graphs = [0]*len(graphs)
        for i,data in enumerate(zip(idsBatch, sourceBatch,amr_pred_seq,concept_batches,rel_triples,graphs)):
            example_id, source,amr_pred,concept, rel_triple,graph= data
            mrp_graph, predicated_graph = graph_to_mrpGraph(example_id, graph, normalizeMod = True, flavor=2, framework="amr", sentence=" ".join(source[0]))

            out = []
            out.append( "# ::id "+ example_id +"\n")
            out.append( "# ::tok "+" ".join(source[0])+"\n")
            out.append(  "# ::lemma "+" ".join(source[1])+"\n")
            out.append(  "# ::pos "+" ".join(source[2])+"\n")
            out.append(  "# ::ner "+" ".join(source[3])+"\n")
            out.append(  self.decoder.nodes_jamr(graph))
            out.append(  self.decoder.edges_jamr(graph))
            out.append( predicated_graph)
            batch_out[order[i]] = "".join(out)+"\n"
            batch_mrp_graphs[order[i]] = mrp_graph
        return batch_mrp_graphs, batch_out
Exemple #2
0
    def parse_batch_preprocessed_data(self, preprocessed_input_examplesi,
                                      set_wiki, normalize_mod):
        """
        only mrp outputs, no txt outputs
        """
        order, idsBatch, srcBatch, src_charBatch, sourceBatch, srcBertBatch, data_iterator = self.feature_to_torch(
            preprocessed_input_examples)

        probBatch, src_enc = self.model(srcBatch,
                                        src_charBatch,
                                        rel=False,
                                        bertBatch=srcBertBatch,
                                        bertIndexBatch=srcBertIndexBatch)

        concepts_pred_seq, concept_batches, aligns_raw, dependent_mark_batch = self.decoder.probAndSourceToConcepts(
            sourceBatch, srcBatch, src_charBatch, probBatch, getsense=True)

        concepts_pred_seq = [[(uni.cat, uni.le, uni.aux, uni.sense, uni)
                              for uni in seq] for seq in concepts_pred_seq]

        rel_batch, aligns = rel_to_batch(concept_batches, aligns_raw,
                                         data_iterator, self.dicts)
        rel_prob, roots = self.model(
            (rel_batch, srcBatch, src_charBatch, src_enc, aligns),
            rel=True,
            bertBatch=srcBertBatch,
            bertIndexBatch=srcBertIndexBatch)
        graphs, rel_triples = self.decoderrelProbAndConToGraph(
            concept_batches,
            rel_prob,
            roots, (dependent_mark_batch, aligns),
            True,
            set_wiki,
            normalizeMod=normalize_mod)
        batch_out = [0] * len(graphs)

        batch_mrp_graphs = [0] * len(graphs)
        for i, data in enumerate(
                zip(idsBatch, sourceBatch, concepts_pred_seq, concept_batches,
                    rel_triples, graphs)):
            example_id, source, amr_pred, concept, rel_triple, graph = data
            mrp_graph, catd_graph = self.decoder.graph_to_mrpGraph(
                example_id,
                graph,
                normalizeMod=opt.normalize_mod,
                flavor=0,
                framework="ucca",
                sentence=" ".join(source[0]))
            batch_mrp_graphs[order[i]] = mrp_graph
        return batch_mrp_graphs, batch_out
    def parse_batch(self, src_text_batch_or_data_batch):
        if not self.parse_from_processed:
            all_data = [
                self.feature_extractor.preprocess(src_text)
                for src_text in src_text_batch_or_data_batch
            ]
        else:
            all_data = src_text_batch_or_data_batch
        order, srcBatch, sourceBatch, data_iterator = self.feature_to_torch(
            all_data)
        probBatch = self.model(srcBatch)

        amr_pred_seq, concept_batches, aligns_raw, dependent_mark_batch = self.decoder.probAndSourceToAmr(
            sourceBatch, srcBatch, probBatch, getsense=True)

        amr_pred_seq = [[(uni.cat, uni.le, uni.aux, uni.sense, uni)
                         for uni in seq] for seq in amr_pred_seq]

        rel_batch, aligns = rel_to_batch(concept_batches, aligns_raw,
                                         data_iterator, self.dicts)
        rel_prob, roots = self.model((rel_batch, srcBatch, aligns), rel=True)
        graphs, rel_triples = self.decoder.relProbAndConToGraph(
            concept_batches, rel_prob, roots,
            (dependent_mark_batch, aligns_raw), True, True)
        batch_out = [0] * len(graphs)

        for i, data in enumerate(
                zip(sourceBatch, amr_pred_seq, concept_batches, rel_triples,
                    graphs)):
            source, amr_pred, concept, rel_triple, graph = data
            predicated_graph = graph_to_amr(graph)

            out = []
            out.append("# ::tok " + " ".join(source[0]) + "\n")
            out.append("# ::lemma " + " ".join(source[1]) + "\n")
            out.append("# ::pos " + " ".join(source[2]) + "\n")
            out.append("# ::ner " + " ".join(source[3]) + "\n")
            out.append(self.decoder.nodes_jamr(graph))
            out.append(self.decoder.edges_jamr(graph))
            out.append(predicated_graph)
            batch_out[order[i]] = "".join(out) + "\n"
        return batch_out
Exemple #4
0
def generate_graph(model,AmrDecoder, data_set,dicts,file):

    concept_scores = concept_score_initial(dicts)

    rel_scores = rel_scores_initial()

    model.eval()
    AmrDecoder.eval()
    output = []
    gold_file = []
    for batchIdx in range(len(data_set)):
        order,srcBatch,_,_,_,_,_,gold_roots,sourceBatch =data_set[batchIdx]

        probBatch = model(srcBatch )



        amr_pred_seq,concept_batches,aligns_raw,dependent_mark_batch = AmrDecoder.probAndSourceToAmr(sourceBatch,srcBatch,probBatch,getsense = opt.get_sense )

        amr_pred_seq = [ [(uni.cat,uni.le,uni.aux,uni.sense,uni)  for uni in seq ] for  seq in amr_pred_seq ]


        rel_batch,aligns = rel_to_batch(concept_batches,aligns_raw,data_set,dicts)
        rel_prob,roots = model((rel_batch,srcBatch,aligns),rel=True)
        graphs,rel_triples  =  AmrDecoder.relProbAndConToGraph(concept_batches,rel_prob,roots,(dependent_mark_batch,aligns_raw),opt.get_sense,opt.get_wiki)
        batch_out = [0]*len(graphs)
        for score_h in rel_scores:
            if score_h.second_filter:
                t,p,tp = score_h.T_P_TP_Batch(rel_triples,list(zip(*sourceBatch))[5],second_filter_material =  (concept_batches,list(zip(*sourceBatch))[4]))
            else:
                t,p,tp = score_h.T_P_TP_Batch(rel_triples,list(zip(*sourceBatch))[5])
        for score_h in concept_scores:
            t,p,tp = score_h.T_P_TP_Batch(concept_batches,list(zip(*sourceBatch))[4])
        for i,data in enumerate(zip( sourceBatch,amr_pred_seq,concept_batches,rel_triples,graphs)):
            source,amr_pred,concept, rel_triple,graph= data
            predicated_graph = graph_to_amr(graph)

            out = []
            out.append( "# ::tok "+" ".join(source[0])+"\n")
            out.append(  "# ::lem "+" ".join(source[1])+"\n")
            out.append(  "# ::pos "+" ".join(source[2])+"\n")
            out.append(  "# ::ner "+" ".join(source[3])+"\n")
            out.append(  "# ::predicated "+" ".join([str(re_cat[-1]) for re_cat in amr_pred])+"\n")
            out.append(  "# ::transformed final predication "+" ".join([str(c) for c in concept])+"\n")
            out.append( AmrDecoder.nodes_jamr(graph))
            out.append( AmrDecoder.edges_jamr(graph))
            out.append( predicated_graph)
            batch_out[order[i]] = "".join(out)+"\n"
        output += batch_out
    t_p_tp = list(map(lambda a,b:a+b, concept_scores[1].t_p_tp,rel_scores[1].t_p_tp))
    total_out = "Smatch"+"\nT,P,TP: "+ " ".join([str(i) for i in  t_p_tp])+"\nPrecesion,Recall,F1: "+ " ".join([str(i)for i in  P_R_F1(*t_p_tp)])
    print(total_out)
    for score_h in rel_scores:
        print("")
        print(score_h)
    file = file.replace(".pickle",".txt")
    with open(file+ opt.output, 'w+') as the_file:
        for data in output:
            the_file.write(data+'\n')
    print(file+ opt.output+" written.")
    return concept_scores,rel_scores,output