Exemple #1
0
    def traverse_all(self, nodes_stack=None, rdfs=None):
        if nodes_stack is None:
            rdfs = self.as_rdf()
            return LinearNode(NodeType.OR,
                              [LinearNode(concat_entity(n), self.traverse_all([n], rdfs)) for n in self.nodes])

        f_edges = [(i, n2, ">", e) for i, (n1, e, n2) in enumerate(rdfs) if n1 == nodes_stack[-1]]
        b_edges = [(i, n1, "<", e) for i, (n1, e, n2) in enumerate(rdfs) if n2 == nodes_stack[-1]]
        edges = f_edges + b_edges

        options = []
        for i, n, d, e in edges:
            new_rdfs = list(rdfs)
            new_rdfs.pop(i)
            text = " ".join([d, readable_edge(e), "[", concat_entity(n)])
            options.append(LinearNode(text, self.traverse_all(nodes_stack + [n], new_rdfs)))

        if len(nodes_stack) > 1:
            options.append(LinearNode("]", self.traverse_all(nodes_stack[:-1], rdfs)))

        if len(options) > 0:
            return options

        if len(rdfs) == 0:
            return [LinearNode(NodeType.FINAL)]
        return [LinearNode(NodeType.FILTER_OUT)]
Exemple #2
0
 def delex_single(self, text: str, ents: List[str], d: Datum):
     delex = super().delex_single(text, ents, d)
     meta = self.get_meta(d)
     if delex:
         delex = delex \
             .replace(" it ", " " + concat_entity(meta["name"]) + " ") \
             .replace(" they ", " " + concat_entity(meta["name"]) + " ")
     return delex
Exemple #3
0
def match_plan(d: Datum):
    g = d.graph
    s = d.delex

    s_s = tokenize_sentences(s)

    components = []
    for sent in s_s:
        must_include = {n for n in g.nodes if concat_entity(n) in sent}
        must_exclude = g.nodes - must_include

        for n1, n2 in g.undirected_edges.keys():
            if n1 in must_include and n2 in must_exclude:
                must_exclude.remove(n2)

        components.append({
            "must_include": must_include,
            "must_exclude": must_exclude
        })

    nodes = tuple(map(concat_entity, g.nodes))

    possible_plans = g.constraint_graphs_plan(components).linearizations()
    final_plans = []

    for p in possible_plans:
        p_s = p.split(".")

        s_order = entities_order(SENTENCE_BREAK.join(s_s), nodes)
        p_order = entities_order(SENTENCE_BREAK.join(p_s), nodes)

        if comp_order([e for e, i in s_order], [e for e, i in p_order]):
            final_plans.append(p)

    return final_plans
Exemple #4
0
        def rename(d: Datum, text: str):
            meta = self.get_meta(d)
            # First occurrence full name, others "it"/"they"
            text = text \
                .replace(concat_entity("name"), concat_entity(meta["name"]), 1) \
                .replace(concat_entity("name") + " are", "THEY are") \
                .replace(concat_entity("name") + " have", "THEY have") \
                .replace(concat_entity("name"), "IT")

            if meta["near"]:
                text = text.replace(concat_entity("near"),
                                    concat_entity(meta["near"]))
            return text
Exemple #5
0
    def extract_sentences(self, lex):
        sentences = lex
        if not isinstance(sentences, list):
            sentences = [sentences]

        for s in sentences:
            text = s["text"]
            template = s["template"]
            if not s["references"]:
                continue

            references = s["references"]["reference"]
            if not isinstance(references, list):
                references = [references]

            references = {r["@tag"]: r["@entity"] for r in references}
            for tag, ent in references.items():
                template = template.replace(tag, concat_entity(ent))

            yield text, template
Exemple #6
0
    def read_tsv(self, file_path):
        rows = csv.reader(open(file_path, encoding="utf-8"), delimiter='\t')
        for title, entities, entity_types, relations, text in rows:
            entities = entities.split(" ; ")
            entity_types = [
                t.strip(">").strip("<") for t in entity_types.split(" ")
            ]
            relations = [(entities[s], " ".join([
                entity_types[s], relation_names[r], entity_types[o]
            ]), entities[o]) for s, r, o in map(
                lambda r: list(map(int, r.split(" "))), relations.split(" ; "))
                         ]

            relations += [("paper", "paper includes " + t, e)
                          for e, t in zip(entities, entity_types)]

            for i, (ent, ent_type) in enumerate(zip(entities, entity_types)):
                text = text.replace("<" + ent_type + "_" + str(i) + ">",
                                    concat_entity(ent))

            text = text.replace("-lrb-", "(").replace("-rrb-", ")")

            yield Datum(text=text, delex=text, rdfs=relations, title=title)
Exemple #7
0
    def plans(type):
        triplets = request.get_json(force=True)
        graph = Graph(triplets)
        planner = pipeline_res["train-planner"]

        all_plans = [
            l.replace("[", " [ ").replace("]", " ] ").replace("  ", " ")
            for l in (graph.exhaustive_plan() if type ==
                      "full" else graph.plan_all()).linearizations()
        ]

        return jsonify({
            "concat": {n: concat_entity(n)
                       for n in graph.nodes},
            "linearizations":
            list(
                sorted([{
                    "l": l,
                    "s": planner.score(l)
                } for l in all_plans],
                       key=lambda p: p["s"],
                       reverse=True))
        })
Exemple #8
0
    def for_translation(self):
        plan_sentences = defaultdict(list)
        for d in self.data:
            meta = self.get_meta(d)
            plan = d.plan.replace(concat_entity(meta["name"]),
                                  concat_entity("name"))
            delex = d.delex
            if delex:
                ents = [meta["name"]
                        ] + [o for s, r, o in d.rdfs if r not in DONT_DELEX]
                delex = self.delex_single(d.text, ents, d)
                delex = delex.replace(concat_entity(meta["name"]),
                                      concat_entity("name"))
                delex = " ".join(tokenize(delex))

            if meta["near"]:
                near_val = concat_entity(meta["near"])
                near_placeholder = concat_entity("near")
                plan = plan.replace(near_val, near_placeholder)
                delex = delex.replace(near_val, near_placeholder)

            plan_sentences[plan].append(delex)

        return plan_sentences
Exemple #9
0
 def convert_graph(self, g: Graph):
     rdf = [(concat_entity(s), self.convert_relation(r), concat_entity(o))
            for s, r, o in g.as_rdf()]
     return Graph(rdf)
Exemple #10
0
 def get_val(self):
     return concat_entity(self.value)