def traverse_all(self, nodes_stack=None, rdfs=None): if nodes_stack is None: rdfs = self.as_rdf() return LinearNode(NodeType.OR, [LinearNode(concat_entity(n), self.traverse_all([n], rdfs)) for n in self.nodes]) f_edges = [(i, n2, ">", e) for i, (n1, e, n2) in enumerate(rdfs) if n1 == nodes_stack[-1]] b_edges = [(i, n1, "<", e) for i, (n1, e, n2) in enumerate(rdfs) if n2 == nodes_stack[-1]] edges = f_edges + b_edges options = [] for i, n, d, e in edges: new_rdfs = list(rdfs) new_rdfs.pop(i) text = " ".join([d, readable_edge(e), "[", concat_entity(n)]) options.append(LinearNode(text, self.traverse_all(nodes_stack + [n], new_rdfs))) if len(nodes_stack) > 1: options.append(LinearNode("]", self.traverse_all(nodes_stack[:-1], rdfs))) if len(options) > 0: return options if len(rdfs) == 0: return [LinearNode(NodeType.FINAL)] return [LinearNode(NodeType.FILTER_OUT)]
def delex_single(self, text: str, ents: List[str], d: Datum): delex = super().delex_single(text, ents, d) meta = self.get_meta(d) if delex: delex = delex \ .replace(" it ", " " + concat_entity(meta["name"]) + " ") \ .replace(" they ", " " + concat_entity(meta["name"]) + " ") return delex
def match_plan(d: Datum): g = d.graph s = d.delex s_s = tokenize_sentences(s) components = [] for sent in s_s: must_include = {n for n in g.nodes if concat_entity(n) in sent} must_exclude = g.nodes - must_include for n1, n2 in g.undirected_edges.keys(): if n1 in must_include and n2 in must_exclude: must_exclude.remove(n2) components.append({ "must_include": must_include, "must_exclude": must_exclude }) nodes = tuple(map(concat_entity, g.nodes)) possible_plans = g.constraint_graphs_plan(components).linearizations() final_plans = [] for p in possible_plans: p_s = p.split(".") s_order = entities_order(SENTENCE_BREAK.join(s_s), nodes) p_order = entities_order(SENTENCE_BREAK.join(p_s), nodes) if comp_order([e for e, i in s_order], [e for e, i in p_order]): final_plans.append(p) return final_plans
def rename(d: Datum, text: str): meta = self.get_meta(d) # First occurrence full name, others "it"/"they" text = text \ .replace(concat_entity("name"), concat_entity(meta["name"]), 1) \ .replace(concat_entity("name") + " are", "THEY are") \ .replace(concat_entity("name") + " have", "THEY have") \ .replace(concat_entity("name"), "IT") if meta["near"]: text = text.replace(concat_entity("near"), concat_entity(meta["near"])) return text
def extract_sentences(self, lex): sentences = lex if not isinstance(sentences, list): sentences = [sentences] for s in sentences: text = s["text"] template = s["template"] if not s["references"]: continue references = s["references"]["reference"] if not isinstance(references, list): references = [references] references = {r["@tag"]: r["@entity"] for r in references} for tag, ent in references.items(): template = template.replace(tag, concat_entity(ent)) yield text, template
def read_tsv(self, file_path): rows = csv.reader(open(file_path, encoding="utf-8"), delimiter='\t') for title, entities, entity_types, relations, text in rows: entities = entities.split(" ; ") entity_types = [ t.strip(">").strip("<") for t in entity_types.split(" ") ] relations = [(entities[s], " ".join([ entity_types[s], relation_names[r], entity_types[o] ]), entities[o]) for s, r, o in map( lambda r: list(map(int, r.split(" "))), relations.split(" ; ")) ] relations += [("paper", "paper includes " + t, e) for e, t in zip(entities, entity_types)] for i, (ent, ent_type) in enumerate(zip(entities, entity_types)): text = text.replace("<" + ent_type + "_" + str(i) + ">", concat_entity(ent)) text = text.replace("-lrb-", "(").replace("-rrb-", ")") yield Datum(text=text, delex=text, rdfs=relations, title=title)
def plans(type): triplets = request.get_json(force=True) graph = Graph(triplets) planner = pipeline_res["train-planner"] all_plans = [ l.replace("[", " [ ").replace("]", " ] ").replace(" ", " ") for l in (graph.exhaustive_plan() if type == "full" else graph.plan_all()).linearizations() ] return jsonify({ "concat": {n: concat_entity(n) for n in graph.nodes}, "linearizations": list( sorted([{ "l": l, "s": planner.score(l) } for l in all_plans], key=lambda p: p["s"], reverse=True)) })
def for_translation(self): plan_sentences = defaultdict(list) for d in self.data: meta = self.get_meta(d) plan = d.plan.replace(concat_entity(meta["name"]), concat_entity("name")) delex = d.delex if delex: ents = [meta["name"] ] + [o for s, r, o in d.rdfs if r not in DONT_DELEX] delex = self.delex_single(d.text, ents, d) delex = delex.replace(concat_entity(meta["name"]), concat_entity("name")) delex = " ".join(tokenize(delex)) if meta["near"]: near_val = concat_entity(meta["near"]) near_placeholder = concat_entity("near") plan = plan.replace(near_val, near_placeholder) delex = delex.replace(near_val, near_placeholder) plan_sentences[plan].append(delex) return plan_sentences
def convert_graph(self, g: Graph): rdf = [(concat_entity(s), self.convert_relation(r), concat_entity(o)) for s, r, o in g.as_rdf()] return Graph(rdf)
def get_val(self): return concat_entity(self.value)