def sentence_object_hook(obj): if all(k in obj for k in Sentence().__dict__): s = Sentence() s.__dict__.update(obj) s.graphs = [WithScore(*l) for l in s.graphs] return s if all(k in obj for k in SemanticGraph().__dict__): g = SemanticGraph() g.__dict__.update(obj) g.edges = EdgeList() g.edges._list = obj['edges'] return g if all(k in obj for k in DUMMY_EDGE.__dict__): e = copy(DUMMY_EDGE) e.__dict__.update(obj) return e return obj
def get_graph_denotations(g: SemanticGraph): """ Convert the given graph to a WikiData query and retrieve the denotations of the graph. The results contain the list of the possible graph denotations 将给定图转换为WikiData查询并检索图的表示。 结果包含可能的图表示列表 :param g: graph as a SemanticGraph :return: graph denotations as a list of dictionaries >>> get_graph_denotations(SemanticGraph([Edge(leftentityid='Q35637', relationid='P1346', rightentityid=QUESTION_VAR, qualifierentityid='2009')])) ['Q76'] >>> get_graph_denotations(SemanticGraph([Edge(leftentityid='Q37320', relationid='P131', rightentityid='?m0Q37320'), Edge(leftentityid='?m0Q37320', relationid='P421', rightentityid=QUESTION_VAR)])) ['Q941023', 'Q28146035'] """ qvar_name = QUESTION_VAR[1:] if "zip" in g.tokens and any(e.relationid == "P281" for e in g.edges): denotations = endpoint_access.query_wikidata( graph_to_query(g, limit=100)) denotations = [ r for r in denotations if any('x' not in r[b] for b in r) ] # Post process zip codes post_processed = [] for r in denotations: codes = re.split("[-–]", r[qvar_name]) for p in codes: if p: if len(p) < len(codes[0]): p = codes[0][:(len(codes[0])) - len(p)] + p post_processed.append(p) return post_processed edges = [e for e in g.edges if e.rightentityid != "Q5" ] # filter out edges with human as argument since they often fail denotations = endpoint_access.query_wikidata( graph_to_query(SemanticGraph(edges=edges), limit=100)) if denotations and all('step' in d for d in denotations): min_transitive_steps = min([d['step'] for d in denotations]) denotations = [ d for d in denotations if d['step'] == min_transitive_steps ] denotations = list({d[qvar_name] for d in denotations if qvar_name in d}) if not sentence.get_question_type(" ".join(g.tokens)) == 'temporal': denotations = filter_auxiliary_entities_by_id( denotations ) # Filter out WikiData auxiliary variables, e.g. Q24523h-87gf8y48 else: denotations = [ l for _, labels in queries.get_labels_for_entities( denotations).items() for l in labels ] return denotations
def test_encode(): g = SemanticGraph([ Edge(leftentityid="?qvar", rightentityid="Q76") ], free_entities=[ {'linkings':[("Q37876", "Natalie Portman")], 'tokens':["Portman"], 'type':'PERSON'}, {'linkings': [('2012', '2012')], 'type': 'YEAR', 'tokens': ['2012']}] ) json_str = json.dumps(g, cls=SentenceEncoder, sort_keys=True) assert '"leftentityid": "?qvar", "qualifierentityid": null, "qualifierrelationid": null, "relationid": null, "rightentityid": "Q76"' in json_str assert '"free_entities": [{"linkings": [["Q37876", "Natalie Portman"]], "tokens": ["Portman"], ' in json_str s = Sentence(entities=[{"type": "NN", "linkings": [("Q5", "human")], 'token_ids': [0]}]) json_str = json.dumps(s, cls=SentenceEncoder, sort_keys=True) assert '"entities": [{"linkings": [["Q5", "human"]], "token_ids": [0], "type": "NN"}],' in json_str assert ', [0.0, 0.0, 0.0]]]' in json_str
def test_decode(): g = SemanticGraph([ Edge(leftentityid="?qvar", rightentityid="Q76") ], free_entities=[ {'linkings': [("Q37876", "Natalie Portman")], 'tokens':["Portman"], 'type':'PERSON'}, {'linkings': [('2012', '2012')], 'type': 'YEAR', 'tokens': ['2012']}] ) json_str = json.dumps(g, cls=SentenceEncoder, sort_keys=True) g_decoded = json.loads(json_str, object_hook=sentence_object_hook) assert len(g_decoded.edges) > 0 assert isinstance(g_decoded.edges, EdgeList) assert g_decoded.edges[0].relationid is None s = Sentence(entities=[{"type": "NN", "linkings": [("Q5", "human")], 'token_ids': [0]}]) json_str = json.dumps(s, cls=SentenceEncoder, sort_keys=True) s_decoded = json.loads(json_str, object_hook=sentence_object_hook) assert len(s_decoded.graphs) == 1 assert s_decoded.graphs[0].scores[2] == 0.0
def get_graph_groundings(g: SemanticGraph, pass_exception=False, use_wikidata=True): """ Convert the given graph to a WikiData query and retrieve the results. The results contain possible bindings for all free variables in the graph. If there are no free variables a single empty grounding is returned. :param g: graph as a dictionary :param pass_exception: :return: graph groundings encoded as a list of dictionaries >>> get_graph_groundings(SemanticGraph([Edge(leftentityid=QUESTION_VAR, rightentityid='Q571', qualifierentityid='MAX')])) [{'r0v': 'P31v'}, {'r0v': 'P800v'}] >>> get_graph_groundings(SemanticGraph([Edge(leftentityid='Q35637', relationid='P1346', rightentityid=QUESTION_VAR, qualifierentityid='2009'), Edge(leftentityid=QUESTION_VAR, relationid='iclass')])) [{'r1v': 'P31c', 'topic': 'human'}, {'r1v': 'P106c', 'topic': 'politician'}] """ ungrouded_edges = g.get_ungrounded_edges() if ungrouded_edges: if len(ungrouded_edges) == 1 and ungrouded_edges[0].relationid == "iclass": if "zip" in g.tokens and any(e.relationid == "P281" for e in g.edges): return [{'r1v': 'P31c', 'topic': 'Q37447'}] elif any([scheme.property2label[e.relationid]["type"] == "time" for e in g.edges if e.leftentityid != QUESTION_VAR]): return [{'r1v': 'P31c', 'topic': "Q577"}] if use_wikidata: groundings = endpoint_access.query_wikidata(graph_to_query(g, limit=500)) else: groundings = get_all_groundings(g) if groundings is None: # If there was an exception return None if pass_exception else [] elif len(groundings) > 0: # keys = {b for r in groundings for b in r if b.startswith("r")} for e in ungrouded_edges: groundings = filter_relations(groundings, b=f"r{e.edgeid:d}v", freq_threshold=FREQ_THRESHOLD) if sentence.get_question_type(" ".join(g.tokens)) != 'temporal': groundings = [r for r in groundings if all([scheme.property2label[r[f"r{e.edgeid:d}v"][:-1]]["type"] != "time" for e in ungrouded_edges if e.leftentityid != QUESTION_VAR])] groundings = sorted(groundings, key=lambda r: sum([scheme.property2label[r[f"r{e.edgeid:d}v"][:-1]]['freq'] for e in ungrouded_edges if f"r{e.edgeid:d}v" in r]), reverse=True) return groundings else: if verify_grounding(g) or not use_wikidata: return [{}] else: return []
def __init__(self, input_text=None, tagged=None, entities=None): """ A sentence object. #参数包括:input_text、 tagged、entities :param input_text: raw input text as a string :param tagged: a list of dict objects, one per token, with the output of the POS and NER taggers, see utils for more info :param entities: a list of tuples, where each tuple is an entity link (first position is the KB id and the second position is the label) """ self.input_text = input_text if input_text else "" self.tagged = tagged if tagged else [] self.tokens = [t['originalText'] for t in self.tagged] self.entities = [{k: e[k] for k in {'type', 'linkings', 'token_ids'}} for e in entities] if entities else [] self.entities += [{ 'type': 'YEAR', 'linkings': [(t['originalText'], t['originalText'])], 'token_ids': [t['index'] - 1] } for t in self.tagged if t['pos'] == 'CD' and t['ner'] == 'DATE'] if get_question_type(self.input_text) == "person": self.entities.append({ 'type': 'NN', 'linkings': [("Q5", 'human')], 'token_ids': [0] }) if get_question_type(self.input_text) == "location": self.entities.append({ 'type': 'NN', 'linkings': [("Q618123", 'geographical object')], 'token_ids': [0] }) self.graphs = [ WithScore( SemanticGraph(free_entities=self.entities, tokens=self.tokens), (0.0, 0.0, 0.0)) ]
import pytest from questionanswering import grounding from questionanswering.grounding import stages from questionanswering.construction.graph import SemanticGraph, Edge from questionanswering.grounding import graph_queries test_graphs_with_groundings = [ SemanticGraph(edges=[ Edge(leftentityid=grounding.graph_queries.QUESTION_VAR, rightentityid="Q571", qualifierentityid="MAX") ]), SemanticGraph(edges=[ Edge(rightentityid=grounding.graph_queries.QUESTION_VAR, leftentityid="Q127367"), Edge(leftentityid=grounding.graph_queries.QUESTION_VAR, rightentityid="MAX"), ]), SemanticGraph(edges=[ Edge(rightentityid=grounding.graph_queries.QUESTION_VAR, qualifierentityid="Q37876") ]), SemanticGraph(edges=[ Edge(rightentityid=grounding.graph_queries.QUESTION_VAR, leftentityid="Q329816") ], tokens=['when', 'were']), SemanticGraph(edges=[ Edge(rightentityid=grounding.graph_queries.QUESTION_VAR, leftentityid="Q458")
import pytest from questionanswering.construction import sentence from questionanswering.construction.graph import SemanticGraph, Edge from questionanswering.grounding import staged_generation, graph_queries from entitylinking import core from test_sparql_queries import test_graphs_grounded test_graphs_with_groundings = [ SemanticGraph([Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='P674', rightentityid='Q3899725'), Edge(leftentityid=graph_queries.QUESTION_VAR, rightentityid='Q571')]), SemanticGraph([Edge(leftentityid=graph_queries.QUESTION_VAR, rightentityid='Q3899725'), Edge(leftentityid=graph_queries.QUESTION_VAR, rightentityid='Q571')]), SemanticGraph([Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='class', rightentityid='Q6256')]), SemanticGraph([Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='class', rightentityid='Q6256'), Edge(leftentityid='Q866345', rightentityid=graph_queries.QUESTION_VAR)]), SemanticGraph(edges=[Edge(qualifierentityid=graph_queries.QUESTION_VAR, rightentityid='Q5620660')]), SemanticGraph(edges=[Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='class', rightentityid='Q5'), Edge(qualifierentityid=graph_queries.QUESTION_VAR, rightentityid='Q5620660')]), SemanticGraph([Edge(rightentityid=graph_queries.QUESTION_VAR, relationid='P161', qualifierentityid='Q5620660'), Edge(leftentityid='Q1079', rightentityid=graph_queries.QUESTION_VAR)]), ] test_graphs_without_groundings = [ SemanticGraph([Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='P1376', rightentityid='Q183'), Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='class', rightentityid='Q37226')]), ] test_sentences_perfect_fscore = [