def sentence_object_hook(obj):
    if all(k in obj for k in Sentence().__dict__):
        s = Sentence()
        s.__dict__.update(obj)
        s.graphs = [WithScore(*l) for l in s.graphs]
        return s
    if all(k in obj for k in SemanticGraph().__dict__):
        g = SemanticGraph()
        g.__dict__.update(obj)
        g.edges = EdgeList()
        g.edges._list = obj['edges']
        return g
    if all(k in obj for k in DUMMY_EDGE.__dict__):
        e = copy(DUMMY_EDGE)
        e.__dict__.update(obj)
        return e
    return obj
コード例 #2
0
def get_graph_denotations(g: SemanticGraph):
    """
    Convert the given graph to a WikiData query and retrieve the denotations of the graph. The results contain the
     list of the possible graph denotations
     将给定图转换为WikiData查询并检索图的表示。 结果包含可能的图表示列表

    :param g: graph as a SemanticGraph
    :return: graph denotations as a list of dictionaries
    >>> get_graph_denotations(SemanticGraph([Edge(leftentityid='Q35637', relationid='P1346', rightentityid=QUESTION_VAR, qualifierentityid='2009')]))
    ['Q76']
    >>> get_graph_denotations(SemanticGraph([Edge(leftentityid='Q37320', relationid='P131', rightentityid='?m0Q37320'), Edge(leftentityid='?m0Q37320', relationid='P421', rightentityid=QUESTION_VAR)]))
    ['Q941023', 'Q28146035']
    """
    qvar_name = QUESTION_VAR[1:]
    if "zip" in g.tokens and any(e.relationid == "P281" for e in g.edges):
        denotations = endpoint_access.query_wikidata(
            graph_to_query(g, limit=100))
        denotations = [
            r for r in denotations if any('x' not in r[b] for b in r)
        ]  # Post process zip codes
        post_processed = []
        for r in denotations:
            codes = re.split("[-–]", r[qvar_name])
            for p in codes:
                if p:
                    if len(p) < len(codes[0]):
                        p = codes[0][:(len(codes[0])) - len(p)] + p
                    post_processed.append(p)
        return post_processed
    edges = [e for e in g.edges if e.rightentityid != "Q5"
             ]  # filter out edges with human as argument since they often fail
    denotations = endpoint_access.query_wikidata(
        graph_to_query(SemanticGraph(edges=edges), limit=100))
    if denotations and all('step' in d for d in denotations):
        min_transitive_steps = min([d['step'] for d in denotations])
        denotations = [
            d for d in denotations if d['step'] == min_transitive_steps
        ]
    denotations = list({d[qvar_name] for d in denotations if qvar_name in d})
    if not sentence.get_question_type(" ".join(g.tokens)) == 'temporal':
        denotations = filter_auxiliary_entities_by_id(
            denotations
        )  # Filter out WikiData auxiliary variables, e.g. Q24523h-87gf8y48
    else:
        denotations = [
            l for _, labels in queries.get_labels_for_entities(
                denotations).items() for l in labels
        ]
    return denotations
コード例 #3
0
def test_encode():
    g = SemanticGraph([
        Edge(leftentityid="?qvar", rightentityid="Q76")
    ], free_entities=[
        {'linkings':[("Q37876", "Natalie Portman")], 'tokens':["Portman"], 'type':'PERSON'},
        {'linkings': [('2012', '2012')], 'type': 'YEAR', 'tokens': ['2012']}]
    )
    json_str = json.dumps(g, cls=SentenceEncoder, sort_keys=True)
    assert '"leftentityid": "?qvar", "qualifierentityid": null, "qualifierrelationid": null, "relationid": null, "rightentityid": "Q76"' in json_str
    assert '"free_entities": [{"linkings": [["Q37876", "Natalie Portman"]], "tokens": ["Portman"], ' in json_str

    s = Sentence(entities=[{"type": "NN", "linkings": [("Q5", "human")], 'token_ids': [0]}])
    json_str = json.dumps(s, cls=SentenceEncoder, sort_keys=True)
    assert '"entities": [{"linkings": [["Q5", "human"]], "token_ids": [0], "type": "NN"}],' in json_str
    assert ', [0.0, 0.0, 0.0]]]' in json_str
コード例 #4
0
def test_decode():
    g = SemanticGraph([
        Edge(leftentityid="?qvar", rightentityid="Q76")
    ], free_entities=[
        {'linkings': [("Q37876", "Natalie Portman")], 'tokens':["Portman"], 'type':'PERSON'},
        {'linkings': [('2012', '2012')], 'type': 'YEAR', 'tokens': ['2012']}]
    )
    json_str = json.dumps(g, cls=SentenceEncoder, sort_keys=True)
    g_decoded = json.loads(json_str, object_hook=sentence_object_hook)
    assert len(g_decoded.edges) > 0
    assert isinstance(g_decoded.edges, EdgeList)
    assert g_decoded.edges[0].relationid is None

    s = Sentence(entities=[{"type": "NN", "linkings": [("Q5", "human")], 'token_ids': [0]}])
    json_str = json.dumps(s, cls=SentenceEncoder, sort_keys=True)
    s_decoded = json.loads(json_str, object_hook=sentence_object_hook)
    assert len(s_decoded.graphs) == 1
    assert s_decoded.graphs[0].scores[2] == 0.0
コード例 #5
0
def get_graph_groundings(g: SemanticGraph, pass_exception=False, use_wikidata=True):
    """
    Convert the given graph to a WikiData query and retrieve the results. The results contain possible bindings
    for all free variables in the graph. If there are no free variables a single empty grounding is returned.

    :param g: graph as a dictionary
    :param pass_exception:
    :return: graph groundings encoded as a list of dictionaries
    >>> get_graph_groundings(SemanticGraph([Edge(leftentityid=QUESTION_VAR, rightentityid='Q571', qualifierentityid='MAX')]))
    [{'r0v': 'P31v'}, {'r0v': 'P800v'}]
    >>> get_graph_groundings(SemanticGraph([Edge(leftentityid='Q35637', relationid='P1346', rightentityid=QUESTION_VAR, qualifierentityid='2009'), Edge(leftentityid=QUESTION_VAR, relationid='iclass')]))
    [{'r1v': 'P31c', 'topic': 'human'}, {'r1v': 'P106c', 'topic': 'politician'}]
    """
    ungrouded_edges = g.get_ungrounded_edges()
    if ungrouded_edges:
        if len(ungrouded_edges) == 1 and ungrouded_edges[0].relationid == "iclass":
            if "zip" in g.tokens and any(e.relationid == "P281" for e in g.edges):
                return [{'r1v': 'P31c', 'topic': 'Q37447'}]
            elif any([scheme.property2label[e.relationid]["type"] == "time"
                      for e in g.edges if e.leftentityid != QUESTION_VAR]):
                return [{'r1v': 'P31c', 'topic': "Q577"}]
        if use_wikidata:
            groundings = endpoint_access.query_wikidata(graph_to_query(g, limit=500))
        else:
            groundings = get_all_groundings(g)
        if groundings is None:  # If there was an exception
            return None if pass_exception else []
        elif len(groundings) > 0:
            # keys = {b for r in groundings for b in r if b.startswith("r")}
            for e in ungrouded_edges:
                groundings = filter_relations(groundings, b=f"r{e.edgeid:d}v", freq_threshold=FREQ_THRESHOLD)
            if sentence.get_question_type(" ".join(g.tokens)) != 'temporal':
                groundings = [r for r in groundings if all([scheme.property2label[r[f"r{e.edgeid:d}v"][:-1]]["type"] != "time"
                                                           for e in ungrouded_edges if e.leftentityid != QUESTION_VAR])]
        groundings = sorted(groundings,
                            key=lambda r: sum([scheme.property2label[r[f"r{e.edgeid:d}v"][:-1]]['freq']
                                               for e in ungrouded_edges if f"r{e.edgeid:d}v" in r]), reverse=True)
        return groundings
    else:
        if verify_grounding(g) or not use_wikidata:
            return [{}]
        else:
            return []
コード例 #6
0
ファイル: sentence.py プロジェクト: lvying1991/KBQA-System
 def __init__(self, input_text=None, tagged=None, entities=None):
     """
     A sentence object.
     #参数包括:input_text、 tagged、entities
     :param input_text: raw input text as a string
     :param tagged: a list of dict objects, one per token, with the output of the POS and NER taggers, see utils
                   for more info
     :param entities: a list of tuples, where each tuple is an entity link (first position is the KB id and
                      the second position is the label)
     """
     self.input_text = input_text if input_text else ""
     self.tagged = tagged if tagged else []
     self.tokens = [t['originalText'] for t in self.tagged]
     self.entities = [{k: e[k]
                       for k in {'type', 'linkings', 'token_ids'}}
                      for e in entities] if entities else []
     self.entities += [{
         'type': 'YEAR',
         'linkings': [(t['originalText'], t['originalText'])],
         'token_ids': [t['index'] - 1]
     } for t in self.tagged if t['pos'] == 'CD' and t['ner'] == 'DATE']
     if get_question_type(self.input_text) == "person":
         self.entities.append({
             'type': 'NN',
             'linkings': [("Q5", 'human')],
             'token_ids': [0]
         })
     if get_question_type(self.input_text) == "location":
         self.entities.append({
             'type':
             'NN',
             'linkings': [("Q618123", 'geographical object')],
             'token_ids': [0]
         })
     self.graphs = [
         WithScore(
             SemanticGraph(free_entities=self.entities, tokens=self.tokens),
             (0.0, 0.0, 0.0))
     ]
コード例 #7
0
import pytest

from questionanswering import grounding
from questionanswering.grounding import stages
from questionanswering.construction.graph import SemanticGraph, Edge
from questionanswering.grounding import graph_queries

test_graphs_with_groundings = [
    SemanticGraph(edges=[
        Edge(leftentityid=grounding.graph_queries.QUESTION_VAR,
             rightentityid="Q571",
             qualifierentityid="MAX")
    ]),
    SemanticGraph(edges=[
        Edge(rightentityid=grounding.graph_queries.QUESTION_VAR,
             leftentityid="Q127367"),
        Edge(leftentityid=grounding.graph_queries.QUESTION_VAR,
             rightentityid="MAX"),
    ]),
    SemanticGraph(edges=[
        Edge(rightentityid=grounding.graph_queries.QUESTION_VAR,
             qualifierentityid="Q37876")
    ]),
    SemanticGraph(edges=[
        Edge(rightentityid=grounding.graph_queries.QUESTION_VAR,
             leftentityid="Q329816")
    ],
                  tokens=['when', 'were']),
    SemanticGraph(edges=[
        Edge(rightentityid=grounding.graph_queries.QUESTION_VAR,
             leftentityid="Q458")
コード例 #8
0
import pytest

from questionanswering.construction import sentence
from questionanswering.construction.graph import SemanticGraph, Edge
from questionanswering.grounding import staged_generation, graph_queries

from entitylinking import core
from test_sparql_queries import test_graphs_grounded

test_graphs_with_groundings = [
    SemanticGraph([Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='P674', rightentityid='Q3899725'),
                   Edge(leftentityid=graph_queries.QUESTION_VAR, rightentityid='Q571')]),
    SemanticGraph([Edge(leftentityid=graph_queries.QUESTION_VAR, rightentityid='Q3899725'),
                   Edge(leftentityid=graph_queries.QUESTION_VAR, rightentityid='Q571')]),
    SemanticGraph([Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='class', rightentityid='Q6256')]),
    SemanticGraph([Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='class', rightentityid='Q6256'),
                   Edge(leftentityid='Q866345', rightentityid=graph_queries.QUESTION_VAR)]),
    SemanticGraph(edges=[Edge(qualifierentityid=graph_queries.QUESTION_VAR, rightentityid='Q5620660')]),
    SemanticGraph(edges=[Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='class', rightentityid='Q5'),
                         Edge(qualifierentityid=graph_queries.QUESTION_VAR, rightentityid='Q5620660')]),
    SemanticGraph([Edge(rightentityid=graph_queries.QUESTION_VAR, relationid='P161', qualifierentityid='Q5620660'),
                   Edge(leftentityid='Q1079', rightentityid=graph_queries.QUESTION_VAR)]),
]


test_graphs_without_groundings = [
    SemanticGraph([Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='P1376', rightentityid='Q183'),
                   Edge(leftentityid=graph_queries.QUESTION_VAR, relationid='class', rightentityid='Q37226')]),
]

test_sentences_perfect_fscore = [