Ejemplo n.º 1
0
def filter_second_hops(grounded_graphs):
    """
    This methods filters out second hop relations that are already present as first hop relations. Relation direction is
    respected.

    :param grounded_graphs: list of grounded graphs
    :return: filtered list of grounded graphs
    >>> filter_second_hops([ {'edgeSet': [{'kbID': 'P17v','type': 'direct'}]}, {'edgeSet': [{'kbID': 'P17v', 'hopUp':'P31v', 'type': 'direct'}]} ])
    [{'edgeSet': [{'kbID': 'P17v', 'type': 'direct'}]}]
    >>> filter_second_hops([ {'edgeSet': [{'kbID': 'P17v','type': 'direct'}]}, {'edgeSet': [{'type':'time', 'argmax':'time'}]} ])
    [{'edgeSet': [{'kbID': 'P17v', 'type': 'direct'}]}, {'edgeSet': [{'type': 'time', 'argmax': 'time'}]}]
    >>> filter_second_hops([ {'edgeSet': [{'kbID': 'P17v','type': 'direct'}]}, {'edgeSet': [{'kbID': 'P17v','type': 'direct'}]} ])
    [{'edgeSet': [{'kbID': 'P17v', 'type': 'direct'}]}, {'edgeSet': [{'kbID': 'P17v', 'type': 'direct'}]}]
    """

    first_order_relations = {
        "{}-{}".format(
            graph.get_graph_last_edge(g).get('kbID'),
            graph.get_graph_last_edge(g).get('type'))
        for g in grounded_graphs if graph.get_graph_last_edge(g).get(
            'type') in {'direct', 'reverse', 'v-structure'}
        and 'hopUp' not in graph.get_graph_last_edge(g)
        and 'hopDown' not in graph.get_graph_last_edge(g)
    }
    grounded_graphs = [
        g for g in grounded_graphs
        if ('hopUp' not in graph.get_graph_last_edge(g) and 'hopDown' not in
            graph.get_graph_last_edge(g)) or ("{}-{}".format(
                graph.get_graph_last_edge(g).get('kbID'),
                graph.get_graph_last_edge(g).get('type')) not in
                                              first_order_relations)
    ]
    logger.debug("Filter out unnecessary hops: {}".format(
        len(grounded_graphs)))
    return grounded_graphs
Ejemplo n.º 2
0
def filter_qualifier_relations(grounded_graphs):
    """
    Filter out relation so that we always prefer the direct relation over qualifiers. Relation direction is not
    respected.

    :param grounded_graphs: list of grounded graphs
    :return: filtered list of grounded graphs
    >>> filter_qualifier_relations([ {'edgeSet': [{'kbID': 'P17v','type': 'direct'}]}, {'edgeSet': [{'kbID': 'P17q', 'type': 'direct'}]} ])
    [{'edgeSet': [{'kbID': 'P17v', 'type': 'direct'}]}]
    >>> filter_qualifier_relations([ {'edgeSet': [{'kbID': 'P17v','type': 'direct'}]}, {'edgeSet': [{'kbID': 'P17q', 'type': 'reverse'}]} ])
    [{'edgeSet': [{'kbID': 'P17v', 'type': 'direct'}]}]
    >>> filter_qualifier_relations([ {'edgeSet': [{'kbID': 'P17v','type': 'direct'}]}, {'edgeSet': [{'kbID': 'P17q', 'hopUp':'P31v', 'type': 'direct'}]} ])
    [{'edgeSet': [{'kbID': 'P17v', 'type': 'direct'}]}]
    >>> filter_qualifier_relations([ {'edgeSet': [{'kbID': 'P175q','type': 'v-structure'}]}, {'edgeSet': [{'type':'direct', 'kbID':'P175q'}]} ])
    [{'edgeSet': [{'type': 'v-structure', 'kbID': 'P175q'}]}]
    >>> filter_qualifier_relations([ {'edgeSet': [{'kbID': 'P17v','type': 'direct'}]}, {'edgeSet': [{'type':'time', 'argmax':'time'}]} ])
    [{'edgeSet': [{'kbID': 'P17v', 'type': 'direct'}]}, {'edgeSet': [{'type': 'time', 'argmax': 'time'}]}]
    """
    direct_relations = {
        graph.get_graph_last_edge(g).get('kbID', "")[:-1]
        for g in grounded_graphs
        if (graph.get_graph_last_edge(g).get('type') in {'direct', 'reverse'}
            and graph.get_graph_last_edge(g).get('kbID', " ")[-1] not in "qr")
        or graph.get_graph_last_edge(g).get('type') == 'v-structure'
    }
    grounded_graphs = [
        g for g in grounded_graphs if graph.get_graph_last_edge(g).get('type')
        not in {'direct', 'reverse'} or graph.get_graph_last_edge(g).get(
            'kbID', " ")[-1] not in "qr" or graph.get_graph_last_edge(g).get(
                'kbID', "")[:-1] not in direct_relations
    ]
    logger.debug("Filter out unnecessary qualifiers: {}".format(
        len(grounded_graphs)))
    return grounded_graphs
Ejemplo n.º 3
0
def filter_repeated_relations(grounded_graphs):
    """
    :param grounded_graphs: list of grounded graphs
    :return: filtered list of grounded graphs
    >>> filter_repeated_relations([ {'edgeSet': [{'kbID': 'P17v','type': 'direct', 'rightkbID':'Q567'}]}, {'edgeSet': [{'kbID': 'P17v','type': 'direct', 'rightkbID':'Q1234'}]} ])
    [{'edgeSet': [{'kbID': 'P17v', 'type': 'direct', 'rightkbID': 'Q567'}]}]
    """
    repeated_relations = set()
    filtered_graphs = []
    for g in grounded_graphs:
        edge = graph.get_graph_last_edge(g)
        edge_str = "{}-{}".format(edge.get('kbID'), edge.get('type'))
        if not edge_str in repeated_relations:
            filtered_graphs.append(g)
            repeated_relations.add(edge_str)
    logger.debug("Filter out repeated relations: {}".format(
        len(filtered_graphs)))
    return filtered_graphs
def last_relation_numeric(g):
    """
    Adds a numeric restriction to the last relation in the graph.

    :param g: a graph with a non-empty edgeSet
    :return: a list of suggested graphs
    >>> last_relation_numeric({'edgeSet': [{'right':[2], 'type':'direct'}, {'right':[8], 'type':'direct'}], 'entities': [{'linkings':[("Q37876", "Natalie Portman")], 'tokens':["Portman"], 'type':'PERSON'}, {'linkings': [(None, ['2012'])], 'type': 'CD', 'tokens': ['2012']}]}) == \
    [{'edgeSet': [{'right':[2], 'type':'direct'}, {'right':[8], 'type':'direct', 'num': ['2012']}], 'entities': [{'linkings':[("Q37876", "Natalie Portman")], 'tokens':["Portman"], 'type':'PERSON'}]}]
    True
    >>> last_relation_numeric({'edgeSet': [{'right':[2]}, {'right':[8], 'argmin':'time'}], 'entities': [{'linkings': [(None, ['2012'])], 'type': 'CD', 'tokens': ['2012']}]})
    []
    >>> last_relation_numeric({'edgeSet': [{'right':[2]}, {'right':[8], 'num':'2009'}], 'entities': [{'linkings': [(None, ['2012'])], 'type': 'CD', 'tokens': ['2012']}]})
    []
    >>> last_relation_numeric({'edgeSet': [{'right':[2]}], 'entities': []})
    []
    >>> last_relation_numeric({'edgeSet': [{'right':[2]}], 'entities': [{'linkings':[("Q37876", "Natalie Portman")], 'tokens':["Portman"], 'type':'PERSON'}]})
    []
    """
    if len(g.get('edgeSet', [])) == 0 or graph.graph_has_temporal(g):
        return []
    if len(g.get('entities', [])) == 0 or not any(
            e.get("type") == 'CD' for e in g['entities'] if len(e) > 1):
        return []
    entities = copy.copy(g.get('entities', []))
    cd_entities = [e.get("tokens") for e in entities if e.get("type") == 'CD']
    if len(cd_entities) == 0:
        return []
    cd_entity = cd_entities[0]
    entities = [e for e in entities if e.get("tokens") != cd_entity]
    new_g = graph.copy_graph(g)
    new_g['entities'] = entities
    edge_to_modify = graph.get_graph_last_edge(
        new_g, filter_out_types={'iclass', 'v-structure', 'class'})
    if len(edge_to_modify) > 0:
        edge_to_modify['num'] = cd_entity
        return [new_g]
    return []
def last_relation_hop(g):
    """
    Takes a graph with an existing relation and an intermediate variable by performing a hop-up for the second entity.

    :param g: a graph with an non-empty edgeSet
    :return: a list of suggested graphs
    >>> last_relation_hop({'edgeSet': [], 'entities': [[4, 5, 6]]})
    []
    >>> last_relation_hop({'edgeSet': [{'right':[4,5,6]}], 'entities': []}) ==\
     [{'edgeSet': [{'right':[4,5,6], 'hopUp': None}], 'entities': []}, {'edgeSet': [{'right':[4,5,6], 'hopDown': None}], 'entities': []}]
    True
    >>> last_relation_hop({'edgeSet': [{'right':[4,5,6], 'hopUp': None}], 'entities': []})
    []
    >>> last_relation_hop({'edgeSet': [{'right':[4,5,6], 'kbID': "P31v", "type": "direct"}], 'entities': []})
    []
    >>> last_relation_hop({'edgeSet': [{'right':["Bahama"], "rightkbID":"Q6754", 'type':'direct'}], 'entities': []}) ==\
     [{'edgeSet': [{'right':["Bahama"], "rightkbID":"Q6754", 'hopUp': None, 'type':'direct'}], 'entities': []}, {'edgeSet': [{'right':["Bahama"], "rightkbID":"Q6754", 'hopDown': None, 'type':'direct'}], 'entities': []}]
    True
    >>> last_relation_hop({'edgeSet': [{'right':[4,5,6], 'argmax':'time'}], 'entities': []})
    []
    >>> last_relation_hop({'edgeSet': [{'right':[4,5,6], 'num':['2012']}], 'entities': []})
    []
    """
    if len(g.get('edgeSet', [])) == 0 or any(
            hop in g['edgeSet'][-1]
            for hop in {'hopUp', 'hopDown'}) or graph.graph_has_temporal(g):
        return []
    new_graphs = []
    for hop in HOP_TYPES:
        new_g = graph.copy_graph(g)
        edge_to_modify = graph.get_graph_last_edge(
            new_g, filter_out_types={'iclass', 'class', 'v-structure', 'time'})
        if len(edge_to_modify) > 0 and 'kbID' not in edge_to_modify:
            edge_to_modify[hop] = None
            new_graphs.append(new_g)
    return new_graphs