def _3_0_1_to_graphs(candidate_pathes, s1, t1):
    '''
        e-{p1}->a*-{p2}->literal
        "location.location.contains\tm.05vd5\tlocation.location.area\t47.0",
    '''
    candidate_graphquery_list = []
    current_nid = 1
    node_topic_entity = GroundedNode(nid=current_nid,
                                     node_type=t1,
                                     id=s1,
                                     type_class='',
                                     friendly_name="",
                                     question_node=0)
    current_nid += 1
    node_answer_entity = GroundedNode(nid=current_nid,
                                      node_type="class",
                                      id='?a',
                                      type_class='',
                                      friendly_name="",
                                      question_node=1)
    current_nid += 1
    node_literal = GroundedNode(nid=current_nid,
                                node_type="literal",
                                id='?literal',
                                type_class='',
                                friendly_name="",
                                question_node=0)
    p1_p2_answers = collections.defaultdict(set)
    for candidate in candidate_pathes:
        cols = candidate.split("\t")
        if len(cols) != 4:
            continue
        p1, answer_entity, p2, literal_entity = cols
        p1_p2_answers['\t'.join([p1, p2])].add((answer_entity, literal_entity))

    for p1_p2 in p1_p2_answers:
        candidate_graphquery = dict()
        candidate_graphquery["querytype"] = '3_0_1'
        candidate_graphquery["nodes"] = [
            node_topic_entity, node_answer_entity, node_literal
        ]
        p1, p2 = p1_p2.split('\t')
        edge1 = GroundedEdge(start=node_topic_entity.nid,
                             end=node_answer_entity.nid,
                             relation=p1)
        edge2 = GroundedEdge(start=node_answer_entity.nid,
                             end=node_literal.nid,
                             relation=p2)
        candidate_graphquery["edges"] = [edge1, edge2]
        candidate_graphquery["path"] = p1_p2
        candidate_graphquery["denotation"] = list(p1_p2_answers[p1_p2])
        candidate_graphquery_list.append(candidate_graphquery)
    return candidate_graphquery_list
def _3_1_to_graphs(candidate_pathes, s1, t1):
    '''
    e-{p1}->m-{p2}->a
    "user.tsegaran.random.taxonomy_subject.entry\tm.04_8c54\tuser.tsegaran.random.taxonomy_entry.taxonomy\tm.04n6k",
    '''
    candidate_graphquery_list = []
    current_nid = 1
    node_topic_entity = GroundedNode(nid=current_nid,
                                     node_type=t1,
                                     id=s1,
                                     type_class='',
                                     friendly_name="",
                                     question_node=0)
    current_nid += 1
    node_m_entity = GroundedNode(nid=current_nid,
                                 node_type="class",
                                 id='?m',
                                 type_class='',
                                 friendly_name="",
                                 question_node=0)
    current_nid += 1
    node_answer_entity = GroundedNode(nid=current_nid,
                                      node_type="class",
                                      id='?a',
                                      type_class='',
                                      friendly_name="",
                                      question_node=1)
    p1_p2_answers = collections.defaultdict(set)
    for candidate in candidate_pathes:
        cols = candidate.split("\t")
        if len(cols) != 4:
            continue
        p1, m_entity, p2, answer_entity = cols
        p1_p2_answers['\t'.join([p1, p2])].add(answer_entity)
    for p1_p2 in p1_p2_answers:
        candidate_graphquery = dict()
        candidate_graphquery["querytype"] = '3_1'
        candidate_graphquery["nodes"] = [
            node_topic_entity, node_m_entity, node_answer_entity
        ]
        p1, p2 = p1_p2.split('\t')
        edge1 = GroundedEdge(start=node_topic_entity.nid,
                             end=node_m_entity.nid,
                             relation=p1)
        edge2 = GroundedEdge(start=node_m_entity.nid,
                             end=node_answer_entity.nid,
                             relation=p2)
        candidate_graphquery["edges"] = [edge1, edge2]
        candidate_graphquery["path"] = p1_p2
        candidate_graphquery["denotation"] = list(p1_p2_answers[p1_p2])
        candidate_graphquery_list.append(candidate_graphquery)
    return candidate_graphquery_list
예제 #3
0
def add_path(_grounded_nodes,
             grounded_graph,
             path,
             son_id=None,
             father_id=None):
    '''add path in current grounded graph'''
    basic_son_node = search_one_node_in_nodes_by_nid(_grounded_nodes, son_id)
    basic_father_node = search_one_node_in_nodes_by_nid(
        _grounded_nodes, father_id)
    spo_list = path.split('\t')
    son_node = GroundedNode(nid=son_id,
                            id=spo_list[0],
                            score=1.0,
                            node_type=basic_son_node.node_type,
                            question_node=basic_son_node.question_node)
    father_node = GroundedNode(nid=father_id,
                               id=spo_list[len(spo_list) - 1],
                               score=1.0,
                               node_type=basic_father_node.node_type,
                               question_node=basic_father_node.question_node)
    grounded_graph.add_node(son_node)
    grounded_graph.add_node(father_node)
    if len(spo_list) > 3:  # s->{p1}->mediator->{p2}->o mediator or cvt
        middle_id = son_id * 10 + father_id
        middle_node = GroundedNode(nid=middle_id,
                                   id=spo_list[2],
                                   score=1,
                                   node_type='class',
                                   question_node=0)
        grounded_graph.add_node(middle_node)
        edge_1 = GroundedEdge(start=son_id,
                              end=middle_id,
                              relation=spo_list[1],
                              friendly_name=spo_list[1],
                              score=1.0)
        edge_2 = GroundedEdge(start=middle_id,
                              end=father_id,
                              relation=spo_list[3],
                              friendly_name=spo_list[3],
                              score=1.0)
        grounded_graph.add_edge(edge_1)
        grounded_graph.add_edge(edge_2)
    else:
        edge = GroundedEdge(start=son_id,
                            end=father_id,
                            relation=spo_list[1],
                            friendly_name=spo_list[1],
                            score=1.0)
        grounded_graph.add_edge(edge)
예제 #4
0
def read_graph_question_json(filename):
    '''
    function: read graphquestion dataset
    :param filename: filename path file
    :return: graph_question structure list
    '''
    graphquestionsList = list()
    with open(filename, 'rb') as f:
        data = json.load(f)
    for questionAnnotation in data:
        graphq = GraphQuestion()
        graphq.qid = questionAnnotation["qid"]
        graphq.graph_entity_level_paraphrase_id = graphq.qid % 100
        graphq.graph_sentence_level_paraphrase_id = (graphq.qid // 100) % 10000
        graphq.graph_query_id = graphq.qid // 1000000
        graphq.question = questionAnnotation["question"]
        graphq.answer = questionAnnotation["answer"]
        graphq.function = questionAnnotation["function"]
        graphq.commonness = questionAnnotation["commonness"]
        graphq.num_node = questionAnnotation["num_node"]
        graphq.num_edge = questionAnnotation["num_edge"]
        graphq.graph_query = questionAnnotation["graph_query"]
        for node in questionAnnotation["graph_query"]["nodes"]:
            graphq.nodes.append(GroundedNode(
                nid=node["nid"], node_type=node["node_type"], type_class=node["class"],
                friendly_name=node["friendly_name"], question_node=node["question_node"],
                function=node["function"], id=node["id"], score=1.0))
        for edge in questionAnnotation["graph_query"]["edges"]:
            graphq.edges.append(GroundedEdge(
                start=edge["start"], end=edge["end"], relation=edge["relation"],
                friendly_name=edge["friendly_name"], score=1.0))
        graphq.sparql_query = questionAnnotation["sparql_query"]
        graphquestionsList.append(graphq)
    return graphquestionsList
예제 #5
0
def _ungrounded_graph_to_grounded_graph(ungrounded_graph, grounding_result_list):
    grouned_graph_list = []
    if grounding_result_list is None:
        return grouned_graph_list
    grounded_nodes = []
    for ungrounded_node in ungrounded_graph.nodes:
        grounded_nodes.append(GroundedNode(nid=ungrounded_node.nid,
                                           node_type=ungrounded_node.node_type,
                                           type_class=ungrounded_node.type_class,
                                           friendly_name=ungrounded_node.friendly_name,
                                           question_node=ungrounded_node.question_node,
                                           function=ungrounded_node.function,
                                           score=0))
    for grounded_node in grounded_nodes:
        for ungrounded_node, nodes_grounding in grounding_result_list:
            if grounded_node.nid == ungrounded_node.nid:
                """nodes_grounding: {'en.xtracycle':1.6, 'freebase.type_profile':1.0}"""
                for mid, pro in nodes_grounding.items():
                    grounded_node.id = mid
                    grounded_node.score = pro
                    break
    grounded_graph = GrounedGraph(grounded_query_id=ungrounded_graph.ungrounded_query_id,
                                  type='', nodes=grounded_nodes, edges=[], key_path='',
                                  sparql_query='', score=0, denotation='')
    grouned_graph_list.append(grounded_graph)
    return grouned_graph_list
예제 #6
0
def _ungrounded_to_grounded(ungrounded_graph):
    '''
    convert ungrounded graph to basic grounded graph
    :param ungrounded_graph:
    :return:
    '''
    nodes = []
    edges = []
    for ungrounded_node in ungrounded_graph.nodes:
        nodes.append(
            GroundedNode(nid=ungrounded_node.nid,
                         node_type=ungrounded_node.node_type,
                         type_class=ungrounded_node.type_class,
                         friendly_name=ungrounded_node.friendly_name,
                         question_node=ungrounded_node.question_node,
                         function=ungrounded_node.function,
                         score=0))
    for ungrounded_edge in ungrounded_graph.edges:
        edges.append(
            GroundedEdge(start=ungrounded_edge.start,
                         end=ungrounded_edge.end,
                         friendly_name=ungrounded_edge.friendly_name,
                         score=ungrounded_edge.score))
    return GrounedGraph(grounded_query_id=ungrounded_graph.ungrounded_query_id,
                        type='',
                        nodes=nodes,
                        edges=edges,
                        key_path='',
                        sparql_query='',
                        score=0,
                        denotation='')
예제 #7
0
def read_gold_graph_query(gold_grounded_graph_json):
    """
    function: read grounded_graph data
    :param grounded_graph_json: grounded_graph_json
    :return: grounded_graph structure
    """
    if gold_grounded_graph_json is None: return None
    grounded_query_id = -1
    nodes = []
    edges = []
    for node_json in gold_grounded_graph_json['nodes']:
        type_class = None
        if 'type_class' in node_json.keys():
            type_class = node_json['type_class']
        elif 'class' in node_json.keys():
            type_class = node_json['class']
        nodes.append(
            GroundedNode(
                nid=node_json["nid"],
                node_type=node_json["node_type"],
                id=node_json["id"],
                type_class=type_class,  #class
                friendly_name=node_json["friendly_name"],
                question_node=node_json["question_node"],
                function=node_json["function"]))
    for edge_json in gold_grounded_graph_json["edges"]:
        edges.append(
            GroundedEdge(start=edge_json["start"],
                         end=edge_json["end"],
                         relation=edge_json["relation"],
                         friendly_name=edge_json["friendly_name"]))
    type = 'gold'
    return GrounedGraph(grounded_query_id, type, nodes, edges)
예제 #8
0
def read_grounded_graph(grounded_graph_json):
    '''
        function: read grounded_graph data
        :param grounded_graph_json: grounded_graph_json
        :return: grounded_graph structure
        '''
    grounded_query_id = grounded_graph_json['grounded_query_id']
    nodes = []
    edges = []
    if 'nodes' in grounded_graph_json:
        for node_json in grounded_graph_json["nodes"]:
            nodes.append(
                GroundedNode(nid=node_json["nid"],
                             node_type=node_json["node_type"],
                             id=node_json["id"],
                             type_class=node_json['type_class'],
                             friendly_name=node_json["friendly_name"],
                             question_node=node_json["question_node"],
                             function=node_json["function"],
                             score=node_json['score'],
                             ordinal=node_json['ordinal']))
    if 'edges' in grounded_graph_json:
        for edge_json in grounded_graph_json["edges"]:
            edges.append(
                GroundedEdge(start=edge_json["start"],
                             end=edge_json["end"],
                             relation=edge_json["relation"],
                             friendly_name=edge_json["friendly_name"],
                             score=edge_json["score"]))

    type = grounded_graph_json['type']
    key_path = grounded_graph_json['key_path']
    sparql_query = grounded_graph_json["sparql_query"]
    score = grounded_graph_json["score"]
    denotation = grounded_graph_json["denotation"]
    total_score = 0.0
    f1_score = 0.0
    if 'total_score' in grounded_graph_json.keys():
        total_score = grounded_graph_json['total_score']
    if 'f1_score' in grounded_graph_json.keys():
        f1_score = grounded_graph_json['f1_score']
    return GrounedGraph(grounded_query_id,
                        type,
                        nodes,
                        edges,
                        key_path=key_path,
                        sparql_query=sparql_query,
                        score=score,
                        denotation=denotation,
                        total_score=total_score,
                        f1_score=f1_score)
def _3_0_to_graphs(candidate_pathes, s1, t1):
    '''1_0 	entity-{p}->o	对应, 第1位对应到路径是p, 第二位对应到路径是o
    ns:m.0dhqrm "organization.organization.headquarters\tm.08cshk7'''
    candidate_graphquery_list = []
    current_nid = 1
    node_topic_entity = GroundedNode(nid=current_nid,
                                     node_type=t1,
                                     id=s1,
                                     type_class='',
                                     friendly_name="",
                                     question_node=0)
    current_nid += 1
    node_answer_entity = GroundedNode(nid=current_nid,
                                      node_type="class",
                                      id='?a',
                                      type_class='',
                                      friendly_name="",
                                      question_node=1)
    p_answers = collections.defaultdict(set)
    for candidate in candidate_pathes:
        cols = candidate.split("\t")
        if len(cols) != 2:
            continue
        relation, answer_entity = cols
        p_answers[relation].add(answer_entity)

    for p in p_answers:
        candidate_graphquery = dict()
        candidate_graphquery["querytype"] = '3_0'
        candidate_graphquery["nodes"] = [node_topic_entity, node_answer_entity]
        edge = GroundedEdge(start=node_topic_entity.nid,
                            end=node_answer_entity.nid,
                            relation=p)
        candidate_graphquery["edges"] = [edge]
        candidate_graphquery["path"] = p
        candidate_graphquery["denotation"] = list(p_answers[p])
        candidate_graphquery_list.append(candidate_graphquery)
    return candidate_graphquery_list
예제 #10
0
def read_gold_graph_query(gold_grounded_graph_json):
    '''
            function: read grounded_graph data
            :param grounded_graph_json: grounded_graph_json
            :return: grounded_graph structure
            '''
    if gold_grounded_graph_json is None: return None
    grounded_query_id = -1
    nodes = []
    edges = []
    for node_json in gold_grounded_graph_json['nodes']:
        type_class = None
        if 'type_class' in node_json.keys():
            type_class = node_json['type_class']
        elif 'class' in node_json.keys():
            type_class = node_json['class']
        nodes.append(
            GroundedNode(
                nid=node_json["nid"],
                node_type=node_json["node_type"],
                id=node_json["id"],
                type_class=type_class,  #class
                friendly_name=node_json["friendly_name"],
                question_node=node_json["question_node"],
                function=node_json["function"]))
    for edge_json in gold_grounded_graph_json["edges"]:
        edges.append(
            GroundedEdge(start=edge_json["start"],
                         end=edge_json["end"],
                         relation=edge_json["relation"],
                         friendly_name=edge_json["friendly_name"]))
    type = 'gold'
    # key_path = grounded_graph_json['key_path']
    # sparql_query = grounded_graph_json["sparql_query"]
    # score = grounded_graph_json["score"]
    # denotation = grounded_graph_json["denotation"]
    # total_score = 0.0
    # if 'total_score' in grounded_graph_json.keys():
    #     total_score = grounded_graph_json['total_score']
    return GrounedGraph(grounded_query_id, type, nodes, edges)
def _3_1_3_to_graphs(candidate_pathes, s1, t1):
    '''
        #e-{p1}->m1-{p2}->a-{p3}->m2->{p4}->literal 对应
        location.location.contains\tm.047tj\tgeography.island.body_of_water\tm.05rgl\tlocation.location.geolocation\tm.05l1d9y\tlocation.geocode.latitude\t0.0",
        :param paths:
        :param s1:
        :param t1:
        :return:
    '''
    candidate_graphquery_list = []
    current_nid = 1
    node_topic_entity = GroundedNode(nid=current_nid,
                                     node_type=t1,
                                     id=s1,
                                     type_class='',
                                     friendly_name="",
                                     question_node=0)
    current_nid += 1
    node_m_entity = GroundedNode(nid=current_nid,
                                 node_type="class",
                                 id='?m',
                                 type_class='',
                                 friendly_name="",
                                 question_node=0)
    current_nid += 1
    node_answer_entity = GroundedNode(nid=current_nid,
                                      node_type="class",
                                      id='?a',
                                      type_class='',
                                      friendly_name="",
                                      question_node=1)
    current_nid += 1
    node_c_entity = GroundedNode(nid=current_nid,
                                 node_type="class",
                                 id='?c',
                                 type_class='',
                                 friendly_name="",
                                 question_node=0)
    current_nid += 1
    node_literal_entity = GroundedNode(nid=current_nid,
                                       node_type="literal",
                                       id='?literal',
                                       type_class='',
                                       friendly_name="",
                                       question_node=0)

    p1_p2_p3_p4_answers = collections.defaultdict(set)
    for candidate in candidate_pathes:
        cols = candidate.split('\t')
        if len(cols) != 8:
            continue
        p1, m_1_entity, p2, answer_entity, p3, m_2_entity, p4, literal_entity = candidate.split(
            "\t")
        p1_p2_p3_p4_answers['\t'.join([p1, p2, p3, p4])].add(
            (answer_entity, literal_entity))

    for p1_p2_p3_p4 in p1_p2_p3_p4_answers:
        candidate_graphquery = dict()
        candidate_graphquery["querytype"] = '3_1_3'
        candidate_graphquery["nodes"] = [
            node_topic_entity, node_m_entity, node_answer_entity,
            node_c_entity, node_literal_entity
        ]
        p1, p2, p3, p4 = p1_p2_p3_p4.split('\t')
        edge1 = GroundedEdge(start=node_topic_entity.nid,
                             end=node_m_entity.nid,
                             relation=p1)
        edge2 = GroundedEdge(start=node_m_entity.nid,
                             end=node_answer_entity.nid,
                             relation=p2)
        edge3 = GroundedEdge(start=node_answer_entity.nid,
                             end=node_c_entity.nid,
                             relation=p3)
        edge4 = GroundedEdge(start=node_c_entity.nid,
                             end=node_literal_entity.nid,
                             relation=p4)
        candidate_graphquery["edges"] = [edge1, edge2, edge3, edge4]
        candidate_graphquery["path"] = p1_p2_p3_p4
        candidate_graphquery["denotation"] = list(
            p1_p2_p3_p4_answers[p1_p2_p3_p4])
        candidate_graphquery_list.append(candidate_graphquery)
    return candidate_graphquery_list
def _3_0_2_to_graphs(candidate_pathes, s1, t1):
    '''
        #e-{p1}->*a-{p2}->*m-{p3}->literal 对应
        "location.location.contains\tm.06s9y\tlocation.statistical_region.gdp_real\tm.0hnzhpd\tmeasurement_unit.adjusted_money_value.adjusted_value\t192244189.0
        :param paths:
        :param s1:
        :param t1:
        :return:
        '''
    candidate_graphquery_list = []
    current_nid = 1
    node_topic_entity = GroundedNode(nid=current_nid,
                                     node_type=t1,
                                     id=s1,
                                     type_class='',
                                     friendly_name="",
                                     question_node=0)
    current_nid += 1
    node_answer_entity = GroundedNode(nid=current_nid,
                                      node_type="class",
                                      id='?a',
                                      type_class='',
                                      friendly_name="",
                                      question_node=1)
    current_nid += 1
    node_m_entity = GroundedNode(nid=current_nid,
                                 node_type="class",
                                 id='?m',
                                 type_class='',
                                 friendly_name="",
                                 question_node=0)
    current_nid += 1
    node_literal = GroundedNode(nid=current_nid,
                                node_type="literal",
                                id='?literal',
                                type_class='',
                                friendly_name="",
                                question_node=0)
    p1_p2_p3_answers = collections.defaultdict(set)
    for candidate in candidate_pathes:
        cols = candidate.split('\t')
        if len(cols) != 6:
            continue
        p1, answer_entity, p2, m_entity, p3, literal_entity = candidate.split(
            "\t")
        p1_p2_p3_answers['\t'.join([p1, p2, p3])].add(
            (answer_entity, literal_entity))

    for p1_p2_p3 in p1_p2_p3_answers:
        candidate_graphquery = dict()
        candidate_graphquery["querytype"] = '3_0_2'
        candidate_graphquery["nodes"] = [
            node_topic_entity, node_answer_entity, node_m_entity, node_literal
        ]
        p1, p2, p3 = p1_p2_p3.split('\t')
        edge1 = GroundedEdge(start=node_topic_entity.nid,
                             end=node_answer_entity.nid,
                             relation=p1)
        edge2 = GroundedEdge(start=node_answer_entity.nid,
                             end=node_m_entity.nid,
                             relation=p2)
        edge3 = GroundedEdge(start=node_m_entity.nid,
                             end=node_literal.nid,
                             relation=p3)
        candidate_graphquery["edges"] = [edge1, edge2, edge3]
        candidate_graphquery["path"] = p1_p2_p3
        candidate_graphquery["denotation"] = list(p1_p2_p3_answers[p1_p2_p3])
        candidate_graphquery_list.append(candidate_graphquery)
    return candidate_graphquery_list
예제 #13
0
def extract_grounded_graph_from_jena_freebase(file_path):
    '''
    :argument: file path ='./2019.04_15_complexwebq_test_bgp.txt'
    :return qid_to_graphs_dict
    qid_to_grounded_graph_dict = complexwebquestion_interface.extract_grounded_graph_from_jena(globals_args.fn_cwq_file.complexwebquestion_test_bgp_dir)
    for qid, grounded_graph in qid_to_grounded_graph_dict.items():
        print (qid, grounded_graph)
    '''
    qid_to_graphs_dict = dict()
    lines = read_list_yuanshi(file_path)
    triples_list = []
    nodes_set = []
    qid = None
    # question = None
    for line in lines:
        if line.startswith('#QID'):
            qid = line.split('\t')[2]
            triples_list = []
            nodes_set = []
        # elif line.startswith('#question'):
        #     if len(line.split('\t'))==2:
        #         question = line.split('\t')[1]
        #     else:
        #         question=''
        elif line.startswith('#2.2_triple'):
            triples_list.append(line.split('\t')[1:])
        elif line.startswith('#2.2_node'):
            nodes_set.append(line)
        if line.startswith('-------------------'):
            grounded_nodes = []
            grounded_edges = []
            # id = 20
            for node_line in nodes_set:
                # id += 1
                cols = node_line.split('\t')
                node_id = cols[1]
                node = GroundedNode(id=node_id, nid=node_id)
                if node.id == '?x':
                    node.question_node = 1
                if node_id.startswith('?'):
                    node.node_type = 'class'
                elif node_id.startswith('m.') or node_id.startswith(
                        'g.') or node_id.startswith('en.'):
                    node.node_type = 'entity'
                else:
                    node.node_type = 'literal'
                if len(cols) == 3:
                    node.friendly_name = eval(
                        cols[2])  #set #2.2_node:	m.03_dwn	{'Lou Seal'}
                elif len(cols) == 4:
                    node.friendly_name = eval(
                        cols[3]
                    )  ##2.2_node:	?x	False	{'m.0117q3yz': {'base.type_ontology.abstract', 'common.topic', 'base.type_ontology.inanimate', 'base.type_ontology.non_agent', 'time.event', 'sports.sports_championship_event'}}
                grounded_nodes.append(node)
            for triple in triples_list:
                start_node = _get_node_by_id(
                    grounded_nodes,
                    triple[0])  #.replace('http://rdf.freebase.com/ns/','')
                end_node = _get_node_by_id(
                    grounded_nodes,
                    triple[2])  #.replace('http://rdf.freebase.com/ns/','')
                if triple[1] == 'common.topic.notable_types':
                    end_node.node_type = 'class'
                    if end_node.nid.startswith('?'):
                        grounded_edges.append(
                            GroundedEdge(start=start_node.nid,
                                         end=end_node.nid,
                                         relation=triple[1],
                                         friendly_name=triple[1]))
                    else:
                        continue
                else:
                    grounded_edges.append(
                        GroundedEdge(start=start_node.nid,
                                     end=end_node.nid,
                                     relation=triple[1],
                                     friendly_name=triple[1]))
            if len(grounded_nodes) > 0:
                qid_to_graphs_dict[qid] = GrounedGraph(nodes=grounded_nodes,
                                                       edges=grounded_edges)
    return qid_to_graphs_dict
예제 #14
0
def extract_grounded_graph_from_jena_dbpedia(file_path):
    '''
    :argument: file path './2019.07.17_qald_test_bgp.txt'
    :return qid_to_graphs_dict
    for qid, grounded_graph in qid_to_grounded_graph_dict.items():
        print (qid, grounded_graph)
    '''
    qid_to_graphs_dict = dict()
    lines = read_list_yuanshi(file_path)
    triples_list = []
    nodes_set = []
    qid = None
    question = None
    for line in lines:
        if line.startswith('#Qid'):
            # qid = 'train_'+line.split('\t')[2]
            qid = line.split('\t')[2]
            triples_list = []
            nodes_set = []
        elif line.startswith('#Question'):
            if len(line.split('\t')) == 2:
                question = line.split('\t')[1]
            else:
                question = ''
        elif line.startswith('#2.2_triple'):
            triples_list.append(line.split('\t')[1:])
        elif line.startswith('#2.2_node'):
            nodes_set.append(line)

        if line.startswith('-------------------'):
            grounded_nodes = []
            grounded_edges = []
            for node_line in nodes_set:
                # cols = node_line.split('\t')
                # node_id = cols[1].replace('http://dbpedia.org/resource/','')
                node_id = node_line.split('\t')[1]
                node = GroundedNode(id=node_id, nid=node_id)
                if node.id == '?uri':  # question node
                    node.question_node = 1
                if node_id.startswith('http://dbpedia.org/resource/'):
                    node.node_type = 'entity'
                else:  #?x
                    node.node_type = 'class'
                # if node_id.startswith('?'):
                #     node.node_type = 'class'
                # elif node_id.startswith('http://dbpedia.org/resource/'):
                #     node.node_type = 'entity'
                # else:
                #     node.node_type = 'class'
                grounded_nodes.append(node)

            for triple in triples_list:
                start_node = _get_node_by_id(grounded_nodes, triple[0])
                end_node = _get_node_by_id(grounded_nodes, triple[2])
                if triple[
                        1] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type':  #2020.04.24 add
                    continue
                grounded_edges.append(
                    GroundedEdge(start=start_node.nid,
                                 end=end_node.nid,
                                 relation=triple[1],
                                 friendly_name=triple[1]))
            qid_to_graphs_dict[qid] = GrounedGraph(nodes=grounded_nodes,
                                                   edges=grounded_edges)
    return qid_to_graphs_dict