Ejemplo n.º 1
0
def entityRecognize(word_list, question):
    entity_list = []
    for word in word_list:
        entity = ""
        finalentity = ""
        for temp_entity in word_list[word_list.index(word):]:
            entity = entity + temp_entity
            all_entity = [entity]
            if len(entity) > 1:
                # print(entity)
                # print(1)
                if entity in mention2entity_dic:  # 如果它有对应的实体
                    for alias in mention2entity_dic[entity]:
                        all_entity.append(alias)
                for en in all_entity:
                    same_name_entity_list = ccksNeo.get_entity_list_by_name(en)
                    extra_name = ccksNeo.get_entity_info_by_name(en)
                    for name in extra_name:
                        if name[0][-1] == '名' or name[0][-1] == '称':
                            if len(name[1]) > 1:
                                if name[0] != '英文名' and name[0] != '英文名称' and name[0] != '外文名' and name[0] != '外文名称':
                                    entity_list.append(name[1])
                    if len(same_name_entity_list) >= 1:
                        entity_list.append(en)
    # print(list(set(entity_list)))
    for entity1 in entity_list:  # 如果短的指称被长的指称包含,检测短指称的一度关系名
        temp = question
        for i in entity1:
            if i in question:
                temp = temp.replace(i, "")
        # temp_list = sentence.replace(entity1, "")
        # segmentor1 = Segmentor()
        # segmentor1.load("./ltpdata/ltp_data_v3.4.0/cws.model")
        # temp_list = segmentor1.segment(temp)
        # segmentor1.release()
        for entity2 in entity_list:
            if entity1 != entity2 and entity1 in entity2:
                # print(2)
                same_name_entity_list = ccksNeo.get_entity_list_by_name(entity1)
                flag = 0
                for entitydict in same_name_entity_list:
                    # print(entitydict, "用id查")
                    # print(3)
                    relations = ccksNeo.get_related_entities_by_id(entitydict['id'])
                    # print(relations)
                    for relation in relations:  # 除掉实体的剩余句子
                        score = serviceWord2vec.get_similarity(list(jieba.cut(temp)), list(jieba.cut(relation['name'])))
                        if score > 0.2:
                            flag = 1
                if flag == 0 and entity1 in entity_list:
                    # print(entity_list)
                    # print(entity1)
                    entity_list.remove(entity1)

    print("entity_list", entity_list)
    # time.sleep(10)

    return entity_list
Ejemplo n.º 2
0
    yitiaodaan = []

    index2 = line.find('|||||')
    try:
        yitiao = line.split("|||||")[1]
    except:
        print(1)

    try:
        yitiaodaan = yitiao.split("||")
    except:
        print(3)

    an21 = []
    if yitiaodaan:
        for i in yitiaodaan:
            if i != ' ':
                try:
                    an21.append(i.split("|")[2])
                except:
                    print(1)
    comp = list(set(an21))
    for mm in comp:
        info = ccksNeo.get_entity_info_by_name(mm)
        if info != []:
            sentence = sentence + '\t' + '<' + mm + '>'
        else:
            sentence = sentence + '\t' + '“' + mm + '”'
    print(comp)
    p.writelines(sentence + '\n')