Beispiel #1
0
def main():
    start_time = time()

    wiki_info_file = 'e:/el/tmpres/demo/wiki-all/wiki-info.pkl'
    links_file = 'e:/el/tmpres/demo/wiki-all/links.txt'
    description_file = 'e:/el/tmpres/demo/wiki-all/text.txt'
    wiki_candidates_file = 'e:/el/tmpres/wiki/dict/name_candidates.pkl'
    word_idf_file = 'e:/el/tmpres/demo/word_idf.txt'

    tfidf = TfIdf(word_idf_file)

    wiki_info = WikiInfo(wiki_info_file, links_file, description_file)
    wiki_link = WikiLink(wiki_candidates_file, wiki_info, tfidf)

    input_file = 'input/00000001.txt'
    fin = open(input_file, 'rb')
    doc_text = fin.read()
    doc_text = doc_text.decode('utf-8')
    fin.close()

    pos = input_file.rfind('/')
    file_name = input_file[pos + 1:]
    ner_result_file = os.path.join('output', file_name + '.ner')
    merged_mention_list = mentiondetection.clean_ner_result(ner_result_file)

    merged_mention_list.sort(key=lambda x: x.span[0])
    wiki_link.link_all(doc_text, merged_mention_list)
    for mention in merged_mention_list:
        if (not mention.mesh_id) and mention.chebi_id < 0 < mention.wid:
            cur_name = doc_text[mention.span[0]:mention.span[1] + 1].lower()
            print cur_name, mention.wid, wiki_info.get_info(mention.wid)[0]

    print time() - start_time
Beispiel #2
0
def init_model():
    # extra_wiki_desc_file = 'e:/el/tmpres/demo/merge/wiki_extra_sentences.txt'
    # extra_parents_file = 'e:/el/tmpres/demo/extra_parents.txt'
    #
    # name_wid_file = 'e:/el/tmpres/demo/dict/single_candidates_wid_dict.txt'
    # record_file = 'd:/data/lab_demo/med_edl_data/records_info_with_wiki.txt'
    # dict_file = 'd:/data/lab_demo/med_edl_data/med_dict_ascii_with_ids_edited.txt'
    # tree_number_file = 'd:/data/lab_demo/med_edl_data/id_tn.txt'

    # res_dir = '/media/dhl/Data/el/tmpres/demo/del-data/'

    # input_file = '/media/dhl/Data/el/tmpres/NER/NER/00000001.txt.bak'
    # output_file = '/media/dhl/Data/el/tmpres/demo/result/result-linux.json'

    res_dir = 'e:/data/el/tmpres/demo/del-data/'
    extra_wiki_desc_file = res_dir + 'wiki_extra_sentences.txt'
    extra_parents_file = res_dir + 'extra_parents.txt'
    mesh_record_file = res_dir + 'records_info_with_wiki.txt'
    mesh_dict_file = res_dir + 'med_dict_ascii_with_ids_edited.txt'
    exclude_words_file = res_dir + 'exclude_words.txt'
    tree_number_file = res_dir + 'id_tn.txt'
    obo_file = res_dir + 'chebi.obo'

    word_idf_file = 'e:/data/el/tmpres/demo/word_idf.txt'

    # wiki_candidates_file = 'e:/el/tmpres/wiki/dict/name_candidates.txt'
    wiki_candidates_file = 'e:/data/el/tmpres/wiki/dict/name_candidates.pkl'

    # wiki_info_file = r'E:\el\tmpres\demo\wiki-med\new\wiki-info.txt'
    # links_file = r'E:\el\tmpres\demo\wiki-med\new\links.txt'
    # description_file = r'E:\el\tmpres\demo\wiki-med\new\text.txt'

    wiki_info_file = 'e:/data/el/tmpres/demo/wiki-all/wiki-info.pkl'
    links_file = 'e:/data/el/tmpres/demo/wiki-all/links.txt'
    description_file = 'e:/data/el/tmpres/demo/wiki-all/text.txt'

    mesh_extra_description_file = 'e:/data/el/tmpres/demo/extra_description_for_mesh.txt'

    chebi_terms = ChebiTerm.load_obo_file(obo_file)
    print '%d chebi terms' % len(chebi_terms)

    mesh_match = MeshMatch(mesh_dict_file, exclude_words_file)
    mesh_records = MeshRecord.load_mesh_records(mesh_record_file)
    mesh_tree = MeshTree(tree_number_file, mesh_records)

    wiki_info = WikiInfo(wiki_info_file, links_file, description_file)
    tfidf = TfIdf(word_idf_file)
    wiki_link = WikiLink(wiki_candidates_file, wiki_info, tfidf)
    extra_wiki_desc = ioutils.load_wiki_extra_descriptions(
        mesh_extra_description_file)
    # extra_wiki_desc = ioutils.load_wiki_extra_sentences(extra_wiki_desc_file)

    med_link = MedLink(extra_parents_file, mesh_match, mesh_records, mesh_tree,
                       chebi_terms, wiki_info, extra_wiki_desc, wiki_link)
    return med_link
Beispiel #3
0
def init_model():
    res_dir = 'e:/data/el/tmpres/'
    # res_dir = '/home/dhl/data/el/tmpres/'
    del_res_dir = os.path.join(res_dir, 'demo/del-data/')
    # extra_wiki_desc_file = del_res_dir + 'wiki_extra_sentences.txt'
    # extra_parents_file = del_res_dir + 'extra_parents.txt'
    # mesh_record_file = del_res_dir + 'records_info_with_wiki.txt'
    # mesh_dict_file = del_res_dir + 'med_dict_ascii_with_ids_edited.txt'
    # exclude_words_file = del_res_dir + 'exclude_words.txt'
    # tree_number_file = del_res_dir + 'id_tn.txt'
    # obo_file = del_res_dir + 'chebi.obo'

    extra_wiki_desc_file = os.path.join(del_res_dir,
                                        'wiki_extra_sentences.txt')
    extra_parents_file = os.path.join(del_res_dir, 'extra_parents.txt')
    mesh_record_file = os.path.join(del_res_dir, 'records_info_with_wiki.txt')
    mesh_dict_file = os.path.join(del_res_dir,
                                  'med_dict_ascii_with_ids_edited.txt')
    exclude_words_file = os.path.join(del_res_dir, 'exclude_words.txt')
    tree_number_file = os.path.join(del_res_dir, 'id_tn.txt')
    obo_file = os.path.join(del_res_dir, 'chebi.obo')

    word_idf_file = os.path.join(res_dir, 'demo/word_idf.txt')
    wiki_candidates_file = os.path.join(res_dir,
                                        'wiki/dict/name_candidates.pkl')
    wiki_info_file = os.path.join(res_dir, 'demo/wiki-all/wiki-info.pkl')
    links_file = os.path.join(res_dir, 'demo/wiki-all/links.txt')
    description_file = os.path.join(res_dir, 'demo/wiki-all/text.txt')
    mesh_extra_description_file = os.path.join(
        res_dir, 'demo/extra_description_for_mesh.txt')

    # wiki_candidates_file = 'e:/el/tmpres/wiki/dict/name_candidates.txt'
    # wiki_candidates_file = 'e:/data/el/tmpres/wiki/dict/name_candidates.pkl'
    # wiki_info_file = 'e:/data/el/tmpres/demo/wiki-all/wiki-info.pkl'
    # links_file = 'e:/data/el/tmpres/demo/wiki-all/links.txt'
    # description_file = 'e:/data/el/tmpres/demo/wiki-all/text.txt'
    # mesh_extra_description_file = 'e:/data/el/tmpres/demo/extra_description_for_mesh.txt'

    chebi_terms = ChebiTerm.load_obo_file(obo_file)

    mesh_match = MeshMatch(mesh_dict_file, exclude_words_file)
    mesh_records = MeshRecord.load_mesh_records(mesh_record_file)
    mesh_tree = MeshTree(tree_number_file, mesh_records)

    wiki_info = WikiInfo(wiki_info_file, links_file, description_file)
    tfidf = TfIdf(word_idf_file)
    wiki_link = WikiLink(wiki_candidates_file, wiki_info, tfidf)
    extra_wiki_desc = ioutils.load_wiki_extra_descriptions(
        mesh_extra_description_file)
    # extra_wiki_desc = ioutils.load_wiki_extra_sentences(extra_wiki_desc_file)

    tmp_med_link = MedLink(extra_parents_file, mesh_match, mesh_records,
                           mesh_tree, chebi_terms, wiki_info, extra_wiki_desc,
                           wiki_link)
    return tmp_med_link
Beispiel #4
0
def __init_mellink():
    word_idf_file = 'e:/data/el/tmpres/demo/word_idf.txt'
    wiki_candidates_file = 'e:/data/el/tmpres/wiki/dict/name_candidates.pkl'
    wiki_info_file = 'e:/data/el/tmpres/demo/wiki-all/wiki-info.pkl'
    links_file = 'e:/data/el/tmpres/demo/wiki-all/links.txt'
    description_file = 'e:/data/el/tmpres/demo/wiki-all/text.txt'

    wiki_info = WikiInfo(wiki_info_file, links_file, description_file)
    tfidf = TfIdf(word_idf_file)
    wiki_link = WikiLink(wiki_candidates_file, wiki_info, tfidf)
    return MedLink(wiki_info=wiki_info, wiki_link=wiki_link)
Beispiel #5
0
def test():
    start_time = time()

    text = 'last opportunities Texas senator Cruz'

    word_idf_file = 'e:/el/tmpres/demo/merge/word_idf.txt'
    tfidf = TfIdf(word_idf_file)

    wiki_info_file = 'e:/el/tmpres/demo/wiki-all/wiki-info.pkl'
    links_file = 'e:/el/tmpres/demo/wiki-all/links.txt'
    description_file = 'e:/el/tmpres/demo/wiki-all/text.txt'
    wiki_info = WikiInfo(wiki_info_file, links_file, description_file)
    wiki_link = WikiLink('e:/el/tmpres/wiki/dict/name_candidates.pkl',
                         wiki_info, tfidf)
    context_tfidf = tfidf.get_tfidf_from_text(text)
    print wiki_link.link_with_context('cruz', context_tfidf)

    print time() - start_time