Beispiel #1
0
def get_path_to_root(entity_id):
    """

    :param entity_id:
    :return:
    """

    if entity_id.startswith('CHEBI'):
        ssm.semantic_base('bin/DiShIn/chebi.db')
    if entity_id.startswith('HP'):
        ssm.semantic_base('bin/DiShIn/hp.db')
    if entity_id.startswith('GO'):
        ssm.semantic_base('bin/DiShIn/go.db')
    if entity_id.startswith('DOID'):
        ssm.semantic_base('bin/DiShIn/doid.db')

    e1 = ssm.get_id(entity_id.replace(':', '_'))

    a = ssm.common_ancestors(e1, e1)
    a = [ssm.get_name(x) for x in a]

    return a
Beispiel #2
0
                if len(documents_entity_list[d][e]) > 0:
                    average_correct_match_score.append(
                        documents_entity_list[d][e][0]["score"])
                    #print(documents_entity_list[d][e][0]["score"])
        print(
            "average_correct_match_score",
            sum(average_correct_match_score) /
            len(average_correct_match_score))
    print("perfect match is solution", perfect_matches_correct)
    print("solution label is not a perfect match", perfect_matches_incorrect)
    # print("entities with incorrect perfect matches", entities_with_incorrect_perfect_matches)
    # print("average number of candidates", sum(ncandidates) / len(ncandidates))
    # print("max number of candidates", max(ncandidates))
    return documents_entity_list


print("load semantic base")
ssm.semantic_base("DiShIn/hp.db")
max_dist = int(sys.argv[1])
min_match_score = float(sys.argv[2])
corpus_dir = sys.argv[3]
#documents_entity_list = get_hpo_documents(corpus=("HPOtest/documents/", "HPOtest/annotations/"), min_match_score=min_match_score)
print("get hpo documents")
documents_entity_list = get_hpo_documents(corpus=corpus_dir,
                                          min_match_score=min_match_score)
print("generate candidates")
for d in documents_entity_list:
    candidates_filename = "candidates/{}/{}".format(corpus_dir, d)
    write_candidates(documents_entity_list[d], candidates_filename, max_dist,
                     "hpo")
Beispiel #3
0
    #print("max number of candidates", max(ncandidates))
    return documents_entity_list
    # print(""perfect matches:", perfect_matches,
    #      "partial matches:", partial_matches,
    #      "label not found", label_not_found)



def write_candidates_file(entities, d, max_dist, corpus="ChebiPatents"):
    candidates_filename = "candidates/{}/{}".format(corpus, d)
    writen = write_candidates(entities, candidates_filename, max_dist, "chebi")
    output.put(writen)

# first argument: max distance between linked concepts
# second argument: min similarity between entity text and candidate match
ssm.semantic_base("DiShIn/chebi.db", to_memory=False)
max_dist = int(sys.argv[1])
min_match_score = float(sys.argv[2])
#corpus_dir = "ChebiPatents"
#corpus_dir  = "ChebiTest"
corpus_dir = sys.argv[3]
start_time = time.time()
documents_entity_list = get_chebi_patents(corpus_dir, min_match_score=min_match_score, mapto="chebi")
print("parsing and get entities time:", time.time() - start_time)
#documents_entity_list = get_chebi_patents(corpus_dir, mapto="dbpedia")
entities_writen = 0
output = mp.Queue()
"""
processes = [mp.Process(target=write_candidates_file, args=(documents_entity_list[d], d, max_dist)) for d in documents_entity_list]
print(processes)
# Run processes
Beispiel #4
0
def get_common_ancestors(id1, id2):
    """

    :param id1:
    :param id2:
    :return:
    """

    if id1.startswith('CHEBI'):
        ssm.semantic_base('bin/DiShIn/chebi.db')
    if id1.startswith('HP'):
        ssm.semantic_base('bin/DiShIn/hp.db')
    if id1.startswith('GO'):
        ssm.semantic_base('bin/DiShIn/go.db')
    if id1.startswith('DOID'):
        ssm.semantic_base('bin/DiShIn/doid.db')

    e1 = ssm.get_id(id1.replace(':', '_'))

    if id2.startswith('CHEBI'):
        ssm.semantic_base('bin/DiShIn/chebi.db')
    if id2.startswith('HP'):
        ssm.semantic_base('bin/DiShIn/hp.db')
    if id2.startswith('GO'):
        ssm.semantic_base('bin/DiShIn/go.db')
    if id2.startswith('DOID'):
        ssm.semantic_base('bin/DiShIn/doid.db')

    e2 = ssm.get_id(id2.replace(':', '_'))

    a = ssm.common_ancestors(e1, e2)
    # if a:
    #     print(id1, id2)
    #     print(e1, e2)
    #     print()
    #     print(a)
    #     b = [ssm.get_name(x) for x in a]
    #     print(b)
    #     print('\n\n\n')
    a = [ssm.get_name(x) for x in a]

    return a
Beispiel #5
0
import logging
from itertools import combinations
import sys
import os
import pickle
import atexit

import obonet
import networkx
from fuzzywuzzy import process
from fuzzywuzzy import fuzz

from DiShIn import ssm
ssm.semantic_base("src/DiShIn/chebi.db")

global chebi_cache
global paths_cache
global chemical_entity
global role
global subatomic_particle
global application
global multiple_match_count
global no_match_count

chebi_cache_file = "temp/chebi_cache.pickle"

# store string-> chebi ID
if os.path.isfile(chebi_cache_file):
    logging.info("loading chebi...")
    chebi_cache = pickle.load(open(chebi_cache_file, "rb"))
    loadedchebi = True