Esempio n. 1
0
def test_search():
    root = "HP:0000118"

    with gzip.open(annotations, 'rt') as annot_file:
        annot_map = flat_to_annotations(annot_file)

    with gzip.open(closures, 'rt') as closure_file:
        graph = build_ic_graph_from_closures(closure_file, root, annot_map)

    profile_a = ("HP:0000403,HP:0000518,HP:0000565,HP:0000767,"
                 "HP:0000872,HP:0001257,HP:0001263,HP:0001290,"
                 "HP:0001629,HP:0002019,HP:0002072".split(','))

    search_results = search(profile_a, annot_map, graph, 'phenodigm')

    assert search_results.results[0].id == 'ORPHA:94125'
    assert search_results.results[0].score > 50
    assert search_results.results[0].rank == 1
Esempio n. 2
0
    ICSemSim,
    SemanticDist,
    build_graph_from_closure_file,
    build_graph_from_rdflib,
    build_ic_graph_from_closures,
    build_ic_graph_from_iri,
    flat_to_annotations,
)

ontology = Path(__file__).parent / 'resources' / 'mock-hpo' / 'ontology.ttl'
closures = Path(__file__).parent / 'resources' / 'mock-hpo' / 'closures.tsv'
annotations = Path(
    __file__).parent / 'resources' / 'mock-hpo' / 'annotations.tsv'

with open(annotations, 'r') as annot_file:
    annotation_map = flat_to_annotations(annot_file)

epsilon = 1e-3

graph_sim_tests = [
    ("self.graph_semsim.jaccard_sim(annotation_map['1'], annotation_map['2'])",
     0.3),
    ("self.graph_semsim.cosine_sim(annotation_map['1'], annotation_map['2'])",
     0.474),
]

ic_sim_tests = [
    ("self.semantic_sim.resnik_sim(annotation_map['1'], annotation_map['2'])",
     0.693),
    (
        "self.semantic_sim.resnik_sim(annotation_map['1'], annotation_map['2'], matrix_metric=MatrixMetric.MAX)",
Esempio n. 3
0
                    required=False,
                    default=int(multiprocessing.cpu_count() / 2))

args = parser.parse_args()

output = open(args.output, 'w')
annotation_file = Path(
    __file__).parents[1] / 'data' / 'synthetic' / 'gold-standard.tsv.gz'
closures = Path(__file__).parents[1] / 'data' / 'hp-closures.tsv'

root = "HP:0000118"

logger.info("Loading closures")

with gzip.open(annotation_file, 'rt') as annot_file:
    annotations = flat_to_annotations(annot_file)

logger.info("Building graph")
with open(closures, 'r') as closure_file:
    graph = build_ic_graph_from_closures(closure_file, root, annotations)

ic_sim = ICSemSim(graph)

# Dictionaries used for constructing synthetic patient objects
simulated_profiles: Dict[str, Set[str]] = defaultdict(set)
synth_to_disease: Dict[str, str] = {}
synthetic_profiles: List[SyntheticProfile] = []

# Confusion matrix per rank
confusion_by_rank: Dict[int, List[int]] = {}
Esempio n. 4
0
from pumpkin_py import get_methods, ICSemSim,GraphSemSim, \
    flat_to_annotations, build_ic_graph_from_closures

print("available sim methods")
print(get_methods())

closures = Path(__file__).parent / 'resources' / 'upheno-closures.tsv.gz'
annotations = Path(__file__).parent / 'resources' / 'all-annotations.tsv.gz'
g2p = Path(__file__).parent / 'resources' / 'Mm_gene_phenotype.txt.gz'

root = "UPHENO:0001001"

print("Loading closures")

with gzip.open(annotations, 'rt') as annot_file:
    annot_map = flat_to_annotations(annot_file)

with gzip.open(closures, 'rt') as closure_file:
    graph = build_ic_graph_from_closures(closure_file, root, annot_map)

with gzip.open(g2p, 'rt') as annot_file:
    mouse_genes = flat_to_annotations(annot_file)

ic_sim = ICSemSim(graph)
graph_sim = GraphSemSim(graph)


profile_a = "HP:0000403,HP:0000518,HP:0000565,HP:0000767," \
            "HP:0000872,HP:0001257,HP:0001263,HP:0001290," \
            "HP:0001629,HP:0002019,HP:0002072".split(',')