def test_search(): root = "HP:0000118" with gzip.open(annotations, 'rt') as annot_file: annot_map = flat_to_annotations(annot_file) with gzip.open(closures, 'rt') as closure_file: graph = build_ic_graph_from_closures(closure_file, root, annot_map) profile_a = ("HP:0000403,HP:0000518,HP:0000565,HP:0000767," "HP:0000872,HP:0001257,HP:0001263,HP:0001290," "HP:0001629,HP:0002019,HP:0002072".split(',')) search_results = search(profile_a, annot_map, graph, 'phenodigm') assert search_results.results[0].id == 'ORPHA:94125' assert search_results.results[0].score > 50 assert search_results.results[0].rank == 1
ICSemSim, SemanticDist, build_graph_from_closure_file, build_graph_from_rdflib, build_ic_graph_from_closures, build_ic_graph_from_iri, flat_to_annotations, ) ontology = Path(__file__).parent / 'resources' / 'mock-hpo' / 'ontology.ttl' closures = Path(__file__).parent / 'resources' / 'mock-hpo' / 'closures.tsv' annotations = Path( __file__).parent / 'resources' / 'mock-hpo' / 'annotations.tsv' with open(annotations, 'r') as annot_file: annotation_map = flat_to_annotations(annot_file) epsilon = 1e-3 graph_sim_tests = [ ("self.graph_semsim.jaccard_sim(annotation_map['1'], annotation_map['2'])", 0.3), ("self.graph_semsim.cosine_sim(annotation_map['1'], annotation_map['2'])", 0.474), ] ic_sim_tests = [ ("self.semantic_sim.resnik_sim(annotation_map['1'], annotation_map['2'])", 0.693), ( "self.semantic_sim.resnik_sim(annotation_map['1'], annotation_map['2'], matrix_metric=MatrixMetric.MAX)",
required=False, default=int(multiprocessing.cpu_count() / 2)) args = parser.parse_args() output = open(args.output, 'w') annotation_file = Path( __file__).parents[1] / 'data' / 'synthetic' / 'gold-standard.tsv.gz' closures = Path(__file__).parents[1] / 'data' / 'hp-closures.tsv' root = "HP:0000118" logger.info("Loading closures") with gzip.open(annotation_file, 'rt') as annot_file: annotations = flat_to_annotations(annot_file) logger.info("Building graph") with open(closures, 'r') as closure_file: graph = build_ic_graph_from_closures(closure_file, root, annotations) ic_sim = ICSemSim(graph) # Dictionaries used for constructing synthetic patient objects simulated_profiles: Dict[str, Set[str]] = defaultdict(set) synth_to_disease: Dict[str, str] = {} synthetic_profiles: List[SyntheticProfile] = [] # Confusion matrix per rank confusion_by_rank: Dict[int, List[int]] = {}
from pumpkin_py import get_methods, ICSemSim,GraphSemSim, \ flat_to_annotations, build_ic_graph_from_closures print("available sim methods") print(get_methods()) closures = Path(__file__).parent / 'resources' / 'upheno-closures.tsv.gz' annotations = Path(__file__).parent / 'resources' / 'all-annotations.tsv.gz' g2p = Path(__file__).parent / 'resources' / 'Mm_gene_phenotype.txt.gz' root = "UPHENO:0001001" print("Loading closures") with gzip.open(annotations, 'rt') as annot_file: annot_map = flat_to_annotations(annot_file) with gzip.open(closures, 'rt') as closure_file: graph = build_ic_graph_from_closures(closure_file, root, annot_map) with gzip.open(g2p, 'rt') as annot_file: mouse_genes = flat_to_annotations(annot_file) ic_sim = ICSemSim(graph) graph_sim = GraphSemSim(graph) profile_a = "HP:0000403,HP:0000518,HP:0000565,HP:0000767," \ "HP:0000872,HP:0001257,HP:0001263,HP:0001290," \ "HP:0001629,HP:0002019,HP:0002072".split(',')