def test_parse_diseases_lines(): ## GIVEN a iterable of disease lines disease_lines = [ "#Format: diseaseId<tab>gene-id<tab>gene-symbol", "OMIM:300818 5277 PIGA", "OMIM:300868 5277 PIGA", "ORPHANET:447 5277 PIGA", "OMIM:101400 2263 FGFR2", "OMIM:101400 7291 TWIST1", "OMIM:139500", ] ## WHEN parsing the diseases diseases = parse_hpo_diseases(disease_lines) ## THEN assert that the diseases are parsed correct assert diseases["OMIM:300818"]["source"] == "OMIM" assert diseases["OMIM:300818"]["hgnc_symbols"] == set(["PIGA"]) assert diseases["ORPHANET:447"]["source"] == "ORPHANET" assert diseases["ORPHANET:447"]["hgnc_symbols"] == set(["PIGA"]) assert diseases["OMIM:101400"]["source"] == "OMIM" assert diseases["OMIM:101400"]["hgnc_symbols"] == set(["FGFR2", "TWIST1"]) assert diseases["OMIM:139500"]["source"] == "OMIM" assert diseases["OMIM:139500"]["hgnc_symbols"] == set([])
def test_parse_diseases_lines(): ## GIVEN a iterable of disease lines disease_lines = [ "#Format: diseaseId<tab>gene-id<tab>gene-symbol", "OMIM:300818 5277 PIGA", "OMIM:300868 5277 PIGA", "ORPHANET:447 5277 PIGA", "OMIM:101400 2263 FGFR2", "OMIM:101400 7291 TWIST1", "OMIM:139500", ] ## WHEN parsing the diseases diseases = parse_hpo_diseases(disease_lines) ## THEN assert that the diseases are parsed correct assert diseases["OMIM:300818"]['source'] == 'OMIM' assert diseases["OMIM:300818"]['hgnc_symbols'] == set(['PIGA']) assert diseases["ORPHANET:447"]['source'] == "ORPHANET" assert diseases["ORPHANET:447"]['hgnc_symbols'] == set(['PIGA']) assert diseases["OMIM:101400"]['source'] == 'OMIM' assert diseases["OMIM:101400"]['hgnc_symbols'] == set(['FGFR2','TWIST1']) assert diseases["OMIM:139500"]['source'] == 'OMIM' assert diseases["OMIM:139500"]['hgnc_symbols'] == set([])
def load_disease_terms(adapter, genemap_lines, genes, hpo_disease_lines): """Load the omim phenotypes into the database Parse the phenotypes from genemap2.txt and find the associated hpo terms from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt. Args: adapter(MongoAdapter) genemap_lines(iterable(str)) genes(dict): Dictionary with all genes found in database hpo_disease_lines(iterable(str)) """ disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines) hpo_diseases = parse_hpo_diseases(hpo_disease_lines) start_time = datetime.now() logger.info("Loading the hpo disease...") for nr_diseases, disease_number in enumerate(disease_terms): disease_info = disease_terms[disease_number] disease_id = "OMIM:{0}".format(disease_number) if disease_id in hpo_diseases: hpo_terms = hpo_diseases[disease_id]['hpo_terms'] if hpo_terms: disease_info['hpo_terms'] = hpo_terms disease_obj = build_disease_term(disease_info, genes) adapter.load_disease_term(disease_obj) logger.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases)) logger.info("Time to load diseases: {0}".format(datetime.now() - start_time))
def test_parse_diseases(hpo_disease_handle): ## GIVEN a iterable of disease lines ## WHEN parsing the diseases diseases = parse_hpo_diseases(hpo_disease_handle) ## THEN assert that the diseases are parsed correct for disease_id in diseases: source = disease_id.split(":")[0] disease_nr = int(disease_id.split(":")[1]) disease_term = diseases[disease_id] assert disease_term["source"] == source assert disease_term["disease_nr"] == disease_nr
def test_parse_diseases(hpo_disease_handle): ## GIVEN a iterable of disease lines ## WHEN parsing the diseases diseases = parse_hpo_diseases(hpo_disease_handle) ## THEN assert that the diseases are parsed correct for disease_id in diseases: source = disease_id.split(':')[0] disease_nr = int(disease_id.split(':')[1]) disease_term = diseases[disease_id] assert disease_term['source'] == source assert disease_term['disease_nr'] == disease_nr
def load_disease_terms(adapter, genemap_lines, genes=None, hpo_disease_lines=None): """Load the omim phenotypes into the database Parse the phenotypes from genemap2.txt and find the associated hpo terms from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt. Args: adapter(MongoAdapter) genemap_lines(iterable(str)) genes(dict): Dictionary with all genes found in database hpo_disease_lines(iterable(str)) """ # Get a map with hgnc symbols to hgnc ids from scout if not genes: genes = adapter.genes_by_alias() # Fetch the disease terms from omim disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines) if not hpo_disease_lines: hpo_disease_lines = fetch_hpo_phenotype_to_terms() hpo_diseases = parse_hpo_diseases(hpo_disease_lines) start_time = datetime.now() nr_diseases = None LOG.info("Loading the hpo disease...") for nr_diseases, disease_number in enumerate(disease_terms): disease_info = disease_terms[disease_number] disease_id = "OMIM:{0}".format(disease_number) if disease_id in hpo_diseases: hpo_terms = hpo_diseases[disease_id]['hpo_terms'] if hpo_terms: disease_info['hpo_terms'] = hpo_terms disease_obj = build_disease_term(disease_info, genes) adapter.load_disease_term(disease_obj) LOG.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases)) LOG.info("Time to load diseases: {0}".format(datetime.now() - start_time))
def test_pheno_terms(request, hpo_disease_file): """Get a file handle to a hpo disease file""" print('') hpo_disease_handle = get_file_handle(hpo_disease_file) diseases = parse_hpo_diseases(hpo_disease_handle) return diseases