def test_build_disease_inappropriate_value(key, test_disease): alias_genes = {} alias_genes["B3GALT6"] = {"true": 17978, "ids": [17978]} test_disease[key] = "not_an_int" # THEN calling build_disease_term(test_disease, alias_genes) a # ValueError is thrown, caught and converted into a KeyError with pytest.raises(KeyError): build_disease_term(test_disease, alias_genes)
def test_build_disease_missing_key(key, test_disease): ## GIVEN a dictionary with disease information and genes alias_genes = {} alias_genes["B3GALT6"] = {"true": 17978, "ids": [17978]} # WHEN deleteing key test_disease.pop(key) # THEN calling build_disease_term() will raise KeyError with pytest.raises(KeyError): build_disease_term(test_disease, alias_genes)
def test_build_disease_term(adapter): ## GIVEN some disease info and a adapter with a gene disease_info = { 'mim_number': 615349, 'description': "EHLERS-DANLOS SYNDROME, PROGEROID TYPE, 2", 'hgnc_symbols': set(['B3GALT6']), 'inheritance': set(['AR']), } alias_genes = {} alias_genes['B3GALT6'] = { 'true': 17978, 'ids': [17978], } ## WHEN building the disease term disease_obj = build_disease_term(disease_info, alias_genes) ## THEN assert the term is on the correct format assert disease_obj['_id'] == disease_obj['disease_id'] == "OMIM:615349" assert disease_obj['inheritance'] == ['AR'] assert disease_obj['genes'] == [17978] assert disease_obj['source'] == 'OMIM' assert isinstance(disease_obj, DiseaseTerm)
def load_disease_terms(adapter, genemap_lines, genes, hpo_disease_lines): """Load the omim phenotypes into the database Parse the phenotypes from genemap2.txt and find the associated hpo terms from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt. Args: adapter(MongoAdapter) genemap_lines(iterable(str)) genes(dict): Dictionary with all genes found in database hpo_disease_lines(iterable(str)) """ disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines) hpo_diseases = parse_hpo_diseases(hpo_disease_lines) start_time = datetime.now() logger.info("Loading the hpo disease...") for nr_diseases, disease_number in enumerate(disease_terms): disease_info = disease_terms[disease_number] disease_id = "OMIM:{0}".format(disease_number) if disease_id in hpo_diseases: hpo_terms = hpo_diseases[disease_id]['hpo_terms'] if hpo_terms: disease_info['hpo_terms'] = hpo_terms disease_obj = build_disease_term(disease_info, genes) adapter.load_disease_term(disease_obj) logger.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases)) logger.info("Time to load diseases: {0}".format(datetime.now() - start_time))
def test_build_disease_term(adapter, test_disease): ## GIVEN some disease info and a adapter with a gene alias_genes = {} alias_genes["B3GALT6"] = {"true": 17978, "ids": [17978]} ## WHEN building the disease term disease_obj = build_disease_term(test_disease, alias_genes) ## THEN assert the term is on the correct format assert disease_obj["_id"] == disease_obj["disease_id"] == "OMIM:615349" assert disease_obj["inheritance"] == ["AR"] assert disease_obj["genes"] == [17978] assert disease_obj["source"] == "OMIM" assert isinstance(disease_obj, DiseaseTerm)
def load_disease_terms(adapter, genemap_lines, genes=None, hpo_disease_lines=None): """Load the omim phenotypes into the database Parse the phenotypes from genemap2.txt and find the associated hpo terms from https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt Args: adapter(MongoAdapter) genemap_lines(iterable(str)) genes(dict): Dictionary with all genes found in database hpo_disease_lines(iterable(str)) """ # Get a map with hgnc symbols to hgnc ids from scout if not genes: genes = adapter.genes_by_alias() # Fetch the disease terms from omim disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines) if not hpo_disease_lines: hpo_disease_lines = fetch_hpo_to_genes_to_disease() hpo_diseases = parse_hpo_diseases(hpo_disease_lines) start_time = datetime.now() nr_diseases = None LOG.info("Loading the hpo disease...") for nr_diseases, disease_number in enumerate(disease_terms): disease_info = disease_terms[disease_number] disease_id = "OMIM:{0}".format(disease_number) if disease_id in hpo_diseases: hpo_terms = hpo_diseases[disease_id]["hpo_terms"] if hpo_terms: disease_info["hpo_terms"] = hpo_terms disease_obj = build_disease_term(disease_info, genes) adapter.load_disease_term(disease_obj) LOG.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases)) LOG.info("Time to load diseases: {0}".format(datetime.now() - start_time))
def load_disease_terms(adapter, genemap_lines, genes=None, hpo_disease_lines=None): """Load the omim phenotypes into the database Parse the phenotypes from genemap2.txt and find the associated hpo terms from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt. Args: adapter(MongoAdapter) genemap_lines(iterable(str)) genes(dict): Dictionary with all genes found in database hpo_disease_lines(iterable(str)) """ # Get a map with hgnc symbols to hgnc ids from scout if not genes: genes = adapter.genes_by_alias() # Fetch the disease terms from omim disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines) if not hpo_disease_lines: hpo_disease_lines = fetch_hpo_phenotype_to_terms() hpo_diseases = parse_hpo_diseases(hpo_disease_lines) start_time = datetime.now() nr_diseases = None LOG.info("Loading the hpo disease...") for nr_diseases, disease_number in enumerate(disease_terms): disease_info = disease_terms[disease_number] disease_id = "OMIM:{0}".format(disease_number) if disease_id in hpo_diseases: hpo_terms = hpo_diseases[disease_id]['hpo_terms'] if hpo_terms: disease_info['hpo_terms'] = hpo_terms disease_obj = build_disease_term(disease_info, genes) adapter.load_disease_term(disease_obj) LOG.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases)) LOG.info("Time to load diseases: {0}".format(datetime.now() - start_time))
def test_build_disease_term(adapter): ## GIVEN some disease info and a adapter with a gene disease_info = { "mim_number": 615349, "description": "EHLERS-DANLOS SYNDROME, PROGEROID TYPE, 2", "hgnc_symbols": set(["B3GALT6"]), "inheritance": set(["AR"]), } alias_genes = {} alias_genes["B3GALT6"] = {"true": 17978, "ids": [17978]} ## WHEN building the disease term disease_obj = build_disease_term(disease_info, alias_genes) ## THEN assert the term is on the correct format assert disease_obj["_id"] == disease_obj["disease_id"] == "OMIM:615349" assert disease_obj["inheritance"] == ["AR"] assert disease_obj["genes"] == [17978] assert disease_obj["source"] == "OMIM" assert isinstance(disease_obj, DiseaseTerm)