def test_build_disease_inappropriate_value(key, test_disease):
    alias_genes = {}
    alias_genes["B3GALT6"] = {"true": 17978, "ids": [17978]}
    test_disease[key] = "not_an_int"
    # THEN calling build_disease_term(test_disease, alias_genes) a
    # ValueError is thrown, caught and converted into a KeyError
    with pytest.raises(KeyError):
        build_disease_term(test_disease, alias_genes)
def test_build_disease_missing_key(key, test_disease):
    ## GIVEN a dictionary with disease information and genes
    alias_genes = {}
    alias_genes["B3GALT6"] = {"true": 17978, "ids": [17978]}

    # WHEN deleteing key
    test_disease.pop(key)
    # THEN calling build_disease_term() will raise KeyError
    with pytest.raises(KeyError):
        build_disease_term(test_disease, alias_genes)
Exemple #3
0
def test_build_disease_term(adapter):
    ## GIVEN some disease info and a adapter with a gene
    disease_info = {
        'mim_number': 615349,
        'description': "EHLERS-DANLOS SYNDROME, PROGEROID TYPE, 2",
        'hgnc_symbols': set(['B3GALT6']),
        'inheritance': set(['AR']),
    }
    alias_genes = {}
    alias_genes['B3GALT6'] = {
        'true': 17978,
        'ids': [17978],
    }

    ## WHEN building the disease term
    disease_obj = build_disease_term(disease_info, alias_genes)

    ## THEN assert the term is on the correct format

    assert disease_obj['_id'] == disease_obj['disease_id'] == "OMIM:615349"
    assert disease_obj['inheritance'] == ['AR']
    assert disease_obj['genes'] == [17978]
    assert disease_obj['source'] == 'OMIM'

    assert isinstance(disease_obj, DiseaseTerm)
def test_build_disease_term(adapter):
    ## GIVEN some disease info and a adapter with a gene
    disease_info = {
        'mim_number': 615349,
        'description': "EHLERS-DANLOS SYNDROME, PROGEROID TYPE, 2",
        'hgnc_symbols': set(['B3GALT6']),
        'inheritance': set(['AR']),
    }
    alias_genes = {}
    alias_genes['B3GALT6'] = {
            'true': 17978,
            'ids': [17978],
        }
    
    ## WHEN building the disease term
    disease_obj = build_disease_term(disease_info, alias_genes)
    
    ## THEN assert the term is on the correct format
    
    assert disease_obj['_id'] == disease_obj['disease_id'] == "OMIM:615349"
    assert disease_obj['inheritance'] == ['AR']
    assert disease_obj['genes'] == [17978]
    assert disease_obj['source'] == 'OMIM'
    
    assert isinstance(disease_obj, DiseaseTerm)
Exemple #5
0
def load_disease_terms(adapter, genemap_lines, genes, hpo_disease_lines):
    """Load the omim phenotypes into the database
    
    Parse the phenotypes from genemap2.txt and find the associated hpo terms
    from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt.

    Args:
        adapter(MongoAdapter)
        genemap_lines(iterable(str))
        genes(dict): Dictionary with all genes found in database
        hpo_disease_lines(iterable(str))

    """

    disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines)
    hpo_diseases = parse_hpo_diseases(hpo_disease_lines)

    start_time = datetime.now()

    logger.info("Loading the hpo disease...")
    for nr_diseases, disease_number in enumerate(disease_terms):
        disease_info = disease_terms[disease_number]
        disease_id = "OMIM:{0}".format(disease_number)

        if disease_id in hpo_diseases:
            hpo_terms = hpo_diseases[disease_id]['hpo_terms']
            if hpo_terms:
                disease_info['hpo_terms'] = hpo_terms
        disease_obj = build_disease_term(disease_info, genes)

        adapter.load_disease_term(disease_obj)

    logger.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases))
    logger.info("Time to load diseases: {0}".format(datetime.now() -
                                                    start_time))
def test_build_disease_term(adapter, test_disease):
    ## GIVEN some disease info and a adapter with a gene
    alias_genes = {}
    alias_genes["B3GALT6"] = {"true": 17978, "ids": [17978]}

    ## WHEN building the disease term
    disease_obj = build_disease_term(test_disease, alias_genes)

    ## THEN assert the term is on the correct format

    assert disease_obj["_id"] == disease_obj["disease_id"] == "OMIM:615349"
    assert disease_obj["inheritance"] == ["AR"]
    assert disease_obj["genes"] == [17978]
    assert disease_obj["source"] == "OMIM"

    assert isinstance(disease_obj, DiseaseTerm)
Exemple #7
0
def load_disease_terms(adapter,
                       genemap_lines,
                       genes=None,
                       hpo_disease_lines=None):
    """Load the omim phenotypes into the database

    Parse the phenotypes from genemap2.txt and find the associated hpo terms
    from https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt

    Args:
        adapter(MongoAdapter)
        genemap_lines(iterable(str))
        genes(dict): Dictionary with all genes found in database
        hpo_disease_lines(iterable(str))

    """
    # Get a map with hgnc symbols to hgnc ids from scout
    if not genes:
        genes = adapter.genes_by_alias()

    # Fetch the disease terms from omim
    disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines)

    if not hpo_disease_lines:
        hpo_disease_lines = fetch_hpo_to_genes_to_disease()
    hpo_diseases = parse_hpo_diseases(hpo_disease_lines)

    start_time = datetime.now()
    nr_diseases = None

    LOG.info("Loading the hpo disease...")
    for nr_diseases, disease_number in enumerate(disease_terms):
        disease_info = disease_terms[disease_number]
        disease_id = "OMIM:{0}".format(disease_number)

        if disease_id in hpo_diseases:
            hpo_terms = hpo_diseases[disease_id]["hpo_terms"]
            if hpo_terms:
                disease_info["hpo_terms"] = hpo_terms
        disease_obj = build_disease_term(disease_info, genes)

        adapter.load_disease_term(disease_obj)

    LOG.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases))
    LOG.info("Time to load diseases: {0}".format(datetime.now() - start_time))
Exemple #8
0
def load_disease_terms(adapter, genemap_lines, genes=None, hpo_disease_lines=None):
    """Load the omim phenotypes into the database
    
    Parse the phenotypes from genemap2.txt and find the associated hpo terms
    from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt.

    Args:
        adapter(MongoAdapter)
        genemap_lines(iterable(str))
        genes(dict): Dictionary with all genes found in database
        hpo_disease_lines(iterable(str))

    """
    # Get a map with hgnc symbols to hgnc ids from scout
    if not genes:
        genes = adapter.genes_by_alias()

    # Fetch the disease terms from omim
    disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines)

    if not hpo_disease_lines:
        hpo_disease_lines = fetch_hpo_phenotype_to_terms()
    hpo_diseases = parse_hpo_diseases(hpo_disease_lines)

    start_time = datetime.now()
    nr_diseases = None

    LOG.info("Loading the hpo disease...")
    for nr_diseases, disease_number in enumerate(disease_terms):
        disease_info = disease_terms[disease_number]
        disease_id = "OMIM:{0}".format(disease_number)
        
        if disease_id in hpo_diseases:
            hpo_terms = hpo_diseases[disease_id]['hpo_terms']
            if hpo_terms:
                disease_info['hpo_terms'] = hpo_terms
        disease_obj = build_disease_term(disease_info, genes)

        adapter.load_disease_term(disease_obj)

    LOG.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases))
    LOG.info("Time to load diseases: {0}".format(datetime.now() - start_time))
Exemple #9
0
def test_build_disease_term(adapter):
    ## GIVEN some disease info and a adapter with a gene
    disease_info = {
        "mim_number": 615349,
        "description": "EHLERS-DANLOS SYNDROME, PROGEROID TYPE, 2",
        "hgnc_symbols": set(["B3GALT6"]),
        "inheritance": set(["AR"]),
    }
    alias_genes = {}
    alias_genes["B3GALT6"] = {"true": 17978, "ids": [17978]}

    ## WHEN building the disease term
    disease_obj = build_disease_term(disease_info, alias_genes)

    ## THEN assert the term is on the correct format

    assert disease_obj["_id"] == disease_obj["disease_id"] == "OMIM:615349"
    assert disease_obj["inheritance"] == ["AR"]
    assert disease_obj["genes"] == [17978]
    assert disease_obj["source"] == "OMIM"

    assert isinstance(disease_obj, DiseaseTerm)