def test_parse_diseases_lines():
    ## GIVEN a iterable of disease lines
    disease_lines = [
        "#Format: diseaseId<tab>gene-id<tab>gene-symbol",
        "OMIM:300818	5277	PIGA",
        "OMIM:300868	5277	PIGA",
        "ORPHANET:447	5277	PIGA",
        "OMIM:101400	2263	FGFR2",
        "OMIM:101400	7291	TWIST1",
        "OMIM:139500",
    ]
    ## WHEN parsing the diseases
    diseases = parse_hpo_diseases(disease_lines)
    ## THEN assert that the diseases are parsed correct
    assert diseases["OMIM:300818"]["source"] == "OMIM"
    assert diseases["OMIM:300818"]["hgnc_symbols"] == set(["PIGA"])

    assert diseases["ORPHANET:447"]["source"] == "ORPHANET"
    assert diseases["ORPHANET:447"]["hgnc_symbols"] == set(["PIGA"])

    assert diseases["OMIM:101400"]["source"] == "OMIM"
    assert diseases["OMIM:101400"]["hgnc_symbols"] == set(["FGFR2", "TWIST1"])

    assert diseases["OMIM:139500"]["source"] == "OMIM"
    assert diseases["OMIM:139500"]["hgnc_symbols"] == set([])
def test_parse_diseases_lines():
    ## GIVEN a iterable of disease lines
    disease_lines = [
        "#Format: diseaseId<tab>gene-id<tab>gene-symbol",
        "OMIM:300818	5277	PIGA",
        "OMIM:300868	5277	PIGA",
        "ORPHANET:447	5277	PIGA",
        "OMIM:101400	2263	FGFR2",
        "OMIM:101400	7291	TWIST1",
        "OMIM:139500",
    ]
    ## WHEN parsing the diseases
    diseases = parse_hpo_diseases(disease_lines)
    ## THEN assert that the diseases are parsed correct
    assert diseases["OMIM:300818"]['source'] == 'OMIM'
    assert diseases["OMIM:300818"]['hgnc_symbols'] == set(['PIGA'])

    assert diseases["ORPHANET:447"]['source'] == "ORPHANET"
    assert diseases["ORPHANET:447"]['hgnc_symbols'] == set(['PIGA'])

    assert diseases["OMIM:101400"]['source'] == 'OMIM'
    assert diseases["OMIM:101400"]['hgnc_symbols'] == set(['FGFR2','TWIST1'])

    assert diseases["OMIM:139500"]['source'] == 'OMIM'
    assert diseases["OMIM:139500"]['hgnc_symbols'] == set([])
Example #3
0
def load_disease_terms(adapter, genemap_lines, genes, hpo_disease_lines):
    """Load the omim phenotypes into the database
    
    Parse the phenotypes from genemap2.txt and find the associated hpo terms
    from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt.

    Args:
        adapter(MongoAdapter)
        genemap_lines(iterable(str))
        genes(dict): Dictionary with all genes found in database
        hpo_disease_lines(iterable(str))

    """

    disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines)
    hpo_diseases = parse_hpo_diseases(hpo_disease_lines)

    start_time = datetime.now()

    logger.info("Loading the hpo disease...")
    for nr_diseases, disease_number in enumerate(disease_terms):
        disease_info = disease_terms[disease_number]
        disease_id = "OMIM:{0}".format(disease_number)

        if disease_id in hpo_diseases:
            hpo_terms = hpo_diseases[disease_id]['hpo_terms']
            if hpo_terms:
                disease_info['hpo_terms'] = hpo_terms
        disease_obj = build_disease_term(disease_info, genes)

        adapter.load_disease_term(disease_obj)

    logger.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases))
    logger.info("Time to load diseases: {0}".format(datetime.now() -
                                                    start_time))
def test_parse_diseases(hpo_disease_handle):
    ## GIVEN a iterable of disease lines
    ## WHEN parsing the diseases
    diseases = parse_hpo_diseases(hpo_disease_handle)
    ## THEN assert that the diseases are parsed correct
    for disease_id in diseases:
        source = disease_id.split(":")[0]
        disease_nr = int(disease_id.split(":")[1])

        disease_term = diseases[disease_id]
        assert disease_term["source"] == source
        assert disease_term["disease_nr"] == disease_nr
def test_parse_diseases(hpo_disease_handle):
    ## GIVEN a iterable of disease lines
    ## WHEN parsing the diseases
    diseases = parse_hpo_diseases(hpo_disease_handle)
    ## THEN assert that the diseases are parsed correct
    for disease_id in diseases:
        source = disease_id.split(':')[0]
        disease_nr = int(disease_id.split(':')[1])
        
        disease_term = diseases[disease_id]
        assert disease_term['source'] == source
        assert disease_term['disease_nr'] == disease_nr
Example #6
0
def load_disease_terms(adapter, genemap_lines, genes=None, hpo_disease_lines=None):
    """Load the omim phenotypes into the database
    
    Parse the phenotypes from genemap2.txt and find the associated hpo terms
    from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt.

    Args:
        adapter(MongoAdapter)
        genemap_lines(iterable(str))
        genes(dict): Dictionary with all genes found in database
        hpo_disease_lines(iterable(str))

    """
    # Get a map with hgnc symbols to hgnc ids from scout
    if not genes:
        genes = adapter.genes_by_alias()

    # Fetch the disease terms from omim
    disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines)

    if not hpo_disease_lines:
        hpo_disease_lines = fetch_hpo_phenotype_to_terms()
    hpo_diseases = parse_hpo_diseases(hpo_disease_lines)

    start_time = datetime.now()
    nr_diseases = None

    LOG.info("Loading the hpo disease...")
    for nr_diseases, disease_number in enumerate(disease_terms):
        disease_info = disease_terms[disease_number]
        disease_id = "OMIM:{0}".format(disease_number)
        
        if disease_id in hpo_diseases:
            hpo_terms = hpo_diseases[disease_id]['hpo_terms']
            if hpo_terms:
                disease_info['hpo_terms'] = hpo_terms
        disease_obj = build_disease_term(disease_info, genes)

        adapter.load_disease_term(disease_obj)

    LOG.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases))
    LOG.info("Time to load diseases: {0}".format(datetime.now() - start_time))
Example #7
0
def test_pheno_terms(request, hpo_disease_file):
    """Get a file handle to a hpo disease file"""
    print('')
    hpo_disease_handle = get_file_handle(hpo_disease_file)
    diseases = parse_hpo_diseases(hpo_disease_handle)
    return diseases