Ejemplo n.º 1
0
def build_database(connection):
    """Build the hpo database
    
        Args:
            conn (sqlite3.connect): A database connection object
    """
    logger.info("Fetching disease to gene file")
    disease_to_genes = getreader('utf-8')(gzip.open(disease_to_genes_path),
                                          errors='replace')

    logger.info("Fetching phenotypes file")
    phenotype_to_genes = getreader('utf-8')(gzip.open(phenotypes_path),
                                            errors='replace')

    logger.info("Fetching genes file")
    genes = getreader('utf-8')(gzip.open(genes_path), errors='replace')

    logger.info("Parsing phenotypes to gene file")
    phenotypes = parse_phenotypes(phenotype_to_genes)
    logger.info("Parsing disease to gene file")
    diseases = parse_diseases(disease_to_genes)
    logger.info("Parsing genes  file")
    genes = parse_genes(genes)

    logger.info("Populating hpo table")
    populate_hpo(connection=connection, hpo_terms=phenotypes)
    logger.debug("Hpo table populated")

    logger.info("Populating disease table")
    populate_disease(connection=connection, disease_terms=diseases)
    logger.debug("Disease table populated")

    logger.info("Populating gene table")
    populate_genes(connection=connection, genes=genes)
    logger.debug("Gene table populated")
Ejemplo n.º 2
0
def genes(request):
    """Get hpo terms"""
    gene_lines = [
        "#chrom\tstart\tstop\tensembl_id\tdescription\thgnc_symbol\thi_score\t"\
        "constraint_score",
        "2\t203044020\t203044694\tENSG00000227890\tproteasome (prosome,"\
        " macropain) subunit, alpha type, 2 pseudogene 3\tPSMA2P3\t\t",
        "11\t87040449\t87041094\tENSG00000254582\tproteasome (prosome,"\
        " macropain) subunit, alpha type, 2 pseudogene 1\tPSMA2P1\t\t",
        "5\t149737202\t149779871\tENSG00000070814\tTreacher Collins-"\
        "Franceschetti syndrome 1\tTCOF1\t0.202\t4.98430839027322",
        "17\t28442539\t28513493\tENSG00000126653\tnuclear speckle splicing"\
        " regulatory protein 1\tNSRP1\t\t2.9732792450519",
    ]
    return parse_genes(gene_lines)
Ejemplo n.º 3
0
def build_database(connection):
    """Build the hpo database
    
        Args:
            conn (sqlite3.connect): A database connection object
    """
    logger.info("Fetching disease to gene file")
    disease_to_genes = getreader('utf-8')(
        gzip.open(disease_to_genes_path), errors='replace')
    
    logger.info("Fetching phenotypes file")
    phenotype_to_genes = getreader('utf-8')(
        gzip.open(phenotypes_path), errors='replace')

    logger.info("Fetching genes file")
    genes = getreader('utf-8')(
        gzip.open(genes_path), errors='replace')
    
    logger.info("Parsing phenotypes to gene file")    
    phenotypes = parse_phenotypes(phenotype_to_genes)
    logger.info("Parsing disease to gene file")    
    diseases = parse_diseases(disease_to_genes)
    logger.info("Parsing genes  file")    
    genes = parse_genes(genes)
    

    logger.info("Populating hpo table")
    populate_hpo(
        connection=connection, 
        hpo_terms=phenotypes
    )
    logger.debug("Hpo table populated")
    
    logger.info("Populating disease table")    
    populate_disease(
        connection=connection, 
        disease_terms=diseases
    )
    logger.debug("Disease table populated")

    logger.info("Populating gene table")    
    populate_genes(
        connection=connection, 
        genes=genes
    )
    logger.debug("Gene table populated")