def test_link_genes( genes37_handle, hgnc_handle, exac_handle, mim2gene_handle, genemap_handle, hpo_genes_handle, ): """docstring for test_link_genes""" genes = link_genes( ensembl_lines=genes37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle, ) for hgnc_id in genes: gene_obj = genes[hgnc_id] assert gene_obj["hgnc_symbol"] assert gene_obj["hgnc_id"] assert gene_obj["chromosome"] assert gene_obj["start"] assert gene_obj["end"] assert gene_obj["hgnc_symbol"] in gene_obj["previous_symbols"]
def genes(context, build, api_key): """ Load the hgnc aliases to the mongo database. """ adapter = context.obj['adapter'] # Fetch the omim information api_key = api_key or context.obj.get('omim_api_key') if not api_key: LOG.warning("Please provide a omim api key to load the omim gene panel") context.abort() try: mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True) except Exception as err: LOG.warning(err) context.abort() LOG.warning("Dropping all gene information") adapter.drop_genes(build) LOG.info("Genes dropped") hpo_genes = fetch_hpo_genes() if build: builds = [build] else: builds = ['37', '38'] for build in builds: LOG.info("Loading hgnc file from {0}".format(hgnc_path)) hgnc_handle = get_file_handle(hgnc_path) ensembl_handle = None if build == '37': ensembl_handle = get_file_handle(transcripts37_path) elif build == '38': ensembl_handle = get_file_handle(transcripts38_path) LOG.info("Loading exac gene file from {0}".format(exac_path)) exac_handle = get_file_handle(exac_path) genes = link_genes( ensembl_lines=ensembl_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim_files['mim2genes'], genemap_lines=mim_files['genemap2'], hpo_lines=hpo_genes ) load_hgnc_genes(adapter=adapter, genes=genes, build=build)
def genes(request, genes37_handle, hgnc_handle, exac_handle, mim2gene_handle, genemap_handle, hpo_genes_handle): """Get a dictionary with the linked genes""" print('') gene_dict = link_genes(ensembl_lines=genes37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle) return gene_dict
def genes(ctx, update, build): """ Load the hgnc aliases to the mongo database. """ adapter = ctx.obj['adapter'] # Test if the genes are loaded nr_present_genes = adapter.nr_genes(build=build) if nr_present_genes > 0: if update: logger.warning("Dropping all gene information") adapter.drop_genes() logger.info("Genes dropped") else: logger.info("Genes are already loaded") logger.info("If you wish to update genes use '--update'") ctx.abort() logger.info("Loading hgnc file from {0}".format(hgnc_path)) hgnc_handle = get_file_handle(hgnc_path) if build == '37': logger.info("Loading ensembl transcript file from {0}".format( transcripts37_path)) ensembl_handle = get_file_handle(transcripts37_path) else: ensembl_handle = get_file_handle(transcripts38_path) logger.info("Loading exac gene file from {0}".format( exac_path)) exac_handle = get_file_handle(exac_path) logger.info("Loading mim information from files {0}, {1}".format( mim2gene_path, genemap2_path)) mim2gene_handle = get_file_handle(mim2gene_path) genemap_handle = get_file_handle(genemap2_path) hpo_handle = get_file_handle(hpogenes_path) genes = link_genes( ensembl_lines=ensembl_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_handle ) load_hgnc_genes(adapter=adapter, genes=genes, build=build)
def genes(request, transcripts_file, hgnc_file, exac_file, mim2gene_file, genemap_file, hpo_genes_file): """Get a dictionary with the linked genes""" print('') transcripts_handle = get_file_handle(transcripts_file) hgnc_handle = get_file_handle(hgnc_file) exac_handle = get_file_handle(exac_file) mim2gene_handle = get_file_handle(mim2gene_file) genemap_handle = get_file_handle(genemap_file) hpo_genes_handle = get_file_handle(hpo_genes_file) gene_dict = link_genes(ensembl_lines=transcripts_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle) return gene_dict
def test_link_genes_no_omim(genes37_handle, hgnc_handle, exac_handle, hpo_genes_handle): ## GIVEN gene informtation without OMIM ## WHEN linking the information from the different sources genes = link_genes( ensembl_lines=genes37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, hpo_lines=hpo_genes_handle, ) ## THEN assert that it works even without omim for hgnc_id in genes: gene_obj = genes[hgnc_id] assert gene_obj["hgnc_symbol"] assert gene_obj["hgnc_id"] assert gene_obj["chromosome"] assert gene_obj["start"] assert gene_obj["end"] assert gene_obj["hgnc_symbol"] in gene_obj["previous_symbols"]
def test_link_genes(genes37_handle, hgnc_handle, exac_handle, mim2gene_handle, genemap_handle, hpo_genes_handle): """docstring for test_link_genes""" genes = link_genes( ensembl_lines=genes37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle, ) for hgnc_id in genes: gene_obj = genes[hgnc_id] assert gene_obj['hgnc_symbol'] assert gene_obj['hgnc_id'] assert gene_obj['chromosome'] assert gene_obj['start'] assert gene_obj['end'] assert gene_obj['hgnc_symbol'] in gene_obj['previous_symbols']
def load_hgnc_genes( adapter, genes=None, ensembl_lines=None, hgnc_lines=None, exac_lines=None, mim2gene_lines=None, genemap_lines=None, hpo_lines=None, build="37", omim_api_key="", ): """Load genes into the database link_genes will collect information from all the different sources and merge it into a dictionary with hgnc_id as key and gene information as values. Args: adapter(scout.adapter.MongoAdapter) genes(dict): If genes are already parsed ensembl_lines(iterable(str)): Lines formated with ensembl gene information hgnc_lines(iterable(str)): Lines with gene information from genenames.org exac_lines(iterable(str)): Lines with information pLi-scores from ExAC mim2gene(iterable(str)): Lines with map from omim id to gene symbol genemap_lines(iterable(str)): Lines with information of omim entries hpo_lines(iterable(str)): Lines information about map from hpo terms to genes build(str): What build to use. Defaults to '37' Returns: gene_objects(list): A list with all gene_objects that was loaded into database """ gene_objects = list() if not genes: # Fetch the resources if not provided if ensembl_lines is None: ensembl_lines = fetch_ensembl_genes(build=build) hgnc_lines = hgnc_lines or fetch_hgnc() exac_lines = exac_lines or fetch_exac_constraint() if not (mim2gene_lines and genemap_lines): if not omim_api_key: LOG.warning("No omim api key provided!") else: mim_files = fetch_mim_files(omim_api_key, mim2genes=True, genemap2=True) mim2gene_lines = mim_files["mim2genes"] genemap_lines = mim_files["genemap2"] if not hpo_lines: hpo_files = fetch_hpo_files(hpogenes=True) hpo_lines = hpo_files["hpogenes"] # Link the resources genes = link_genes( ensembl_lines=ensembl_lines, hgnc_lines=hgnc_lines, exac_lines=exac_lines, hpo_lines=hpo_lines, mim2gene_lines=mim2gene_lines, genemap_lines=genemap_lines, ) non_existing = 0 nr_genes = len(genes) with progressbar(genes.values(), label="Building genes", length=nr_genes) as bar: for gene_data in bar: if not gene_data.get("chromosome"): LOG.debug( "skipping gene: %s. No coordinates found", gene_data.get("hgnc_symbol", "?"), ) non_existing += 1 continue gene_obj = build_hgnc_gene(gene_data, build=build) gene_objects.append(gene_obj) LOG.info("Loading genes build %s", build) adapter.load_hgnc_bulk(gene_objects) LOG.info("Loading done. %s genes loaded", len(gene_objects)) LOG.info("Nr of genes without coordinates in build %s: %s", build, non_existing) return gene_objects
def database(context, institute_name, user_name, user_mail): """Setup a scout database""" log.info("Running scout setup database") institute_name = institute_name or context.obj['institute_name'] user_name = user_name or context.obj['user_name'] user_mail = user_mail or context.obj['user_mail'] adapter = context.obj['adapter'] log.info("Setting up database %s", context.obj['mongodb']) log.info("Deleting previous database") for collection_name in adapter.db.collection_names(): log.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) log.info("Database deleted") # Build a institute with id institute_name institute_obj = build_institute( internal_id=institute_name, display_name=institute_name, sanger_recipients=[user_mail] ) # Add the institute to database adapter.add_institute(institute_obj) # Build a user obj user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=['admin'], institutes=[institute_name] ) adapter.add_user(user_obj) # Load the genes and transcripts hgnc_handle = context.obj['hgnc'] transcripts37_handle = context.obj['transcripts37'] transcripts38_handle = context.obj['transcripts38'] exac_handle = context.obj['exac'] hpo_genes_handle = context.obj['hpogenes'] mim2gene_handle = context.obj['mim2gene'] genemap_handle = context.obj['genemap2'] genes37 = link_genes( ensembl_lines=transcripts37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle, ) load_hgnc_genes(adapter, genes37, build='37') genes38 = link_genes( ensembl_lines=transcripts38_handle, hgnc_lines=context.obj['hgnc38'], exac_lines=context.obj['exac38'], mim2gene_lines=context.obj['mim2gene38'], genemap_lines=context.obj['genemap2_38'], hpo_lines=context.obj['hpogenes_38'], ) load_hgnc_genes(adapter, genes38, build='38') hpo_terms_handle = context.obj['hpo_terms'] disease_handle = context.obj['disease_terms'] hpo_disease_handle = context.obj['hpodiseases'] load_hpo( adapter=adapter, hpo_lines=hpo_terms_handle, disease_lines=disease_handle, hpo_disease_lines=hpo_disease_handle ) log.info("Creating indexes") adapter.hgnc_collection.create_index([('build', pymongo.ASCENDING), ('chromosome', pymongo.ASCENDING)]) log.info("hgnc gene index created") log.info("Scout instance setup successful")
def demo(context): """Setup a scout demo instance. This instance will be populated with a case a gene panel and some variants. """ log.info("Running scout setup demo") institute_name = context.obj['institute_name'] user_name = context.obj['user_name'] user_mail = context.obj['user_mail'] adapter = context.obj['adapter'] log.info("Setting up database %s", context.obj['mongodb']) log.info("Deleting previous database") for collection_name in adapter.db.collection_names(): log.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) log.info("Database deleted") # Build a institute with id institute_name institute_obj = build_institute( internal_id=institute_name, display_name=institute_name, sanger_recipients=[user_mail] ) # Add the institute to database adapter.add_institute(institute_obj) # Build a user obj user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=['admin'], institutes=[institute_name] ) adapter.add_user(user_obj) # Load the genes and transcripts hgnc_handle = context.obj['hgnc'] transcripts37_handle = context.obj['transcripts37'] # transcripts38_handle = context.obj['transcripts38'] exac_handle = context.obj['exac'] hpo_genes_handle = context.obj['hpogenes'] mim2gene_handle = context.obj['mim2gene'] genemap_handle = context.obj['genemap2'] genes37 = link_genes( ensembl_lines=transcripts37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle, ) load_hgnc_genes(adapter, genes37, build='37') hpo_terms_handle = context.obj['hpo_terms'] disease_handle = context.obj['disease_terms'] hpo_disease_handle = context.obj['hpodiseases'] load_hpo( adapter=adapter, hpo_lines=hpo_terms_handle, disease_lines=disease_handle, hpo_disease_lines=hpo_disease_handle ) panel_info = { 'date': datetime.datetime.now(), 'file': panel_path, 'type': 'clinical', 'institute': 'cust000', 'version': '1.0', 'panel_name': 'panel1', 'full_name': 'Test panel' } parsed_panel = parse_gene_panel(panel_info) panel_obj = build_panel(parsed_panel, adapter) load_panel( adapter=adapter, panel_info=panel_info ) case_handle = get_file_handle(load_path) case_data = yaml.load(case_handle) case_data['vcf_snv'] = clinical_snv_path case_data['vcf_sv'] = clinical_sv_path case_data['vcf_snv_research'] = research_snv_path case_data['vcf_sv_research'] = research_sv_path case_data['madeline'] = madeline_path load_scout(adapter, case_data) log.info("Creating indexes") adapter.hgnc_collection.create_index([('build', pymongo.ASCENDING), ('chromosome', pymongo.ASCENDING)]) log.info("hgnc gene index created") log.info("Scout demo instance setup successful")
def database(context, institute_name, user_name, user_mail, api_key): """Setup a scout database""" LOG.info("Running scout setup database") # Fetch the omim information api_key = api_key or context.obj.get('omim_api_key') if not api_key: LOG.warning("Please provide a omim api key to load the omim gene panel") context.abort() try: mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True) except Exception as err: LOG.warning(err) context.abort() # for fn in mim_files: # click.echo("{0}: {1}".format(fn, type(mim_files[fn]))) # # context.abort() institute_name = institute_name or context.obj['institute_name'] user_name = user_name or context.obj['user_name'] user_mail = user_mail or context.obj['user_mail'] adapter = context.obj['adapter'] LOG.info("Deleting previous database") for collection_name in adapter.db.collection_names(): if not collection_name.startswith('system'): LOG.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) LOG.info("Database deleted") LOG.info("Setting up database %s", context.obj['mongodb']) # Build a institute with id institute_name institute_obj = build_institute( internal_id=institute_name, display_name=institute_name, sanger_recipients=[user_mail] ) # Add the institute to database adapter.add_institute(institute_obj) # Build a user obj user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=['admin'], institutes=[institute_name] ) adapter.add_user(user_obj) # Fetch the genes to hpo information hpo_genes = fetch_hpo_genes() # Load the genes and transcripts genes37 = link_genes( ensembl_lines=get_file_handle(transcripts37_path), hgnc_lines=get_file_handle(hgnc_path), exac_lines=get_file_handle(exac_path), mim2gene_lines=mim_files['mim2genes'], genemap_lines=mim_files['genemap2'], hpo_lines=hpo_genes, ) load_hgnc_genes(adapter, genes37, build='37') genes38 = link_genes( ensembl_lines=get_file_handle(transcripts38_path), hgnc_lines=get_file_handle(hgnc_path), exac_lines=get_file_handle(exac_path), mim2gene_lines=mim_files['mim2genes'], genemap_lines=mim_files['genemap2'], hpo_lines=hpo_genes, ) load_hgnc_genes(adapter, genes38, build='38') load_hpo( adapter=adapter, disease_lines=mim_files['genemap2'], ) LOG.info("Creating indexes") adapter.load_indexes() LOG.info("Scout instance setup successful")
def demo(context): """Setup a scout demo instance. This instance will be populated with a case a gene panel and some variants. """ LOG.info("Running scout setup demo") institute_name = context.obj['institute_name'] user_name = context.obj['user_name'] user_mail = context.obj['user_mail'] adapter = context.obj['adapter'] LOG.info("Setting up database %s", context.obj['mongodb']) LOG.info("Deleting previous database") for collection_name in adapter.db.collection_names(): LOG.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) LOG.info("Database deleted") # Build a institute with id institute_name institute_obj = build_institute( internal_id=institute_name, display_name=institute_name, sanger_recipients=[user_mail] ) # Add the institute to database adapter.add_institute(institute_obj) # Build a user obj user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=['admin'], institutes=[institute_name] ) adapter.add_user(user_obj) # Load the genes and transcripts LOG.info("Loading hgnc genes from %s", hgnc_reduced_path) hgnc_handle = get_file_handle(hgnc_reduced_path) hgnc38 = get_file_handle(hgnc_reduced_path) LOG.info("Loading exac genes from %s", exac_reduced_path) exac_handle = get_file_handle(exac_reduced_path) exac38 = get_file_handle(exac_reduced_path) LOG.info("Loading mim2gene info from %s", mim2gene_reduced_path) mim2gene_handle = get_file_handle(mim2gene_reduced_path) mim2gene38 = get_file_handle(mim2gene_reduced_path) LOG.info("Loading genemap info from %s", genemap2_reduced_path) genemap_handle = get_file_handle(genemap2_reduced_path) genemap38 = get_file_handle(genemap2_reduced_path) LOG.info("Loading hpo gene info from %s", hpogenes_reduced_path) hpo_genes_handle = get_file_handle(hpogenes_reduced_path) hpo_to_genes_handle = get_file_handle(hpo_to_genes_reduced_path) hpogenes38 = get_file_handle(hpogenes_reduced_path) LOG.info("Loading hpo disease info from %s", hpo_phenotype_to_terms_reduced_path) hpo_disease_handle = get_file_handle(hpo_phenotype_to_terms_reduced_path) LOG.info("Loading hpo terms from %s", hpoterms_reduced_path) hpo_terms_handle = get_file_handle(hpoterms_reduced_path) LOG.info("Loading omim disease info from %s", genemap2_reduced_path) disease_handle = get_file_handle(genemap2_reduced_path) LOG.info("Loading transcripts build 37 info from %s", transcripts37_reduced_path) transcripts37_handle = get_file_handle(transcripts37_reduced_path) transcripts38_handle = get_file_handle(transcripts38_reduced_path) genes37 = link_genes( ensembl_lines=transcripts37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle, ) load_hgnc_genes(adapter, genes37, build='37') load_hpo( adapter=adapter, hpo_lines=hpo_terms_handle, hpo_gene_lines=hpo_to_genes_handle, disease_lines=disease_handle, hpo_disease_lines=hpo_disease_handle ) adapter.load_panel( path=panel_path, institute='cust000', panel_id='panel1', date=datetime.datetime.now(), panel_type='clinical', version=1.0, display_name='Test panel' ) case_handle = get_file_handle(load_path) case_data = yaml.load(case_handle) adapter.load_case(case_data) LOG.info("Creating indexes") adapter.load_indexes() LOG.info("Scout demo instance setup successful")