def test_add_hg38_gene_links(): """Test to add hg38 gene links to a gene object""" # GIVEN a minimal gene and a genome build gene_obj = {"hgnc_id": 257} build = 38 # WHEN adding the gene links add_gene_links(gene_obj, build) # THEN assert some links are added assert "hgnc_link" in gene_obj
def test_ucsc_link(): """Test if ucsc link is correctly added""" # GIVEN a minimal gene and a genome build gene_obj = {"hgnc_id": 257, "ucsc_id": "uc001jwi.4"} build = 37 # WHEN adding the gene links add_gene_links(gene_obj, build) # THEN assert some links are added link = gene_obj.get("ucsc_link") assert link is not None
def gene(store, hgnc_id): """Parse information about a gene.""" res = { 'builds': { '37': None, '38': None }, 'symbol': None, 'description': None, 'ensembl_id': None, 'record': None } for build in res['builds']: record = store.hgnc_gene(hgnc_id, build=build) if record: record[ 'position'] = "{this[chromosome]}:{this[start]}-{this[end]}".format( this=record) res['aliases'] = record['aliases'] res['hgnc_id'] = record['hgnc_id'] res['description'] = record['description'] res['builds'][build] = record res['symbol'] = record['hgnc_symbol'] res['description'] = record['description'] res['entrez_id'] = record.get('entrez_id') res['pli_score'] = record.get('pli_score') add_gene_links(record, int(build)) res['omim_id'] = record.get('omim_id') res['incomplete_penetrance'] = record.get('incomplete_penetrance', False) res['inheritance_models'] = record.get('inheritance_models', []) for transcript in record['transcripts']: transcript['position'] = ( "{this[chrom]}:{this[start]}-{this[end]}".format( this=transcript)) add_tx_links(transcript, build) for phenotype in record.get('phenotypes', []): phenotype['omim_link'] = omim(phenotype.get('mim_number')) if not res['record']: res['record'] = record pp(record) # If none of the genes where found if not any(res.values()): raise ValueError return res
def gene(store, hgnc_id): """Parse information about a gene.""" res = { "builds": { "37": None, "38": None }, "symbol": None, "description": None, "ensembl_id": None, "record": None, } for build in res["builds"]: record = store.hgnc_gene(hgnc_id, build=build) if record: record[ "position"] = "{this[chromosome]}:{this[start]}-{this[end]}".format( this=record) res["aliases"] = record["aliases"] res["hgnc_id"] = record["hgnc_id"] res["description"] = record["description"] res["builds"][build] = record res["symbol"] = record["hgnc_symbol"] res["description"] = record["description"] res["entrez_id"] = record.get("entrez_id") res["pli_score"] = record.get("pli_score") add_gene_links(record, int(build)) res["omim_id"] = record.get("omim_id") res["incomplete_penetrance"] = record.get("incomplete_penetrance", False) res["inheritance_models"] = record.get("inheritance_models", []) for transcript in record["transcripts"]: transcript[ "position"] = "{this[chrom]}:{this[start]}-{this[end]}".format( this=transcript) add_tx_links(transcript, build) for phenotype in record.get("phenotypes", []): phenotype["omim_link"] = omim(phenotype.get("mim_number")) if not res["record"]: res["record"] = record # If none of the genes where found if not any(res.values()): raise ValueError return res
def parse_gene(gene_obj, build=None): """Parse variant genes.""" build = build or 37 if gene_obj['common']: add_gene_links(gene_obj, build) refseq_transcripts = [] for tx_obj in gene_obj['transcripts']: parse_transcript(gene_obj, tx_obj, build) # select refseq transcripts as "primary" if not tx_obj.get('refseq_id'): continue refseq_transcripts.append(tx_obj) gene_obj['primary_transcripts'] = (refseq_transcripts if refseq_transcripts else [])
def parse_gene(gene_obj, build=None): """Parse variant genes.""" build = build or 37 if gene_obj['common']: add_gene_links(gene_obj, build) refseq_transcripts = [ transcript for transcript in gene_obj['transcripts'] if transcript.get('refseq_id') ] # select refseq transcripts as "primary" or use all Ensembl transcripts gene_obj['primary_transcripts'] = (refseq_transcripts if len(refseq_transcripts) > 0 else gene_obj['transcripts']) for tx_obj in gene_obj['transcripts']: parse_transcript(gene_obj, tx_obj, build)
def gene(store, hgnc_id): """Parse information about a gene.""" res = {'builds': {'37': None, '38': None}, 'symbol': None, 'description': None, 'ensembl_id': None, 'record': None} for build in res['builds']: record = store.hgnc_gene(hgnc_id, build=build) if record: record['position'] = "{this[chromosome]}:{this[start]}-{this[end]}".format(this=record) res['aliases'] = record['aliases'] res['hgnc_id'] = record['hgnc_id'] res['description'] = record['description'] res['builds'][build] = record res['symbol'] = record['hgnc_symbol'] res['description'] = record['description'] res['entrez_id'] = record.get('entrez_id') res['pli_score'] = record.get('pli_score') add_gene_links(record, int(build)) res['omim_id'] = record.get('omim_id') res['incomplete_penetrance'] = record.get('incomplete_penetrance',False) res['inheritance_models'] = record.get('inheritance_models',[]) for transcript in record['transcripts']: transcript['position'] = ("{this[chrom]}:{this[start]}-{this[end]}" .format(this=transcript)) add_tx_links(transcript, build) for phenotype in record.get('phenotypes',[]): phenotype['omim_link'] = omim(phenotype.get('mim_number')) if not res['record']: res['record'] = record # If none of the genes where found if not any(res.values()): raise ValueError return res
def add_gene_info(store, variant_obj, gene_panels=None, genome_build=None): """Adds information to variant genes from hgnc genes and gene panels. Variants are annotated with gene and transcript information from VEP. In Scout the database keeps updated and extended information about genes and transcript. This function will compliment the VEP information with the updated database information. Also there is sometimes additional information that are manually curated in the gene panels. This information needs to be added to the variant before sending it to the template. This function will loop over all genes and add that extra information. Args: store(scout.adapter.MongoAdapter) variant_obj(dict): A variant from the database gene_panels(list(dict)): List of panels from database genome_build(str) Returns: variant_obj """ gene_panels = gene_panels or [] genome_build = genome_build or "37" # Add a variable that checks if there are any refseq transcripts # extra_info will hold information from gene panels extra_info = {} for panel_obj in gene_panels: for gene_info in panel_obj["genes"]: hgnc_id = gene_info["hgnc_id"] if hgnc_id not in extra_info: extra_info[hgnc_id] = [] extra_info[hgnc_id].append(gene_info) # Loop over the genes in the variant object to add information # from hgnc_genes and panel genes to the variant object variant_obj["has_refseq"] = False variant_obj["disease_associated_transcripts"] = [] all_models = set() for variant_gene in variant_obj.get("genes", []): hgnc_id = variant_gene["hgnc_id"] # Get the hgnc_gene hgnc_gene = store.hgnc_gene(hgnc_id, build=genome_build) if not hgnc_gene: continue hgnc_symbol = hgnc_gene["hgnc_symbol"] # Add omim information if gene is annotated to have incomplete penetrance if hgnc_gene.get("incomplete_penetrance"): variant_gene["omim_penetrance"] = True ############# PANEL SPECIFIC INFORMATION ############# # Panels can have extra information about genes and transcripts panel_info = add_panel_specific_gene_info(extra_info.get(hgnc_id, [])) variant_gene.update(panel_info) update_transcripts_information(variant_gene, hgnc_gene, variant_obj) variant_gene["common"] = hgnc_gene add_gene_links(variant_gene, genome_build) # Add disease associated transcripts from panel to variant for refseq_id in panel_info.get("disease_associated_transcripts", []): transcript_str = "{}:{}".format(hgnc_symbol, refseq_id) variant_obj["disease_associated_transcripts"].append( transcript_str) # Add the associated disease terms disease_terms = store.disease_terms(hgnc_id) variant_gene["disease_terms"] = disease_terms all_models = all_models.union(set(variant_gene["manual_inheritance"])) omim_models = set() for disease_term in variant_gene.get("disease_terms", []): omim_models.update(disease_term.get("inheritance", [])) variant_gene["omim_inheritance"] = list(omim_models) all_models = all_models.union(omim_models) variant_obj["all_models"] = all_models return variant_obj