def parse_transcript(gene_obj, tx_obj, build=None): """Parse variant gene transcript (VEP).""" build = build or 37 add_tx_links(tx_obj, build) if tx_obj.get('refseq_id'): gene_name = (gene_obj['common']['hgnc_symbol'] if gene_obj['common'] else gene_obj['hgnc_id']) tx_obj['change_str'] = transcript_str(tx_obj, gene_name)
def gene(store, hgnc_id): """Parse information about a gene.""" res = { 'builds': { '37': None, '38': None }, 'symbol': None, 'description': None, 'ensembl_id': None, 'record': None } for build in res['builds']: record = store.hgnc_gene(hgnc_id, build=build) if record: record[ 'position'] = "{this[chromosome]}:{this[start]}-{this[end]}".format( this=record) res['aliases'] = record['aliases'] res['hgnc_id'] = record['hgnc_id'] res['description'] = record['description'] res['builds'][build] = record res['symbol'] = record['hgnc_symbol'] res['description'] = record['description'] res['entrez_id'] = record.get('entrez_id') res['pli_score'] = record.get('pli_score') add_gene_links(record, int(build)) res['omim_id'] = record.get('omim_id') res['incomplete_penetrance'] = record.get('incomplete_penetrance', False) res['inheritance_models'] = record.get('inheritance_models', []) for transcript in record['transcripts']: transcript['position'] = ( "{this[chrom]}:{this[start]}-{this[end]}".format( this=transcript)) add_tx_links(transcript, build) for phenotype in record.get('phenotypes', []): phenotype['omim_link'] = omim(phenotype.get('mim_number')) if not res['record']: res['record'] = record pp(record) # If none of the genes where found if not any(res.values()): raise ValueError return res
def gene(store, hgnc_id): """Parse information about a gene.""" res = { "builds": { "37": None, "38": None }, "symbol": None, "description": None, "ensembl_id": None, "record": None, } for build in res["builds"]: record = store.hgnc_gene(hgnc_id, build=build) if record: record[ "position"] = "{this[chromosome]}:{this[start]}-{this[end]}".format( this=record) res["aliases"] = record["aliases"] res["hgnc_id"] = record["hgnc_id"] res["description"] = record["description"] res["builds"][build] = record res["symbol"] = record["hgnc_symbol"] res["description"] = record["description"] res["entrez_id"] = record.get("entrez_id") res["pli_score"] = record.get("pli_score") add_gene_links(record, int(build)) res["omim_id"] = record.get("omim_id") res["incomplete_penetrance"] = record.get("incomplete_penetrance", False) res["inheritance_models"] = record.get("inheritance_models", []) for transcript in record["transcripts"]: transcript[ "position"] = "{this[chrom]}:{this[start]}-{this[end]}".format( this=transcript) add_tx_links(transcript, build) for phenotype in record.get("phenotypes", []): phenotype["omim_link"] = omim(phenotype.get("mim_number")) if not res["record"]: res["record"] = record # If none of the genes where found if not any(res.values()): raise ValueError return res
def gene(store, hgnc_id): """Parse information about a gene.""" res = {'builds': {'37': None, '38': None}, 'symbol': None, 'description': None, 'ensembl_id': None, 'record': None} for build in res['builds']: record = store.hgnc_gene(hgnc_id, build=build) if record: record['position'] = "{this[chromosome]}:{this[start]}-{this[end]}".format(this=record) res['aliases'] = record['aliases'] res['hgnc_id'] = record['hgnc_id'] res['description'] = record['description'] res['builds'][build] = record res['symbol'] = record['hgnc_symbol'] res['description'] = record['description'] res['entrez_id'] = record.get('entrez_id') res['pli_score'] = record.get('pli_score') add_gene_links(record, int(build)) res['omim_id'] = record.get('omim_id') res['incomplete_penetrance'] = record.get('incomplete_penetrance',False) res['inheritance_models'] = record.get('inheritance_models',[]) for transcript in record['transcripts']: transcript['position'] = ("{this[chrom]}:{this[start]}-{this[end]}" .format(this=transcript)) add_tx_links(transcript, build) for phenotype in record.get('phenotypes',[]): phenotype['omim_link'] = omim(phenotype.get('mim_number')) if not res['record']: res['record'] = record # If none of the genes where found if not any(res.values()): raise ValueError return res
def update_transcripts_information(variant_gene, hgnc_gene, variant_obj, genome_build=None): """Collect tx info from the hgnc gene and panels and update variant transcripts Since the hgnc information are continuously being updated we need to run this each time a variant is fetched. This function will: - Add a dictionary with tx_id -> tx_info to the hgnc variant - Add information from the panel - Adds a list of refseq transcripts Args: variant_gene(dict): the gene information from the variant hgnc_gene(dict): the hgnc gene information varaiant_obj(scout.models.Variant) """ genome_build = genome_build or "37" disease_associated_no_version = variant_gene.get( "disease_associated_no_version", set()) # Create a dictionary with transcripts information # Use ensembl transcript id as keys transcripts_dict = {} # Add transcript information from the hgnc gene for transcript in hgnc_gene.get("transcripts", []): tx_id = transcript["ensembl_transcript_id"] transcripts_dict[tx_id] = transcript # Add the transcripts to the gene object hgnc_gene["transcripts_dict"] = transcripts_dict hgnc_symbol = hgnc_gene["hgnc_symbol"] refseq_transcripts = [] # First loop over the variants transcripts for transcript in variant_gene.get("transcripts", []): tx_id = transcript["transcript_id"] hgnc_transcript = transcripts_dict.get(tx_id) # If the tx does not exist in ensembl anymore we skip it if not hgnc_transcript: continue # Check in the common information if it is a primary transcript if hgnc_transcript.get("is_primary"): transcript["is_primary"] = True # Add the transcript links add_tx_links(transcript, genome_build) # If the transcript has a ref seq identifier we add that # to the variants transcript refseq_id = hgnc_transcript.get("refseq_id") if not refseq_id: continue transcript["refseq_id"] = refseq_id variant_obj["has_refseq"] = True refseq_transcripts.append(transcript) # Check if the refseq id are disease associated if refseq_id in disease_associated_no_version: transcript["is_disease_associated"] = True # Since a ensemble transcript can have multiple refseq identifiers we add all of # those transcript["refseq_identifiers"] = hgnc_transcript.get( "refseq_identifiers", []) transcript["change_str"] = transcript_str(transcript, hgnc_symbol) variant_gene["primary_transcripts"] = refseq_transcripts