def gene_variants(store, pymongo_cursor, variant_count, institute_id, page=1, per_page=50): """Pre-process list of variants.""" skip_count = per_page * max(page - 1, 0) more_variants = True if variant_count > (skip_count + per_page) else False variant_res = pymongo_cursor.skip(skip_count).limit(per_page) my_institutes = set(inst["_id"] for inst in user_institutes(store, current_user)) variants = [] for variant_obj in variant_res: # Populate variant case_display_name variant_case_obj = store.case(case_id=variant_obj["case_id"]) if not variant_case_obj: # A variant with missing case was encountered continue case_display_name = variant_case_obj.get("display_name") variant_obj["case_display_name"] = case_display_name # hide other institutes for now other_institutes = set([variant_case_obj.get("owner")]) other_institutes.update(set(variant_case_obj.get("collaborators", []))) if my_institutes.isdisjoint(other_institutes): # If the user does not have access to the information we skip it continue genome_build = get_genome_build(variant_case_obj) variant_genes = variant_obj.get("genes") gene_object = update_HGNC_symbols(store, variant_genes, genome_build) # Populate variant HGVS and predictions variant_genes = variant_obj.get("genes") hgvs_c = [] hgvs_p = [] if variant_genes is not None: for gene_obj in variant_genes: hgnc_id = gene_obj["hgnc_id"] gene_symbol = gene(store, hgnc_id)["symbol"] gene_symbols = [gene_symbol] # gather HGVS info from gene transcripts (hgvs_nucleotide, hgvs_protein) = get_hgvs(gene_obj) hgvs_c.append(hgvs_nucleotide) hgvs_p.append(hgvs_protein) if len(gene_symbols) == 1: variant_obj["hgvs"] = hgvs_str(gene_symbols, hgvs_p, hgvs_c) # populate variant predictions for display variant_obj.update(predictions(variant_genes)) variants.append(variant_obj) return {"variants": variants, "more_variants": more_variants}
def variant_export_genes_info(store, gene_list): """Adds gene info to a list of fields corresponding to a variant to be exported. Args: gene_list(list) A list of gene objects contained in the variant Returns: gene_info(list) A list of gene-relates string info """ gene_ids = [] gene_names = [] hgvs_c = [] gene_info = [] for gene_obj in gene_list: hgnc_id = gene_obj["hgnc_id"] gene_name = gene(store, hgnc_id)["symbol"] gene_ids.append(hgnc_id) gene_names.append(gene_name) hgvs_nucleotide = "-" # gather HGVS info from gene transcripts transcripts_list = gene_obj.get("transcripts") for transcript_obj in transcripts_list: if (transcript_obj.get("is_canonical") is not None and transcript_obj.get("is_canonical") is True): hgvs_nucleotide = str( transcript_obj.get("coding_sequence_name")) hgvs_c.append(hgvs_nucleotide) gene_info.append(";".join(str(x) for x in gene_ids)) gene_info.append(";".join(str(x) for x in gene_names)) gene_info.append(";".join(str(x) for x in hgvs_c)) return gene_info
def variant_export_lines(store, case_obj, variants_query): """Get variants info to be exported to file, one list (line) per variant. Args: store(scout.adapter.MongoAdapter) case_obj(scout.models.Case) variants_query: a list of variant objects, each one is a dictionary Returns: export_variants: a list of strings. Each string of the list corresponding to the fields of a variant to be exported to file, separated by comma """ export_variants = [] for variant in variants_query: variant_line = [] position = variant['position'] change = variant['reference']+'>'+variant['alternative'] variant_line.append(variant['rank_score']) variant_line.append(variant['chromosome']) variant_line.append(position) variant_line.append(change) variant_line.append('_'.join([str(position), change])) # gather gene info: gene_list = variant.get('genes') #this is a list of gene objects gene_ids = [] gene_names = [] hgvs_c = [] # if variant is in genes if len(gene_list) > 0: for gene_obj in gene_list: hgnc_id = gene_obj['hgnc_id'] gene_name = gene(store, hgnc_id)['symbol'] gene_ids.append(hgnc_id) gene_names.append(gene_name) hgvs_nucleotide = '-' # gather HGVS info from gene transcripts transcripts_list = gene_obj.get('transcripts') for transcript_obj in transcripts_list: if transcript_obj.get('is_canonical') and transcript_obj.get('is_canonical') is True: hgvs_nucleotide = str(transcript_obj.get('coding_sequence_name')) hgvs_c.append(hgvs_nucleotide) variant_line.append(';'.join( str(x) for x in gene_ids)) variant_line.append(';'.join( str(x) for x in gene_names)) variant_line.append(';'.join( str(x) for x in hgvs_c)) else: while i < 4: variant_line.append('-') # instead of gene ids i = i+1 variant_gts = variant['samples'] # list of coverage and gt calls for case samples for individual in case_obj['individuals']: for variant_gt in variant_gts: if individual['individual_id'] == variant_gt['sample_id']: # gather coverage info variant_line.append(variant_gt['allele_depths'][0]) # AD reference variant_line.append(variant_gt['allele_depths'][1]) # AD alternate # gather genotype quality info variant_line.append(variant_gt['genotype_quality']) variant_line = [str(i) for i in variant_line] export_variants.append(",".join(variant_line)) return export_variants
def variant_export_lines(store, case_obj, variants_query): """Get variants info to be exported to file, one list (line) per variant. Args: store(scout.adapter.MongoAdapter) case_obj(scout.models.Case) variants_query: a list of variant objects, each one is a dictionary Returns: export_variants: a list of strings. Each string of the list corresponding to the fields of a variant to be exported to file, separated by comma """ export_variants = [] for variant in variants_query: variant_line = [] position = variant["position"] change = variant["reference"] + ">" + variant["alternative"] variant_line.append(variant["rank_score"]) variant_line.append(variant["chromosome"]) variant_line.append(position) variant_line.append(change) variant_line.append("_".join([str(position), change])) # gather gene info: gene_list = variant.get("genes") # this is a list of gene objects gene_ids = [] gene_names = [] hgvs_c = [] # if variant is in genes if len(gene_list) > 0: for gene_obj in gene_list: hgnc_id = gene_obj["hgnc_id"] gene_name = gene(store, hgnc_id)["symbol"] gene_ids.append(hgnc_id) gene_names.append(gene_name) hgvs_nucleotide = "-" # gather HGVS info from gene transcripts transcripts_list = gene_obj.get("transcripts") for transcript_obj in transcripts_list: if (transcript_obj.get("is_canonical") and transcript_obj.get("is_canonical") is True): hgvs_nucleotide = str( transcript_obj.get("coding_sequence_name")) hgvs_c.append(hgvs_nucleotide) variant_line.append(";".join(str(x) for x in gene_ids)) variant_line.append(";".join(str(x) for x in gene_names)) variant_line.append(";".join(str(x) for x in hgvs_c)) else: i = 0 while i < 4: variant_line.append("-") # instead of gene ids i = i + 1 variant_gts = variant[ "samples"] # list of coverage and gt calls for case samples for individual in case_obj["individuals"]: for variant_gt in variant_gts: if individual["individual_id"] == variant_gt["sample_id"]: # gather coverage info variant_line.append( variant_gt["allele_depths"][0]) # AD reference variant_line.append( variant_gt["allele_depths"][1]) # AD alternate # gather genotype quality info variant_line.append(variant_gt["genotype_quality"]) variant_line = [str(i) for i in variant_line] export_variants.append(",".join(variant_line)) return export_variants
def gene_variants(store, variants_query, page=1, per_page=50): """Pre-process list of variants.""" variant_count = variants_query.count() skip_count = per_page * max(page - 1, 0) more_variants = True if variant_count > (skip_count + per_page) else False variant_res = variants_query.skip(skip_count).limit(per_page) my_institutes = list(inst['_id'] for inst in user_institutes(store, current_user)) variants = [] for variant_obj in variant_res: # hide other institutes for now if variant_obj['institute'] not in my_institutes: LOG.warning("Institute {} not allowed.".format( variant_obj['institute'])) continue # Populate variant case_display_name variant_case_obj = store.case(case_id=variant_obj['case_id']) if not variant_case_obj: # A variant with missing case was encountered continue case_display_name = variant_case_obj.get('display_name') variant_obj['case_display_name'] = case_display_name genome_build = variant_case_obj.get('genome_build', '37') if genome_build not in ['37', '38']: genome_build = '37' # Update the HGNC symbols if they are not set variant_genes = variant_obj.get('genes') if variant_genes is not None: for gene_obj in variant_genes: # If there is no hgnc id there is nothin we can do if not gene_obj['hgnc_id']: continue # Else we collect the gene object and check the id if gene_obj.get('hgnc_symbol') is None or gene_obj.get( 'description') is None: hgnc_gene = store.hgnc_gene(gene_obj['hgnc_id'], build=genome_build) if not hgnc_gene: continue gene_obj['hgnc_symbol'] = hgnc_gene['hgnc_symbol'] gene_obj['description'] = hgnc_gene['description'] # Populate variant HGVS and predictions gene_ids = [] gene_symbols = [] hgvs_c = [] hgvs_p = [] variant_genes = variant_obj.get('genes') if variant_genes is not None: functional_annotation = '' for gene_obj in variant_genes: hgnc_id = gene_obj['hgnc_id'] gene_symbol = gene(store, hgnc_id)['symbol'] gene_ids.append(hgnc_id) gene_symbols.append(gene_symbol) hgvs_nucleotide = '-' # gather HGVS info from gene transcripts transcripts_list = gene_obj.get('transcripts') for transcript_obj in transcripts_list: if transcript_obj.get( 'is_canonical' ) and transcript_obj.get('is_canonical') is True: hgvs_nucleotide = str( transcript_obj.get('coding_sequence_name')) hgvs_protein = str( transcript_obj.get('protein_sequence_name')) hgvs_c.append(hgvs_nucleotide) hgvs_p.append(hgvs_protein) if len(gene_symbols) == 1: if (hgvs_p[0] != "None"): hgvs = hgvs_p[0] elif (hgvs_c[0] != "None"): hgvs = hgvs_c[0] else: hgvs = "-" variant_obj['hgvs'] = hgvs # populate variant predictions for display variant_obj.update(get_predictions(variant_genes)) variants.append(variant_obj) return { 'variants': variants, 'more_variants': more_variants, }
def gene_variants(store, variants_query, institute_id, page=1, per_page=50): """Pre-process list of variants.""" # We need to call variants_collection.count_documents here variant_count = variants_query.count() skip_count = per_page * max(page - 1, 0) more_variants = True if variant_count > (skip_count + per_page) else False variant_res = variants_query.skip(skip_count).limit(per_page) my_institutes = set(inst["_id"] for inst in user_institutes(store, current_user)) variants = [] for variant_obj in variant_res: # Populate variant case_display_name variant_case_obj = store.case(case_id=variant_obj["case_id"]) if not variant_case_obj: # A variant with missing case was encountered continue case_display_name = variant_case_obj.get("display_name") variant_obj["case_display_name"] = case_display_name # hide other institutes for now other_institutes = set([variant_case_obj.get("owner")]) other_institutes.update(set(variant_case_obj.get("collaborators", []))) if my_institutes.isdisjoint(other_institutes): # If the user does not have access to the information we skip it continue genome_build = variant_case_obj.get("genome_build", "37") if genome_build not in ["37", "38"]: genome_build = "37" # Update the HGNC symbols if they are not set variant_genes = variant_obj.get("genes") if variant_genes is not None: for gene_obj in variant_genes: # If there is no hgnc id there is nothin we can do if not gene_obj["hgnc_id"]: continue # Else we collect the gene object and check the id if (gene_obj.get("hgnc_symbol") is None or gene_obj.get("description") is None): hgnc_gene = store.hgnc_gene(gene_obj["hgnc_id"], build=genome_build) if not hgnc_gene: continue gene_obj["hgnc_symbol"] = hgnc_gene["hgnc_symbol"] gene_obj["description"] = hgnc_gene["description"] # Populate variant HGVS and predictions gene_ids = [] gene_symbols = [] hgvs_c = [] hgvs_p = [] variant_genes = variant_obj.get("genes") if variant_genes is not None: functional_annotation = "" for gene_obj in variant_genes: hgnc_id = gene_obj["hgnc_id"] gene_symbol = gene(store, hgnc_id)["symbol"] gene_ids.append(hgnc_id) gene_symbols.append(gene_symbol) hgvs_nucleotide = "-" # gather HGVS info from gene transcripts transcripts_list = gene_obj.get("transcripts") for transcript_obj in transcripts_list: if (transcript_obj.get("is_canonical") and transcript_obj.get("is_canonical") is True): hgvs_nucleotide = str( transcript_obj.get("coding_sequence_name")) hgvs_protein = str( transcript_obj.get("protein_sequence_name")) hgvs_c.append(hgvs_nucleotide) hgvs_p.append(hgvs_protein) if len(gene_symbols) == 1: if hgvs_p[0] != "None": hgvs = hgvs_p[0] elif hgvs_c[0] != "None": hgvs = hgvs_c[0] else: hgvs = "-" variant_obj["hgvs"] = hgvs # populate variant predictions for display variant_obj.update(predictions(variant_genes)) variants.append(variant_obj) return {"variants": variants, "more_variants": more_variants}
def gene_variants(store, variants_query, page=1, per_page=50): """Pre-process list of variants.""" variant_count = variants_query.count() skip_count = per_page * max(page - 1, 0) more_variants = True if variant_count > (skip_count + per_page) else False variant_res = variants_query.skip(skip_count).limit(per_page) my_institutes = list(inst['_id'] for inst in user_institutes(store, current_user)) variants = [] for variant_obj in variant_res: # hide other institutes for now if variant_obj['institute'] not in my_institutes: LOG.warning("Institute {} not allowed.".format(variant_obj['institute'])) continue # Populate variant case_display_name variant_case_obj = store.case(case_id=variant_obj['case_id']) if not variant_case_obj: # A variant with missing case was encountered continue case_display_name = variant_case_obj.get('display_name') variant_obj['case_display_name'] = case_display_name genome_build = variant_case_obj.get('genome_build', '37') if genome_build not in ['37','38']: genome_build = '37' # Update the HGNC symbols if they are not set variant_genes = variant_obj.get('genes') if variant_genes is not None: for gene_obj in variant_genes: # If there is no hgnc id there is nothin we can do if not gene_obj['hgnc_id']: continue # Else we collect the gene object and check the id if gene_obj.get('hgnc_symbol') is None or gene_obj.get('description') is None: hgnc_gene = store.hgnc_gene(gene_obj['hgnc_id'], build=genome_build) if not hgnc_gene: continue gene_obj['hgnc_symbol'] = hgnc_gene['hgnc_symbol'] gene_obj['description'] = hgnc_gene['description'] # Populate variant HGVS and predictions gene_ids = [] gene_symbols = [] hgvs_c = [] hgvs_p = [] variant_genes = variant_obj.get('genes') if variant_genes is not None: functional_annotation = '' for gene_obj in variant_genes: hgnc_id = gene_obj['hgnc_id'] gene_symbol = gene(store, hgnc_id)['symbol'] gene_ids.append(hgnc_id) gene_symbols.append(gene_symbol) hgvs_nucleotide = '-' # gather HGVS info from gene transcripts transcripts_list = gene_obj.get('transcripts') for transcript_obj in transcripts_list: if transcript_obj.get('is_canonical') and transcript_obj.get('is_canonical') is True: hgvs_nucleotide = str(transcript_obj.get('coding_sequence_name')) hgvs_protein = str(transcript_obj.get('protein_sequence_name')) hgvs_c.append(hgvs_nucleotide) hgvs_p.append(hgvs_protein) if len(gene_symbols) == 1: if(hgvs_p[0] != "None"): hgvs = hgvs_p[0] elif(hgvs_c[0] != "None"): hgvs = hgvs_c[0] else: hgvs = "-" variant_obj['hgvs'] = hgvs # populate variant predictions for display variant_obj.update(get_predictions(variant_genes)) variants.append(variant_obj) return { 'variants': variants, 'more_variants': more_variants, }