def test_gene_predictions_two_genes(): ## GIVEN a empty list of genes gene = { "hgnc_symbol": "AAA", "sift_prediction": "deleterious", "polyphen_prediction": "probably_damaging", "region_annotation": "exonic", "functional_annotation": "missense_variant", "spliceai_score": 0.17, "spliceai_position": -4, "spliceai_prediction": ["ds 0.17 dp -4"], } gene2 = { "hgnc_symbol": "BBB", "sift_prediction": "tolerated", "polyphen_prediction": "unknown", "region_annotation": "exonic", "functional_annotation": "synonymous_variant", "spliceai_score": 0.9, "spliceai_position": 5, "spliceai_prediction": ["ds 0.9 dp 5"], } genes = [gene, gene2] ## WHEN parsing the gene predictions res = predictions(genes) ## THEN assert the result is not filled assert set(res["sift_predictions"]) == set(["AAA:deleterious", "BBB:tolerated"])
def test_gene_predictions_one_gene(): ## GIVEN a list with one gene gene = { "sift_prediction": "deleterious", "polyphen_prediction": "probably_damaging", "region_annotation": "exonic", "functional_annotation": "missense_variant", "spliceai_score": 0.17, "spliceai_position": -4, "spliceai_prediction": ["ds 0.17 dp -4"], } genes = [gene] ## WHEN parsing the gene predictions res = predictions(genes) ## THEN assert the result is filled assert res == { "sift_predictions": ["deleterious"], "polyphen_predictions": ["probably_damaging"], "region_annotations": ["exonic"], "functional_annotations": ["missense_variant"], "spliceai_scores": [0.17], "spliceai_positions": [-4], "spliceai_predictions": [["ds 0.17 dp -4"]], }
def gene_variants(store, pymongo_cursor, variant_count, institute_id, page=1, per_page=50): """Pre-process list of variants.""" skip_count = per_page * max(page - 1, 0) more_variants = True if variant_count > (skip_count + per_page) else False variant_res = pymongo_cursor.skip(skip_count).limit(per_page) my_institutes = set(inst["_id"] for inst in user_institutes(store, current_user)) variants = [] for variant_obj in variant_res: # Populate variant case_display_name variant_case_obj = store.case(case_id=variant_obj["case_id"]) if not variant_case_obj: # A variant with missing case was encountered continue case_display_name = variant_case_obj.get("display_name") variant_obj["case_display_name"] = case_display_name # hide other institutes for now other_institutes = set([variant_case_obj.get("owner")]) other_institutes.update(set(variant_case_obj.get("collaborators", []))) if my_institutes.isdisjoint(other_institutes): # If the user does not have access to the information we skip it continue genome_build = get_genome_build(variant_case_obj) variant_genes = variant_obj.get("genes") gene_object = update_HGNC_symbols(store, variant_genes, genome_build) # Populate variant HGVS and predictions variant_genes = variant_obj.get("genes") hgvs_c = [] hgvs_p = [] if variant_genes is not None: for gene_obj in variant_genes: hgnc_id = gene_obj["hgnc_id"] gene_symbol = gene(store, hgnc_id)["symbol"] gene_symbols = [gene_symbol] # gather HGVS info from gene transcripts (hgvs_nucleotide, hgvs_protein) = get_hgvs(gene_obj) hgvs_c.append(hgvs_nucleotide) hgvs_p.append(hgvs_protein) if len(gene_symbols) == 1: variant_obj["hgvs"] = hgvs_str(gene_symbols, hgvs_p, hgvs_c) # populate variant predictions for display variant_obj.update(predictions(variant_genes)) variants.append(variant_obj) return {"variants": variants, "more_variants": more_variants}
def test_gene_predictions_no_info(): ## GIVEN a empty list of genes genes = [] ## WHEN parsing the gene predictions res = predictions(genes) ## THEN assert the result is not filled assert res == { "sift_predictions": [], "polyphen_predictions": [], "region_annotations": [], "functional_annotations": [], }
def test_gene_predictions_one_gene_no_sift(): ## GIVEN a empty list of genes gene = { "hgnc_symbol": "AAA", "polyphen_prediction": "probably_damaging", "region_annotation": "exonic", "functional_annotation": "missense_variant", } genes = [gene] ## WHEN parsing the gene predictions res = predictions(genes) ## THEN assert the result is not filled assert res == { "sift_predictions": ["-"], "polyphen_predictions": ["probably_damaging"], "region_annotations": ["exonic"], "functional_annotations": ["missense_variant"], }
def gene_variants(store, pymongo_cursor, variant_count, page=1, per_page=50): """Pre-process list of variants.""" skip_count = per_page * max(page - 1, 0) more_variants = True if variant_count > (skip_count + per_page) else False variant_res = pymongo_cursor.skip(skip_count).limit(per_page) variants = [] for variant_obj in variant_res: # Populate variant case_display_name variant_case_obj = store.case(case_id=variant_obj["case_id"]) case_display_name = variant_case_obj.get("display_name") variant_obj["case_display_name"] = case_display_name genome_build = get_genome_build(variant_case_obj) variant_genes = variant_obj.get("genes") update_HGNC_symbols(store, variant_genes, genome_build) # Populate variant HGVS and predictions variant_genes = variant_obj.get("genes") hgvs_c = [] hgvs_p = [] if variant_genes is not None: for gene_obj in variant_genes: hgnc_id = gene_obj["hgnc_id"] gene_caption = store.hgnc_gene_caption(hgnc_id) gene_symbols = [gene_caption["hgnc_symbol"]] # gather HGVS info from gene transcripts (hgvs_nucleotide, hgvs_protein) = get_hgvs(gene_obj) hgvs_c.append(hgvs_nucleotide) hgvs_p.append(hgvs_protein) if len(gene_symbols) == 1: variant_obj["hgvs"] = hgvs_str(gene_symbols, hgvs_p, hgvs_c) # populate variant predictions for display variant_obj.update(predictions(variant_genes)) variants.append(variant_obj) return {"variants": variants, "more_variants": more_variants}
def test_gene_predictions_one_gene_no_sift(): ## GIVEN a list with one gene and some missing values gene = { "hgnc_symbol": "AAA", "polyphen_prediction": "probably_damaging", "region_annotation": "exonic", "functional_annotation": "missense_variant", "spliceai_score": 0.17, "spliceai_prediction": ["ds 0.17"], } genes = [gene] ## WHEN parsing the gene predictions res = predictions(genes) ## THEN assert the result is correctly filled assert res == { "sift_predictions": ["-"], "polyphen_predictions": ["probably_damaging"], "region_annotations": ["exonic"], "functional_annotations": ["missense_variant"], "spliceai_scores": [0.17], "spliceai_positions": ["-"], "spliceai_predictions": [["ds 0.17"]], }
def parse_variant( store, institute_obj, case_obj, variant_obj, update=False, genome_build="37", get_compounds=True, ): """Parse information about variants. - Adds information about compounds - Updates the information about compounds if necessary and 'update=True' Args: store(scout.adapter.MongoAdapter) institute_obj(scout.models.Institute) case_obj(scout.models.Case) variant_obj(scout.models.Variant) update(bool): If variant should be updated in database genome_build(str) """ has_changed = False compounds = variant_obj.get("compounds", []) if compounds and get_compounds: # Check if we need to add compound information # If it is the first time the case is viewed we fill in some compound information if "not_loaded" not in compounds[0]: new_compounds = store.update_variant_compounds(variant_obj) variant_obj["compounds"] = new_compounds has_changed = True # sort compounds on combined rank score variant_obj["compounds"] = sorted( variant_obj["compounds"], key=lambda compound: -compound["combined_score"]) # Update the hgnc symbols if they are incorrect variant_genes = variant_obj.get("genes") if variant_genes is not None: for gene_obj in variant_genes: # If there is no hgnc id there is nothin we can do if not gene_obj["hgnc_id"]: continue # Else we collect the gene object and check the id if gene_obj.get("hgnc_symbol") is None: hgnc_gene = store.hgnc_gene(gene_obj["hgnc_id"], build=genome_build) if not hgnc_gene: continue has_changed = True gene_obj["hgnc_symbol"] = hgnc_gene["hgnc_symbol"] # We update the variant if some information was missing from loading # Or if symbold in reference genes have changed if update and has_changed: variant_obj = store.update_variant(variant_obj) variant_obj["comments"] = store.events( institute_obj, case=case_obj, variant_id=variant_obj["variant_id"], comments=True, ) if variant_genes: variant_obj.update(predictions(variant_genes)) if variant_obj.get("category") == "cancer": variant_obj.update(get_variant_info(variant_genes)) for compound_obj in compounds: compound_obj.update(predictions(compound_obj.get("genes", []))) classification = variant_obj.get("acmg_classification") if isinstance(classification, int): acmg_code = ACMG_MAP[variant_obj["acmg_classification"]] variant_obj["acmg_classification"] = ACMG_COMPLETE_MAP[acmg_code] # convert length for SV variants variant_length = variant_obj.get("length") variant_obj["length"] = { 100000000000: "inf", -1: "n.d." }.get(variant_length, variant_length) if not "end_chrom" in variant_obj: variant_obj["end_chrom"] = variant_obj["chromosome"] return variant_obj
def gene_variants(store, variants_query, institute_id, page=1, per_page=50): """Pre-process list of variants.""" # We need to call variants_collection.count_documents here variant_count = variants_query.count() skip_count = per_page * max(page - 1, 0) more_variants = True if variant_count > (skip_count + per_page) else False variant_res = variants_query.skip(skip_count).limit(per_page) my_institutes = set(inst["_id"] for inst in user_institutes(store, current_user)) variants = [] for variant_obj in variant_res: # Populate variant case_display_name variant_case_obj = store.case(case_id=variant_obj["case_id"]) if not variant_case_obj: # A variant with missing case was encountered continue case_display_name = variant_case_obj.get("display_name") variant_obj["case_display_name"] = case_display_name # hide other institutes for now other_institutes = set([variant_case_obj.get("owner")]) other_institutes.update(set(variant_case_obj.get("collaborators", []))) if my_institutes.isdisjoint(other_institutes): # If the user does not have access to the information we skip it continue genome_build = variant_case_obj.get("genome_build", "37") if genome_build not in ["37", "38"]: genome_build = "37" # Update the HGNC symbols if they are not set variant_genes = variant_obj.get("genes") if variant_genes is not None: for gene_obj in variant_genes: # If there is no hgnc id there is nothin we can do if not gene_obj["hgnc_id"]: continue # Else we collect the gene object and check the id if (gene_obj.get("hgnc_symbol") is None or gene_obj.get("description") is None): hgnc_gene = store.hgnc_gene(gene_obj["hgnc_id"], build=genome_build) if not hgnc_gene: continue gene_obj["hgnc_symbol"] = hgnc_gene["hgnc_symbol"] gene_obj["description"] = hgnc_gene["description"] # Populate variant HGVS and predictions gene_ids = [] gene_symbols = [] hgvs_c = [] hgvs_p = [] variant_genes = variant_obj.get("genes") if variant_genes is not None: functional_annotation = "" for gene_obj in variant_genes: hgnc_id = gene_obj["hgnc_id"] gene_symbol = gene(store, hgnc_id)["symbol"] gene_ids.append(hgnc_id) gene_symbols.append(gene_symbol) hgvs_nucleotide = "-" # gather HGVS info from gene transcripts transcripts_list = gene_obj.get("transcripts") for transcript_obj in transcripts_list: if (transcript_obj.get("is_canonical") and transcript_obj.get("is_canonical") is True): hgvs_nucleotide = str( transcript_obj.get("coding_sequence_name")) hgvs_protein = str( transcript_obj.get("protein_sequence_name")) hgvs_c.append(hgvs_nucleotide) hgvs_p.append(hgvs_protein) if len(gene_symbols) == 1: if hgvs_p[0] != "None": hgvs = hgvs_p[0] elif hgvs_c[0] != "None": hgvs = hgvs_c[0] else: hgvs = "-" variant_obj["hgvs"] = hgvs # populate variant predictions for display variant_obj.update(predictions(variant_genes)) variants.append(variant_obj) return {"variants": variants, "more_variants": more_variants}