def defFavorFlt(metadata_record): assert metadata_record.get("data_schema") == "FAVOR", ( "FAVOR data schema expected: " + metadata_record.get("data_schema")) filters = FilterPrepareSetH(metadata_record, anfisaVariables, check_identifiers=False) with filters.viewGroup("Coordinates"): filters.statusUnit("Chromosome", "/_filters/chromosome", variants=[ "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chr23", "chrX", "chrY", "undefined" ], default_value="undefined") filters.intValueUnit("Position", "/_filters/position", default_value=sys.maxsize) with filters.viewGroup("Genes"): genes_unit = filters.multiStatusUnit( "Symbol", # "/_filters/genes[]", "/_view/general/genes[]", compact_mode=True) filters.panelsUnit("Panels", genes_unit, "Symbol", view_path="/_view/general/gene_panels") filters.intValueUnit("Num_Genes", "/_view/general/genes", conversion="len", default_value=0) with filters.viewGroup("gnomAD"): filters.floatValueUnit("gnomAD_Total_AF", "/_filters/gnomad_total_af", diap=(0., 1.), default_value=0.) with filters.viewGroup("GENCODE"): filters.multiStatusUnit("GENCODE_Category", "/_filters/gencode_category[]", default_value="None") filters.multiStatusUnit("GENCODE_Exonic_Category", "/_filters/gencode_exonic_category", compact_mode=True) with filters.viewGroup("TOPMed"): filters.multiStatusUnit("TOPMed_QC_Status", "/_filters/top_med_qc_status[]", default_value="None") filters.floatValueUnit("TOPMed_Bravo_AF", "/_filters/top_med_bravo_af", default_value=0.) with filters.viewGroup("Allele Frequencies"): filters.floatValueUnit("ExAC03", "/_filters/exac03", render_mode="linear,<", default_value=0.) with filters.viewGroup("Variant Category"): filters.multiStatusUnit("Disruptive_Missense", "/_filters/disruptive_missense", default_value="N/A") filters.multiStatusUnit("CAGE_Promoter", "/_filters/cage_promoter", default_value="N/A") filters.multiStatusUnit("CAGE_Enhancer", "/_filters/cage_enhancer", default_value="N/A") filters.multiStatusUnit("Gene_Hancer", "/_filters/gene_hancer", default_value="N/A") filters.multiStatusUnit("Super_Enhancer", "/_filters/super_enhancer", default_value="N/A") with filters.viewGroup("Nucleotide Diversity"): filters.floatValueUnit("bStatistics", "/_filters/bstatistics", default_value=0.) with filters.viewGroup("Mutation Rate"): filters.floatValueUnit("Freq1000bp", "/_filters/freq1000bp", default_value=0.) filters.floatValueUnit("Rare1000bp", "/_filters/rare1000bp", default_value=0.) with filters.viewGroup("Predictions"): filters.multiStatusUnit("Clinvar", "/_filters/clinvar[]", default_value="N/A") with filters.viewGroup("Protein Function"): filters.multiStatusUnit("Polyphen_2_HVAR", "/_filters/polyphen2_hvar", default_value="N/A") filters.multiStatusUnit("Polyphen_2_HDIV", "/_filters/polyphen2_hdiv", default_value="N/A") filters.multiStatusUnit("PolyPhenCat", "/_filters/polyphen_cat", default_value="N/A") filters.multiStatusUnit("SIFTcat", "/_filters/sift_cat", default_value="N/A") with filters.viewGroup("Integrative Score"): filters.floatValueUnit("GC", "/_filters/gc", default_value=0.) filters.floatValueUnit("CpG", "/_filters/cpg", default_value=0.) return filters
def defineFilterSchema(metadata_record): data_schema = metadata_record.get("data_schema") if data_schema == "FAVOR": return FavorSchema.defineFilterSchema(metadata_record) assert data_schema is None or data_schema == "CASE", ( "Bad data schema: " + data_schema) filters = FilterPrepareSetH(metadata_record) cohorts = metadata_record.get("cohorts") with filters.viewGroup("Inheritance"): if cohorts: filters.multiStatusUnit("Variant_in", "/_filters/cohort_has_variant[]") filters.multiStatusUnit("Callers", "/_view/bioinformatics/called_by[]", title = "Called by") filters.statusUnit("Proband_Zygosity", "/_view/bioinformatics/zygosity", title = "Proband Zygosity") filters.intValueUnit("Num_Samples", "/_filters/has_variant", title = "Number of Samples", conversion = ["len"], default_value = 0, tooltip = "Number of samples for which this variant has been called") filters.multiStatusUnit("Has_Variant", "/_filters/has_variant[]") if cohorts: all_cohorts = ["ALL"] + [ch["name"] for ch in cohorts] with filters.viewGroup("Cohorts"): for ch_name in all_cohorts: filters.floatValueUnit(ch_name + "_AF", "/_view/cohorts/" + ch_name + "/AF", default_value = 0) filters.floatValueUnit(ch_name + "_AF2", "/_view/cohorts/" + ch_name + "/AF2", default_value = 0, title = "AF_Hom") with filters.viewGroup("Variant"): filters.statusUnit("Variant_Class", "/__data/variant_class", tooltip = ("Variant class as returned by VEP. " "The class of a variant is based on Sequence " "Ontology and is called according to its component " "alleles and its mapping to the reference genome. " "https://useast.ensembl.org/info/genome/variation/" "prediction/classification.html#classes")) filters.statusUnit("Most_Severe_Consequence", "/__data/most_severe_consequence", variants = sConsequenceVariants, default_value = "undefined") filters.multiStatusUnit("Canonical_Annotation", "/_view/general/canonical_annotation[]", default_value = "undefined") filters.statusUnit("Multiallelic", "/_filters/multiallelic", title = "Multi-allelic?") filters.statusUnit("Altered_VCF", "/_filters/altered_vcf", title = "Has VCF been normalized?") # filters.intValueUnit("Number_ALTs", # "/_filters/alts", # title = "Number of Alternative alleles", # conversion = ["len"], default_value = 0) #filters.intValueUnit("zyg_len", "/__data/zygosity", # conversion = ["len"], default_value = 0) with filters.viewGroup("Genes"): genes_unit = filters.multiStatusUnit("Symbol", "/_view/general/genes[]", compact_mode = True) filters.panelsUnit("Panels", genes_unit, "Symbol", view_path = "/_view/general/gene_panels") filters.multiStatusUnit("EQTL_Gene", "/_filters/eqtl_gene[]", title = "EQTL Gene", default_value = "None") #filters.multiStatusUnit("Transcripts", # "/__data/transcript_consequences[]", compact_mode = True, # conversion = [["property", transcript_id"]]) filters.intValueUnit("Num_Genes", "/_view/general/genes", title = "Number of overlapping genes", conversion = ["len"], default_value = 0) filters.intValueUnit("Num_Transcripts", "/__data/transcript_consequences", title = "Number of transcripts at the position", conversion = ["len"], default_value = 0) with filters.viewGroup("Transcripts"): filters.transcriptMultisetUnit("Transcript_consequence", "transcript_annotations", variants = sConsequenceVariants, default_value = "undefined") filters.transcriptStatusUnit("Transcript_canonical", "is_canonical", bool_check_value = "True", default_value = "False") filters.transcriptStatusUnit("Transcript_GENCODE_Basic", "gencode_basic", bool_check_value = "True", default_value = "False") filters.transcriptStatusUnit("Transcript_biotype", "biotype", default_value = "undefined") filters.transcriptStatusUnit("Transcript_worst", "is_worst", bool_check_value = "True", default_value = "False") filters.transcriptStatusUnit("Transcript_id", "id", default_value = "undefined") tr_genes_unit = filters.transcriptStatusUnit("Transctript_Gene", "gene", default_value = "undefined") filters.transcriptPanelsUnit("Transcript_Gene_Panels", tr_genes_unit, "Symbol", view_name = "tr_gene_panels") filters.transcriptStatusUnit("Transcript_source", "transcript_source", default_value = "undefined") filters.transcriptStatusUnit("Transcript_codon_pos", "codonpos", default_value = "undefined") filters.transcriptStatusUnit("Transcript_region", "region", title= "Gene Region", default_value = "undefined") filters.transcriptStatusUnit("Transcript_CDS", "cds", title= "CDS?", default_value = "-") filters.transcriptStatusUnit("Transcript_masked", "masked_region", title= "Masked", default_value = "No") filters.transcriptIntValueUnit("Transcript_dist_from_exon", "dist_from_exon", title = "Distance from Exon Boundary", default_value = -1) # filters.transcriptStatusUnit("Transcript_strand", "strand", # default_value = "undefined") with filters.viewGroup("Transcript_Predictions"): filters.transcriptStatusUnit( "Transcript_PolypPhen_HDIV", "polyphen2_hdiv_prediction") filters.transcriptStatusUnit( "Transcript_PolyPhen_HVAR", "polyphen2_hvar_prediction") filters.transcriptStatusUnit( "Transcript_SIFT", "sift_prediction") filters.transcriptStatusUnit( "Transcript_SIFT_4G", "sift_4g_prediction") filters.transcriptStatusUnit( "Transcript_FATHMM", "fathmm_prediction") # with filters.viewGroup("Transcripts"): # filters.transcriptMultisetUnit("Transcript_consequence", # "consequence_terms", variants = sConsequenceVariants, # default_value = "undefined") # filters.transcriptStatusUnit("Transcript_canonical", "canonical", # bool_check_value = "1", default_value = "False") # filters.transcriptStatusUnit("Transcript_biotype", "biotype", # default_value = "undefined") # filters.transcriptStatusUnit("Transcript_worst", "consequence_terms", # bool_check_value = "${Most_Severe_Consequence}", # default_value = "False") # filters.transcriptStatusUnit("Transcript_id", "transcript_id", # default_value = "undefined") # filters.transcriptStatusUnit("Transctript_gene_id", "gene_id", # default_value = "undefined") # filters.transcriptStatusUnit("Transcript_source", "source", # default_value = "undefined") # filters.transcriptStatusUnit("Transcript_strand", "strand", # default_value = "undefined") with filters.viewGroup("Coordinates"): filters.statusUnit("Chromosome", "/_filters/chromosome", variants = ["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chr23", "chrX", "chrY", "undefined"], default_value = "undefined") filters.intValueUnit("Start_Pos", "/__data/start", title = "Start Position", render_mode = "neighborhood", default_value = sys.maxsize) filters.intValueUnit("End_Pos", "/__data/end", title = "End Position", default_value = 0, render_mode = "neighborhood") filters.intValueUnit("Dist_from_Exon", "/_filters/dist_from_exon", title = "Distance From Intron/Exon Boundary (Canonical)", default_value = 0, render_mode = "log,<") filters.intValueUnit("Dist_from_Exon_Canonical", "/_filters/dist_from_exon_canonical", title = "Distance From Intron/Exon Boundary (Canonical)", default_value = 0, render_mode = "log,<", conversion = ["min"]) filters.intValueUnit("Dist_from_Exon_Worst", "/_filters/dist_from_exon_worst", title = "Distance From Intron/Exon Boundary (Canonical)", default_value = 0, render_mode = "log,<", conversion = ["min"]) filters.multiStatusUnit("Region_Canonical", "/__data/region_canonical[]", title = "Region (Canonical)", default_value = "Other") filters.multiStatusUnit("Region_Worst", "/__data/region_worst[]", title = "Region (Canonical)", default_value = "Other") filters.statusUnit("Region", "/__data/region_canonical", title = "Region (Legacy)", default_value = "Other", ) filters.statusUnit("hg19", "/_view/general/hg19", title = "HG19", conversion = [["filter", "is_none"]], value_map= {"None": "Unmapped"}, default_value = "Mapped") with filters.viewGroup("gnomAD"): filters.floatValueUnit("gnomAD_AF", "/_filters/gnomad_af_fam", diap = (0., 1.), default_value = 0., title = "gnomAD Allele Frequency (family)", tooltip = "gnomAD Overall Allele Frequency", render_mode = "log,<") filters.floatValueUnit("gnomAD_AF_Exomes", "/_filters/gnomad_db_exomes_af", diap = (0., 1.), default_value = 0., title = "gnomAD Exome Allele Frequency (family)", render_mode = "log,<") filters.floatValueUnit("gnomAD_AF_Genomes", "/_filters/gnomad_db_genomes_af", diap = (0., 1.), default_value = 0., title = "gnomAD Genome Allele Frequency (family)", render_mode = "log,<") filters.floatValueUnit("gnomAD_AF_Proband", "/_filters/gnomad_af_pb", diap = (0., 1.), default_value = 0., title = "gnomAD Allele Frequency (proband)", tooltip = "gnomAD Overall Allele Frequency " "for the allele present in proband", render_mode = "log,<") filters.floatValueUnit("gnomAD_PopMax_AF", "/_filters/gnomad_popmax_af", tooltip = "Maximum allele frequency across outbred populations", diap = (0., 1.), default_value = 0., title = "PopMax Allele Frequency", render_mode = "log,<") filters.statusUnit("gnomAD_PopMax", "/_filters/gnomad_popmax", default_value = "None", title = "PopMax Ancestry", tooltip = "Outbred population that has the maximum allele frequency") filters.intValueUnit("gnomAD_PopMax_AN", "/_filters/gnomad_popmax_an", default_value = 0, title = "Number of alleles in outbred PopMax Ancestry", render_mode = "log,>") filters.floatValueUnit("gnomAD_PopMax_AF_Inbred", "/_filters/gnomad_raw_popmax_af", tooltip = "Maximum allele frequency across all populations " + "(including inbred)", diap = (0., 1.), default_value = 0., title = "PopMax Allele Frequency (including inbred)", render_mode = "log,<") filters.statusUnit("gnomAD_PopMax_Inbred", "/_filters/gnomad_raw_popmax", default_value = "None", title = "PopMax Ancestry (including inbred)", tooltip = "Population, including inbred, that has the maximum " + "allele frequency") filters.intValueUnit("gnomAD_PopMax_AN_Inbred", "/_filters/gnomad_raw_popmax_an", default_value = 0, render_mode = "log,>", title = "Number of alleles in (inbred) PopMax Ancestry") filters.intValueUnit("gnomAD_Hom", "/_filters/gnomad_hom", default_value = 0, render_mode = "log,>", title = "gnomAD: Number of homozygous") filters.intValueUnit("gnomAD_Hem", "/_filters/gnomad_hem", default_value = 0, render_mode = "log,>", title = "gnomAD: Number of hemizygous") with filters.viewGroup("Databases"): presence_in_db = [ ("ClinVar", "/_view/databases/clinVar"), ("GnomAD", "/_filters/gnomad_af_fam"), ("HGMD", "/__data/hgmd_pmids[]"), ("OMIM", "/_view/databases/omim")] for submitter in sorted(filters.getStdItem( "item-dict", "Clinvar_Trusted_Submitters").getData().values()): presence_in_db.append((submitter, "/_view/databases/clinvar_trusted/%s" % submitter)) filters.presenceUnit("Presence_in_Databases", presence_in_db, title = "Presence in Databases") filters.multiStatusUnit("ClinVar_Submitters", "/_view/databases/clinVar_submitters[]", title = "ClinVar Submitters", compact_mode = True) filters.intValueUnit("Number_submitters", "/_view/databases/clinVar_submitters", title = "Number of ClinVar Submitters", conversion = ["len"], default_value = 0) filters.multiStatusUnit("PMIDs", "/_view/databases/references[]", title = "PMIDs", compact_mode = True) filters.intValueUnit("Number_pmid", "/_view/databases/references", title = "Number of PMIDs", conversion = ["len"], default_value = 0) # filters.multiStatusUnit("beacons", # "/__data/beacon_names", # title = "Observed at") with filters.viewGroup("Call_Quality"): filters.floatValueUnit("Proband_GQ", "/_filters/proband_gq", title = "Genotype Quality (GQ) for Proband", render_mode = "linear,>", default_value = -1, tooltip = "GQ tells you how confident we are that " "the genotype we assigned to a particular sample is correct. " "It is simply the second lowest PL, because it is the " "difference between the second lowest PL and the lowest PL " "(always 0).") filters.floatValueUnit("Min_GQ", "/_filters/min_gq", title = "Minimum GQ for the family", render_mode = "linear,>", default_value = -1, tooltip = "GQ tells you how confident we are that " "the genotype we assigned to a particular sample is correct. " "It is simply the second lowest PL, because it is the " "difference between the second lowest PL and the lowest PL " "(always 0).") filters.intValueUnit("Max_GQ", "/_view/quality_samples", title = "The highest GQ", tooltip= "Max(GQ) for those samples that have the variant", render_mode = "linear,=", default_value = 0, conversion = [ ["filter", "has_variant"], ["property", "genotype_quality"], "max"]) filters.intValueUnit("Num_NO_CALL", "/_view/quality_samples", title = "Number of NO_CALL samples", tooltip= "Number of samples with NO_CALL in the current site", render_mode = "linear,=", default_value = 0, conversion = [ ["skip", 1], ["property", "genotype_quality"], "negative", "len"]) filters.intValueUnit("QUAL", "/_filters/qual", title = "Variant Call Quality", default_value = -1) filters.floatValueUnit("QD", "/_filters/qd", title = "Quality by Depth", render_mode = "linear,>", default_value = -1., tooltip = "The QUAL score normalized by allele depth (AD) " "for a variant. This annotation puts the variant confidence " "QUAL score into perspective by normalizing for the amount " "of coverage available. Because each read contributes a little " "to the QUAL score, variants in regions with deep coverage " "can have artificially inflated QUAL scores, giving the " "impression that the call is supported by more evidence " "than it really is. To compensate for this, we normalize " "the variant confidence by depth, which gives us a more " "objective picture of how well supported the call is.") filters.floatValueUnit("FS", "/_filters/fs", title = "Fisher Strand Bias", render_mode = "linear,<", default_value = 0., tooltip = "Phred-scaled probability that there is strand bias " "at the site. Strand Bias tells us whether the alternate " "allele was seen more or less often on the forward or " "reverse strand than the reference allele. When there " "little to no strand bias at the site, the FS value " "will be close to 0.") filters.multiStatusUnit("FT", "/_filters/filters[]", title = "FILTER", tooltip = "This field contains the name(s) of any filter(s) " "that the variant fails to pass, or the value PASS if the " "variant passed all filters. If the FILTER value is ., " "then no filtering has been applied to the records.") with filters.viewGroup("Predictions"): # research_only = True filters.statusUnit("HGMD_Benign", "/_filters/hgmd_benign", title = "Categorized Benign in HGMD", default_value = "Not in HGMD", value_map = {"True": "Benign", "False": "VUS or Pathogenic"}) filters.multiStatusUnit("HGMD_Tags", "/_view/databases/hgmd_tags[]", default_value = "None") # research_only = True filters.statusUnit("Clinvar_Benign", "/_filters/clinvar_benign", default_value = "Not in ClinVar", title = "Categorized Benign in ClinVar by all submitters", value_map = {"True": "Benign", "False": "VUS or Pathogenic"}) filters.multiStatusUnit("ClinVar_Significance", "/__data/clinvar_significance[]", title = "Clinical Significance in ClinVar") filters.regPreTransform(lambda rec_no, rec_data: clinvarPreTransform(rec_data, filters.getStdItem( "item-dict", "Clinvar_Trusted_Submitters").getData())) filters.multiStatusUnit("Clinvar_Trusted_Significance", "/_view/databases/clinvar_trusted", title = "ClinVar significance by trusted submitters only", tooltip = "Clinical Significance by ClinVar trusted submitters only", conversion = ["values", ["split", ','], "clear", "uniq"]) filters.multiStatusUnit("Clinvar_Trusted_Simplified", "/_view/databases/clinvar_trusted_simplified", tooltip = "Simplified Clinical Significance by trusted submitters only", conversion = ["values", ["split", ','], "clear", "uniq"]) filters.statusUnit("Clinvar_stars", "/_filters/clinvar_stars", default_value = "No data", title = "ClinVar Stars") filters.intValueUnit("Number_of_clinvar_submitters", "/_filters/num_clinvar_submitters", render_mode = "log,>", default_value = 0, title = "ClinVar: Number of Submitters") filters.statusUnit("Clinvar_review_status", "/_filters/clinvar_review_status", default_value = "No data", title = "ClinVar Review Status") filters.statusUnit("Clinvar_criteria_provided", "/_filters/clinvar_criteria_provided", default_value = "Not Provided", title = "ClinVar Criteria") filters.statusUnit("Clinvar_conflicts", "/_filters/clinvar_conflicts", default_value = "Criteria not Provided", title = "ClinVar Conflicts") filters.multiStatusUnit("Clinvar_acmg_guidelines", "/_filters/clinvar_acmg_guidelines[]", default_value = "None") for submitter in sorted(filters.getStdItem( "item-dict", "Clinvar_Trusted_Submitters").getData().values()): filters.statusUnit("%s_Significance" % submitter, "/_view/databases/clinvar_trusted", title = "Clinical Significance by %s" % submitter, conversion = [["property", submitter]], default_value = "None") #filters.statusUnit("Clinvar_Trusted_Benign", # "/_filters/clinvar_trusted_benign", # default_value = "No data", # title = "Categorized Benign by Clinvar Trusted Submitters", # value_map = {"True": "Benign by Trusted submitters", # "False": "Unknown"}) filters.statusUnit("splice_altering", "/_filters/splice_altering", title = "Splice AI splice altering", default_value = "No altering") filters.floatValueUnit("splice_ai_dsmax", "/_filters/splice_ai_dsmax", title = "Splice AI splice altering score", render_mode = "linear,>", default_value = 0) # filters.multiStatusUnit("Polyphen", "/_view/predictions/polyphen[]", # default_value = "N/A") # This is an obsolete filter replaced by Polyphen 2 filters.multiStatusUnit("Polyphen_2_HVAR", "/_view/predictions/polyphen2_hvar[]", title = "Polyphen", conversion = [["split_re", r"[\s\,]"], "clear", "uniq"], default_value = "N/A", tooltip = "HumVar (HVAR) is PolyPhen-2 classifier " "trained on known human variation (disease mutations vs." " common neutral variants)") filters.multiStatusUnit("Polyphen_2_HDIV", "/_view/predictions/polyphen2_hdiv[]", title = "Polyphen HDIV (High sensitivity)", conversion = [["split_re", r"[\s\,]"], "clear", "uniq"], default_value = "N/A", tooltip = "HumDiv (HDIV) classifier is trained on a smaller " "number of select extreme effect disease mutations vs. " "divergence with close homologs (e.g. primates), which is " "supposed to consist of mostly neutral mutations.") filters.multiStatusUnit("SIFT", "/_view/predictions/sift[]", default_value = "N/A", tooltip = "Sort intolerated from tolerated (An amino acid at a " "position is tolerated | The most frequentest amino acid " "being tolerated). D: Deleterious T: tolerated") filters.multiStatusUnit("FATHMM", "/_view/predictions/fathmm[]", default_value = "N/A", tooltip = "Functional analysis through hidden markov model HMM." "D: Deleterious; T: Tolerated") filters.multiStatusUnit("PrimateAI", "/_view/predictions/primate_ai_pred[]", default_value = "N/A", tooltip = "Prediction of PrimateAI score based on the authors " "recommendation, “T(olerated)” or “D(amaging)”. " "The score cutoff between “D” and “T” is 0.803.") filters.floatValueUnit("GERP_score", "/_view/bioinformatics/gerp_rs", render_mode = "linear,>", default_value = 0, title = "GERP Score") with filters.viewGroup("Pharmacogenomics"): filters.multiStatusUnit("Diseases", "/_filters/pharmacogenomics_diseases[]", default_value = "N/A") filters.multiStatusUnit("Chemicals", "/_filters/pharmacogenomics_chemicals[]", default_value = "N/A") with filters.viewGroup("Expression"): filters.multiStatusUnit("Mostly_Expressed_in", "/_filters/top_tissues[]", default_value = "N/A") # required = {"debug"} with filters.viewGroup("Debug_Info"): filters.intValueUnit("Severity", "/_filters/severity", default_value = -1) return filters
s_value = "other" clinvar_trusted_simplified[trusted_map[submitter]] = s_value rec_data["_view"]["databases"]["clinvar_trusted"] = clinvar_trusted rec_data["_view"]["databases"]["clinvar_trusted_simplified"] = ( clinvar_trusted_simplified) #=============================================== def sample_has_variant(sample): genotype = sample.get("genotype") return genotype and not ("HOM_REF" in genotype or "NO_CALL" in genotype) def is_none(value): return value == "None" FilterPrepareSetH.regNamedFunction("has_variant", sample_has_variant) FilterPrepareSetH.regNamedFunction("is_none", is_none) #=============================================== def defineFilterSchema(metadata_record): data_schema = metadata_record.get("data_schema") if data_schema == "FAVOR": return FavorSchema.defineFilterSchema(metadata_record) assert data_schema is None or data_schema == "CASE", ( "Bad data schema: " + data_schema) filters = FilterPrepareSetH(metadata_record) cohorts = metadata_record.get("cohorts") with filters.viewGroup("Inheritance"): if cohorts: filters.multiStatusUnit("Variant_in",
def defineFilterSchema(metadata_record): data_schema = metadata_record.get("data_schema") if data_schema == "FAVOR": return FavorSchema.defineFilterSchema(metadata_record) assert data_schema is None or data_schema == "CASE", ( "Bad data schema: " + data_schema) filters = FilterPrepareSetH(metadata_record, anfisaVariables) cohorts = metadata_record.get("cohorts") with filters.viewGroup("Inheritance"): if cohorts: filters.multiStatusUnit("Variant_in", "/_filters/cohort_has_variant[]") filters.multiStatusUnit("Callers", "/_view/bioinformatics/called_by[]") filters.statusUnit("Proband_Zygosity", "/_view/bioinformatics/zygosity") filters.intValueUnit("Num_Samples", "/_filters/has_variant", conversion = ["len"], default_value = 0) filters.multiStatusUnit("Has_Variant", "/_filters/has_variant[]") if cohorts: with filters.viewGroup("Cohorts"): filters.floatValueUnit("ALL_AF", "/_view/cohorts/ALL", default_value = 0) filters.floatValueUnit("ALL_AF2", "/_view/cohorts/ALL2", default_value = 0) for ch_info in cohorts: ch_name = ch_info["name"] filters.floatValueUnit(f"Cohort_{ch_name}_AF", f"/_view/cohorts/{ch_name}/AF", default_value = 0) filters.floatValueUnit(f"Cohort_{ch_name}_AF2", f"/_view/cohorts/{ch_name}/AF2", default_value = 0) with filters.viewGroup("Variant"): filters.statusUnit("Variant_Class", "/__data/variant_class") filters.statusUnit("Most_Severe_Consequence", "/__data/most_severe_consequence", variants = sConsequenceVariants, default_value = "undefined") filters.multiStatusUnit("Canonical_Annotation", "/_view/general/canonical_annotation[]", default_value = "undefined") filters.statusUnit("Multiallelic", "/_filters/multiallelic") filters.statusUnit("Altered_VCF", "/_filters/altered_vcf") # filters.intValueUnit("Number_ALTs", # "/_filters/alts", # conversion = ["len"], default_value = 0) #filters.intValueUnit("zyg_len", "/__data/zygosity", # conversion = ["len"], default_value = 0) with filters.viewGroup("Genes"): genes_unit = filters.multiStatusUnit("Symbol", "/_view/general/genes[]", compact_mode = True) filters.panelsUnit("Panels", genes_unit, "Symbol", view_path = "/_view/general/gene_panels") filters.multiStatusUnit("EQTL_Gene", "/_filters/eqtl_gene[]", default_value = "None") filters.intValueUnit("Num_Genes", "/_view/general/genes", conversion = ["len"], default_value = 0) filters.intValueUnit("Num_Transcripts", "/__data/transcript_consequences", conversion = ["len"], default_value = 0) with filters.viewGroup("Transcripts"): filters.transcriptMultisetUnit("Transcript_consequence", "transcript_annotations", variants = sConsequenceVariants, default_value = "undefined") filters.transcriptStatusUnit("Transcript_canonical", "is_canonical", bool_check_value = "True", default_value = "False") filters.transcriptStatusUnit("Transcript_GENCODE_Basic", "gencode_basic", bool_check_value = "True", default_value = "False") filters.transcriptStatusUnit("Transcript_biotype", "biotype", default_value = "undefined") filters.transcriptStatusUnit("Transcript_worst", "is_worst", bool_check_value = "True", default_value = "False") filters.transcriptStatusUnit("Transcript_id", "id", default_value = "undefined", transcript_id_mode = True) tr_genes_unit = filters.transcriptStatusUnit("Transctript_Gene", "gene", default_value = "undefined") filters.transcriptPanelsUnit("Transcript_Gene_Panels", tr_genes_unit, "Symbol", view_name = "tr_gene_panels") filters.transcriptStatusUnit("Transcript_source", "transcript_source", default_value = "undefined") filters.transcriptStatusUnit("Transcript_codon_pos", "codonpos", default_value = "undefined") filters.transcriptStatusUnit("Transcript_region", "region", default_value = "undefined") filters.transcriptStatusUnit("Transcript_CDS", "cds", default_value = "-") filters.transcriptStatusUnit("Transcript_masked", "masked_region", default_value = "No") filters.transcriptIntValueUnit("Transcript_dist_from_exon", "dist_from_exon", default_value = -1) # filters.transcriptStatusUnit("Transcript_strand", "strand", # default_value = "undefined") with filters.viewGroup("Transcript_Predictions"): filters.transcriptStatusUnit( "Transcript_PolypPhen_HDIV", "polyphen2_hdiv_prediction") filters.transcriptStatusUnit( "Transcript_PolyPhen_HVAR", "polyphen2_hvar_prediction") filters.transcriptStatusUnit( "Transcript_SIFT", "sift_prediction") filters.transcriptStatusUnit( "Transcript_SIFT_4G", "sift_4g_prediction") filters.transcriptStatusUnit( "Transcript_FATHMM", "fathmm_prediction") with filters.viewGroup("Coordinates"): filters.statusUnit("Chromosome", "/_filters/chromosome", variants = ["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chr23", "chrX", "chrY", "undefined"], default_value = "undefined") filters.intValueUnit("Start_Pos", "/__data/start", default_value = sys.maxsize) filters.intValueUnit("End_Pos", "/__data/end", default_value = 0) filters.intValueUnit("Dist_from_Exon", "/_filters/dist_from_exon", default_value = 0) filters.intValueUnit("Dist_from_Exon_Canonical", "/_filters/dist_from_exon_canonical", default_value = 0, conversion = ["min"]) filters.intValueUnit("Dist_from_Exon_Worst", "/_filters/dist_from_exon_worst", default_value = 0, conversion = ["min"]) filters.multiStatusUnit("Region_Canonical", "/__data/region_canonical[]", default_value = "Other") filters.multiStatusUnit("Region_Worst", "/__data/region_worst[]", default_value = "Other") filters.transcriptStatusUnit("Region", "region", default_value = "undefined") filters.statusUnit("In_hg19", "/_view/general/hg19", conversion = [["filter", "is_none"]], value_map= {"None": "Unmapped"}, default_value = "Mapped") with filters.viewGroup("gnomAD"): filters.floatValueUnit("gnomAD_AF", "/_filters/gnomad_af_fam", diap = (0., 1.), default_value = 0.) filters.floatValueUnit("gnomAD_AF_Exomes", "/_filters/gnomad_db_exomes_af", diap = (0., 1.), default_value = 0.) filters.floatValueUnit("gnomAD_AF_Genomes", "/_filters/gnomad_db_genomes_af", diap = (0., 1.), default_value = 0.) filters.floatValueUnit("gnomAD_AF_Proband", "/_filters/gnomad_af_pb", diap = (0., 1.), default_value = 0.) filters.floatValueUnit("gnomAD_PopMax_AF", "/_filters/gnomad_popmax_af", diap = (0., 1.), default_value = 0.) filters.statusUnit("gnomAD_PopMax", "/_filters/gnomad_popmax", default_value = "None") filters.intValueUnit("gnomAD_PopMax_AN", "/_filters/gnomad_popmax_an", default_value = 0) filters.floatValueUnit("gnomAD_PopMax_AF_Inbred", "/_filters/gnomad_raw_popmax_af", diap = (0., 1.), default_value = 0.) filters.statusUnit("gnomAD_PopMax_Inbred", "/_filters/gnomad_raw_popmax", default_value = "None") filters.intValueUnit("gnomAD_PopMax_AN_Inbred", "/_filters/gnomad_raw_popmax_an", default_value = 0) filters.intValueUnit("gnomAD_Hom", "/_filters/gnomad_hom", default_value = 0) filters.intValueUnit("gnomAD_Hem", "/_filters/gnomad_hem", default_value = 0) with filters.viewGroup("Databases"): presence_in_db = [ ("ClinVar", "/_view/databases/clinVar"), ("GnomAD", "/_filters/gnomad_af_fam"), ("HGMD", "/__data/hgmd_pmids[]"), ("OMIM", "/_view/databases/omim")] for submitter in sorted(filters.getStdItem( "item-dict", "Clinvar_Trusted_Submitters").getData().values()): presence_in_db.append((submitter, "/_view/databases/clinvar_trusted/%s" % submitter)) filters.presenceUnit("Presence_in_Databases", presence_in_db) filters.multiStatusUnit("ClinVar_Submitters", "/_view/databases/clinVar_submitters[]", compact_mode = True) filters.intValueUnit("Number_submitters", "/_view/databases/clinVar_submitters", conversion = ["len"], default_value = 0) filters.multiStatusUnit("PMIDs", "/_view/databases/references[]", compact_mode = True) filters.intValueUnit("Number_pmid", "/_view/databases/references", conversion = ["len"], default_value = 0) # filters.multiStatusUnit("beacons", # "/__data/beacon_names") with filters.viewGroup("Call_Quality"): filters.floatValueUnit("Proband_GQ", "/_filters/proband_gq", default_value = -1) filters.floatValueUnit("Min_GQ", "/_filters/min_gq", default_value = -1) filters.intValueUnit("Max_GQ", "/_view/quality_samples", default_value = 0, conversion = [ ["filter", "has_variant"], ["property", "genotype_quality"], "max"]) filters.intValueUnit("Num_NO_CALL", "/_view/quality_samples", default_value = 0, conversion = [ ["skip", 1], ["property", "genotype_quality"], "negative", "len"]) filters.intValueUnit("QUAL", "/_filters/qual", default_value = -1) filters.floatValueUnit("QD", "/_filters/qd", default_value = -1.) filters.floatValueUnit("FS", "/_filters/fs", default_value = 0.) filters.multiStatusUnit("FT", "/_filters/filters[]") with filters.viewGroup("Predictions"): # research_only = True filters.statusUnit("HGMD_Benign", "/_filters/hgmd_benign", default_value = "Not in HGMD", value_map = {"True": "Benign", "False": "VUS or Pathogenic"}) filters.multiStatusUnit("HGMD_Tags", "/_view/databases/hgmd_tags[]", default_value = "None") # research_only = True filters.statusUnit("Clinvar_Benign", "/_filters/clinvar_benign", default_value = "Not in ClinVar", value_map = {"True": "Benign", "False": "VUS or Pathogenic"}) filters.multiStatusUnit("ClinVar_Significance", "/__data/clinvar_significance[]") filters.regPreTransform(lambda rec_no, rec_data: clinvarPreTransform(rec_data, filters.getStdItem( "item-dict", "Clinvar_Trusted_Submitters").getData())) filters.multiStatusUnit("Clinvar_Trusted_Significance", "/_view/databases/clinvar_trusted", conversion = ["values", ["split", ','], "clear", "uniq"]) filters.multiStatusUnit("Clinvar_Trusted_Simplified", "/_view/databases/clinvar_trusted_simplified", conversion = ["values", ["split", ','], "clear", "uniq"]) filters.statusUnit("Clinvar_stars", "/_filters/clinvar_stars", default_value = "No data") filters.intValueUnit("Number_of_clinvar_submitters", "/_filters/num_clinvar_submitters", default_value = 0) filters.statusUnit("Clinvar_review_status", "/_filters/clinvar_review_status", default_value = "No data") filters.statusUnit("Clinvar_criteria_provided", "/_filters/clinvar_criteria_provided", default_value = "Not Provided") filters.statusUnit("Clinvar_conflicts", "/_filters/clinvar_conflicts", default_value = "Criteria not Provided") filters.multiStatusUnit("Clinvar_acmg_guidelines", "/_filters/clinvar_acmg_guidelines[]", default_value = "None") for submitter in sorted(filters.getStdItem( "item-dict", "Clinvar_Trusted_Submitters").getData().values()): filters.statusUnit(f"ClinVar_Significance_{submitter}", "/_view/databases/clinvar_trusted", conversion = [["property", submitter]], default_value = "None") #filters.statusUnit("Clinvar_Trusted_Benign", # "/_filters/clinvar_trusted_benign", # default_value = "No data", # value_map = {"True": "Benign by Trusted submitters", # "False": "Unknown"}) filters.statusUnit("splice_altering", "/_filters/splice_altering", default_value = "No altering") filters.floatValueUnit("splice_ai_dsmax", "/_filters/splice_ai_dsmax", default_value = 0) filters.multiStatusUnit("Polyphen_2_HVAR", "/_view/predictions/polyphen2_hvar[]", conversion = [["split_re", r"[\s\,]"], "clear", "uniq"], default_value = "N/A") filters.multiStatusUnit("Polyphen_2_HDIV", "/_view/predictions/polyphen2_hdiv[]", conversion = [["split_re", r"[\s\,]"], "clear", "uniq"], default_value = "N/A") filters.multiStatusUnit("SIFT", "/_view/predictions/sift[]", default_value = "N/A") filters.multiStatusUnit("FATHMM", "/_view/predictions/fathmm[]", default_value = "N/A") filters.multiStatusUnit("PrimateAI", "/_view/predictions/primate_ai_pred[]", default_value = "N/A") filters.floatValueUnit("GERP_score", "/_view/bioinformatics/gerp_rs", default_value = 0) with filters.viewGroup("Pharmacogenomics"): filters.multiStatusUnit("Diseases", "/_filters/pharmacogenomics_diseases[]", default_value = "N/A") filters.multiStatusUnit("Chemicals", "/_filters/pharmacogenomics_chemicals[]", default_value = "N/A") with filters.viewGroup("Expression"): filters.multiStatusUnit("Mostly_Expressed_in", "/_filters/top_tissues[]", default_value = "N/A") # required = {"debug"} with filters.viewGroup("Debug_Info"): filters.intValueUnit("Severity", "/_filters/severity", default_value = -1) assert filters.getTranscriptIdUnitName() is not None, ( "Transcript ID unit is not set") return filters
def defineFilterSchema(): filters = FilterPrepareSetH() with filters.viewGroup("Inheritance"): filters.statusUnit("Proband_Zygosity", "/view/bioinformatics/zygosity", title="Proband Zygosity") filters.zygositySpecialUnit( "Inheritance_Mode", "/data/zygosity", config={"x_cond": ConditionMaker.condEnum("Chromosome", ["chrX"])}, title="Inheritance Mode") filters.multiStatusUnit("Callers", "/view/bioinformatics/called_by[]", title="Called by") filters.multiStatusUnit("Has_Variant", "/_filters/has_variant[]") with filters.viewGroup("Variant"): filters.statusUnit( "Variant_Class", "/data/variant_class", tooltip=("Variant class as returned by VEP. " "The class of a variant is based on Sequence " "Ontology and is called according to its component " "alleles and its mapping to the reference genome. " "https://useast.ensembl.org/info/genome/variation/" "prediction/classification.html#classes")) filters.statusUnit("Most_Severe_Consequence", "/data/most_severe_consequence", variants=sConsequenceVariants, default_value="undefined") filters.statusUnit("Canonical_Annotation", "/view/general/canonical_annotation", default_value="undefined") filters.intValueUnit("Number_ALTs", "/_filters/alts", title="Number of Alternative alleles", conversion=_conv_len, default_value=0) #filters.intValueUnit("zyg_len", "/data/zygosity", # conversion = _conv_len, default_value = 0) with filters.viewGroup("Genes"): genes_unit = filters.multiStatusUnit("Symbol", "/view/general/genes[]", compact_mode=True) filters.panelStatusUnit("Panels", genes_unit, view_path="/view/general/gene_panels") #filters.multiStatusUnit("Transcripts", # "/data/transcript_consequences[]", compact_mode = True, # conversion = lambda arr: # [el["transcript_id"] for el in arr] if arr else []) filters.intValueUnit("Num_Genes", "/view/general/genes", title="Number of overlapping genes", conversion=_conv_len, default_value=0) filters.intValueUnit("Num_Transcripts", "/data/transcript_consequences", title="Number of transcripts at the position", conversion=_conv_len, default_value=0) with filters.viewGroup("Transcripts"): filters.transctiptMultisetUnit("Transctipt_consequence", "consequence_terms", variants=sConsequenceVariants) filters.transctiptStatusUnit("Transcript_canonical", "canonical", bool_check_value="1", default_value="False") filters.transctiptStatusUnit("Transcript_biotype", "biotype", default_value="undefined") filters.transctiptStatusUnit( "Transcript_worst", "consequence_terms", bool_check_value="${Most_Severe_Consequence}", default_value="False") filters.transctiptStatusUnit("Transcript_id", "transcript_id", default_value="undefined") filters.transctiptStatusUnit("Transctript_gene_id", "gene_id", default_value="undefined") filters.transctiptStatusUnit("Transcript_source", "source", default_value="undefined") filters.transctiptStatusUnit("Transcript_strand", "strand", default_value="undefined") with filters.viewGroup("Coordinates"): filters.statusUnit("Chromosome", "/_filters/chromosome", variants=[ "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chr23", "chrX", "chrY", "undefined" ], default_value="undefined") filters.intValueUnit("Start_Pos", "/data/start", title="Start Position", render_mode="neighborhood", default_value=sys.maxsize) filters.intValueUnit("End_Pos", "/data/end", title="End Position", default_value=0, render_mode="neighborhood") filters.intValueUnit( "Dist_from_Exon", "/_filters/dist_from_exon", title="Distance From Intron/Exon Boundary (Canonical)", default_value=0, render_mode="log,<") filters.statusUnit("Region", "/data/region_canonical", title="Region (Canonical)", default_value="Other") with filters.viewGroup("gnomAD"): filters.floatValueUnit("gnomAD_AF", "/_filters/gnomad_af_fam", diap=(0., 1.), default_value=0., title="gnomAD Allele Frequency (family)", tooltip="gnomAD Overall Allele Frequency", render_mode="log,<") filters.floatValueUnit("gnomAD_AF_Exomes", "/_filters/gnomad_db_exomes_af", diap=(0., 1.), default_value=0., title="gnomAD Exome Allele Frequency (family)", render_mode="log,<") filters.floatValueUnit("gnomAD_AF_Genomes", "/_filters/gnomad_db_genomes_af", diap=(0., 1.), default_value=0., title="gnomAD Genome Allele Frequency (family)", render_mode="log,<") filters.floatValueUnit("gnomAD_AF_Proband", "/_filters/gnomad_af_pb", diap=(0., 1.), default_value=0., title="gnomAD Allele Frequency (proband)", tooltip="gnomAD Overall Allele Frequency " "for the allele present in proband", render_mode="log,<") filters.floatValueUnit( "gnomAD_PopMax_AF", "/_filters/gnomad_popmax_af", tooltip="Maximum allele frequency across all populations", diap=(0., 1.), default_value=0., title="gnomAD PopMax Allele Frequency", render_mode="log,<") filters.statusUnit( "gnomAD_PopMax", "/_filters/gnomad_popmax", default_value="None", title="gnomAD PopMax Ancestry", tooltip="Population that has the maximum allele frequency") filters.intValueUnit( "gnomAD_PopMax_AN", "/_filters/gnomad_popmax_an", default_value=0, title="gnomAD: Number of alleles in PopMax Ancestry", render_mode="log,>") filters.intValueUnit("gnomAD_Hom", "/_filters/gnomad_hom", default_value=0, title="gnomAD: Number of homozygous", render_mode="log,>") filters.intValueUnit("gnomAD_Hem", "/_filters/gnomad_hem", default_value=0, title="gnomAD: Number of hemizygous", render_mode="log,>") with filters.viewGroup("Databases"): filters.presenceUnit( "Presence_in_Databases", [("ClinVar", "/view/databases/clinVar"), ("LMM", "/view/databases/lmm_significance"), ("GeneDx", "/view/databases/gene_dx_significance"), ("GnomAD", "/_filters/gnomad_af_fam"), ("HGMD", "/view/databases/hgmd_pmids[]"), ("OMIM", "/view/databases/omim")], title="Presence in Databases") filters.multiStatusUnit("ClinVar_Submitters", "/view/databases/clinVar_submitters[]", title="ClinVar Submitters", compact_mode=True) filters.intValueUnit("Number_submitters", "/view/databases/clinVar_submitters", title="Number of ClinVar Submitters", conversion=_conv_len, default_value=0) filters.intValueUnit("Number_pmid", "/view/databases/hgmd_pmids", title="Number of PMIDs in HGMD", conversion=_conv_len, default_value=0) # filters.multiStatusUnit("beacons", # "/data/beacon_names", # title = "Observed at") with filters.viewGroup("Call_Quality"): filters.floatValueUnit("Proband_GQ", "/_filters/proband_gq", title="Genotype Quality (GQ) for Proband", render_mode="linear,>", default_value=1000) filters.floatValueUnit("Min_GQ", "/_filters/min_gq", title="Minimum GQ for the family)", render_mode="linear,>", default_value=1000) filters.floatValueUnit("QD", "/_filters/qd", title="Quality by Depth", render_mode="linear,>", default_value=100000.) filters.floatValueUnit("FS", "/_filters/fs", "Fisher Strand Bias", render_mode="linear,<", default_value=0.) filters.multiStatusUnit("FT", "/_filters/filters[]", title="FILTER") with filters.viewGroup("Predictions"): filters.statusUnit( "HGMD_Benign", "/_filters/hgmd_benign", title="Categorized Benign in HGMD", default_value="Not in HGMD", research_only=True, render_mode="replace(True/Benign, False/Not Benign)") filters.multiStatusUnit("HGMD_Tags", "/view/databases/hgmd_tags[]", default_value="None") filters.statusUnit( "Clinvar_Benign", "/_filters/clinvar_benign", default_value="Not in ClinVar", title="Categorized Benign in ClinVar by all submitters", research_only=True) filters.multiStatusUnit("ClinVar_Significance", "/data/clinvar_significance[]", title="Clinical Significance in ClinVar") filters.statusUnit("Clinvar_stars", "/_filters/clinvar_stars", default_value="No data", title="ClinVar Stars") filters.intValueUnit("Number_of_clinvar_submitters", "/_filters/num_clinvar_submitters", render_mode="log,>", default_value=0, title="ClinVar: Number of Submitters") filters.statusUnit("Clinvar_review_status", "/_filters/clinvar_review_status", default_value="No data", title="ClinVar Review Status") filters.statusUnit("Clinvar_criteria_provided", "/_filters/clinvar_criteria_provided", default_value="Not Provided", title="ClinVar Criteria") filters.statusUnit("Clinvar_conflicts", "/_filters/clinvar_conflicts", default_value="Criteria not Provided", title="ClinVar Conflicts") filters.multiStatusUnit("Clinvar_acmg_guidelines", "/_filters/clinvar_acmg_guidelines[]", default_value="None") filters.statusUnit( "Clinvar_Trusted_Benign", "/_filters/clinvar_trusted_benign", default_value="No data", title="Categorized Benign by Clinvar Trusted Submitters") filters.multiStatusUnit("LMM_Significance", "/data/lmm", title="Clinical Significance by LMM") filters.multiStatusUnit("GeneDx_Significance", "/data/gene_dx", title="Clinical Significance by GeneDx") filters.statusUnit("splice_altering", "/_filters/splice_altering", default_value="No altering", title="Splice AI splice altering") filters.floatValueUnit("splice_ai_dsmax", "/_filters/splice_ai_dsmax", render_mode="linear,>", default_value=0, title="Splice AI splice altering score") filters.multiStatusUnit("Polyphen", "/view/predictions/polyphen[]", default_value="N/A") filters.multiStatusUnit("SIFT", "/view/predictions/sift[]", default_value="N/A") filters.multiStatusUnit("Polyphen_2_HVAR", "/view/predictions/polyphen2_hvar[]", separators="[\s\,]", default_value="N/A") filters.multiStatusUnit("Polyphen_2_HDIV", "/view/predictions/polyphen2_hdiv[]", separators="[\s\,]", default_value="N/A") filters.floatValueUnit("GERP_score", "/view/bioinformatics/gerp_rs", render_mode="linear,>", default_value=0, title="GERP Score") with filters.viewGroup("Debug_Info"): filters.intValueUnit("Severity", "/_filters/severity", research_only=True, default_value=-1) return filters
def execIt(self): if not self.correctWSName(self.mWSName): self.setStatus("Incorrect derived dataset name") return None self.setStatus("Preparing to create derived dataset") logging.info("Prepare dataset derivation: %s" % self.mWSName) receipt = { "kind": self.mEval.getSolKind(), "base": self.mDS.getName(), "root": self.mDS.getRootDSName() } if self.mEval.getSolKind() == "filter": if self.mEval.getFilterName(): receipt["filter-name"] = self.mEval.getFilterName() condition = self.mEval.getCondition() rec_count = self.mDS.getEvalSpace().evalTotalCounts(condition)[0] if (rec_count < 1 or rec_count >= AnfisaConfig.configOption("max.ws.size")): self.setStatus("Size is incorrect: %d" % rec_count) return None rec_no_seq = self.mDS.getEvalSpace().evalRecSeq( condition, rec_count) receipt["f-presentation"] = self.mEval.getPresentation() receipt["conditions"] = self.mEval.getCondDataSeq() else: if self.mEval.getDTreeName(): receipt["dtree-name"] = self.mEval.getDTreeName() rec_no_seq, point_seq = self.mEval.collectRecSeq() receipt["p-presentation"] = point_seq receipt["dtree-code"] = self.mEval.getCode() receipt["eval-update-info"] = self.mEval.getUpdateInfo() rec_no_seq = sorted(rec_no_seq) ws_dir = self.mDS.getDataVault().getDir() + "/" + self.mWSName if os.path.exists(ws_dir) and self.mForceMode: if self.mDS.getDataVault().getDS(self.mWSName): self.mDS.getDataVault().unloadDS(self.mWSName, "ws") shutil.rmtree(ws_dir) if os.path.exists(ws_dir): self.setStatus("Dataset already exists") return None view_schema = deepcopy(self.mDS.getViewSchema()) flt_schema = deepcopy(self.mDS.getFltSchema()) meta_rec = deepcopy(self.mDS.getDataInfo().get("meta")) filter_set = FilterPrepareSetH(meta_rec, anfisaVariables) filter_set.setupFromInfo(flt_schema) trans_prep = TransformPreparator_WS(flt_schema, self.mDS, False) os.mkdir(ws_dir) logging.info("Fill dataset %s datafiles..." % self.mWSName) with DataDiskStorageWriter(False, ws_dir, filter_set, trans_prep) as ws_out: for _, rec_data in self.mDS.getRecStorage().iterRecords( rec_no_seq): ws_out.saveRecord(rec_data) if ws_out.getTotal() % self.mReportLines == 0: self.setStatus("Extracting records: %d/%d" % (ws_out.getTotal(), len(rec_no_seq))) self.setStatus("Finishing...") logging.info("Finalizing derivation %s" % self.mWSName) total_item_count = trans_prep.finishUp() date_loaded = datetime.now().isoformat() mongo_agent = self.mDS.getApp().getMongoConnector().getDSAgent( self.mWSName, "ws") mongo_agent.updateCreationDate(date_loaded) if "versions" in meta_rec: meta_rec["versions"][ "Anfisa load"] = self.mDS.getApp().getVersionCode() receipts = self.mDS.getDataInfo().get("receipts") if receipts is not None: receipts = [receipt] + receipts[:] else: receipts = [receipt] ds_info = { "name": self.mWSName, "kind": "ws", "view_schema": view_schema, "flt_schema": flt_schema, "total": len(rec_no_seq), "total_items": total_item_count, "mongo": self.mWSName, "base": self.mDS.getName(), "root": self.mDS.getRootDSName(), "modes": ["secondary"], "meta": meta_rec, "doc": [], "zygosity_var": self.mDS.getDataInfo()["zygosity_var"], "receipts": receipts, "date_loaded": date_loaded} with open(ws_dir + "/dsinfo.json", "w", encoding = "utf-8") as outp: print(json.dumps(ds_info, sort_keys = True, indent = 4), file = outp) os.mkdir(ws_dir + "/doc") with open(ws_dir + "/doc/info.html", "w", encoding = "utf-8") as outp: reportDS(outp, ds_info, mongo_agent, self.mDS.getDataInfo()) with open(ws_dir + "/active", "w", encoding = "utf-8") as outp: print("", file = outp) self.mDS.getDataVault().loadDS(self.mWSName, "ws") self.setStatus("Done") return {"ws": self.mWSName}