コード例 #1
0
ファイル: f_flt.py プロジェクト: ForomePlatform/anfisa
def defFavorFlt(metadata_record):
    assert metadata_record.get("data_schema") == "FAVOR", (
        "FAVOR data schema expected: " + metadata_record.get("data_schema"))

    filters = FilterPrepareSetH(metadata_record,
                                anfisaVariables,
                                check_identifiers=False)

    with filters.viewGroup("Coordinates"):
        filters.statusUnit("Chromosome",
                           "/_filters/chromosome",
                           variants=[
                               "chr1", "chr2", "chr3", "chr4", "chr5", "chr6",
                               "chr7", "chr8", "chr9", "chr10", "chr11",
                               "chr12", "chr13", "chr14", "chr15", "chr16",
                               "chr17", "chr18", "chr19", "chr20", "chr21",
                               "chr22", "chr23", "chrX", "chrY", "undefined"
                           ],
                           default_value="undefined")

        filters.intValueUnit("Position",
                             "/_filters/position",
                             default_value=sys.maxsize)

    with filters.viewGroup("Genes"):
        genes_unit = filters.multiStatusUnit(
            "Symbol",
            #  "/_filters/genes[]",
            "/_view/general/genes[]",
            compact_mode=True)
        filters.panelsUnit("Panels",
                           genes_unit,
                           "Symbol",
                           view_path="/_view/general/gene_panels")
        filters.intValueUnit("Num_Genes",
                             "/_view/general/genes",
                             conversion="len",
                             default_value=0)

    with filters.viewGroup("gnomAD"):
        filters.floatValueUnit("gnomAD_Total_AF",
                               "/_filters/gnomad_total_af",
                               diap=(0., 1.),
                               default_value=0.)

    with filters.viewGroup("GENCODE"):
        filters.multiStatusUnit("GENCODE_Category",
                                "/_filters/gencode_category[]",
                                default_value="None")
        filters.multiStatusUnit("GENCODE_Exonic_Category",
                                "/_filters/gencode_exonic_category",
                                compact_mode=True)

    with filters.viewGroup("TOPMed"):
        filters.multiStatusUnit("TOPMed_QC_Status",
                                "/_filters/top_med_qc_status[]",
                                default_value="None")
        filters.floatValueUnit("TOPMed_Bravo_AF",
                               "/_filters/top_med_bravo_af",
                               default_value=0.)

    with filters.viewGroup("Allele Frequencies"):
        filters.floatValueUnit("ExAC03",
                               "/_filters/exac03",
                               render_mode="linear,<",
                               default_value=0.)

    with filters.viewGroup("Variant Category"):
        filters.multiStatusUnit("Disruptive_Missense",
                                "/_filters/disruptive_missense",
                                default_value="N/A")
        filters.multiStatusUnit("CAGE_Promoter",
                                "/_filters/cage_promoter",
                                default_value="N/A")
        filters.multiStatusUnit("CAGE_Enhancer",
                                "/_filters/cage_enhancer",
                                default_value="N/A")
        filters.multiStatusUnit("Gene_Hancer",
                                "/_filters/gene_hancer",
                                default_value="N/A")
        filters.multiStatusUnit("Super_Enhancer",
                                "/_filters/super_enhancer",
                                default_value="N/A")

    with filters.viewGroup("Nucleotide Diversity"):
        filters.floatValueUnit("bStatistics",
                               "/_filters/bstatistics",
                               default_value=0.)

    with filters.viewGroup("Mutation Rate"):
        filters.floatValueUnit("Freq1000bp",
                               "/_filters/freq1000bp",
                               default_value=0.)
        filters.floatValueUnit("Rare1000bp",
                               "/_filters/rare1000bp",
                               default_value=0.)

    with filters.viewGroup("Predictions"):
        filters.multiStatusUnit("Clinvar",
                                "/_filters/clinvar[]",
                                default_value="N/A")

    with filters.viewGroup("Protein Function"):
        filters.multiStatusUnit("Polyphen_2_HVAR",
                                "/_filters/polyphen2_hvar",
                                default_value="N/A")
        filters.multiStatusUnit("Polyphen_2_HDIV",
                                "/_filters/polyphen2_hdiv",
                                default_value="N/A")

        filters.multiStatusUnit("PolyPhenCat",
                                "/_filters/polyphen_cat",
                                default_value="N/A")

        filters.multiStatusUnit("SIFTcat",
                                "/_filters/sift_cat",
                                default_value="N/A")

    with filters.viewGroup("Integrative Score"):
        filters.floatValueUnit("GC", "/_filters/gc", default_value=0.)
        filters.floatValueUnit("CpG", "/_filters/cpg", default_value=0.)

    return filters
コード例 #2
0
def defineFilterSchema(metadata_record):
    data_schema = metadata_record.get("data_schema")
    if data_schema == "FAVOR":
        return FavorSchema.defineFilterSchema(metadata_record)
    assert data_schema is None or data_schema == "CASE", (
        "Bad data schema: " + data_schema)

    filters = FilterPrepareSetH(metadata_record)

    cohorts = metadata_record.get("cohorts")
    with filters.viewGroup("Inheritance"):
        if cohorts:
            filters.multiStatusUnit("Variant_in",
                "/_filters/cohort_has_variant[]")
        filters.multiStatusUnit("Callers", "/_view/bioinformatics/called_by[]",
            title = "Called by")
        filters.statusUnit("Proband_Zygosity",
            "/_view/bioinformatics/zygosity",
            title = "Proband Zygosity")
        filters.intValueUnit("Num_Samples", "/_filters/has_variant",
            title = "Number of Samples",
            conversion = ["len"], default_value = 0,
            tooltip =
            "Number of samples for which this variant has been called")
        filters.multiStatusUnit("Has_Variant", "/_filters/has_variant[]")

    if cohorts:
        all_cohorts = ["ALL"] + [ch["name"] for ch in cohorts]
        with filters.viewGroup("Cohorts"):
            for ch_name in all_cohorts:
                filters.floatValueUnit(ch_name + "_AF",
                    "/_view/cohorts/" + ch_name + "/AF",
                    default_value = 0)
                filters.floatValueUnit(ch_name + "_AF2",
                    "/_view/cohorts/" + ch_name + "/AF2",
                    default_value = 0, title = "AF_Hom")

    with filters.viewGroup("Variant"):
        filters.statusUnit("Variant_Class", "/__data/variant_class",
            tooltip = ("Variant class as returned by VEP. "
                "The class of a variant is based on Sequence "
                "Ontology and is called according to its component "
                "alleles and its mapping to the reference genome. "
                "https://useast.ensembl.org/info/genome/variation/"
                "prediction/classification.html#classes"))
        filters.statusUnit("Most_Severe_Consequence",
               "/__data/most_severe_consequence",
               variants = sConsequenceVariants,
               default_value = "undefined")
        filters.multiStatusUnit("Canonical_Annotation",
            "/_view/general/canonical_annotation[]",
            default_value = "undefined")
        filters.statusUnit("Multiallelic", "/_filters/multiallelic",
            title = "Multi-allelic?")
        filters.statusUnit("Altered_VCF", "/_filters/altered_vcf",
            title = "Has VCF been normalized?")
        # filters.intValueUnit("Number_ALTs",
        #     "/_filters/alts",
        #     title = "Number of Alternative alleles",
        #     conversion = ["len"], default_value = 0)

        #filters.intValueUnit("zyg_len", "/__data/zygosity",
        #   conversion = ["len"], default_value = 0)

    with filters.viewGroup("Genes"):
        genes_unit = filters.multiStatusUnit("Symbol",
            "/_view/general/genes[]",
            compact_mode = True)
        filters.panelsUnit("Panels", genes_unit, "Symbol",
            view_path = "/_view/general/gene_panels")
        filters.multiStatusUnit("EQTL_Gene", "/_filters/eqtl_gene[]",
            title = "EQTL Gene", default_value = "None")
        #filters.multiStatusUnit("Transcripts",
        #    "/__data/transcript_consequences[]", compact_mode = True,
        #    conversion = [["property", transcript_id"]])
        filters.intValueUnit("Num_Genes", "/_view/general/genes",
            title = "Number of overlapping genes",
            conversion = ["len"], default_value = 0)
        filters.intValueUnit("Num_Transcripts",
            "/__data/transcript_consequences",
            title = "Number of transcripts at the position",
            conversion = ["len"], default_value = 0)

    with filters.viewGroup("Transcripts"):
        filters.transcriptMultisetUnit("Transcript_consequence",
            "transcript_annotations", variants = sConsequenceVariants,
            default_value = "undefined")
        filters.transcriptStatusUnit("Transcript_canonical", "is_canonical",
            bool_check_value = "True", default_value = "False")
        filters.transcriptStatusUnit("Transcript_GENCODE_Basic",
            "gencode_basic", bool_check_value = "True",
            default_value = "False")
        filters.transcriptStatusUnit("Transcript_biotype", "biotype",
            default_value = "undefined")
        filters.transcriptStatusUnit("Transcript_worst", "is_worst",
            bool_check_value = "True", default_value = "False")
        filters.transcriptStatusUnit("Transcript_id", "id",
            default_value = "undefined")
        tr_genes_unit = filters.transcriptStatusUnit("Transctript_Gene",
            "gene", default_value = "undefined")
        filters.transcriptPanelsUnit("Transcript_Gene_Panels",
            tr_genes_unit, "Symbol", view_name = "tr_gene_panels")
        filters.transcriptStatusUnit("Transcript_source", "transcript_source",
            default_value = "undefined")
        filters.transcriptStatusUnit("Transcript_codon_pos", "codonpos",
            default_value = "undefined")
        filters.transcriptStatusUnit("Transcript_region", "region",
            title= "Gene Region", default_value = "undefined")
        filters.transcriptStatusUnit("Transcript_CDS", "cds",
            title= "CDS?", default_value = "-")
        filters.transcriptStatusUnit("Transcript_masked", "masked_region",
            title= "Masked", default_value = "No")
        filters.transcriptIntValueUnit("Transcript_dist_from_exon",
            "dist_from_exon",
            title = "Distance from Exon Boundary", default_value = -1)
        # filters.transcriptStatusUnit("Transcript_strand", "strand",
        #     default_value = "undefined")

    with filters.viewGroup("Transcript_Predictions"):
        filters.transcriptStatusUnit(
            "Transcript_PolypPhen_HDIV", "polyphen2_hdiv_prediction")
        filters.transcriptStatusUnit(
            "Transcript_PolyPhen_HVAR", "polyphen2_hvar_prediction")
        filters.transcriptStatusUnit(
            "Transcript_SIFT", "sift_prediction")
        filters.transcriptStatusUnit(
            "Transcript_SIFT_4G", "sift_4g_prediction")
        filters.transcriptStatusUnit(
            "Transcript_FATHMM", "fathmm_prediction")

    # with filters.viewGroup("Transcripts"):
    #     filters.transcriptMultisetUnit("Transcript_consequence",
    #         "consequence_terms", variants = sConsequenceVariants,
    #         default_value = "undefined")
    #     filters.transcriptStatusUnit("Transcript_canonical", "canonical",
    #         bool_check_value = "1", default_value = "False")
    #     filters.transcriptStatusUnit("Transcript_biotype", "biotype",
    #         default_value = "undefined")
    #     filters.transcriptStatusUnit("Transcript_worst", "consequence_terms",
    #         bool_check_value = "${Most_Severe_Consequence}",
    #         default_value = "False")
    #     filters.transcriptStatusUnit("Transcript_id", "transcript_id",
    #         default_value = "undefined")
    #     filters.transcriptStatusUnit("Transctript_gene_id", "gene_id",
    #         default_value = "undefined")
    #     filters.transcriptStatusUnit("Transcript_source", "source",
    #         default_value = "undefined")
    #     filters.transcriptStatusUnit("Transcript_strand", "strand",
    #         default_value = "undefined")

    with filters.viewGroup("Coordinates"):
        filters.statusUnit("Chromosome", "/_filters/chromosome",
            variants = ["chr1", "chr2", "chr3", "chr4", "chr5",
            "chr6", "chr7", "chr8", "chr9", "chr10",
            "chr11", "chr12", "chr13", "chr14", "chr15",
            "chr16", "chr17", "chr18", "chr19", "chr20",
            "chr21", "chr22", "chr23", "chrX", "chrY", "undefined"],
            default_value = "undefined")

        filters.intValueUnit("Start_Pos", "/__data/start",
            title = "Start Position",
            render_mode = "neighborhood", default_value = sys.maxsize)
        filters.intValueUnit("End_Pos", "/__data/end",
            title = "End Position", default_value = 0,
            render_mode = "neighborhood")
        filters.intValueUnit("Dist_from_Exon", "/_filters/dist_from_exon",
            title = "Distance From Intron/Exon Boundary (Canonical)",
            default_value = 0, render_mode = "log,<")
        filters.intValueUnit("Dist_from_Exon_Canonical",
            "/_filters/dist_from_exon_canonical",
            title = "Distance From Intron/Exon Boundary (Canonical)",
            default_value = 0, render_mode = "log,<", conversion = ["min"])
        filters.intValueUnit("Dist_from_Exon_Worst",
            "/_filters/dist_from_exon_worst",
            title = "Distance From Intron/Exon Boundary (Canonical)",
            default_value = 0, render_mode = "log,<", conversion = ["min"])
        filters.multiStatusUnit("Region_Canonical",
            "/__data/region_canonical[]",
            title = "Region (Canonical)", default_value = "Other")
        filters.multiStatusUnit("Region_Worst", "/__data/region_worst[]",
            title = "Region (Canonical)", default_value = "Other")
        filters.statusUnit("Region", "/__data/region_canonical",
            title = "Region (Legacy)", default_value = "Other", )
        filters.statusUnit("hg19", "/_view/general/hg19", title = "HG19",
            conversion = [["filter", "is_none"]],
            value_map= {"None": "Unmapped"}, default_value = "Mapped")

    with filters.viewGroup("gnomAD"):
        filters.floatValueUnit("gnomAD_AF",
            "/_filters/gnomad_af_fam",
            diap = (0., 1.), default_value = 0.,
            title = "gnomAD Allele Frequency (family)",
            tooltip = "gnomAD Overall Allele Frequency",
            render_mode = "log,<")
        filters.floatValueUnit("gnomAD_AF_Exomes",
            "/_filters/gnomad_db_exomes_af",
            diap = (0., 1.), default_value = 0.,
            title = "gnomAD Exome Allele Frequency (family)",
            render_mode = "log,<")
        filters.floatValueUnit("gnomAD_AF_Genomes",
            "/_filters/gnomad_db_genomes_af",
            diap = (0., 1.), default_value = 0.,
            title = "gnomAD Genome Allele Frequency (family)",
            render_mode = "log,<")
        filters.floatValueUnit("gnomAD_AF_Proband",
            "/_filters/gnomad_af_pb",
            diap = (0., 1.), default_value = 0.,
            title = "gnomAD Allele Frequency (proband)",
            tooltip = "gnomAD Overall Allele Frequency "
            "for the allele present in proband",
            render_mode = "log,<")
        filters.floatValueUnit("gnomAD_PopMax_AF",
            "/_filters/gnomad_popmax_af",
            tooltip = "Maximum allele frequency across outbred populations",
            diap = (0., 1.), default_value = 0.,
            title = "PopMax Allele Frequency",
            render_mode = "log,<")
        filters.statusUnit("gnomAD_PopMax",
            "/_filters/gnomad_popmax", default_value = "None",
            title = "PopMax Ancestry",
            tooltip =
                "Outbred population that has the maximum allele frequency")
        filters.intValueUnit("gnomAD_PopMax_AN",
            "/_filters/gnomad_popmax_an",
            default_value = 0,
            title = "Number of alleles in outbred PopMax Ancestry",
            render_mode = "log,>")
        filters.floatValueUnit("gnomAD_PopMax_AF_Inbred",
            "/_filters/gnomad_raw_popmax_af",
            tooltip = "Maximum allele frequency across all populations "
                      + "(including inbred)",
            diap = (0., 1.), default_value = 0.,
            title = "PopMax Allele Frequency (including inbred)",
            render_mode = "log,<")
        filters.statusUnit("gnomAD_PopMax_Inbred",
            "/_filters/gnomad_raw_popmax", default_value = "None",
            title = "PopMax Ancestry (including inbred)",
            tooltip = "Population, including inbred, that has the maximum "
                      + "allele frequency")
        filters.intValueUnit("gnomAD_PopMax_AN_Inbred",
            "/_filters/gnomad_raw_popmax_an",
            default_value = 0, render_mode = "log,>",
            title = "Number of alleles in (inbred) PopMax Ancestry")
        filters.intValueUnit("gnomAD_Hom",
            "/_filters/gnomad_hom",
            default_value = 0, render_mode = "log,>",
            title = "gnomAD: Number of homozygous")
        filters.intValueUnit("gnomAD_Hem",
            "/_filters/gnomad_hem",
            default_value = 0, render_mode = "log,>",
            title = "gnomAD: Number of hemizygous")

    with filters.viewGroup("Databases"):
        presence_in_db = [
            ("ClinVar", "/_view/databases/clinVar"),
            ("GnomAD", "/_filters/gnomad_af_fam"),
            ("HGMD", "/__data/hgmd_pmids[]"),
            ("OMIM", "/_view/databases/omim")]
        for submitter in sorted(filters.getStdItem(
                "item-dict", "Clinvar_Trusted_Submitters").getData().values()):
            presence_in_db.append((submitter,
                "/_view/databases/clinvar_trusted/%s" % submitter))
        filters.presenceUnit("Presence_in_Databases", presence_in_db,
            title = "Presence in Databases")

        filters.multiStatusUnit("ClinVar_Submitters",
            "/_view/databases/clinVar_submitters[]",
            title = "ClinVar Submitters", compact_mode = True)
        filters.intValueUnit("Number_submitters",
            "/_view/databases/clinVar_submitters",
            title = "Number of ClinVar Submitters",
            conversion = ["len"], default_value = 0)

        filters.multiStatusUnit("PMIDs",
            "/_view/databases/references[]",
            title = "PMIDs", compact_mode = True)
        filters.intValueUnit("Number_pmid",
            "/_view/databases/references",
            title = "Number of PMIDs",
            conversion = ["len"], default_value = 0)

        # filters.multiStatusUnit("beacons",
        #     "/__data/beacon_names",
        #     title = "Observed at")

    with filters.viewGroup("Call_Quality"):
        filters.floatValueUnit("Proband_GQ", "/_filters/proband_gq",
            title = "Genotype Quality (GQ) for Proband",
            render_mode = "linear,>", default_value = -1,
            tooltip = "GQ tells you how confident we are that "
            "the genotype we assigned to a particular sample is correct. "
            "It is simply the second lowest PL, because it is the "
            "difference between the second lowest PL and the lowest PL "
            "(always 0).")
        filters.floatValueUnit("Min_GQ", "/_filters/min_gq",
            title = "Minimum GQ for the family", render_mode = "linear,>",
            default_value = -1,
            tooltip = "GQ tells you how confident we are that "
            "the genotype we assigned to a particular sample is correct. "
            "It is simply the second lowest PL, because it is the "
            "difference between the second lowest PL and the lowest PL "
            "(always 0).")
        filters.intValueUnit("Max_GQ", "/_view/quality_samples",
            title = "The highest GQ",
            tooltip= "Max(GQ) for those samples that have the variant",
            render_mode = "linear,=", default_value = 0,
            conversion = [
                ["filter", "has_variant"],
                ["property", "genotype_quality"],
                "max"])

        filters.intValueUnit("Num_NO_CALL", "/_view/quality_samples",
            title = "Number of NO_CALL samples",
            tooltip= "Number of samples with NO_CALL in the current site",
            render_mode = "linear,=", default_value = 0,
            conversion = [
                ["skip", 1],
                ["property", "genotype_quality"],
                "negative", "len"])
        filters.intValueUnit("QUAL", "/_filters/qual",
            title = "Variant Call Quality",
            default_value = -1)
        filters.floatValueUnit("QD", "/_filters/qd",
            title = "Quality by Depth",
            render_mode = "linear,>", default_value = -1.,
            tooltip = "The QUAL score normalized by allele depth (AD) "
            "for a variant. This annotation puts the variant confidence "
            "QUAL score into perspective by normalizing for the amount "
            "of coverage available. Because each read contributes a little "
            "to the QUAL score, variants in regions with deep coverage "
            "can have artificially inflated QUAL scores, giving the "
            "impression that the call is supported by more evidence "
            "than it really is. To compensate for this, we normalize "
            "the variant confidence by depth, which gives us a more "
            "objective picture of how well supported the call is.")
        filters.floatValueUnit("FS", "/_filters/fs",
            title = "Fisher Strand Bias",
            render_mode = "linear,<", default_value = 0.,
            tooltip = "Phred-scaled probability that there is strand bias "
            "at the site. Strand Bias tells us whether the alternate "
            "allele was seen more or less often on the forward or "
            "reverse strand than the reference allele. When there "
            "little to no strand bias at the site, the FS value "
            "will be close to 0.")
        filters.multiStatusUnit("FT", "/_filters/filters[]", title = "FILTER",
            tooltip = "This field contains the name(s) of any filter(s) "
            "that the variant fails to pass, or the value PASS if the "
            "variant passed all filters. If the FILTER value is ., "
            "then no filtering has been applied to the records.")

    with filters.viewGroup("Predictions"):
        # research_only = True
        filters.statusUnit("HGMD_Benign", "/_filters/hgmd_benign",
            title = "Categorized Benign in HGMD",
            default_value = "Not in HGMD",
            value_map = {"True": "Benign", "False": "VUS or Pathogenic"})
        filters.multiStatusUnit("HGMD_Tags", "/_view/databases/hgmd_tags[]",
            default_value = "None")

        # research_only = True
        filters.statusUnit("Clinvar_Benign", "/_filters/clinvar_benign",
            default_value = "Not in ClinVar",
            title = "Categorized Benign in ClinVar by all submitters",
            value_map = {"True": "Benign", "False": "VUS or Pathogenic"})
        filters.multiStatusUnit("ClinVar_Significance",
            "/__data/clinvar_significance[]",
            title = "Clinical Significance in ClinVar")
        filters.regPreTransform(lambda rec_no, rec_data:
            clinvarPreTransform(rec_data, filters.getStdItem(
                "item-dict", "Clinvar_Trusted_Submitters").getData()))

        filters.multiStatusUnit("Clinvar_Trusted_Significance",
            "/_view/databases/clinvar_trusted",
            title = "ClinVar significance by trusted submitters only",
            tooltip =
                "Clinical Significance by ClinVar trusted submitters only",
            conversion = ["values", ["split", ','], "clear", "uniq"])
        filters.multiStatusUnit("Clinvar_Trusted_Simplified",
            "/_view/databases/clinvar_trusted_simplified",
            tooltip =
                "Simplified Clinical Significance by trusted submitters only",
            conversion = ["values", ["split", ','], "clear", "uniq"])

        filters.statusUnit("Clinvar_stars", "/_filters/clinvar_stars",
            default_value = "No data", title = "ClinVar Stars")
        filters.intValueUnit("Number_of_clinvar_submitters",
            "/_filters/num_clinvar_submitters", render_mode = "log,>",
            default_value = 0, title = "ClinVar: Number of Submitters")
        filters.statusUnit("Clinvar_review_status",
            "/_filters/clinvar_review_status",
            default_value = "No data", title = "ClinVar Review Status")
        filters.statusUnit("Clinvar_criteria_provided",
            "/_filters/clinvar_criteria_provided",
            default_value = "Not Provided", title = "ClinVar Criteria")
        filters.statusUnit("Clinvar_conflicts",
            "/_filters/clinvar_conflicts",
            default_value = "Criteria not Provided",
            title = "ClinVar Conflicts")
        filters.multiStatusUnit("Clinvar_acmg_guidelines",
            "/_filters/clinvar_acmg_guidelines[]",
            default_value = "None")

        for submitter in sorted(filters.getStdItem(
                "item-dict", "Clinvar_Trusted_Submitters").getData().values()):
            filters.statusUnit("%s_Significance" % submitter,
                "/_view/databases/clinvar_trusted",
                title = "Clinical Significance by %s" % submitter,
                conversion = [["property", submitter]],
                default_value = "None")

        #filters.statusUnit("Clinvar_Trusted_Benign",
        #    "/_filters/clinvar_trusted_benign",
        #    default_value = "No data",
        #    title = "Categorized Benign by Clinvar Trusted Submitters",
        #    value_map = {"True": "Benign by Trusted submitters",
        #        "False": "Unknown"})

        filters.statusUnit("splice_altering", "/_filters/splice_altering",
            title = "Splice AI splice altering",
            default_value = "No altering")
        filters.floatValueUnit("splice_ai_dsmax", "/_filters/splice_ai_dsmax",
            title = "Splice AI splice altering score",
            render_mode = "linear,>", default_value = 0)

        # filters.multiStatusUnit("Polyphen", "/_view/predictions/polyphen[]",
        # default_value = "N/A")
        # This is an obsolete filter replaced by Polyphen 2
        filters.multiStatusUnit("Polyphen_2_HVAR",
            "/_view/predictions/polyphen2_hvar[]",
            title = "Polyphen",
            conversion = [["split_re", r"[\s\,]"], "clear", "uniq"],
            default_value = "N/A",
            tooltip = "HumVar (HVAR) is PolyPhen-2 classifier "
            "trained on known human variation (disease mutations vs."
            " common neutral variants)")
        filters.multiStatusUnit("Polyphen_2_HDIV",
            "/_view/predictions/polyphen2_hdiv[]",
            title = "Polyphen HDIV (High sensitivity)",
            conversion = [["split_re", r"[\s\,]"], "clear", "uniq"],
            default_value = "N/A",
            tooltip = "HumDiv (HDIV) classifier is trained on a smaller "
            "number of select extreme effect disease mutations vs. "
            "divergence with close homologs (e.g. primates), which is "
            "supposed to consist of mostly neutral mutations.")

        filters.multiStatusUnit("SIFT", "/_view/predictions/sift[]",
            default_value = "N/A",
            tooltip = "Sort intolerated from tolerated (An amino acid at a "
            "position is tolerated | The most frequentest amino acid "
            "being tolerated). D: Deleterious T: tolerated")
        filters.multiStatusUnit("FATHMM", "/_view/predictions/fathmm[]",
            default_value = "N/A",
            tooltip = "Functional analysis through hidden markov model HMM."
            "D: Deleterious; T: Tolerated")
        filters.multiStatusUnit("PrimateAI",
            "/_view/predictions/primate_ai_pred[]",
            default_value = "N/A",
            tooltip = "Prediction of PrimateAI score based on the authors "
            "recommendation, “T(olerated)” or “D(amaging)”. "
            "The score cutoff between “D” and “T” is 0.803.")
        filters.floatValueUnit("GERP_score", "/_view/bioinformatics/gerp_rs",
            render_mode = "linear,>", default_value = 0, title = "GERP Score")

    with filters.viewGroup("Pharmacogenomics"):
        filters.multiStatusUnit("Diseases",
            "/_filters/pharmacogenomics_diseases[]", default_value = "N/A")
        filters.multiStatusUnit("Chemicals",
            "/_filters/pharmacogenomics_chemicals[]", default_value = "N/A")

    with filters.viewGroup("Expression"):
        filters.multiStatusUnit("Mostly_Expressed_in",
            "/_filters/top_tissues[]", default_value = "N/A")

    # required = {"debug"}
    with filters.viewGroup("Debug_Info"):
        filters.intValueUnit("Severity", "/_filters/severity",
            default_value = -1)

    return filters
コード例 #3
0
                s_value = "other"
            clinvar_trusted_simplified[trusted_map[submitter]] = s_value
    rec_data["_view"]["databases"]["clinvar_trusted"] = clinvar_trusted
    rec_data["_view"]["databases"]["clinvar_trusted_simplified"] = (
        clinvar_trusted_simplified)

#===============================================
def sample_has_variant(sample):
    genotype = sample.get("genotype")
    return genotype and not ("HOM_REF" in genotype or "NO_CALL" in genotype)

def is_none(value):
    return value == "None"


FilterPrepareSetH.regNamedFunction("has_variant", sample_has_variant)
FilterPrepareSetH.regNamedFunction("is_none", is_none)
#===============================================
def defineFilterSchema(metadata_record):
    data_schema = metadata_record.get("data_schema")
    if data_schema == "FAVOR":
        return FavorSchema.defineFilterSchema(metadata_record)
    assert data_schema is None or data_schema == "CASE", (
        "Bad data schema: " + data_schema)

    filters = FilterPrepareSetH(metadata_record)

    cohorts = metadata_record.get("cohorts")
    with filters.viewGroup("Inheritance"):
        if cohorts:
            filters.multiStatusUnit("Variant_in",
コード例 #4
0
ファイル: flt_schema.py プロジェクト: ForomePlatform/anfisa
def defineFilterSchema(metadata_record):
    data_schema = metadata_record.get("data_schema")
    if data_schema == "FAVOR":
        return FavorSchema.defineFilterSchema(metadata_record)
    assert data_schema is None or data_schema == "CASE", (
        "Bad data schema: " + data_schema)

    filters = FilterPrepareSetH(metadata_record, anfisaVariables)

    cohorts = metadata_record.get("cohorts")
    with filters.viewGroup("Inheritance"):
        if cohorts:
            filters.multiStatusUnit("Variant_in",
                "/_filters/cohort_has_variant[]")
        filters.multiStatusUnit("Callers",
            "/_view/bioinformatics/called_by[]")
        filters.statusUnit("Proband_Zygosity",
            "/_view/bioinformatics/zygosity")
        filters.intValueUnit("Num_Samples", "/_filters/has_variant",
            conversion = ["len"], default_value = 0)
        filters.multiStatusUnit("Has_Variant", "/_filters/has_variant[]")

    if cohorts:
        with filters.viewGroup("Cohorts"):
            filters.floatValueUnit("ALL_AF",
                "/_view/cohorts/ALL", default_value = 0)
            filters.floatValueUnit("ALL_AF2",
                "/_view/cohorts/ALL2", default_value = 0)
            for ch_info in cohorts:
                ch_name = ch_info["name"]
                filters.floatValueUnit(f"Cohort_{ch_name}_AF",
                    f"/_view/cohorts/{ch_name}/AF",
                    default_value = 0)
                filters.floatValueUnit(f"Cohort_{ch_name}_AF2",
                    f"/_view/cohorts/{ch_name}/AF2",
                    default_value = 0)

    with filters.viewGroup("Variant"):
        filters.statusUnit("Variant_Class", "/__data/variant_class")
        filters.statusUnit("Most_Severe_Consequence",
               "/__data/most_severe_consequence",
               variants = sConsequenceVariants,
               default_value = "undefined")
        filters.multiStatusUnit("Canonical_Annotation",
            "/_view/general/canonical_annotation[]",
            default_value = "undefined")
        filters.statusUnit("Multiallelic", "/_filters/multiallelic")
        filters.statusUnit("Altered_VCF", "/_filters/altered_vcf")
        # filters.intValueUnit("Number_ALTs",
        #     "/_filters/alts",
        #     conversion = ["len"], default_value = 0)

        #filters.intValueUnit("zyg_len", "/__data/zygosity",
        #   conversion = ["len"], default_value = 0)

    with filters.viewGroup("Genes"):
        genes_unit = filters.multiStatusUnit("Symbol",
            "/_view/general/genes[]", compact_mode = True)
        filters.panelsUnit("Panels", genes_unit, "Symbol",
            view_path = "/_view/general/gene_panels")
        filters.multiStatusUnit("EQTL_Gene", "/_filters/eqtl_gene[]",
            default_value = "None")
        filters.intValueUnit("Num_Genes", "/_view/general/genes",
            conversion = ["len"], default_value = 0)
        filters.intValueUnit("Num_Transcripts",
            "/__data/transcript_consequences",
            conversion = ["len"], default_value = 0)

    with filters.viewGroup("Transcripts"):
        filters.transcriptMultisetUnit("Transcript_consequence",
            "transcript_annotations", variants = sConsequenceVariants,
            default_value = "undefined")
        filters.transcriptStatusUnit("Transcript_canonical", "is_canonical",
            bool_check_value = "True", default_value = "False")
        filters.transcriptStatusUnit("Transcript_GENCODE_Basic",
            "gencode_basic", bool_check_value = "True",
            default_value = "False")
        filters.transcriptStatusUnit("Transcript_biotype", "biotype",
            default_value = "undefined")
        filters.transcriptStatusUnit("Transcript_worst", "is_worst",
            bool_check_value = "True", default_value = "False")
        filters.transcriptStatusUnit("Transcript_id", "id",
            default_value = "undefined", transcript_id_mode = True)
        tr_genes_unit = filters.transcriptStatusUnit("Transctript_Gene",
            "gene", default_value = "undefined")
        filters.transcriptPanelsUnit("Transcript_Gene_Panels",
            tr_genes_unit, "Symbol", view_name = "tr_gene_panels")
        filters.transcriptStatusUnit("Transcript_source", "transcript_source",
            default_value = "undefined")
        filters.transcriptStatusUnit("Transcript_codon_pos", "codonpos",
            default_value = "undefined")
        filters.transcriptStatusUnit("Transcript_region", "region",
            default_value = "undefined")
        filters.transcriptStatusUnit("Transcript_CDS", "cds",
            default_value = "-")
        filters.transcriptStatusUnit("Transcript_masked", "masked_region",
            default_value = "No")
        filters.transcriptIntValueUnit("Transcript_dist_from_exon",
            "dist_from_exon", default_value = -1)
        # filters.transcriptStatusUnit("Transcript_strand", "strand",
        #     default_value = "undefined")

    with filters.viewGroup("Transcript_Predictions"):
        filters.transcriptStatusUnit(
            "Transcript_PolypPhen_HDIV", "polyphen2_hdiv_prediction")
        filters.transcriptStatusUnit(
            "Transcript_PolyPhen_HVAR", "polyphen2_hvar_prediction")
        filters.transcriptStatusUnit(
            "Transcript_SIFT", "sift_prediction")
        filters.transcriptStatusUnit(
            "Transcript_SIFT_4G", "sift_4g_prediction")
        filters.transcriptStatusUnit(
            "Transcript_FATHMM", "fathmm_prediction")

    with filters.viewGroup("Coordinates"):
        filters.statusUnit("Chromosome", "/_filters/chromosome",
            variants = ["chr1", "chr2", "chr3", "chr4", "chr5",
            "chr6", "chr7", "chr8", "chr9", "chr10",
            "chr11", "chr12", "chr13", "chr14", "chr15",
            "chr16", "chr17", "chr18", "chr19", "chr20",
            "chr21", "chr22", "chr23", "chrX", "chrY", "undefined"],
            default_value = "undefined")

        filters.intValueUnit("Start_Pos", "/__data/start",
            default_value = sys.maxsize)
        filters.intValueUnit("End_Pos", "/__data/end",
            default_value = 0)
        filters.intValueUnit("Dist_from_Exon", "/_filters/dist_from_exon",
            default_value = 0)
        filters.intValueUnit("Dist_from_Exon_Canonical",
            "/_filters/dist_from_exon_canonical",
            default_value = 0, conversion = ["min"])
        filters.intValueUnit("Dist_from_Exon_Worst",
            "/_filters/dist_from_exon_worst",
            default_value = 0, conversion = ["min"])
        filters.multiStatusUnit("Region_Canonical",
            "/__data/region_canonical[]",
            default_value = "Other")
        filters.multiStatusUnit("Region_Worst", "/__data/region_worst[]",
            default_value = "Other")
        filters.transcriptStatusUnit("Region", "region",
                                     default_value = "undefined")
        filters.statusUnit("In_hg19", "/_view/general/hg19",
            conversion = [["filter", "is_none"]],
            value_map= {"None": "Unmapped"}, default_value = "Mapped")

    with filters.viewGroup("gnomAD"):
        filters.floatValueUnit("gnomAD_AF",
            "/_filters/gnomad_af_fam",
            diap = (0., 1.), default_value = 0.)
        filters.floatValueUnit("gnomAD_AF_Exomes",
            "/_filters/gnomad_db_exomes_af",
            diap = (0., 1.), default_value = 0.)
        filters.floatValueUnit("gnomAD_AF_Genomes",
            "/_filters/gnomad_db_genomes_af",
            diap = (0., 1.), default_value = 0.)
        filters.floatValueUnit("gnomAD_AF_Proband",
            "/_filters/gnomad_af_pb",
            diap = (0., 1.), default_value = 0.)
        filters.floatValueUnit("gnomAD_PopMax_AF",
            "/_filters/gnomad_popmax_af",
            diap = (0., 1.), default_value = 0.)
        filters.statusUnit("gnomAD_PopMax",
            "/_filters/gnomad_popmax", default_value = "None")
        filters.intValueUnit("gnomAD_PopMax_AN",
            "/_filters/gnomad_popmax_an", default_value = 0)
        filters.floatValueUnit("gnomAD_PopMax_AF_Inbred",
            "/_filters/gnomad_raw_popmax_af",
            diap = (0., 1.), default_value = 0.)
        filters.statusUnit("gnomAD_PopMax_Inbred",
            "/_filters/gnomad_raw_popmax", default_value = "None")
        filters.intValueUnit("gnomAD_PopMax_AN_Inbred",
            "/_filters/gnomad_raw_popmax_an", default_value = 0)
        filters.intValueUnit("gnomAD_Hom",
            "/_filters/gnomad_hom", default_value = 0)
        filters.intValueUnit("gnomAD_Hem",
            "/_filters/gnomad_hem", default_value = 0)

    with filters.viewGroup("Databases"):
        presence_in_db = [
            ("ClinVar", "/_view/databases/clinVar"),
            ("GnomAD", "/_filters/gnomad_af_fam"),
            ("HGMD", "/__data/hgmd_pmids[]"),
            ("OMIM", "/_view/databases/omim")]
        for submitter in sorted(filters.getStdItem(
                "item-dict", "Clinvar_Trusted_Submitters").getData().values()):
            presence_in_db.append((submitter,
                "/_view/databases/clinvar_trusted/%s" % submitter))
        filters.presenceUnit("Presence_in_Databases", presence_in_db)

        filters.multiStatusUnit("ClinVar_Submitters",
            "/_view/databases/clinVar_submitters[]", compact_mode = True)
        filters.intValueUnit("Number_submitters",
            "/_view/databases/clinVar_submitters",
            conversion = ["len"], default_value = 0)

        filters.multiStatusUnit("PMIDs",
            "/_view/databases/references[]", compact_mode = True)
        filters.intValueUnit("Number_pmid",
            "/_view/databases/references",
            conversion = ["len"], default_value = 0)

        # filters.multiStatusUnit("beacons",
        #     "/__data/beacon_names")

    with filters.viewGroup("Call_Quality"):
        filters.floatValueUnit("Proband_GQ", "/_filters/proband_gq",
            default_value = -1)
        filters.floatValueUnit("Min_GQ", "/_filters/min_gq",
            default_value = -1)
        filters.intValueUnit("Max_GQ", "/_view/quality_samples",
            default_value = 0,
            conversion = [
                ["filter", "has_variant"],
                ["property", "genotype_quality"],
                "max"])

        filters.intValueUnit("Num_NO_CALL", "/_view/quality_samples",
            default_value = 0,
            conversion = [
                ["skip", 1],
                ["property", "genotype_quality"],
                "negative", "len"])
        filters.intValueUnit("QUAL", "/_filters/qual",
            default_value = -1)
        filters.floatValueUnit("QD", "/_filters/qd",
            default_value = -1.)
        filters.floatValueUnit("FS", "/_filters/fs",
            default_value = 0.)
        filters.multiStatusUnit("FT", "/_filters/filters[]")

    with filters.viewGroup("Predictions"):
        # research_only = True
        filters.statusUnit("HGMD_Benign", "/_filters/hgmd_benign",
            default_value = "Not in HGMD",
            value_map = {"True": "Benign", "False": "VUS or Pathogenic"})
        filters.multiStatusUnit("HGMD_Tags", "/_view/databases/hgmd_tags[]",
            default_value = "None")

        # research_only = True
        filters.statusUnit("Clinvar_Benign", "/_filters/clinvar_benign",
            default_value = "Not in ClinVar",
            value_map = {"True": "Benign", "False": "VUS or Pathogenic"})
        filters.multiStatusUnit("ClinVar_Significance",
            "/__data/clinvar_significance[]")
        filters.regPreTransform(lambda rec_no, rec_data:
            clinvarPreTransform(rec_data, filters.getStdItem(
                "item-dict", "Clinvar_Trusted_Submitters").getData()))

        filters.multiStatusUnit("Clinvar_Trusted_Significance",
            "/_view/databases/clinvar_trusted",
            conversion = ["values", ["split", ','], "clear", "uniq"])
        filters.multiStatusUnit("Clinvar_Trusted_Simplified",
            "/_view/databases/clinvar_trusted_simplified",
            conversion = ["values", ["split", ','], "clear", "uniq"])

        filters.statusUnit("Clinvar_stars", "/_filters/clinvar_stars",
            default_value = "No data")
        filters.intValueUnit("Number_of_clinvar_submitters",
            "/_filters/num_clinvar_submitters",
            default_value = 0)
        filters.statusUnit("Clinvar_review_status",
            "/_filters/clinvar_review_status",
            default_value = "No data")
        filters.statusUnit("Clinvar_criteria_provided",
            "/_filters/clinvar_criteria_provided",
            default_value = "Not Provided")
        filters.statusUnit("Clinvar_conflicts",
            "/_filters/clinvar_conflicts",
            default_value = "Criteria not Provided")
        filters.multiStatusUnit("Clinvar_acmg_guidelines",
            "/_filters/clinvar_acmg_guidelines[]",
            default_value = "None")

        for submitter in sorted(filters.getStdItem(
                "item-dict", "Clinvar_Trusted_Submitters").getData().values()):
            filters.statusUnit(f"ClinVar_Significance_{submitter}",
                "/_view/databases/clinvar_trusted",
                conversion = [["property", submitter]],
                default_value = "None")

        #filters.statusUnit("Clinvar_Trusted_Benign",
        #    "/_filters/clinvar_trusted_benign",
        #    default_value = "No data",
        #    value_map = {"True": "Benign by Trusted submitters",
        #        "False": "Unknown"})

        filters.statusUnit("splice_altering", "/_filters/splice_altering",
            default_value = "No altering")
        filters.floatValueUnit("splice_ai_dsmax", "/_filters/splice_ai_dsmax",
            default_value = 0)

        filters.multiStatusUnit("Polyphen_2_HVAR",
            "/_view/predictions/polyphen2_hvar[]",
            conversion = [["split_re", r"[\s\,]"], "clear", "uniq"],
            default_value = "N/A")
        filters.multiStatusUnit("Polyphen_2_HDIV",
            "/_view/predictions/polyphen2_hdiv[]",
            conversion = [["split_re", r"[\s\,]"], "clear", "uniq"],
            default_value = "N/A")

        filters.multiStatusUnit("SIFT", "/_view/predictions/sift[]",
            default_value = "N/A")
        filters.multiStatusUnit("FATHMM", "/_view/predictions/fathmm[]",
            default_value = "N/A")
        filters.multiStatusUnit("PrimateAI",
            "/_view/predictions/primate_ai_pred[]",
            default_value = "N/A")
        filters.floatValueUnit("GERP_score", "/_view/bioinformatics/gerp_rs",
            default_value = 0)

    with filters.viewGroup("Pharmacogenomics"):
        filters.multiStatusUnit("Diseases",
            "/_filters/pharmacogenomics_diseases[]", default_value = "N/A")
        filters.multiStatusUnit("Chemicals",
            "/_filters/pharmacogenomics_chemicals[]", default_value = "N/A")

    with filters.viewGroup("Expression"):
        filters.multiStatusUnit("Mostly_Expressed_in",
            "/_filters/top_tissues[]", default_value = "N/A")

    # required = {"debug"}
    with filters.viewGroup("Debug_Info"):
        filters.intValueUnit("Severity", "/_filters/severity",
            default_value = -1)

    assert filters.getTranscriptIdUnitName() is not None, (
        "Transcript ID unit is not set")

    return filters
コード例 #5
0
def defineFilterSchema():
    filters = FilterPrepareSetH()

    with filters.viewGroup("Inheritance"):
        filters.statusUnit("Proband_Zygosity",
                           "/view/bioinformatics/zygosity",
                           title="Proband Zygosity")
        filters.zygositySpecialUnit(
            "Inheritance_Mode",
            "/data/zygosity",
            config={"x_cond": ConditionMaker.condEnum("Chromosome", ["chrX"])},
            title="Inheritance Mode")
        filters.multiStatusUnit("Callers",
                                "/view/bioinformatics/called_by[]",
                                title="Called by")
        filters.multiStatusUnit("Has_Variant", "/_filters/has_variant[]")

    with filters.viewGroup("Variant"):
        filters.statusUnit(
            "Variant_Class",
            "/data/variant_class",
            tooltip=("Variant class as returned by VEP. "
                     "The class of a variant is based on Sequence "
                     "Ontology and is called according to its component "
                     "alleles and its mapping to the reference genome. "
                     "https://useast.ensembl.org/info/genome/variation/"
                     "prediction/classification.html#classes"))
        filters.statusUnit("Most_Severe_Consequence",
                           "/data/most_severe_consequence",
                           variants=sConsequenceVariants,
                           default_value="undefined")
        filters.statusUnit("Canonical_Annotation",
                           "/view/general/canonical_annotation",
                           default_value="undefined")
        filters.intValueUnit("Number_ALTs",
                             "/_filters/alts",
                             title="Number of Alternative alleles",
                             conversion=_conv_len,
                             default_value=0)

        #filters.intValueUnit("zyg_len", "/data/zygosity",
        #   conversion = _conv_len, default_value = 0)

    with filters.viewGroup("Genes"):
        genes_unit = filters.multiStatusUnit("Symbol",
                                             "/view/general/genes[]",
                                             compact_mode=True)
        filters.panelStatusUnit("Panels",
                                genes_unit,
                                view_path="/view/general/gene_panels")
        #filters.multiStatusUnit("Transcripts",
        #    "/data/transcript_consequences[]", compact_mode = True,
        #    conversion = lambda arr:
        #        [el["transcript_id"] for el in arr] if arr else [])
        filters.intValueUnit("Num_Genes",
                             "/view/general/genes",
                             title="Number of overlapping genes",
                             conversion=_conv_len,
                             default_value=0)
        filters.intValueUnit("Num_Transcripts",
                             "/data/transcript_consequences",
                             title="Number of transcripts at the position",
                             conversion=_conv_len,
                             default_value=0)

    with filters.viewGroup("Transcripts"):
        filters.transctiptMultisetUnit("Transctipt_consequence",
                                       "consequence_terms",
                                       variants=sConsequenceVariants)
        filters.transctiptStatusUnit("Transcript_canonical",
                                     "canonical",
                                     bool_check_value="1",
                                     default_value="False")
        filters.transctiptStatusUnit("Transcript_biotype",
                                     "biotype",
                                     default_value="undefined")
        filters.transctiptStatusUnit(
            "Transcript_worst",
            "consequence_terms",
            bool_check_value="${Most_Severe_Consequence}",
            default_value="False")
        filters.transctiptStatusUnit("Transcript_id",
                                     "transcript_id",
                                     default_value="undefined")
        filters.transctiptStatusUnit("Transctript_gene_id",
                                     "gene_id",
                                     default_value="undefined")
        filters.transctiptStatusUnit("Transcript_source",
                                     "source",
                                     default_value="undefined")
        filters.transctiptStatusUnit("Transcript_strand",
                                     "strand",
                                     default_value="undefined")

    with filters.viewGroup("Coordinates"):
        filters.statusUnit("Chromosome",
                           "/_filters/chromosome",
                           variants=[
                               "chr1", "chr2", "chr3", "chr4", "chr5", "chr6",
                               "chr7", "chr8", "chr9", "chr10", "chr11",
                               "chr12", "chr13", "chr14", "chr15", "chr16",
                               "chr17", "chr18", "chr19", "chr20", "chr21",
                               "chr22", "chr23", "chrX", "chrY", "undefined"
                           ],
                           default_value="undefined")

        filters.intValueUnit("Start_Pos",
                             "/data/start",
                             title="Start Position",
                             render_mode="neighborhood",
                             default_value=sys.maxsize)
        filters.intValueUnit("End_Pos",
                             "/data/end",
                             title="End Position",
                             default_value=0,
                             render_mode="neighborhood")
        filters.intValueUnit(
            "Dist_from_Exon",
            "/_filters/dist_from_exon",
            title="Distance From Intron/Exon Boundary (Canonical)",
            default_value=0,
            render_mode="log,<")
        filters.statusUnit("Region",
                           "/data/region_canonical",
                           title="Region (Canonical)",
                           default_value="Other")

    with filters.viewGroup("gnomAD"):
        filters.floatValueUnit("gnomAD_AF",
                               "/_filters/gnomad_af_fam",
                               diap=(0., 1.),
                               default_value=0.,
                               title="gnomAD Allele Frequency (family)",
                               tooltip="gnomAD Overall Allele Frequency",
                               render_mode="log,<")
        filters.floatValueUnit("gnomAD_AF_Exomes",
                               "/_filters/gnomad_db_exomes_af",
                               diap=(0., 1.),
                               default_value=0.,
                               title="gnomAD Exome Allele Frequency (family)",
                               render_mode="log,<")
        filters.floatValueUnit("gnomAD_AF_Genomes",
                               "/_filters/gnomad_db_genomes_af",
                               diap=(0., 1.),
                               default_value=0.,
                               title="gnomAD Genome Allele Frequency (family)",
                               render_mode="log,<")
        filters.floatValueUnit("gnomAD_AF_Proband",
                               "/_filters/gnomad_af_pb",
                               diap=(0., 1.),
                               default_value=0.,
                               title="gnomAD Allele Frequency (proband)",
                               tooltip="gnomAD Overall Allele Frequency "
                               "for the allele present in proband",
                               render_mode="log,<")
        filters.floatValueUnit(
            "gnomAD_PopMax_AF",
            "/_filters/gnomad_popmax_af",
            tooltip="Maximum allele frequency across all populations",
            diap=(0., 1.),
            default_value=0.,
            title="gnomAD PopMax Allele Frequency",
            render_mode="log,<")
        filters.statusUnit(
            "gnomAD_PopMax",
            "/_filters/gnomad_popmax",
            default_value="None",
            title="gnomAD PopMax Ancestry",
            tooltip="Population that has the maximum allele frequency")
        filters.intValueUnit(
            "gnomAD_PopMax_AN",
            "/_filters/gnomad_popmax_an",
            default_value=0,
            title="gnomAD: Number of alleles in PopMax Ancestry",
            render_mode="log,>")
        filters.intValueUnit("gnomAD_Hom",
                             "/_filters/gnomad_hom",
                             default_value=0,
                             title="gnomAD: Number of homozygous",
                             render_mode="log,>")
        filters.intValueUnit("gnomAD_Hem",
                             "/_filters/gnomad_hem",
                             default_value=0,
                             title="gnomAD: Number of hemizygous",
                             render_mode="log,>")

    with filters.viewGroup("Databases"):
        filters.presenceUnit(
            "Presence_in_Databases",
            [("ClinVar", "/view/databases/clinVar"),
             ("LMM", "/view/databases/lmm_significance"),
             ("GeneDx", "/view/databases/gene_dx_significance"),
             ("GnomAD", "/_filters/gnomad_af_fam"),
             ("HGMD", "/view/databases/hgmd_pmids[]"),
             ("OMIM", "/view/databases/omim")],
            title="Presence in Databases")

        filters.multiStatusUnit("ClinVar_Submitters",
                                "/view/databases/clinVar_submitters[]",
                                title="ClinVar Submitters",
                                compact_mode=True)
        filters.intValueUnit("Number_submitters",
                             "/view/databases/clinVar_submitters",
                             title="Number of ClinVar Submitters",
                             conversion=_conv_len,
                             default_value=0)
        filters.intValueUnit("Number_pmid",
                             "/view/databases/hgmd_pmids",
                             title="Number of PMIDs in HGMD",
                             conversion=_conv_len,
                             default_value=0)

        # filters.multiStatusUnit("beacons",
        #     "/data/beacon_names",
        #     title = "Observed at")

    with filters.viewGroup("Call_Quality"):
        filters.floatValueUnit("Proband_GQ",
                               "/_filters/proband_gq",
                               title="Genotype Quality (GQ) for Proband",
                               render_mode="linear,>",
                               default_value=1000)
        filters.floatValueUnit("Min_GQ",
                               "/_filters/min_gq",
                               title="Minimum GQ for the family)",
                               render_mode="linear,>",
                               default_value=1000)
        filters.floatValueUnit("QD",
                               "/_filters/qd",
                               title="Quality by Depth",
                               render_mode="linear,>",
                               default_value=100000.)
        filters.floatValueUnit("FS",
                               "/_filters/fs",
                               "Fisher Strand Bias",
                               render_mode="linear,<",
                               default_value=0.)
        filters.multiStatusUnit("FT", "/_filters/filters[]", title="FILTER")

    with filters.viewGroup("Predictions"):
        filters.statusUnit(
            "HGMD_Benign",
            "/_filters/hgmd_benign",
            title="Categorized Benign in HGMD",
            default_value="Not in HGMD",
            research_only=True,
            render_mode="replace(True/Benign, False/Not Benign)")
        filters.multiStatusUnit("HGMD_Tags",
                                "/view/databases/hgmd_tags[]",
                                default_value="None")

        filters.statusUnit(
            "Clinvar_Benign",
            "/_filters/clinvar_benign",
            default_value="Not in ClinVar",
            title="Categorized Benign in ClinVar by all submitters",
            research_only=True)
        filters.multiStatusUnit("ClinVar_Significance",
                                "/data/clinvar_significance[]",
                                title="Clinical Significance in ClinVar")
        filters.statusUnit("Clinvar_stars",
                           "/_filters/clinvar_stars",
                           default_value="No data",
                           title="ClinVar Stars")
        filters.intValueUnit("Number_of_clinvar_submitters",
                             "/_filters/num_clinvar_submitters",
                             render_mode="log,>",
                             default_value=0,
                             title="ClinVar: Number of Submitters")
        filters.statusUnit("Clinvar_review_status",
                           "/_filters/clinvar_review_status",
                           default_value="No data",
                           title="ClinVar Review Status")
        filters.statusUnit("Clinvar_criteria_provided",
                           "/_filters/clinvar_criteria_provided",
                           default_value="Not Provided",
                           title="ClinVar Criteria")
        filters.statusUnit("Clinvar_conflicts",
                           "/_filters/clinvar_conflicts",
                           default_value="Criteria not Provided",
                           title="ClinVar Conflicts")
        filters.multiStatusUnit("Clinvar_acmg_guidelines",
                                "/_filters/clinvar_acmg_guidelines[]",
                                default_value="None")

        filters.statusUnit(
            "Clinvar_Trusted_Benign",
            "/_filters/clinvar_trusted_benign",
            default_value="No data",
            title="Categorized Benign by Clinvar Trusted Submitters")
        filters.multiStatusUnit("LMM_Significance",
                                "/data/lmm",
                                title="Clinical Significance by LMM")
        filters.multiStatusUnit("GeneDx_Significance",
                                "/data/gene_dx",
                                title="Clinical Significance by GeneDx")

        filters.statusUnit("splice_altering",
                           "/_filters/splice_altering",
                           default_value="No altering",
                           title="Splice AI splice altering")
        filters.floatValueUnit("splice_ai_dsmax",
                               "/_filters/splice_ai_dsmax",
                               render_mode="linear,>",
                               default_value=0,
                               title="Splice AI splice altering score")

        filters.multiStatusUnit("Polyphen",
                                "/view/predictions/polyphen[]",
                                default_value="N/A")
        filters.multiStatusUnit("SIFT",
                                "/view/predictions/sift[]",
                                default_value="N/A")

        filters.multiStatusUnit("Polyphen_2_HVAR",
                                "/view/predictions/polyphen2_hvar[]",
                                separators="[\s\,]",
                                default_value="N/A")
        filters.multiStatusUnit("Polyphen_2_HDIV",
                                "/view/predictions/polyphen2_hdiv[]",
                                separators="[\s\,]",
                                default_value="N/A")

        filters.floatValueUnit("GERP_score",
                               "/view/bioinformatics/gerp_rs",
                               render_mode="linear,>",
                               default_value=0,
                               title="GERP Score")

    with filters.viewGroup("Debug_Info"):
        filters.intValueUnit("Severity",
                             "/_filters/severity",
                             research_only=True,
                             default_value=-1)

    return filters
コード例 #6
0
ファイル: sec_ws.py プロジェクト: ForomePlatform/anfisa
    def execIt(self):
        if not self.correctWSName(self.mWSName):
            self.setStatus("Incorrect derived dataset name")
            return None
        self.setStatus("Preparing to create derived dataset")
        logging.info("Prepare dataset derivation: %s" % self.mWSName)
        receipt = {
            "kind": self.mEval.getSolKind(),
            "base": self.mDS.getName(),
            "root": self.mDS.getRootDSName()
        }

        if self.mEval.getSolKind() == "filter":
            if self.mEval.getFilterName():
                receipt["filter-name"] = self.mEval.getFilterName()
            condition = self.mEval.getCondition()
            rec_count = self.mDS.getEvalSpace().evalTotalCounts(condition)[0]
            if (rec_count < 1
                    or rec_count >= AnfisaConfig.configOption("max.ws.size")):
                self.setStatus("Size is incorrect: %d" % rec_count)
                return None
            rec_no_seq = self.mDS.getEvalSpace().evalRecSeq(
                condition, rec_count)
            receipt["f-presentation"] = self.mEval.getPresentation()
            receipt["conditions"] = self.mEval.getCondDataSeq()
        else:
            if self.mEval.getDTreeName():
                receipt["dtree-name"] = self.mEval.getDTreeName()
            rec_no_seq, point_seq = self.mEval.collectRecSeq()
            receipt["p-presentation"] = point_seq
            receipt["dtree-code"] = self.mEval.getCode()
        receipt["eval-update-info"] = self.mEval.getUpdateInfo()

        rec_no_seq = sorted(rec_no_seq)
        ws_dir = self.mDS.getDataVault().getDir() + "/" + self.mWSName
        if os.path.exists(ws_dir) and self.mForceMode:
            if self.mDS.getDataVault().getDS(self.mWSName):
                self.mDS.getDataVault().unloadDS(self.mWSName, "ws")
            shutil.rmtree(ws_dir)
        if os.path.exists(ws_dir):
            self.setStatus("Dataset already exists")
            return None

        view_schema = deepcopy(self.mDS.getViewSchema())
        flt_schema  = deepcopy(self.mDS.getFltSchema())
        meta_rec = deepcopy(self.mDS.getDataInfo().get("meta"))
        filter_set = FilterPrepareSetH(meta_rec, anfisaVariables)
        filter_set.setupFromInfo(flt_schema)
        trans_prep = TransformPreparator_WS(flt_schema, self.mDS, False)

        os.mkdir(ws_dir)
        logging.info("Fill dataset %s datafiles..." % self.mWSName)

        with DataDiskStorageWriter(False,
                ws_dir, filter_set, trans_prep) as ws_out:
            for _, rec_data in self.mDS.getRecStorage().iterRecords(
                    rec_no_seq):
                ws_out.saveRecord(rec_data)
                if ws_out.getTotal() % self.mReportLines == 0:
                    self.setStatus("Extracting records: %d/%d" %
                        (ws_out.getTotal(), len(rec_no_seq)))

        self.setStatus("Finishing...")
        logging.info("Finalizing derivation %s" % self.mWSName)

        total_item_count = trans_prep.finishUp()

        date_loaded = datetime.now().isoformat()
        mongo_agent = self.mDS.getApp().getMongoConnector().getDSAgent(
            self.mWSName, "ws")
        mongo_agent.updateCreationDate(date_loaded)

        if "versions" in meta_rec:
            meta_rec["versions"][
                "Anfisa load"] = self.mDS.getApp().getVersionCode()

        receipts = self.mDS.getDataInfo().get("receipts")
        if receipts is not None:
            receipts = [receipt] + receipts[:]
        else:
            receipts = [receipt]

        ds_info = {
            "name": self.mWSName,
            "kind": "ws",
            "view_schema": view_schema,
            "flt_schema": flt_schema,
            "total": len(rec_no_seq),
            "total_items": total_item_count,
            "mongo": self.mWSName,
            "base": self.mDS.getName(),
            "root": self.mDS.getRootDSName(),
            "modes": ["secondary"],
            "meta": meta_rec,
            "doc": [],
            "zygosity_var": self.mDS.getDataInfo()["zygosity_var"],
            "receipts": receipts,
            "date_loaded": date_loaded}

        with open(ws_dir + "/dsinfo.json", "w", encoding = "utf-8") as outp:
            print(json.dumps(ds_info, sort_keys = True, indent = 4),
                file = outp)

        os.mkdir(ws_dir + "/doc")
        with open(ws_dir + "/doc/info.html", "w", encoding = "utf-8") as outp:
            reportDS(outp, ds_info, mongo_agent, self.mDS.getDataInfo())

        with open(ws_dir + "/active", "w", encoding = "utf-8") as outp:
            print("", file = outp)

        self.mDS.getDataVault().loadDS(self.mWSName, "ws")

        self.setStatus("Done")
        return {"ws": self.mWSName}