Beispiel #1
0
    class Input:
        """Input fields for GatkHaplotypeCallerGvcf."""

        bam = DataField("alignment:bam", label="Analysis ready BAM file")
        ref_seq = DataField("seq:nucleotide", label="Reference sequence")

        advanced = BooleanField(
            label="Show advanced options",
            description="Inspect and modify parameters.",
            default=False,
        )

        class Options:
            """Options."""

            intervals = DataField(
                "bed",
                label=
                "Use intervals BED file to limit the analysis to the specified parts of the genome.",
                required=False,
            )

            contamination = FloatField(
                label="Contamination fraction",
                default=0,
                description=
                "Fraction of contamination in sequencing data (for all samples) to aggressively remove.",
            )

        options = GroupField(Options, label="Options", hidden="!advanced")
Beispiel #2
0
    class Input:
        """Input fields for SlamdunkAllPaired."""

        reads = DataField('reads:fastq:paired', label='Reads')
        transcriptome = DataField(
            'seq:nucleotide',
            label='FASTA file containig sequences for alingnig.')
        regions = DataField(
            'bed', label='BED file with coordinates of regions of interest.')

        filter_multimappers = BooleanField(
            label='Filter multimappers',
            description=
            'If true filter and reasign multimappers based on provided BED file with regions of interest.',
            default=True)

        max_alignments = IntegerField(
            label='Maximum number of multimapper alignments',
            description=
            'The maximum number of alignments that will be reported for a multi-mapping read (i.e. reads'
            'with multiple alignments of equal best scores).',
            default=1)

        read_length = IntegerField(
            label='Maximum read length',
            description='Maximul length of reads in the input FASTQ file.',
            default=150)
Beispiel #3
0
    class Input:
        """Input fields."""

        data = DataField("test", label="My input data")
        data2 = DataField("test",
                          label="My second non-required input data",
                          required=False)
Beispiel #4
0
    class Input:
        """Input fields to process WgsPreprocess."""

        reads = DataField("reads:fastq:paired", label="Input sample")
        ref_seq = DataField("seq:nucleotide", label="Reference sequence")
        bwa_index = DataField("index:bwa", label="BWA genome index")
        known_sites = ListField(DataField("variants:vcf"),
                                label="Known sites of variation (VCF)")

        advanced = BooleanField(
            label="Show advanced options",
            description="Inspect and modify parameters.",
            default=False,
        )

        class AdvancedOptions:
            """Advanced options."""

            pixel_distance = IntegerField(
                label="--OPTICAL_DUPLICATE_PIXEL_DISTANCE",
                default=2500,
                description="Set the optical pixel distance, e.g. "
                "distance between clusters. Modify this parameter to "
                "ensure compatibility with older Illumina platforms.",
            )

        advanced_options = GroupField(AdvancedOptions,
                                      label="Advanced options",
                                      hidden="!advanced")
Beispiel #5
0
    class Input:
        """Input fields for SlamdunkAllPaired."""

        reads = DataField("reads:fastq:paired", label="Reads")
        ref_seq = DataField("seq:nucleotide", label="FASTA file")
        regions = DataField(
            "bed", label="BED file with coordinates of regions of interest"
        )

        filter_multimappers = BooleanField(
            label="Filter multimappers",
            description="If true filter and reasign multimappers based on provided BED file with regions of interest",
            default=True,
        )

        max_alignments = IntegerField(
            label="Maximum number of multimapper alignments",
            description="The maximum number of alignments that will be reported for a multi-mapping read (i.e. reads"
            "with multiple alignments of equal best scores)",
            default=1,
        )

        read_length = IntegerField(
            label="Maximum read length",
            description="Maximum length of reads in the input FASTQ file",
            default=150,
        )
Beispiel #6
0
    class Input:
        """Input fields."""

        my_field = StringField(label="My field")
        my_list = ListField(StringField(), label="My list")
        input_data = DataField("test:save", label="My input data")
        input_entity_data = DataField("entity", label="My entity data")
        bar = DataField(data_type="test:save", label="My bar")
        url = UrlField(UrlField.DOWNLOAD, label="My URL")
        integer = IntegerField(label="My integer")
        my_float = FloatField(label="My float")
        my_json = JsonField(label="Blah blah")
        my_optional = StringField(label="Optional",
                                  required=False,
                                  default="default value")
        my_optional_no_default = StringField(label="Optional no default",
                                             required=False)

        class MyGroup:
            foo = IntegerField(label="Foo")
            bar = StringField(label="Bar")
            group_optional_no_default = StringField(
                label="Group optional no default", required=False)

        my_group = GroupField(MyGroup, label="My group")
Beispiel #7
0
    class Input:
        """Input fields for BsConversionRate."""

        mr = DataField(
            "alignment:bam:walt",
            label="Aligned reads from bisulfite sequencing",
            description="Bisulfite specifc alignment such as WALT is required as .mr file type is used. Duplicates"
            "should be removed to reduce any bias introduced by incomplete conversion on PCR duplicate"
            "reads.",
        )
        skip = BooleanField(
            label="Skip Bisulfite conversion rate step",
            description="Bisulfite conversion rate step can be skipped.",
            default=False,
        )
        sequence = DataField(
            "seq:nucleotide",
            label="Unmethylated control sequence",
            description="Separate unmethylated control sequence FASTA file is required to estimate bisulfite"
            "conversion rate.",
            required=False,
        )
        count_all = BooleanField(
            label="Count all cytosines including CpGs", default=True
        )
        read_length = IntegerField(label="Average read length", default=150)
        max_mismatch = FloatField(
            label="Maximum fraction of mismatches", required=False
        )
        a_rich = BooleanField(label="Reads are A-rich", default=False)
Beispiel #8
0
    class Input:
        """Input fields for AlleyoopRates."""

        ref_seq = DataField(
            "seq:nucleotide",
            label="FASTA file containig sequences for aligning")
        slamdunk = DataField("alignment:bam:slamdunk",
                             label="Slamdunk results")
Beispiel #9
0
    class Input:
        """Input fields to process ROSE2."""

        input_macs = DataField(
            "chipseq:callpeak",
            label="BED/narrowPeak file (MACS results)",
            required=False,
            hidden="input_upload",
        )
        input_upload = DataField(
            "bed",
            label="BED file (Upload)",
            required=False,
            hidden="input_macs || use_filtered_bam",
        )
        use_filtered_bam = BooleanField(
            label="Use Filtered BAM File",
            default=False,
            hidden="input_upload",
            description=("Use filtered BAM file from a MACS2 object to rank "
                         "enhancers by. Only applicable if input is MACS2."),
        )
        rankby = DataField(
            "alignment:bam",
            label="BAM file",
            required=False,
            hidden="use_filtered_bam",
            description="BAM file to rank enhancers by.",
        )
        control = DataField(
            "alignment:bam",
            label="Control BAM File",
            required=False,
            hidden="use_filtered_bam",
            description="BAM file to rank enhancers by.",
        )
        tss = IntegerField(
            label="TSS exclusion",
            default=0,
            description=
            "Enter a distance from TSS to exclude. 0 = no TSS exclusion.",
        )
        stitch = IntegerField(
            label="Stitch",
            required=False,
            description=(
                "Enter a max linking distance for stitching. If not "
                "given, optimal stitching parameter will be determined"
                " automatically."),
        )
        mask = DataField(
            "bed",
            label="Masking BED file",
            required=False,
            description=(
                "Mask a set of regions from analysis. Provide a BED of"
                " masking regions."),
        )
Beispiel #10
0
    class Input:
        """Input fields to process ChipQC."""

        alignment = DataField(
            data_type="alignment:bam",
            label="Aligned reads",
        )
        peaks = DataField(
            data_type="chipseq:callpeak",
            label="Called peaks",
        )
        blacklist = DataField(
            data_type="bed",
            label="Blacklist regions",
            description="BED file containing genomic regions that should be "
            "excluded from the analysis.",
            required=False,
        )
        calculate_enrichment = BooleanField(
            label="Calculate enrichment",
            description="Calculate enrichment of signal in known genomic "
            "annotation. By default annotation is provided from "
            "the TranscriptDB package specified by genome bulid "
            "which should match one of the supported annotations "
            "(hg19, hg38, hg18, mm10, mm9, rn4, ce6, dm3). If "
            "annotation is not supported the analysis is skipped.",
            default=False,
        )

        class Advanced:
            """Add advanced list of options."""

            quality_threshold = IntegerField(
                label="Mapping quality threshold",
                description="Only reads with mapping quality scores above "
                "this threshold will be used for some statistics.",
                default=15,
            )
            profile_window = IntegerField(
                label="Window size",
                description="An integer indicating the width of the window "
                "used for peak profiles. Peaks will be centered "
                "on their summits and include half of the window "
                "size upstream and half downstream of this point.",
                default=400,
            )
            shift_size = StringField(
                label="Shift size",
                description="Vector of values to try when computing optimal "
                "shift sizes. It should be specifeird as "
                "consecutive numbers vector with start:end",
                default="1:300",
            )

        advanced = GroupField(
            Advanced,
            label="Advanced parameters",
        )
Beispiel #11
0
    class Input:
        """Input fields to process Bamclipper."""

        alignment = DataField('alignment:bam', label='Alignment BAM file')
        bedpe = DataField('bedpe', label='BEDPE file', required=False)
        skip = BooleanField(
            label='Skip Bamclipper step',
            description='Use this option to skip Bamclipper step.',
            default=False)
Beispiel #12
0
    class Input:
        """Input fields to process ImportScRNA10x."""

        reads = DataField(
            data_type="screads:10x:",
            label="10x reads data object",
        )
        genome_index = DataField(
            data_type="genomeindex:10x:",
            label="10x genome index data object",
        )
        chemistry = StringField(
            label="Chemistry",
            required=False,
            default="auto",
            description=
            ("Assay configuration. By default the assay configuration is detected "
             "automatically, which is the recommended mode. You should only specify "
             "chemistry if there is an error in automatic detection."),
            choices=[
                ("auto", "auto"),
                ("Single Cell 3'", "threeprime"),
                ("Single Cell 5'", "fiveprime"),
                ("Single Cell 3' v1", "SC3Pv1"),
                ("Single Cell 3' v2", "SC3Pv2"),
                ("Single Cell 3' v3", "SC3Pv3"),
                ("Single Cell 5' paired-end", "C5P-PE"),
                ("Single Cell 5' R2-only", "SC5P-R2"),
            ],
        )
        trim_r1 = IntegerField(
            label="Trim R1",
            required=False,
            description=
            ("Hard-trim the input R1 sequence to this length. Note that the length "
             "includes the Barcode and UMI sequences so do not set this below 26 for "
             "Single Cell 3' v2 or Single Cell 5'. This and \"Trim R2\" are useful for "
             "determining the optimal read length for sequencing."),
        )
        trim_r2 = IntegerField(
            label="Trim R2",
            required=False,
            description="Hard-trim the input R2 sequence to this length.",
        )
        expected_cells = IntegerField(
            label="Expected number of recovered cells",
            default=3000,
        )
        force_cells = IntegerField(
            label="Force cell number",
            required=False,
            description=
            ("Force pipeline to use this number of cells, bypassing the cell "
             "detection algorithm. Use this if the number of cells estimated by Cell "
             "Ranger is not consistent with the barcode rank plot."),
        )
Beispiel #13
0
    class Input:
        """Input fields for GatkGenotypeGVCFs."""

        gvcfs = ListField(
            DataField("variants:gvcf"),
            label="Input data (GVCF)",
        )
        ref_seq = DataField("seq:nucleotide", label="Reference sequence")

        intervals = DataField(
            "bed",
            label="Intervals file (.bed)",
        )

        dbsnp = DataField("variants:vcf", label="dbSNP file")

        advanced = BooleanField(
            label="Show advanced options",
            description="Inspect and modify parameters.",
            default=False,
        )

        class AdvancedOptions:
            """Advanced options."""

            batch_size = IntegerField(
                label="Batch size",
                default=0,
                description="Batch size controls the number of samples "
                "for which readers are open at once and therefore provides "
                "a way to minimize memory consumption. However, it can "
                "take longer to complete. Use the consolidate flag if more "
                "than a hundred batches were used. This will improve feature "
                "read time. batchSize=0 means no batching "
                "(i.e. readers for all samples will be opened at once).",
            )

            consolidate = BooleanField(
                label="Consolidate",
                default=False,
                description="Boolean flag to enable consolidation. If "
                "importing data in batches, a new fragment is created for "
                "each batch. In case thousands of fragments are created, "
                "GenomicsDB feature readers will try to open ~20x as many "
                "files. Also, internally GenomicsDB would consume more "
                "memory to maintain bookkeeping data from all fragments. "
                "Use this flag to merge all fragments into one. Merging "
                "can potentially improve read performance, however overall "
                "benefit might not be noticeable as the top Java layers "
                "have significantly higher overheads. This flag has no "
                "effect if only one batch is used.",
            )

        advanced_options = GroupField(AdvancedOptions,
                                      label="Advanced options",
                                      hidden="!advanced")
Beispiel #14
0
    class Input:
        """Input fields to process Bamclipper."""

        alignment = DataField("alignment:bam", label="Alignment BAM file")
        bedpe = DataField("bedpe", label="BEDPE file", required=False)
        skip = BooleanField(
            label="Skip Bamclipper step",
            description="Use this option to skip Bamclipper step.",
            default=False,
        )
Beispiel #15
0
    class Input:
        """Input fields to process CellRangerMkref."""

        genome = DataField(
            data_type='genome:fasta:',
            label='Reference genome',
        )
        annotation = DataField(
            data_type='annotation:gtf:',
            label='Annotation',
        )
Beispiel #16
0
    class Input:
        """Input fields to process CellRangerMkref."""

        genome = DataField(
            data_type="seq:nucleotide:",
            label="Reference genome",
        )
        annotation = DataField(
            data_type="annotation:gtf:",
            label="Annotation",
        )
Beispiel #17
0
    class Input:
        """Input fields for InsertSizeMetrics."""

        bam = DataField("alignment:bam", label="Alignment BAM file")
        genome = DataField("seq:nucleotide", label="Genome")

        minimum_fraction = FloatField(
            label="Minimum fraction of reads in a category to be considered ",
            description="When generating the histogram, discard any data "
            "categories (out of FR, TANDEM, RF) that have fewer than this "
            "fraction of overall reads (Range: 0 and 0.5).",
            default=0.05,
        )

        include_duplicates = BooleanField(
            label=
            "Include reads marked as duplicates in the insert size histogram",
            default=False,
        )

        deviations = FloatField(
            label="Deviations limit",
            description=
            "Generate mean, standard deviation and plots by trimming "
            "the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. "
            "This is done because insert size data typically includes enough "
            "anomalous values from chimeras and other artifacts to make the "
            "mean and standard deviation grossly misleading regarding the real "
            "distribution.",
            default=10.0,
        )

        validation_stringency = StringField(
            label="Validation stringency",
            description="Validation stringency for all SAM files read by this "
            "program. Setting stringency to SILENT can improve "
            "performance when processing a BAM file in which "
            "variable-length data (read, qualities, tags) do not "
            "otherwise need to be decoded. Default is STRICT.",
            choices=[
                ("STRICT", "STRICT"),
                ("LENIENT", "LENIENT"),
                ("SILENT", "SILENT"),
            ],
            default="STRICT",
        )

        assume_sorted = BooleanField(
            label="Sorted BAM file",
            description=
            "If True, the sort order in the header file will be ignored.",
            default=False,
        )
    class Input:
        """Input fields for AlleyoopSnpEval."""

        ref_seq = DataField(
            "seq:nucleotide",
            label="FASTA file containig sequences for aligning")
        regions = DataField(
            "bed", label="BED file with coordinates of regions of interest")
        slamdunk = DataField("alignment:bam:slamdunk",
                             label="Slamdunk results")
        read_length = IntegerField(
            label="Maximum read length",
            description="Maximum length of reads in the input FASTQ file",
            default=150,
        )
    class Input:
        """Input fields for AlignmentSummary."""

        bam = DataField("alignment:bam", label="Alignment BAM file")
        genome = DataField("seq:nucleotide", label="Genome")

        adapters = DataField("seq:nucleotide",
                             label="Adapter sequences",
                             required=False)

        validation_stringency = StringField(
            label="Validation stringency",
            description="Validation stringency for all SAM files read by this "
            "program. Setting stringency to SILENT can improve "
            "performance when processing a BAM file in which "
            "variable-length data (read, qualities, tags) do not "
            "otherwise need to be decoded. Default is STRICT.",
            choices=[
                ("STRICT", "STRICT"),
                ("LENIENT", "LENIENT"),
                ("SILENT", "SILENT"),
            ],
            default="STRICT",
        )

        insert_size = IntegerField(label="Maximum insert size", default=100000)

        pair_orientation = StringField(
            label="Pair orientation",
            default="null",
            choices=[
                ("null", "Unspecified"),
                ("FR", "FR"),
                ("RF", "RF"),
                ("TANDEM", "TANDEM"),
            ],
        )

        bisulfite = BooleanField(
            label="BAM file consists of bisulfite sequenced reads",
            default=False)

        assume_sorted = BooleanField(
            label="Sorted BAM file",
            description=
            "If true the sort order in the header file will be ignored.",
            default=False,
        )
Beispiel #20
0
    class Input:
        """Input fields to perform Base quality score recalibration."""

        bam = DataField("alignment:bam", label="BAM file containing reads")
        reference = DataField("seq:nucleotide", label="Reference genome file")
        known_sites = ListField(
            DataField(
                data_type="variants:vcf",
                description=
                "One or more databases of known polymorphic sites used to exclude regions around known "
                "polymorphisms from analysis.",
            ),
            label="List of known sites of variation",
        )
        intervals = DataField(
            data_type="bed",
            required=False,
            label="One or more genomic intervals over which to operate.",
            description=
            "This field is optional, but it can speed up the process by restricting calculations to "
            "specific genome regions.",
        )
        read_group = StringField(
            label="Replace read groups in BAM",
            description=
            "Replace read groups in a BAM file.This argument enables the user to replace all read groups "
            "in the INPUT file with a single new read group and assign all reads to this read group in "
            "the OUTPUT BAM file. Addition or replacement is performed using Picard's "
            "AddOrReplaceReadGroups tool. Input should take the form of -name=value delimited by a "
            '";", e.g. "-ID=1;-LB=GENIALIS;-PL=ILLUMINA;-PU=BARCODE;-SM=SAMPLENAME1". See tool\'s '
            "documentation for more information on tag names. Note that PL, LB, PU and SM are require "
            "fields. See caveats of rewriting read groups in the documentation.",
            default="",
        )
        validation_stringency = StringField(
            label="Validation stringency",
            description=
            "Validation stringency for all SAM files read by this program. Setting stringency to SILENT "
            "can improve performance when processing a BAM file in which variable-length data (read, "
            "qualities, tags) do not otherwise need to be decoded. Default is STRICT. This setting is "
            "used in BaseRecalibrator and ApplyBQSR processes.",
            choices=[
                ("STRICT", "STRICT"),
                ("LENIENT", "LENIENT"),
                ("SILENT", "SILENT"),
            ],
            default="STRICT",
        )
Beispiel #21
0
    class Input:
        """Input fields to process MergeFastqPaired."""

        reads = ListField(
            DataField(data_type="reads:fastq:paired:"),
            label="Reads data objects",
        )
Beispiel #22
0
    class Input:
        """Input fields to process MergeFastqSingle."""

        reads = ListField(
            DataField(data_type="reads:fastq:single:"),
            label="Reads data objects",
        )
Beispiel #23
0
        class Options:
            """Options."""

            stranded = StringField(
                label="Assay type",
                default='non_specific',
                choices=[
                    ('non_specific', 'Strand non-specific'),
                    ('forward', 'Strand-specific forward'),
                    ('reverse', 'Strand-specific reverse'),
                    ('auto', 'Detect automatically'),
                ],
            )

            cdna_index = DataField('index:salmon',
                                   label="cDNA index file",
                                   required=False,
                                   hidden="options.stranded != 'auto'")

            n_reads = IntegerField(
                label="Number of reads in subsampled alignment file",
                default=5000000,
                hidden="options.stranded != 'auto'")

            maxPhredScore = IntegerField(
                label="Max Phred Score",
                required=False,
            )

            adjustPhredScore = IntegerField(
                label="Adjust Phred Score",
                required=False,
            )
Beispiel #24
0
    class Input:
        """Input fields to process MapMicroarrayProbes."""

        expressions = ListField(
            DataField("microarray:normalized"),
            label="Normalized expressions",
        )
        mapping_file = FileField(
            label="File with probe ID mappings",
            description=
            "The file should be tab-separated and contain two columns with their column names. The first "
            "column should contain Gene IDs and the second one should contain probe names. Supported file extensions "
            "are .tab.*, .tsv.*, .txt.*",
            required=False,
        )
        source = StringField(
            label="Gene ID source",
            description=
            "Gene ID source used for probe mapping is required when using a custom file.",
            allow_custom_choice=True,
            required=False,
            choices=[
                ("AFFY", "AFFY"),
                ("DICTYBASE", "DICTYBASE"),
                ("ENSEMBL", "ENSEMBL"),
                ("NCBI", "NCBI"),
                ("UCSC", "UCSC"),
            ],
        )
        build = StringField(
            label="Genome build",
            description=
            "Genome build of mapping file is required when using a custom file.",
            required=False,
        )
    class Input:
        """Input fields to process ClusterTimeCourse."""

        expressions = ListField(
            DataField("expression"),
            relation_type="series",
            label="Time series relation",
            description=
            "Select time course to which the expressions belong to.",
        )
        genes = ListField(
            StringField(),
            label="Gene subset",
            required=False,
            description="Select at least two genes or leave this field empty.",
        )
        gene_species = StringField(
            label="Species",
            description="Species to which the selected genes belong to. "
            "This field is required if gene subset is set.",
            required=False,
            hidden="!genes",
            allow_custom_choice=True,
            choices=[
                ("Dictyostelium discoideum", "Dictyostelium discoideum"),
                ("H**o sapiens", "H**o sapiens"),
                ("Macaca mulatta", "Macaca mulatta"),
                ("Mus musculus", "Mus musculus"),
                ("Rattus norvegicus", "Rattus norvegicus"),
            ],
        )
        gene_source = StringField(
            label="Gene ID database of selected genes",
            description="This field is required if gene subset is set.",
            required=False,
            hidden="!genes",
        )
        distance = StringField(
            label="Distance metric",
            choices=[
                ("spearman", "Spearman"),
                ("pearson", "Pearson"),
            ],
            default="spearman",
        )
        linkage = StringField(
            label="Linkage method",
            choices=[
                ("single", "single"),
                ("average", "average"),
                ("complete", "complete"),
            ],
            default="average",
        )
        ordering = BooleanField(
            label="Use optimal ordering",
            description="Results in a more intuitive tree structure, "
            "but may slow down the clustering on large datasets",
            default=False,
        )
Beispiel #26
0
    class Input:
        """Input fields to process MicroarrayExpression."""

        exp_unmapped = DataField(
            "microarray:normalized",
            label="Unmapped normalized expressions",
            description=
            "Unmapped normalized expression with the original probe IDs.",
        )
        exp = FileField(
            label="Normalized and mapped expressions file",
            description=
            "Files should have two columns one with GeneIDs and the other one with expression values."
            "Expected column names are 'Gene' and 'Expression'.Supported file extensions are .tab.*, .tsv.*, .txt.*",
        )
        source = StringField(
            label="Gene ID source",
            allow_custom_choice=True,
            choices=[
                ("AFFY", "AFFY"),
                ("DICTYBASE", "DICTYBASE"),
                ("ENSEMBL", "ENSEMBL"),
                ("NCBI", "NCBI"),
                ("UCSC", "UCSC"),
            ],
        )
        build = StringField(label="Genome build", )
        probe_mapping = StringField(label="Probe to transcript mapping used", )
Beispiel #27
0
    class Input:
        """Input fields for CollectRrbsMetrics."""

        bam = DataField("alignment:bam", label="Alignment BAM file")
        genome = DataField("seq:nucleotide", label="Genome")

        min_quality = IntegerField(
            label=
            "Threshold for base quality of a C base before it is considered",
            default=20,
        )

        next_base_quality = IntegerField(
            label=
            "Threshold for quality of a base next to a C before the C base is considered",
            default=10,
        )

        min_lenght = IntegerField(label="Minimum read length", default=5)

        mismatch_rate = FloatField(
            label=
            "Maximum fraction of mismatches in a read to be considered (Range: 0 and 1)",
            default=0.1,
        )

        validation_stringency = StringField(
            label="Validation stringency",
            description="Validation stringency for all SAM files read by this "
            "program. Setting stringency to SILENT can improve "
            "performance when processing a BAM file in which "
            "variable-length data (read, qualities, tags) do not "
            "otherwise need to be decoded. Default is STRICT.",
            choices=[
                ("STRICT", "STRICT"),
                ("LENIENT", "LENIENT"),
                ("SILENT", "SILENT"),
            ],
            default="STRICT",
        )

        assume_sorted = BooleanField(
            label="Sorted BAM file",
            description=
            "If true the sort order in the header file will be ignored.",
            default=False,
        )
Beispiel #28
0
    class Input:
        """Input fields to process MethylationArraySesame."""

        idat_file = DataField(
            data_type="methylationarray:idat",
            label="Illumina methylation array IDAT file",
            description="Illumina methylation array BeadChip raw IDAT file.",
        )
Beispiel #29
0
    class Input:
        """Input fields."""

        alignment = DataField("alignment:bam", label="Alignment")
        annotation = DataField("annotation:gtf", label="GTF annotation")

        class Options:
            """Options."""

            stranded = StringField(
                label="Assay type",
                default="non_specific",
                choices=[
                    ("non_specific", "Strand non-specific"),
                    ("forward", "Strand-specific forward"),
                    ("reverse", "Strand-specific reverse"),
                    ("auto", "Detect automatically"),
                ],
            )

            cdna_index = DataField(
                "index:salmon",
                label="cDNA index file",
                required=False,
                hidden="options.stranded != 'auto'",
            )

            n_reads = IntegerField(
                label="Number of reads in subsampled alignment file",
                default=5000000,
                hidden="options.stranded != 'auto'",
            )

            maxPhredScore = IntegerField(
                label="Max Phred Score",
                required=False,
            )

            adjustPhredScore = IntegerField(
                label="Adjust Phred Score",
                required=False,
            )

        options = GroupField(Options, label="Options")
Beispiel #30
0
    class Input:
        """Input fields to perform Base quality score recalibration."""

        bam = DataField('alignment:bam', label='BAM file containing reads')
        reference = DataField('genome:fasta', label='Reference genome file')
        known_sites = ListField(
            DataField(
                data_type='variants:vcf',
                description=
                'One or more databases of known polymorphic sites used to exclude regions around known '
                'polymorphisms from analysis.'),
            label='List of known sites of variation',
        )
        intervals = DataField(
            data_type='bed',
            label='One or more genomic intervals over which to operate.',
            description=
            'This field is optional, but it can speed up the process by restricting calculations to '
            'specific genome regions.')
        read_group = StringField(
            label='Replace read groups in BAM',
            description=
            'Replace read groups in a BAM file.This argument enables the user to replace all read groups '
            'in the INPUT file with a single new read group and assign all reads to this read group in '
            'the OUTPUT BAM file. Addition or replacement is performed using Picard\'s '
            'AddOrReplaceReadGroups tool. Input should take the form of -name=value delimited by a '
            '";", e.g. "-ID=1;-LB=GENIALIS;-PL=ILLUMINA;-PU=BARCODE;-SM=SAMPLENAME1". See tool\'s '
            'documentation for more information on tag names. Note that PL, LB, PU and SM are require '
            'fields. See caveats of rewriting read groups in the documentation.',
            default='')
        validation_stringency = StringField(
            label='Validation stringency',
            description=
            'Validation stringency for all SAM files read by this program. Setting stringency to SILENT '
            'can improve performance when processing a BAM file in which variable-length data (read, '
            'qualities, tags) do not otherwise need to be decoded. Default is STRICT. This setting is '
            'used in BaseRecalibrator and ApplyBQSR processes.',
            choices=[
                ('STRICT', 'STRICT'),
                ('LENIENT', 'LENIENT'),
                ('SILENT', 'SILENT'),
            ],
            default='STRICT',
        )