class Advanced: """Options.""" dirs = BooleanField( label="--dirs", default=True, description="Prepend directory to sample names.", ) dirs_depth = IntegerField( label="--dirs-depth", default=-1, description="Prepend a specified number of directories to sample names. Enter a " "negative number (default) to take from start of path.", ) fullnames = BooleanField( label="--fullnames", default=False, description="Disable the sample name cleaning (leave as full file name).", ) config = BooleanField( label="Use configuration file", default=True, description="Use Genialis configuration file for MultiQC report.", ) cl_config = StringField( label="--cl-config", required=False, description="Enter text with command-line configuration options to override the " "defaults (e.g. custom_logo_url: https://www.genialis.com).", )
class Input: """Input fields for BsConversionRate.""" mr = DataField( "alignment:bam:walt", label="Aligned reads from bisulfite sequencing", description="Bisulfite specifc alignment such as WALT is required as .mr file type is used. Duplicates" "should be removed to reduce any bias introduced by incomplete conversion on PCR duplicate" "reads.", ) skip = BooleanField( label="Skip Bisulfite conversion rate step", description="Bisulfite conversion rate step can be skipped.", default=False, ) sequence = DataField( "seq:nucleotide", label="Unmethylated control sequence", description="Separate unmethylated control sequence FASTA file is required to estimate bisulfite" "conversion rate.", required=False, ) count_all = BooleanField( label="Count all cytosines including CpGs", default=True ) read_length = IntegerField(label="Average read length", default=150) max_mismatch = FloatField( label="Maximum fraction of mismatches", required=False ) a_rich = BooleanField(label="Reads are A-rich", default=False)
class Input: """Input fields for InsertSizeMetrics.""" bam = DataField("alignment:bam", label="Alignment BAM file") genome = DataField("seq:nucleotide", label="Genome") minimum_fraction = FloatField( label="Minimum fraction of reads in a category to be considered ", description="When generating the histogram, discard any data " "categories (out of FR, TANDEM, RF) that have fewer than this " "fraction of overall reads (Range: 0 and 0.5).", default=0.05, ) include_duplicates = BooleanField( label= "Include reads marked as duplicates in the insert size histogram", default=False, ) deviations = FloatField( label="Deviations limit", description= "Generate mean, standard deviation and plots by trimming " "the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. " "This is done because insert size data typically includes enough " "anomalous values from chimeras and other artifacts to make the " "mean and standard deviation grossly misleading regarding the real " "distribution.", default=10.0, ) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) assume_sorted = BooleanField( label="Sorted BAM file", description= "If True, the sort order in the header file will be ignored.", default=False, )
class Input: """Input fields for AlignmentSummary.""" bam = DataField("alignment:bam", label="Alignment BAM file") genome = DataField("seq:nucleotide", label="Genome") adapters = DataField("seq:nucleotide", label="Adapter sequences", required=False) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) insert_size = IntegerField(label="Maximum insert size", default=100000) pair_orientation = StringField( label="Pair orientation", default="null", choices=[ ("null", "Unspecified"), ("FR", "FR"), ("RF", "RF"), ("TANDEM", "TANDEM"), ], ) bisulfite = BooleanField( label="BAM file consists of bisulfite sequenced reads", default=False) assume_sorted = BooleanField( label="Sorted BAM file", description= "If true the sort order in the header file will be ignored.", default=False, )
class Input: """Input fields to process ClusterTimeCourse.""" expressions = ListField( DataField("expression"), relation_type="series", label="Time series relation", description= "Select time course to which the expressions belong to.", ) genes = ListField( StringField(), label="Gene subset", required=False, description="Select at least two genes or leave this field empty.", ) gene_species = StringField( label="Species", description="Species to which the selected genes belong to. " "This field is required if gene subset is set.", required=False, hidden="!genes", allow_custom_choice=True, choices=[ ("Dictyostelium discoideum", "Dictyostelium discoideum"), ("H**o sapiens", "H**o sapiens"), ("Macaca mulatta", "Macaca mulatta"), ("Mus musculus", "Mus musculus"), ("Rattus norvegicus", "Rattus norvegicus"), ], ) gene_source = StringField( label="Gene ID database of selected genes", description="This field is required if gene subset is set.", required=False, hidden="!genes", ) distance = StringField( label="Distance metric", choices=[ ("spearman", "Spearman"), ("pearson", "Pearson"), ], default="spearman", ) linkage = StringField( label="Linkage method", choices=[ ("single", "single"), ("average", "average"), ("complete", "complete"), ], default="average", ) ordering = BooleanField( label="Use optimal ordering", description="Results in a more intuitive tree structure, " "but may slow down the clustering on large datasets", default=False, )
class Input: """Input fields to process WgsPreprocess.""" reads = DataField("reads:fastq:paired", label="Input sample") ref_seq = DataField("seq:nucleotide", label="Reference sequence") bwa_index = DataField("index:bwa", label="BWA genome index") known_sites = ListField(DataField("variants:vcf"), label="Known sites of variation (VCF)") advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class AdvancedOptions: """Advanced options.""" pixel_distance = IntegerField( label="--OPTICAL_DUPLICATE_PIXEL_DISTANCE", default=2500, description="Set the optical pixel distance, e.g. " "distance between clusters. Modify this parameter to " "ensure compatibility with older Illumina platforms.", ) advanced_options = GroupField(AdvancedOptions, label="Advanced options", hidden="!advanced")
class InsertSizeMetrics: """InsertSizeMetrics parameters.""" minimum_fraction = FloatField( label="Minimum fraction of reads in a category to be considered", default=0.05, description="When generating the histogram, discard any data " "categories (out of FR, TANDEM, RF) that have fewer than " "this fraction of overall reads (Range: 0 and 0.5).", ) include_duplicates = BooleanField( label="Include reads marked as duplicates in the insert size histogram", default=False, ) deviations = FloatField( label="Deviations limit", default=10.0, description="Generate mean, standard deviation and plots " "by trimming the data down to MEDIAN + DEVIATIONS * " "MEDIAN_ABSOLUTE_DEVIATION. This is done because insert " "size data typically includes enough anomalous values " "from chimeras and other artifacts to make the mean and " "standard deviation grossly misleading regarding the real " "distribution.", )
class Input: """Input fields for SlamdunkAllPaired.""" reads = DataField("reads:fastq:paired", label="Reads") ref_seq = DataField("seq:nucleotide", label="FASTA file") regions = DataField( "bed", label="BED file with coordinates of regions of interest" ) filter_multimappers = BooleanField( label="Filter multimappers", description="If true filter and reasign multimappers based on provided BED file with regions of interest", default=True, ) max_alignments = IntegerField( label="Maximum number of multimapper alignments", description="The maximum number of alignments that will be reported for a multi-mapping read (i.e. reads" "with multiple alignments of equal best scores)", default=1, ) read_length = IntegerField( label="Maximum read length", description="Maximum length of reads in the input FASTQ file", default=150, )
class Input: """Input fields to process ImportSra.""" sra_accession = ListField(StringField(), label="SRA accession(s)") show_advanced = BooleanField(label="Show advanced options", default=False) class Advanced: """Advanced options.""" prefetch = BooleanField(label="Prefetch SRA file", default=True) max_size_prefetch = StringField( label="Maximum file size to download in KB", default="20G", description="A unit prefix can be used instead of a value in KB (e.g. 1024M or 1G).", ) min_spot_id = IntegerField(label="Minimum spot ID", required=False) max_spot_id = IntegerField(label="Maximum spot ID", required=False) min_read_len = IntegerField(label="Minimum read length", required=False) clip = BooleanField(label="Clip adapter sequences", default=False) aligned = BooleanField(label="Dump only aligned sequences", default=False) unaligned = BooleanField( label="Dump only unaligned sequences", default=False ) advanced = GroupField( Advanced, label="Advanced options", hidden="!show_advanced" )
class Input: """Input fields.""" gse_accession = StringField( label="GEO accession", description="Enter a GEO series accession number.") show_advanced = BooleanField(label="Show advanced options", default=False) class Advanced: """Advanced options.""" prefetch = BooleanField(label="Prefetch SRA file", default=True) max_size_prefetch = StringField( label="Maximum file size to download in KB", default="20G", description= "A unit prefix can be used instead of a value in KB (e.g. 1024M or 1G).", ) min_spot_id = IntegerField(label="Minimum spot ID", required=False) max_spot_id = IntegerField(label="Maximum spot ID", required=False) min_read_len = IntegerField(label="Minimum read length", required=False) clip = BooleanField(label="Clip adapter sequences", default=False) aligned = BooleanField(label="Dump only aligned sequences", default=False) unaligned = BooleanField(label="Dump only unaligned sequences", default=False) mapping_file = FileField( label="File with probe ID mappings", description= "The file should be tab-separated and contain two columns with their column names. The " "first column should contain Gene IDs and the second one should contain probe names. Supported file " "extensions are .tab.*, .tsv.*, .txt.*", required=False, ) source = StringField( label="Gene ID source", description= "Gene ID source used for probe mapping is required when using a custom file.", allow_custom_choice=True, required=False, choices=[ ("AFFY", "AFFY"), ("DICTYBASE", "DICTYBASE"), ("ENSEMBL", "ENSEMBL"), ("NCBI", "NCBI"), ("UCSC", "UCSC"), ], ) build = StringField( label="Genome build", description= "Genome build of mapping file is required when using a custom file.", required=False, ) advanced = GroupField(Advanced, label="Advanced options", hidden="!show_advanced")
class Input: """Input fields for GatkHaplotypeCallerGvcf.""" bam = DataField("alignment:bam", label="Analysis ready BAM file") ref_seq = DataField("seq:nucleotide", label="Reference sequence") advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class Options: """Options.""" intervals = DataField( "bed", label= "Use intervals BED file to limit the analysis to the specified parts of the genome.", required=False, ) contamination = FloatField( label="Contamination fraction", default=0, description= "Fraction of contamination in sequencing data (for all samples) to aggressively remove.", ) options = GroupField(Options, label="Options", hidden="!advanced")
class Input: """Input fields for SlamdunkAllPaired.""" reads = DataField('reads:fastq:paired', label='Reads') transcriptome = DataField( 'seq:nucleotide', label='FASTA file containig sequences for alingnig.') regions = DataField( 'bed', label='BED file with coordinates of regions of interest.') filter_multimappers = BooleanField( label='Filter multimappers', description= 'If true filter and reasign multimappers based on provided BED file with regions of interest.', default=True) max_alignments = IntegerField( label='Maximum number of multimapper alignments', description= 'The maximum number of alignments that will be reported for a multi-mapping read (i.e. reads' 'with multiple alignments of equal best scores).', default=1) read_length = IntegerField( label='Maximum read length', description='Maximul length of reads in the input FASTQ file.', default=150)
class Options: """Options.""" min_map_quality = IntegerField( label= "Minimum mapping quality for a read to contribute coverage", default=20, ) min_quality = IntegerField( label="Minimum base quality for a base to contribute coverage", description= "N bases will be treated as having a base quality of " "negative infinity and will therefore be excluded from coverage " "regardless of the value of this parameter.", default=20, ) coverage_cap = IntegerField( label="Maximum coverage cap", description= "Treat positions with coverage exceeding this value as " "if they had coverage at this set value.", default=250, ) accumulation_cap = IntegerField( label="Ignore positions with coverage above this value", description="At positions with coverage exceeding this value, " "completely ignore reads that accumulate beyond this value", default=100000, ) count_unpaired = BooleanField( label= "Count unpaired reads and paired reads with one end unmapped", default=False, ) sample_size = IntegerField( label= "Sample Size used for Theoretical Het Sensitivity sampling", default=10000, ) validation_stringency = StringField( label="Validation stringency", description= "Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", )
class AdvancedOptions: """Advanced options.""" batch_size = IntegerField( label="Batch size", default=0, description="Batch size controls the number of samples " "for which readers are open at once and therefore provides " "a way to minimize memory consumption. However, it can " "take longer to complete. Use the consolidate flag if more " "than a hundred batches were used. This will improve feature " "read time. batchSize=0 means no batching " "(i.e. readers for all samples will be opened at once).", ) consolidate = BooleanField( label="Consolidate", default=False, description="Boolean flag to enable consolidation. If " "importing data in batches, a new fragment is created for " "each batch. In case thousands of fragments are created, " "GenomicsDB feature readers will try to open ~20x as many " "files. Also, internally GenomicsDB would consume more " "memory to maintain bookkeeping data from all fragments. " "Use this flag to merge all fragments into one. Merging " "can potentially improve read performance, however overall " "benefit might not be noticeable as the top Java layers " "have significantly higher overheads. This flag has no " "effect if only one batch is used.", )
class Advanced: """Advanced options.""" prefetch = BooleanField(label="Prefetch SRA file", default=True) max_size_prefetch = StringField( label="Maximum file size to download in KB", default="20G", description="A unit prefix can be used instead of a value in KB (e.g. 1024M or 1G).", ) min_spot_id = IntegerField(label="Minimum spot ID", required=False) max_spot_id = IntegerField(label="Maximum spot ID", required=False) min_read_len = IntegerField(label="Minimum read length", required=False) clip = BooleanField(label="Clip adapter sequences", default=False) aligned = BooleanField(label="Dump only aligned sequences", default=False) unaligned = BooleanField( label="Dump only unaligned sequences", default=False )
class Advanced: """Add advanced list of options.""" boolean_field2 = BooleanField( label="Labels are short and do not end in a period", description="Description ends in a period.", default=False, )
class Input: """Input fields to process ROSE2.""" input_macs = DataField( "chipseq:callpeak", label="BED/narrowPeak file (MACS results)", required=False, hidden="input_upload", ) input_upload = DataField( "bed", label="BED file (Upload)", required=False, hidden="input_macs || use_filtered_bam", ) use_filtered_bam = BooleanField( label="Use Filtered BAM File", default=False, hidden="input_upload", description=("Use filtered BAM file from a MACS2 object to rank " "enhancers by. Only applicable if input is MACS2."), ) rankby = DataField( "alignment:bam", label="BAM file", required=False, hidden="use_filtered_bam", description="BAM file to rank enhancers by.", ) control = DataField( "alignment:bam", label="Control BAM File", required=False, hidden="use_filtered_bam", description="BAM file to rank enhancers by.", ) tss = IntegerField( label="TSS exclusion", default=0, description= "Enter a distance from TSS to exclude. 0 = no TSS exclusion.", ) stitch = IntegerField( label="Stitch", required=False, description=( "Enter a max linking distance for stitching. If not " "given, optimal stitching parameter will be determined" " automatically."), ) mask = DataField( "bed", label="Masking BED file", required=False, description=( "Mask a set of regions from analysis. Provide a BED of" " masking regions."), )
class Input: """Input fields to process ChipQC.""" alignment = DataField( data_type="alignment:bam", label="Aligned reads", ) peaks = DataField( data_type="chipseq:callpeak", label="Called peaks", ) blacklist = DataField( data_type="bed", label="Blacklist regions", description="BED file containing genomic regions that should be " "excluded from the analysis.", required=False, ) calculate_enrichment = BooleanField( label="Calculate enrichment", description="Calculate enrichment of signal in known genomic " "annotation. By default annotation is provided from " "the TranscriptDB package specified by genome bulid " "which should match one of the supported annotations " "(hg19, hg38, hg18, mm10, mm9, rn4, ce6, dm3). If " "annotation is not supported the analysis is skipped.", default=False, ) class Advanced: """Add advanced list of options.""" quality_threshold = IntegerField( label="Mapping quality threshold", description="Only reads with mapping quality scores above " "this threshold will be used for some statistics.", default=15, ) profile_window = IntegerField( label="Window size", description="An integer indicating the width of the window " "used for peak profiles. Peaks will be centered " "on their summits and include half of the window " "size upstream and half downstream of this point.", default=400, ) shift_size = StringField( label="Shift size", description="Vector of values to try when computing optimal " "shift sizes. It should be specifeird as " "consecutive numbers vector with start:end", default="1:300", ) advanced = GroupField( Advanced, label="Advanced parameters", )
class Input: """Input fields to process MarkDuplicates.""" bam = DataField("alignment:bam", label="Alignment BAM file") skip = BooleanField( label="Skip MarkDuplicates step", description="MarkDuplicates step can be skipped.", default=False, ) remove_duplicates = BooleanField( label="Remove duplicates", description="If true do not write duplicates to the output file " "instead of writing them with appropriate flags set.", default=False, ) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) assume_sort_order = StringField( label="Assume sort order", description="If not null (default), assume that the input file " "has this order even if the header says otherwise." "Possible values are unsorted, queryname, coordinate " "and unknown.", choices=[ ("", "as in BAM header (default)"), ("unsorted", "unsorted"), ("queryname", "queryname"), ("coordinate", "coordinate"), ("duplicate", "duplicate"), ("unknown", "unknown"), ], default="", )
class Input: """Input fields to process Bamclipper.""" alignment = DataField('alignment:bam', label='Alignment BAM file') bedpe = DataField('bedpe', label='BEDPE file', required=False) skip = BooleanField( label='Skip Bamclipper step', description='Use this option to skip Bamclipper step.', default=False)
class Options: """Options.""" beta_prior = BooleanField( label="Beta prior", default=False, description="Whether or not to put a zero-mean normal prior " "on the non-intercept coefficients.", )
class Input: """Input fields for GatkGenotypeGVCFs.""" gvcfs = ListField( DataField("variants:gvcf"), label="Input data (GVCF)", ) ref_seq = DataField("seq:nucleotide", label="Reference sequence") intervals = DataField( "bed", label="Intervals file (.bed)", ) dbsnp = DataField("variants:vcf", label="dbSNP file") advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class AdvancedOptions: """Advanced options.""" batch_size = IntegerField( label="Batch size", default=0, description="Batch size controls the number of samples " "for which readers are open at once and therefore provides " "a way to minimize memory consumption. However, it can " "take longer to complete. Use the consolidate flag if more " "than a hundred batches were used. This will improve feature " "read time. batchSize=0 means no batching " "(i.e. readers for all samples will be opened at once).", ) consolidate = BooleanField( label="Consolidate", default=False, description="Boolean flag to enable consolidation. If " "importing data in batches, a new fragment is created for " "each batch. In case thousands of fragments are created, " "GenomicsDB feature readers will try to open ~20x as many " "files. Also, internally GenomicsDB would consume more " "memory to maintain bookkeeping data from all fragments. " "Use this flag to merge all fragments into one. Merging " "can potentially improve read performance, however overall " "benefit might not be noticeable as the top Java layers " "have significantly higher overheads. This flag has no " "effect if only one batch is used.", ) advanced_options = GroupField(AdvancedOptions, label="Advanced options", hidden="!advanced")
class Input: """Input fields to process Bamclipper.""" alignment = DataField("alignment:bam", label="Alignment BAM file") bedpe = DataField("bedpe", label="BEDPE file", required=False) skip = BooleanField( label="Skip Bamclipper step", description="Use this option to skip Bamclipper step.", default=False, )
class FilterOptions: """Filtering options.""" count = BooleanField( label="Filter genes based on expression count", default=True, ) min_count_sum = IntegerField( label="Minimum gene expression count summed over all samples", default=10, description="Filter genes in the expression matrix input. " "Remove genes where the expression count sum over all samples " "is below the threshold.", hidden="!filter_options.count", ) cook = BooleanField( label="Filter genes based on Cook's distance", default=False, ) cooks_cutoff = FloatField( label="Threshold on Cook's distance", required=False, description="If one or more samples have Cook's distance " "larger than the threshold set here, the p-value for the row " "is set to NA. If left empty, the default threshold of 0.99 " "quantile of the F(p, m-p) distribution is used, where p is " "the number of coefficients being fitted and m is the number " "of samples. This test excludes Cook's distance of samples " "belonging to experimental groups with only two samples.", hidden="!filter_options.cook", ) independent = BooleanField( label="Apply independent gene filtering", default=False, ) alpha = FloatField( label="Significance cut-off used for optimizing independent " "gene filtering", default=0.1, description="The value should be set to adjusted p-value " "cut-off (FDR).", hidden="!filter_options.independent", )
class Input: """Input fields to process MarkDuplicates.""" bam = DataField('alignment:bam', label='Alignment BAM file') skip = BooleanField( label='Skip MarkDuplicates step', description='MarkDuplicates step can be skipped.', default=False, ) remove_duplicates = BooleanField( label='Remove duplicates', description='If true do not write duplicates to the output file ' 'instead of writing them with appropriate flags set.', default=False, ) validation_stringency = StringField( label='Validation stringency', description='Validation stringency for all SAM files read by this ' 'program. Setting stringency to SILENT can improve ' 'performance when processing a BAM file in which ' 'variable-length data (read, qualities, tags) do not ' 'otherwise need to be decoded. Default is STRICT.', choices=[('STRICT', 'STRICT'), ('LENIENT', 'LENIENT'), ('SILENT', 'SILENT')], default='STRICT', ) assume_sort_order = StringField( label='Assume sort order', description='If not null (default), assume that the input file ' 'has this order even if the header says otherwise.' 'Possible values are unsorted, queryname, coordinate ' 'and unknown.', choices=[('', 'as in BAM header (default)'), ('unsorted', 'unsorted'), ('queryname', 'queryname'), ('coordinate', 'coordinate'), ('duplicate', 'duplicate'), ('unknown', 'unknown')], default='')
class Input: """Input fields for CollectRrbsMetrics.""" bam = DataField("alignment:bam", label="Alignment BAM file") genome = DataField("seq:nucleotide", label="Genome") min_quality = IntegerField( label= "Threshold for base quality of a C base before it is considered", default=20, ) next_base_quality = IntegerField( label= "Threshold for quality of a base next to a C before the C base is considered", default=10, ) min_lenght = IntegerField(label="Minimum read length", default=5) mismatch_rate = FloatField( label= "Maximum fraction of mismatches in a read to be considered (Range: 0 and 1)", default=0.1, ) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) assume_sorted = BooleanField( label="Sorted BAM file", description= "If true the sort order in the header file will be ignored.", default=False, )
class Input: """Input fields to process EdgeR.""" case = ListField( DataField("expression"), label="Case", description="Case samples (replicates)", ) control = ListField( DataField("expression"), label="Control", description="Control samples (replicates)", ) count_filter = IntegerField( label="Raw counts filtering threshold", default=10, description="Filter genes in the expression matrix input. " "Remove genes where the number of counts in all samples is " "below the threshold.", ) create_sets = BooleanField( label="Create gene sets", description="After calculating differential gene " "expressions create gene sets for up-regulated genes, " "down-regulated genes and all genes.", default=False, ) logfc = FloatField( label="Log2 fold change threshold for gene sets", description="Genes above Log2FC are considered as " "up-regulated and genes below -Log2FC as down-regulated.", default=1.0, hidden="!create_sets", ) fdr = FloatField( label="FDR threshold for gene sets", default=0.05, hidden="!create_sets", )
class QualityTrimming: """Quality trimming options.""" quality = IntegerField( label="Quality cutoff", description= "Trim low-quality ends from reads based on phred score.", default=20, ) nextseq = IntegerField( label="NextSeq/NovaSeq trim cutoff", description="NextSeq/NovaSeq-specific quality " "trimming. Trims also dark cycles appearing as " "high-quality G bases. This will set a specific " "quality cutoff, but qualities of G bases are ignored. " "This can not be used with Quality cutoff and will " "override it.", required=False, ) phred = StringField( label="Phred score encoding", description="Use either ASCII+33 quality scores as " "Phred scores (Sanger/Illumina 1.9+ encoding) or " "ASCII+64 quality scores (Illumina 1.5 encoding) for " "quality trimming", choices=[ ("--phred33", "ASCII+33"), ("--phred64", "ASCII+64"), ], default="--phred33", ) min_length = IntegerField( label="Minimum length after trimming", description="Discard reads that became shorter than " "selected length because of either quality or adapter " "trimming. Both reads of a read-pair need to be longer " "than specified length to be printed out to validated " "paired-end files. If only one read became too short " "there is the possibility of keeping such unpaired " "single-end reads with Retain unpaired. A value of 0 " "disables filtering based on length.", default=20, ) max_n = IntegerField( label="Maximum number of Ns", description="Read exceeding this limit will result in " "the entire pair being removed from the trimmed output " "files.", required=False, ) retain_unpaired = BooleanField( label="Retain unpaired reads after trimming", description="If only one of the two paired-end reads " "became too short, the longer read will be written.", default=False, ) unpaired_len_1 = IntegerField( label="Unpaired read length cutoff for mate 1", default=35, hidden="!quality_trim.retain_unpaired", ) unpaired_len_2 = IntegerField( label="Unpaired read length cutoff for mate 2", default=35, hidden="!quality_trim.retain_unpaired", ) clip_r1 = IntegerField( label="Trim bases from 5' end of read 1", description="This may be useful if the qualities were " "very poor, or if there is some sort of unwanted bias " "at the 5' end.", required=False, ) clip_r2 = IntegerField( label="Trim bases from 5' end of read 2", description="This may be useful if the qualities were " "very poor, or if there is some sort of unwanted bias " "at the 5' end. For paired-end bisulfite sequencing, " "it is recommended to remove the first few bp because " "the end-repair reaction may introduce a bias towards " "low methylation.", required=False, ) three_prime_r1 = IntegerField( label="Trim bases from 3' end of read 1", description="Remove bases from the 3' end of read 1 " "after adapter/quality trimming has been performed. " "This may remove some unwanted bias from the 3' end " "that is not directly related to adapter sequence or " "basecall quality.", required=False, ) three_prime_r2 = IntegerField( label="Trim bases from 3' end of read 2", description="Remove bases from the 3' end of read 2 " "after adapter/quality trimming has been performed. " "This may remove some unwanted bias from the 3' end " "that is not directly related to adapter sequence or " "basecall quality.", required=False, )
class Input: """Input fields to process MarkDuplicates.""" bam = DataField("alignment:bam", label="Alignment BAM file") skip = BooleanField( label="Skip MarkDuplicates step", description="MarkDuplicates step can be skipped.", default=False, ) remove_duplicates = BooleanField( label="Remove duplicates", description="If true do not write duplicates to the output file " "instead of writing them with appropriate flags set.", default=False, ) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) assume_sort_order = StringField( label="Assume sort order", description="If not null (default), assume that the input file " "has this order even if the header says otherwise." "Possible values are unsorted, queryname, coordinate " "and unknown.", choices=[ ("", "as in BAM header (default)"), ("unsorted", "unsorted"), ("queryname", "queryname"), ("coordinate", "coordinate"), ("duplicate", "duplicate"), ("unknown", "unknown"), ], default="", ) class BigWigOptions: """Options for calculating BigWig.""" bigwig_binsize = IntegerField( label="BigWig bin size", description="Size of the bins, in bases, for the output of the " "bigwig/bedgraph file. Default is 50.", default=50, ) bigwig_timeout = IntegerField( label="BigWig timeout", description= "Number of seconds before creation of BigWig timeouts. " "Default is after 480 seconds (8 minutes).", default=480, ) bigwig_opts = GroupField(BigWigOptions, label="BigWig options")
class Input: """Input fields to process Deseq.""" case = ListField( DataField("expression"), label="Case", description="Case samples (replicates)", ) control = ListField( DataField("expression"), label="Control", description="Control samples (replicates)", ) create_sets = BooleanField( label="Create gene sets", description="After calculating differential gene " "expressions create gene sets for up-regulated genes, " "down-regulated genes and all genes.", default=False, ) logfc = FloatField( label="Log2 fold change threshold for gene sets", description="Genes above Log2FC are considered as " "up-regulated and genes below -Log2FC as down-regulated.", default=1.0, hidden="!create_sets", ) fdr = FloatField( label="FDR threshold for gene sets", default=0.05, hidden="!create_sets", ) class Options: """Options.""" beta_prior = BooleanField( label="Beta prior", default=False, description="Whether or not to put a zero-mean normal prior " "on the non-intercept coefficients.", ) class FilterOptions: """Filtering options.""" count = BooleanField( label="Filter genes based on expression count", default=True, ) min_count_sum = IntegerField( label="Minimum gene expression count summed over all samples", default=10, description="Filter genes in the expression matrix input. " "Remove genes where the expression count sum over all samples " "is below the threshold.", hidden="!filter_options.count", ) cook = BooleanField( label="Filter genes based on Cook's distance", default=False, ) cooks_cutoff = FloatField( label="Threshold on Cook's distance", required=False, description="If one or more samples have Cook's distance " "larger than the threshold set here, the p-value for the row " "is set to NA. If left empty, the default threshold of 0.99 " "quantile of the F(p, m-p) distribution is used, where p is " "the number of coefficients being fitted and m is the number " "of samples. This test excludes Cook's distance of samples " "belonging to experimental groups with only two samples.", hidden="!filter_options.cook", ) independent = BooleanField( label="Apply independent gene filtering", default=False, ) alpha = FloatField( label="Significance cut-off used for optimizing independent " "gene filtering", default=0.1, description="The value should be set to adjusted p-value " "cut-off (FDR).", hidden="!filter_options.independent", ) options = GroupField(Options, label="Gene filtering options") filter_options = GroupField( FilterOptions, label="Differential expression analysis options")