Beispiel #1
0
 def inputs(self):
     return [
         ToolInput(
             "gtf",
             File,
             position=1,
             doc=
             "The input GTF file containing features to check the bam against",
         ),
         ToolInput(
             "bam",
             BamBai,
             position=2,
             doc="The input SAM/BAM file containing reads to process",
         ),
         ToolInput(
             "output_dir",
             String(optional=True),
             position=3,
             default=".",
             doc="Output directory",
         ),
         ToolInput(
             "sample",
             String(optional=True),
             position=4,
             prefix="--sample",
             doc=
             "The name of the current sample. Default: The bam's filename",
         ),
         *self.additional_args,
     ]
Beispiel #2
0
 def inputs(self) -> List[ToolInput]:
     return [
         ToolInput("piscesVersion", String()),
         ToolInput(
             "inputBam",
             BamBai(),
             prefix="-b",
             position=4,
             shell_quote=False,
             doc="Input BAM file",
         ),
         ToolInput(
             "outputDir",
             String(),
             prefix="--outfolder",
             position=4,
             shell_quote=False,
             doc="Output Folder",
         ),
         ToolInput(
             "referenceFolder",
             Directory(),
             prefix="--genomefolders",
             position=5,
             shell_quote=False,
             doc="Folder containing reference genome files",
         ),
         *self.additional_hygea_args,
     ]
Beispiel #3
0
 def inputs(self):
     return [
         ToolInput(
             "outputFilename",
             String(),
             prefix="--output",
             default=".",
             doc=
             "The directory to output SAM/BAM/CRAM files. Default value: '.' ",
         ),
         ToolInput(
             "bam",
             BamBai,
             prefix="--input",
             position=1,
             secondaries_present_as={".bai": "^.bai"},
             doc=
             "(-I:String) BAM/SAM/CRAM file containing reads  This argument must be specified at least once.",
         ),
         ToolInput(
             tag="intervals",
             input_type=Bed(optional=True),
             prefix="--intervals",
             doc=
             "(-L:String) One or more genomic intervals over which to operate This argument may be specified 0 or more times. Default value: null. ",
         ),
         *super().inputs(),
         *Gatk4SplitReadsBase.additional_args,
     ]
Beispiel #4
0
 def inputs(self):
     return [
         *super(Gatk4LearnReadOrientationModelBase, self).inputs(),
         *Gatk4LearnReadOrientationModelBase.additional_args,
         ToolInput(
             "f1r2CountsFiles",
             Array(TarFileGz),
             position=0,
             prefix="-I",
             prefix_applies_to_all_elements=True,
             doc="Counts for the read orientation of fragments",
         ),
         ToolInput(
             "numEmIterations",
             Int(optional=True),
             position=1,
             prefix="--num-em-iterations",
             default=30,  # Sebastian thinks this is best
             doc="Amount of iterations for the em process before it bails",
         ),
         ToolInput("modelFileOut",
                   Filename(extension=".tar.gz"),
                   position=3,
                   prefix="-O"),
     ]
Beispiel #5
0
 def inputs(self):
     return [
         *super(SamToolsViewBase, self).inputs(),
         *SamToolsViewBase.additional_inputs,
         ToolInput("sam", UnionType(Sam(), Bam(), Cram()), position=10),
         ToolInput(
             "reference",
             FastaWithDict(optional=True),
             position=6,
             prefix="-T",
             doc=
             "A FASTA format reference FILE, optionally compressed by bgzip and ideally indexed "
             "by samtools faidx. If an index is not present, one will be generated for you.",
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("sam", remove_file_extension=True),
                 extension=".bam",
             ),
             position=5,
             prefix="-o",
             doc="Output to FILE [stdout].",
         ),
         ToolInput(
             "regions",
             Array(String, optional=True),
             position=11,
             doc=
             "Region specifications after the input filename to restrict output to only those alignments which "
             "overlap the specified region(s). Use of region specifications requires a coordinate-sorted and "
             "indexed input file (in BAM or CRAM format)",
         ),
     ]
    def inputs(self) -> List[ToolInput]:
        import uuid

        fastq_uuid = str(uuid.uuid1())
        return [
            ToolInput("fastq", FastqGzPair, position=5),
            ToolInput(
                "adapter",
                input_type=Array(String(), optional=True),
                prefix="-a",
                prefix_applies_to_all_elements=True,
                doc=
                "Sequence of an adapter ligated to the 3' end (paired data: of the first read). "
                "The adapter and subsequent bases are trimmed. If a '$' character is appended ('anchoring'), "
                "the adapter is only found if it is a suffix of the read.",
            ),
            ToolInput(
                "outputFilename",
                Filename(suffix="-R1", extension=".fastq.gz"),
                prefix="-o",
                doc=
                "Write trimmed reads to FILE. FASTQ or FASTA format is chosen depending on input. "
                "The summary report is sent to standard output. Use '{name}' in FILE to demultiplex "
                "reads into multiple files. Default: write to standard output",
            ),
            ToolInput(
                "secondReadFile",
                Filename(suffix="-R2", extension=".fastq.gz"),
                prefix="-p",
                doc="Write second read in a pair to FILE.",
            ),
            *self.additional_args,
        ]
Beispiel #7
0
 def inputs(self):
     return [
         *super().inputs(),
         *Gatk4CalculateContaminationBase.additional_args,
         ToolInput(
             "pileupTable",
             File(),
             prefix="-I",
             doc="pileup table from summarize pileup",
         ),
         ToolInput(
             "segmentationFileOut",
             Filename(
                 prefix=InputSelector("pileupTable",
                                      remove_file_extension=True),
                 extension=".mutect2_segments",
             ),
             prefix="--tumor-segmentation",
             doc="Reference sequence file",
         ),
         ToolInput(
             "contaminationFileOut",
             Filename(
                 prefix=InputSelector("pileupTable",
                                      remove_file_extension=True),
                 extension=".mutect2_contamination",
             ),
             position=2,
             prefix="-O",
         ),
     ]
 def inputs(self) -> List[ToolInput]:
     return [
         ToolInput("vcf", Vcf(optional=True), position=1,
                   shell_quote=False),
         ToolInput(
             "compressedTabixVcf",
             VcfTabix(optional=True),
             position=1,
             shell_quote=False,
         ),
         ToolInput(
             "compressedVcf",
             CompressedVcf(optional=True),
             position=1,
             shell_quote=False,
         ),
         ToolInput(
             "reference",
             FastaWithDict(),
             prefix="-r",
             position=4,
             shell_quote=False,
         ),
         ToolInput(
             "outputFilename",
             Filename(extension=".vcf", suffix=".norm"),
             position=6,
             prefix="-o",
             shell_quote=False,
         ),
     ]
 def inputs(self):
     return [
         ToolInput("inputs", String(), position=0),
         ToolInput("input2", String(optional=True), position=1),
         ToolInput("input3", String(optional=True), position=2),
         ToolInput("input4", String(optional=True), position=3),
     ]
Beispiel #10
0
 def inputs(self):
     return [
         ToolInput(
             "vcf",
             UnionType(Vcf, CompressedVcf),
             position=1,
             doc="The VCF file to sort",
         ),
         ToolInput(
             "outputFilename",
             Filename(suffix=".sorted", extension=".vcf.gz"),
             prefix="--output-file",
             doc="(-o) output file name [stdout]",
         ),
         ToolInput(
             "outputType",
             String(optional=True),
             prefix="--output-type",
             default="z",
             doc="(-O) b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]",
         ),
         ToolInput(
             "tempDir",
             String(optional=True),
             prefix="--temp-dir",
             doc="(-T) temporary files [/tmp/bcftools-sort.XXXXXX/]",
         ),
     ]
Beispiel #11
0
 def inputs(self):
     return [
         *self.additional_inputs,
         ToolInput(
             "inputBam",
             Bam(optional=True),
             prefix="-ibam",
             doc=
             "Input bam file. Note: BAM _must_ be sorted by position. A 'samtools sort <BAM>' should suffice.",
         ),
         ToolInput(
             "inputBed",
             File(optional=True),
             prefix="-iBed",
             doc=
             "Input bed file. Must be grouped by chromosome. A simple 'sort -k 1,1 <BED> > <BED>.sorted' will suffice.",
         ),
         ToolInput(
             "inputFile",
             File(optional=True),
             prefix="-i",
             doc="Input file, can be gff/vcf.",
         ),
         ToolInput(
             "genome",
             File(optional=True),
             prefix="-g",
             doc=
             "Genome file. The genome file should tab delimited and structured as follows: <chromName><TAB><chromSize>.",
         ),
     ]
Beispiel #12
0
 def inputs(self):
     return [
         ToolInput(
             "bam",
             BamBai(),
             prefix="-I",
             doc="Input file containing sequence  data (BAM or CRAM)",
             secondaries_present_as={".bai": "^.bai"},
             position=10,
         ),
         ToolInput("reference",
                   FastaWithDict(),
                   prefix="-R",
                   doc="Reference sequence file"),
         ToolInput(
             "outputPrefix",
             String(),
             prefix="-o",
             doc=
             "An output file created by the walker. Will overwrite contents if file exists",
         ),
         ToolInput(
             "intervals",
             File(optional=True),
             prefix="-L",
             doc="One or more genomic intervals over which to operate",
         ),
         ToolInput(
             "excludeIntervals",
             File(optional=True),
             prefix="--excludeIntervals",
             doc="One or more genomic intervals to exclude from processing",
         ),
         *self.additional_args,
     ]
Beispiel #13
0
 def inputs(self):
     return [
         *super(Gatk4ApplyBqsrBase, self).inputs(),
         ToolInput(
             "bam",
             BamBai(),
             prefix="-I",
             doc="The SAM/BAM/CRAM file containing reads.",
             secondaries_present_as={".bai": "^.bai"},
             position=10,
         ),
         ToolInput(
             "reference", FastaWithDict(), prefix="-R", doc="Reference sequence"
         ),
         ToolInput(
             "outputFilename",
             Filename(extension=".bam"),
             prefix="-O",
             doc="Write output to this file",
         ),
         ToolInput(
             "recalFile",
             Tsv(optional=True),
             prefix="--bqsr-recal-file",
             doc="Input recalibration table for BQSR",
         ),
         ToolInput(
             "intervals",
             Bed(optional=True),
             prefix="--intervals",
             doc="-L (BASE) One or more genomic intervals over which to operate",
         ),
         *self.additional_args,
     ]
Beispiel #14
0
 def inputs(self):
     return [
         *super(Gatk4CollectInsertSizeMetricsBase, self).inputs(),
         ToolInput(
             "bam",
             BamBai(optional=False),
             prefix="-I",
             doc="Input SAM or BAM file.  Required.",
             position=10,
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("bam", remove_file_extension=True),
                 extension=".txt",
                 suffix=".metrics",
             ),
             prefix="-O",
             doc="File to write the output to.  Required.",
         ),
         ToolInput(
             "outputHistogram",
             Filename(
                 prefix=InputSelector("bam", remove_file_extension=True),
                 extension=".pdf",
                 suffix=".histogram",
             ),
             prefix="-H",
             doc="File to write insert size Histogram chart to.  Required. ",
         ),
         *Gatk4CollectInsertSizeMetricsBase.additional_args,
     ]
Beispiel #15
0
 def inputs(self):
     return [
         ToolInput(
             "kmer_size",
             Int(optional=True),
             prefix="-k",
             position=1,
             doc="k-mer (odd) length (default: 31, max value: 31)",
         ),
         ToolInput(
             "index",
             Filename(extension=".kidx"),
             prefix="-i",
             position=2,
             doc="Filename for the kallisto index to be constructed",
         ),
         ToolInput(
             "reference",
             Fasta,
             position=3,
             localise_file=True,
             doc="Filename for a reference transcriptome",
         ),
         # --make-unique           Replace repeated target names with unique names
     ]
Beispiel #16
0
 def inputs(self):
     return [
         *super(Gatk4DepthOfCoverageBase, self).inputs(),
         ToolInput(
             "bam",
             BamBai(),
             prefix="-I",
             doc="The SAM/BAM/CRAM file containing reads.",
             secondaries_present_as={".bai": "^.bai"},
         ),
         ToolInput(
             "reference", FastaWithDict(), prefix="-R", doc="Reference sequence"
         ),
         ToolInput(
             "outputPrefix",
             String(),
             prefix="-O",
             doc="An output file created by the walker. Will overwrite contents if file exists",
         ),
         ToolInput(
             "intervals",
             Array(Bed),
             prefix="--intervals",
             doc="-L (BASE) One or more genomic intervals over which to operate",
             prefix_applies_to_all_elements=True,
         ),
         *self.additional_args,
     ]
Beispiel #17
0
 def inputs(self):
     return [
         *super(Gatk4SortSamBase, self).inputs(),
         ToolInput(
             "bam",
             Bam(),
             prefix="-I",
             doc="The SAM/BAM/CRAM file to sort.",
             position=10,
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("bam", remove_file_extension=True),
                 suffix=".sorted",
                 extension=".bam",
             ),
             position=10,
             prefix="-O",
             doc="The sorted SAM/BAM/CRAM output file.",
         ),
         ToolInput(
             "sortOrder",
             String(),
             prefix="-SO",
             position=10,
             doc=
             "The --SORT_ORDER argument is an enumerated type (SortOrder), which can have one of "
             "the following values: [unsorted, queryname, coordinate, duplicate, unknown]",
         ),
         *Gatk4SortSamBase.additional_args,
     ]
Beispiel #18
0
 def inputs(self):
     return [
         *super().inputs(),
         *Gatk4GetPileUpSummariesBase.additional_args,
         ToolInput(
             "bam",
             Array(BamBai()),
             prefix="-I",
             prefix_applies_to_all_elements=True,
             doc="The SAM/BAM/CRAM file containing reads.",
             position=0,
         ),
         ToolInput(
             "sites",
             VcfTabix(),
             prefix="-V",
             doc="sites of common biallelic variants",
         ),
         ToolInput(
             "intervals",
             VcfTabix(optional=True),
             prefix="--intervals",
             doc=
             "-L (BASE) One or more genomic intervals over which to operate",
         ),
         ToolInput("pileupTableOut",
                   Filename(extension=".txt"),
                   position=1,
                   prefix="-O"),
     ]
Beispiel #19
0
 def inputs(self):
     return [
         *self.additional_inputs,
         ToolInput(
             "bam",
             Array(Bam),
             position=10,
             doc=
             "A list of SAM or BAM format files. They can be either name or location sorted. If no files provided, <stdin> input is expected. Location-sorted paired-end reads are automatically sorted by read names.",
         ),
         ToolInput(
             "outputFilename",
             Filename(extension=".txt"),
             prefix="-o",
             doc=
             "Name of output file including read counts. A separate file including summary statistics of counting results is also included in the output ('<string>.summary'). Both files are in tab delimited format.",
         ),
         ToolInput(
             "annotationFile",
             File,
             prefix="-a",
             doc=
             "Name of an annotation file. GTF/GFF format by default. See -F option for more format information. Inbuilt annotations (SAF format) is available in 'annotation' directory of the package. Gzipped file is also accepted.",
         ),
     ]
 def inputs(self) -> List[ToolInput]:
     return [
         ToolInput("intervals", Bed(), position=2, shell_quote=False),
         ToolInput(
             "outputFilename",
             Filename(extension=".vcf", suffix=".vardict"),
             prefix=">",
             position=6,
             shell_quote=False,
         ),
         ToolInput(
             "bam",
             BamBai(),
             prefix="-b",
             position=1,
             shell_quote=False,
             doc="The indexed BAM file",
         ),
         ToolInput(
             "reference",
             FastaFai(),
             prefix="-G",
             position=1,
             shell_quote=False,
             doc="The reference fasta. Should be indexed (.fai). "
             "Defaults to: /ngs/reference_data/genomes/Hsapiens/hg19/seq/hg19.fa",
         ),
         *VarDictGermlineBase.vardict_inputs,
         *VarDictGermlineBase.var2vcf_inputs,
     ]
Beispiel #21
0
 def tumor_normal_inputs():
     return [
         ToolInput(
             "tumor",
             BamBai(),
             position=6,
             prefix="-I",
             doc="BAM/SAM/CRAM file containing reads",
         ),
         ToolInput(
             "tumorName",
             String(),
             position=6,
             prefix="-tumor",
             doc=
             "BAM sample name of tumor. May be URL-encoded as output by GetSampleName with -encode.",
         ),
         ToolInput(
             "normal",
             BamBai(),
             position=5,
             prefix="-I",
             doc="BAM/SAM/CRAM file containing reads",
         ),
         ToolInput(
             "normalName",
             String(),
             position=6,
             prefix="-normal",
             doc=
             "BAM sample name of normal. May be URL-encoded as output by GetSampleName with -encode.",
         ),
     ]
Beispiel #22
0
 def inputs(self):
     return [
         *super().inputs(),
         ToolInput(
             "callVCF",
             VcfTabix(),
             prefix="--CALL_VCF",
             doc="The VCF containing the call sample",
         ),
         ToolInput(
             "truthVCF",
             VcfIdx(),
             prefix="--TRUTH_VCF",
             doc="The VCF containing the truth sample",
         ),
         ToolInput(
             "outputBasename",
             Filename(),
             prefix="--OUTPUT",
             doc="Basename for the three metrics files that are to be written. Resulting files will be:"
             "(1) .genotype_concordance_summary_metrics, "
             "(2) .genotype_concordance_detail_metrics, "
             "(3) .genotype_concordance_contingency_metrics.",
         ),
         # *super(Gatk4GenotypeConcordanceBase, self).inputs(),
         *self.additional_args,
     ]
Beispiel #23
0
 def inputs(self):
     return [
         *super().inputs(),
         ToolInput(
             "bams",
             Array(BamBai()),
             prefix="-I",
             prefix_applies_to_all_elements=True,
             doc="The SAM/BAM file to sort.",
             position=10,
         ),
         ToolInput(
             "sampleName",
             String(optional=True),
             doc="Used for naming purposes only",
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("sampleName"),
                 suffix=".merged",
                 extension=".bam",
             ),
             position=10,
             prefix="-O",
             doc="SAM/BAM file to write merged result to",
         ),
         *self.additional_args,
     ]
Beispiel #24
0
    def inputs(self):
        return [
            ToolInput("reference", Fasta, position=1, localise_file=True),
            # ToolInput(
            #     "prefix",
            #     String(optional=True),
            #     prefix="-p",
            #     doc="prefix of the index [same as fasta name]",
            # ),
            ToolInput(
                "blockSize",
                Int(optional=True),
                prefix="-b",
                doc="block size for the bwtsw algorithm (effective with -a bwtsw) [10000000]",
            ),
            ToolInput(
                "algorithm",
                String(optional=True),
                prefix="-a",
                doc="""\
BWT construction algorithm: bwtsw, is or rb2 [auto]
    - is	IS linear-time algorithm for constructing suffix array. It requires 5.37N memory where N is the size of the database. IS is moderately fast, but does not work with database larger than 2GB. IS is the default algorithm due to its simplicity. The current codes for IS algorithm are reimplemented by Yuta Mori.
    - bwtsw	Algorithm implemented in BWT-SW. This method works with the whole human genome.
""",
            ),
        ]
Beispiel #25
0
 def inputs(self):
     return [
         ToolInput(
             "ubam",
             BamBai(),
             prefix="--UNMAPPED_BAM",
             prefix_applies_to_all_elements=True,
             doc=
             "Original SAM or BAM file of unmapped reads, which must be in queryname order.",
             position=10,
         ),
         ToolInput(
             "bam",
             Array(BamBai()),
             prefix="--ALIGNED_BAM",
             prefix_applies_to_all_elements=True,
             doc="SAM or BAM file(s) with alignment data.",
             position=10,
         ),
         ToolInput(
             "reference",
             FastaWithDict(optional=True),
             prefix="--REFERENCE_SEQUENCE",
             position=10,
             doc="Reference sequence file.",
         ),
         ToolInput(
             "outputFilename",
             Filename(extension=".bam"),
             position=10,
             prefix="--OUTPUT",
             doc="Merged SAM or BAM file to write to.",
         ),
         *self.additional_args,
     ]
Beispiel #26
0
 def inputs(self):
     return [
         ToolInput(
             "bam",
             BamBai(),
             prefix="-I",
             position=10,
             secondaries_present_as={".bai": "^.bai"},
             doc=
             "One or more input SAM or BAM files to analyze. Must be coordinate sorted.",
         ),
         ToolInput(
             "outputFilename",
             Filename(extension=".bam"),
             position=10,
             prefix="-O",
             doc="File to write duplication metrics to",
         ),
         ToolInput(
             "metricsFilename",
             Filename(extension=".metrics.txt"),
             position=10,
             prefix="-M",
             doc="The output file to write marked records to.",
         ),
         *super(Gatk4MarkDuplicatesBase, self).inputs(),
         *self.additional_args,
     ]
Beispiel #27
0
    def inputs(self):
        # Would be good to include this in the prefix:
        #   If(InputSelector("bam").length().equals(1), InputSelector("bam")[0].basename(), None)

        prefix = FirstOperator([InputSelector("outputPrefix"), "generated"])
        return [
            ToolInput(
                "bam",
                Array(Bam),
                prefix="-I",
                position=10,
                # secondaries_present_as={".bai": "^.bai"},
                doc=
                "One or more input SAM or BAM files to analyze. Must be coordinate sorted.",
            ),
            ToolInput("outputPrefix", String(optional=True)),
            ToolInput(
                "outputFilename",
                Filename(prefix=prefix, suffix=".markduped", extension=".bam"),
                position=10,
                prefix="-O",
                doc="File to write duplication metrics to",
            ),
            ToolInput(
                "metricsFilename",
                Filename(prefix=prefix, suffix=".metrics", extension=".txt"),
                position=10,
                prefix="-M",
                doc="The output file to write marked records to.",
            ),
            *super().inputs(),
            *self.additional_args,
        ]
Beispiel #28
0
 def inputs(self):
     return [
         ToolInput(
             "flagstat",
             File(),
             prefix="--flagstat",
             doc="output of samtools flagstat on bam",
         ),
         ToolInput(
             "collectInsertSizeMetrics",
             File,
             prefix="--collect_insert_metrics",
             doc="output of CollectInsertMetrics (GATK or Picard) on bam",
         ),
         ToolInput(
             "coverage",
             File(),
             prefix="--coverage",
             doc="output of bedtools coverageBed for targeted bam; bedtools genomeCoverageBed for whole genome bam",
         ),
         ToolInput(
             "outputPrefix",
             Filename(extension=".csv"),
             prefix="-o",
             doc="prefix of output summary csv",
         ),
         *self.additional_args,
     ]
Beispiel #29
0
 def inputs(self):
     return [
         ToolInput("files", Array(File()), position=2, localise_file=True),
         ToolInput(
             "files2", Array(File(), optional=True), position=3, localise_file=True
         ),
         ToolInput("outputFilename", Filename(extension=".tar"), position=1),
     ]
Beispiel #30
0
 def inputs(self):
     return [
         ToolInput("reference", FastaBwa(), position=9),
         ToolInput("reads", Array(FastqGz()), position=10, doc=None),
         ToolInput("mates", FastqGzPair(optional=True), position=11, doc=None),
         ToolInput("outputFilename", Filename(extension=".sam")),
         *BwaMemBase.additional_inputs,
     ]