Python TextFileの例、janis_unix.TextFile Pythonの例

コード例 #1

0

ファイルを表示

ファイル: base.py プロジェクト: mmYeung/janis-bioinformatics

 def outputs(self):
     return [
         ToolOutput("geneFileOut",
                    TextFile(),
                    glob=InputSelector("outputGeneFile")),
         ToolOutput("regionFileOut",
                    TextFile(),
                    glob=InputSelector("outputRegionFile")),
     ]

コード例 #2

0

ファイルを表示

 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "positions": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.sorted.uncompressed.stdout",
                 ),
                 "reference": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
                 "bam": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam",
                 ),
                 "countOrphans": True,
                 "noBAQ": True,
                 "maxDepth": 10000,
                 "minBQ": 0,
             },
             output=TextFile.basic_test(
                 "out",
                 19900,
                 "chr17\t43044391\tG\t19\tA,A,,A.a,,A,,A..,,a\tDJCJ:FHDDBJBBJJIDDB",
                 187,
                 "53c3e03c20730ff45411087444379b1b",
             ),
         )
     ]

コード例 #3

0

ファイルを表示

ファイル: base.py プロジェクト: Akmazad/janis-bioinformatics

 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "inputBam":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam.bam",
                 ),
                 "genome":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.genome_file.txt",
                 ),
             },
             output=TextFile.basic_test(
                 "out",
                 7432,
                 "chr17\t0\t83144233\t83257441\t0.99864",
                 220,
                 "f2007353bbd18f0a04eae9499d7c6a91",
             ),
         )
     ]

コード例 #4

0

ファイルを表示

 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "reads": [
                     f"{remote_dir}/NA12878-BRCA1_R1.fastq.gz",
                     f"{remote_dir}/NA12878-BRCA1_R2.fastq.gz",
                 ],
                 "threads":
                 1,
             },
             output=FastqGzPair.basic_test("out", 824000, 408000, 416000) +
             Array.array_wrapper([TextFile.basic_test(
                 "datafile",
                 81000,
             )]),
         ),
         TTestCase(
             name="minimal",
             input={
                 "reads": [
                     f"{remote_dir}/NA12878-BRCA1_R1.fastq.gz",
                     f"{remote_dir}/NA12878-BRCA1_R2.fastq.gz",
                 ],
                 "threads":
                 1,
             },
             output=self.minimal_test(),
         ),
     ]

コード例 #5

0

ファイルを表示

 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "positions":
                 f"{remote_dir}/NA12878-BRCA1.sorted.uncompressed.stdout",
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
                 "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam",
                 "countOrphans": True,
                 "noBAQ": True,
                 "maxDepth": 10000,
                 "minBQ": 0,
             },
             output=TextFile.basic_test(
                 "out",
                 19900,
                 "chr17\t43044391\tG\t19\tA,A,,A.a,,A,,A..,,a\tDJCJ:FHDDBJBBJJIDDB",
                 187,
                 "53c3e03c20730ff45411087444379b1b",
             ),
         )
     ]

コード例 #6

0

ファイルを表示

 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": [
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "NA12878-BRCA1.merged.bam",
                     )
                 ],
                 "javaOptions": ["-Xmx6G"],
                 "maxRecordsInRam": 5000000,
                 "createIndex": True,
                 "tmpDir": "./tmp",
             },
             output=BamBai.basic_test(
                 "out",
                 2829000,
                 3780,
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam.flagstat",
                 ),
             )
             + TextFile.basic_test(
                 "metrics",
                 3700,
                 "NA12878-BRCA1\t193\t9468\t164\t193\t46\t7\t1\t0.003137\t7465518",
                 112,
             ),
         )
     ]

コード例 #7

0

ファイルを表示

 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "reference":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
             },
             output=TextFile.basic_test("out", 15, "chr17\t83257441\n", 1),
         ),
         TTestCase(
             name="minimal",
             input={
                 "reference":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
             },
             output=self.minimal_test(),
         ),
     ]

コード例 #8

0

ファイルを表示

 def outputs(self):
     return [
         ToolOutput(
             "out",
             VcfTabix,
             glob=InputSelector("outputFilename"),
             doc="To determine type",
         ),
         ToolOutput(
             "stats",
             TextFile(extension=".stats"),
             glob=InputSelector("outputFilename") + ".stats",
             doc="To determine type",
         ),
         ToolOutput(
             "f1f2r_out",
             TarFileGz,
             glob=InputSelector("f1r2TarGz_outputFilename"),
             doc="To determine type",
         ),
         ToolOutput(
             "bam",
             BamBai(optional=True),
             glob=InputSelector("outputBamName"),
             doc="File to which assembled haplotypes should be written",
             secondaries_present_as={".bai": "^.bai"},
         ),
     ]

コード例 #9

0

ファイルを表示

 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "flagstat":
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam.flagstat",
                 "collectInsertSizeMetrics":
                 f"{remote_dir}/NA12878-BRCA1.markduped.metrics.txt",
                 "coverage":
                 f"{remote_dir}/NA12878-BRCA1.genomeCoverageBed.stdout",
                 "rmdupFlagstat":
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam.bam.flagstat",
                 "genome": True,
             },
             output=TextFile.basic_test(
                 tag="out",
                 min_size=948,
                 line_count=2,
                 md5="575354942cfb8d0367725f9020181443",
                 expected_file_path=
                 f"{remote_dir}/NA12878-BRCA1_performance_summary.csv",
             ),
         )
     ]

コード例 #10

0

ファイルを表示

 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": [f"{remote_dir}/NA12878-BRCA1.merged.bam"],
                 "javaOptions": ["-Xmx6G"],
                 "maxRecordsInRam": 5000000,
                 "createIndex": True,
                 "tmpDir": "./tmp",
             },
             output=BamBai.basic_test(
                 "out",
                 2829000,
                 3780,
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam.flagstat",
             ) + TextFile.basic_test(
                 "metrics",
                 3700,
                 "NA12878-BRCA1\t193\t9468\t164\t193\t46\t7\t1\t0.003137\t7465518",
                 112,
             ),
         )
     ]

コード例 #11

0

ファイルを表示

ファイル: base.py プロジェクト: mmYeung/janis-bioinformatics

 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     # The first 5 lines of the file include headers that change with every run (time, etc)
     with open(
             os.path.join(
                 BioinformaticsTool.test_data_path(),
                 "NA12878-BRCA1.markduped.metrics.txt",
             ),
             "r",
     ) as f:
         for i in range(5):
             next(f)
         expected_content = f.read()
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam",
                 "javaOptions": ["-Xmx6G"],
             },
             output=TextFile.basic_test("out", 7260, expected_content, 905)
             + [
                 TTestExpectedOutput(
                     tag="outHistogram",
                     preprocessor=TTestPreprocessor.FileSize,
                     operator=operator.ge,
                     expected_value=15600,
                 ),
             ],
         )
     ]

コード例 #12

0

ファイルを表示

ファイル: base.py プロジェクト: Akmazad/janis-bioinformatics

 def tests(self):
     # The first 5 lines of the file include headers that change with every run (time, etc)
     with open(
         os.path.join(
             BioinformaticsTool.test_data_path(),
             "wgsgermline_data",
             "NA12878-BRCA1.markduped.metrics.txt",
         ),
         "r",
     ) as f:
         for i in range(5):
             next(f)
         expected_content = f.read()
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam",
                 ),
                 "javaOptions": ["-Xmx6G"],
             },
             output=TextFile.basic_test("out", 7260, expected_content, 905)
             + [
                 TTestExpectedOutput(
                     tag="outHistogram",
                     preprocessor=TTestPreprocessor.FileSize,
                     operator=operator.ge,
                     expected_value=15600,
                 ),
             ],
         )
     ]

コード例 #13

0

ファイルを表示

ファイル: base.py プロジェクト: Akmazad/janis-bioinformatics

 def outputs(self):
     return [
         ToolOutput("out", TextFile(), glob=InputSelector("outputFilename")),
         ToolOutput(
             "outHistogram",
             File(extension=".pdf"),
             glob=InputSelector("outputHistogram"),
         ),
     ]

コード例 #14

0

ファイルを表示

ファイル: base.py プロジェクト: mmYeung/janis-bioinformatics

 def outputs(self):
     return [
         ToolOutput(
             "meg",
             TextFile(),
             glob=WildcardSelector("*_CloneFinder.meg"),
             doc="To determine type",
         ),
         ToolOutput(
             "clonalFractions",
             TextFile(),
             glob=WildcardSelector("*_CloneFinder.txt"),
             doc="To determine type",
         ),
         ToolOutput(
             "summary",
             TextFile(),
             glob=WildcardSelector("*_summary.txt"),
             doc="To determine type",
         ),
     ]

コード例 #15

0

ファイルを表示

 def tests(self):
     parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics"
     somatic_data = f"{parent_dir}/wgssomatic_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "javaOptions": ["-Xmx6G"],
                 "pileupTable": f"{somatic_data}/generated.txt",
                 "segmentationFileOut": "generated.txt.mutect2_segments",
             },
             output=TextFile.basic_test(
                 "contOut",
                 59,
                 "sample\tcontamination\terror\nNA12878-NA24385-mixture\t0.0\t0.0",
             )
             + TextFile.basic_test(
                 "segOut",
                 125,
                 "contig\tstart\tend\tminor_allele_fraction\nchr17\t43045941\t43098543\t0.28541019662496847",
             ),
         ),
     ]

コード例 #16

0

ファイルを表示

 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam",
                 ),
                 "reference": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
                 "knownSites": [
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz",
                     ),
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz",
                     ),
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz",
                     ),
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz",
                     ),
                 ],
                 "intervals": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "BRCA1.hg38.bed",
                 ),
                 "javaOptions": ["-Xmx12G"],
             },
             output=TextFile.basic_test(
                 "out", 1131758, "#:GATKReport.v1.1:5", 10376
             ),
         )
     ]

コード例 #17

0

ファイルを表示

ファイル: base.py プロジェクト: mmYeung/janis-bioinformatics

 def inputs(self):
     return [
         ToolInput(
             tag="snvInput",
             input_type=TextFile(),
             position=2,
             doc="tab seperated file of snvs to find clones for",
         ),
         ToolInput(
             tag="outputFolder",
             input_type=Filename(),
             prefix="-o",
             doc="folder to write output to (default: current working dir)",
         ),
     ]

コード例 #18

0

ファイルを表示

ファイル: base.py プロジェクト: mmYeung/janis-bioinformatics

 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam",
             },
             output=TextFile.basic_test(
                 "out",
                 410,
                 "19486 + 0 in total (QC-passed reads + QC-failed reads)",
                 13,
                 "ddbcfe52e60b925d222fb8bc1517a7a0",
             ),
         )
     ]

コード例 #19

0

ファイルを表示

 def outputs(self):
     return [
         ToolOutput("sample",
                    TextFile(),
                    glob=InputSelector("outputPrefix"),
                    doc=""),
         ToolOutput(
             "sampleCumulativeCoverageCounts",
             TextFile(),
             glob=InputSelector("outputPrefix") +
             ".sample_cumulative_coverage_counts",
             doc="",
         ),
         ToolOutput(
             "sampleCumulativeCoverageProportions",
             TextFile(),
             glob=InputSelector("outputPrefix") +
             ".sample_cumulative_coverage_proportions",
             doc="",
         ),
         ToolOutput(
             "sampleIntervalStatistics",
             TextFile(),
             glob=InputSelector("outputPrefix") +
             ".sample_interval_statistics",
             doc="",
         ),
         ToolOutput(
             "sampleIntervalSummary",
             TextFile(),
             glob=InputSelector("outputPrefix") +
             ".sample_interval_summary",
             doc="",
         ),
         ToolOutput(
             "sampleStatistics",
             TextFile(),
             glob=InputSelector("outputPrefix") + ".sample_statistics",
             doc="",
         ),
         ToolOutput(
             "sampleSummary",
             TextFile(),
             glob=InputSelector("outputPrefix") + ".sample_summary",
             doc="",
         ),
     ]

コード例 #20

0

ファイルを表示

ファイル: base.py プロジェクト: mmYeung/janis-bioinformatics

 def outputs(self):
     return [
         ToolOutput(
             "out_sample",
             TextFile(optional=True),
             glob=InputSelector("outputPrefix"),
             doc="per locus coverage",
         ),
         ToolOutput(
             "out_sampleCumulativeCoverageCounts",
             TextFile(),
             glob=InputSelector("outputPrefix")
             + ".sample_cumulative_coverage_counts",
             doc="coverage histograms (# locus with >= X coverage), aggregated over all bases",
         ),
         ToolOutput(
             "out_sampleCumulativeCoverageProportions",
             TextFile(),
             glob=InputSelector("outputPrefix")
             + ".sample_cumulative_coverage_proportions",
             doc="proprotions of loci with >= X coverage, aggregated over all bases",
         ),
         ToolOutput(
             "out_sampleIntervalStatistics",
             TextFile(),
             glob=InputSelector("outputPrefix") + ".sample_interval_statistics",
             doc="total, mean, median, quartiles, and threshold proportions, aggregated per interval",
         ),
         ToolOutput(
             "out_sampleIntervalSummary",
             TextFile(),
             glob=InputSelector("outputPrefix") + ".sample_interval_summary",
             doc="2x2 table of # of intervals covered to >= X depth in >=Y samples",
         ),
         ToolOutput(
             "out_sampleStatistics",
             TextFile(),
             glob=InputSelector("outputPrefix") + ".sample_statistics",
             doc="coverage histograms (# locus with X coverage), aggregated over all bases",
         ),
         ToolOutput(
             "out_sampleSummary",
             TextFile(),
             glob=InputSelector("outputPrefix") + ".sample_summary",
             doc="total, mean, median, quartiles, and threshold proportions, aggregated over all bases",
         ),
     ]

コード例 #21

0

ファイルを表示

 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "inputBam":
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam.bam",
                 "genome": f"{remote_dir}/NA12878-BRCA1.genome_file.txt",
             },
             output=TextFile.basic_test(
                 "out",
                 7432,
                 "chr17\t0\t83144233\t83257441\t0.99864",
                 220,
                 "f2007353bbd18f0a04eae9499d7c6a91",
             ),
         )
     ]

コード例 #22

0

ファイルを表示

ファイル: generatebedtoolscoveragegenomefile.py プロジェクト: mmYeung/janis-bioinformatics

 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
             },
             output=TextFile.basic_test("out", 15, "chr17\t83257441\n", 1),
         ),
         TTestCase(
             name="minimal",
             input={
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
             },
             output=self.minimal_test(),
         ),
     ]

コード例 #23

0

ファイルを表示

ファイル: base_4_1.py プロジェクト: matthdsm/janis-bioinformatics

 def outputs(self):
     return [
         ToolOutput(
             "out",
             VcfTabix,
             glob=InputSelector("outputFilename"),
             doc="To determine type",
         ),
         ToolOutput(
             "stats",
             TextFile(extension=".stats"),
             glob=InputSelector("outputFilename") + ".stats",
             doc="To determine type",
         ),
         ToolOutput(
             "f1f2r_out",
             TarFileGz,
             glob=InputSelector("f1r2TarGz_outputFilename"),
             doc="To determine type",
         ),
     ]

コード例 #24

0

ファイルを表示

ファイル: base.py プロジェクト: Akmazad/janis-bioinformatics

 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bam":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam",
                 ),
             },
             output=TextFile.basic_test(
                 "out",
                 410,
                 "19486 + 0 in total (QC-passed reads + QC-failed reads)",
                 13,
                 "ddbcfe52e60b925d222fb8bc1517a7a0",
             ),
         )
     ]

コード例 #25

0

ファイルを表示

ファイル: base.py プロジェクト: Akmazad/janis-bioinformatics

 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "flagstat": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam.flagstat",
                 ),
                 "collectInsertSizeMetrics": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.metrics.txt",
                 ),
                 "coverage": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.genomeCoverageBed.stdout",
                 ),
                 "rmdupFlagstat": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam.bam.flagstat",
                 ),
                 "genome": True,
             },
             output=TextFile.basic_test(
                 tag="out",
                 min_size=948,
                 line_count=2,
                 md5="575354942cfb8d0367725f9020181443",
                 expected_file_path=os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1_performance_summary.csv",
                 ),
             ),
         )
     ]

コード例 #26

0

ファイルを表示

 def tests(self):
     parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics"
     germline_data = f"{parent_dir}/wgsgermline_data"
     somatic_data = f"{parent_dir}/wgssomatic_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "javaOptions": ["-Xmx48G"],
                 "bam": [
                     f"{somatic_data}/NA12878-NA24385-mixture.markduped.recalibrated.bam"
                 ],
                 "sites": f"{somatic_data}/af-only-gnomad.hg38.BRCA1.vcf.gz",
                 "intervals": f"{germline_data}/BRCA1.hg38.bed",
             },
             output=TextFile.basic_test(
                 "out",
                 2592,
                 md5="54672b8b13d46aaef25c56351c82a3f4",
             ),
         ),
     ]

コード例 #27

0

ファイルを表示

 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bam":
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam",
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
                 "knownSites": [
                     f"{remote_dir}/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz",
                     f"{remote_dir}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz",
                     f"{remote_dir}/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz",
                     f"{remote_dir}/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz",
                 ],
                 "intervals":
                 f"{remote_dir}/BRCA1.hg38.bed",
                 "javaOptions": ["-Xmx12G"],
             },
             output=TextFile.basic_test("out", 1131758,
                                        "#:GATKReport.v1.1:5", 10376),
         )
     ]

コード例 #28

0

ファイルを表示

ファイル: base.py プロジェクト: mmYeung/janis-bioinformatics

 def outputs(self):
     return [
         ToolOutput("out", TextFile(), glob=InputSelector("outputFilename"))
     ]

コード例 #29

0

ファイルを表示

ファイル: base_1_2.py プロジェクト: mmYeung/janis-bioinformatics

 def inputs(self):
     return [
         ToolInput(
             tag="bams",
             input_type=Array(BamBai),
             prefix="-b",
             prefix_applies_to_all_elements=True,
             doc="Add FILE to the set of BAM files to be analyzed.",
         ),
         ToolInput(
             tag="bamList",
             input_type=TextFile(optional=True),
             prefix="-L",
             doc="A file containing a list of BAM files to be analyzed.",
         ),
         ToolInput(
             tag="reference",
             input_type=FastaFai(),
             prefix="-f",
             doc=
             " Use FILE as the reference sequence for analysis. An index file (FILE.fai) will be created if none exists. If neither --targets nor --region are specified, FreeBayes will analyze every position in this reference.",
         ),
         ToolInput(
             tag="targetsFile",
             prefix="-t",
             input_type=Bed(optional=True),
             doc=" Limit analysis to targets listed in the BED-format FILE.",
         ),
         ToolInput(
             tag="region",
             prefix="-r",
             input_type=String(optional=True),
             doc=
             "<chrom>:<start_position>-<end_position> Limit analysis to the specified region, 0-base coordinates, end_position not included (same as BED format). Either '-' or '..' maybe used as a separator.",
         ),
         ToolInput(
             tag="samplesFile",
             prefix="-s",
             input_type=TextFile(optional=True),
             doc=
             "FILE  Limit analysis to samples listed (one per line) in the FILE. By default FreeBayes will analyze all samples in its input BAM files.",
         ),
         ToolInput(
             tag="popFile",
             prefix="--populations",
             input_type=TextFile(optional=True),
             doc=
             "FILE Each line of FILE should list a sample and a population which it is part of. The population-based bayesian inference model will then be partitioned on the basis of the populations.",
         ),
         ToolInput(
             tag="cnvFile",
             prefix="-A",
             input_type=TextFile(optional=True),
             doc=
             "FILE Read a copy number map from the BED file FILE, which has either a sample-level ploidy: sample name, copy number or a region-specific format: reference sequence, start, end, sample name, copy number ... for each region in each sample which does not have the default copy number as set by --ploidy.",
         ),
         ToolInput(
             tag="outputFilename",
             prefix="-v",
             input_type=Filename(extension=".vcf"),
             doc="FILE Output VCF-format results to FILE. (default: stdout)",
         ),
         ToolInput(
             tag="gvcfFlag",
             prefix="--gvcf",
             input_type=Boolean(optional=True),
             default=False,
             doc=
             "Write gVCF output, which indicates coverage in uncalled regions.",
         ),
         ToolInput(
             tag="gvcfChunkSize",
             prefix="--gvcf-chunk",
             input_type=Int(optional=True),
             doc=
             " When writing gVCF output emit a record for every NUM bases.",
         ),
         ToolInput(
             tag="candidateVcf",
             prefix="-@",
             input_type=File(optional=True),
             doc=
             " Use variants reported in VCF file as input to the algorithm. Variants in this file will included in the output even if there is not enough support in the data to pass input filters.",
         ),
         ToolInput(
             tag="restrictSitesFlag",
             prefix="-l",
             input_type=Boolean(optional=True),
             doc=
             "Only provide variant calls and genotype likelihoods for sites and alleles which are provided in the VCF input, and provide output in the VCF for all input alleles, not just those which have support in the data.",
         ),
         ToolInput(
             tag="candidateHaploVcf",
             prefix="--haplotype-basis-alleles",
             input_type=File(optional=True),
             doc=
             "When specified, only variant alleles provided in this input VCF will be used for the construction of complex or haplotype alleles.",
         ),
         ToolInput(
             tag="reportHapAllelesFlag",
             prefix="--report-all-haplotype-alleles",
             input_type=Boolean(optional=True),
             doc=
             "At sites where genotypes are made over haplotype alleles, provide information about all alleles in output, not only those which are called.",
         ),
         ToolInput(
             tag="monomorphicFlag",
             prefix="--report-monomorphic",
             input_type=Boolean(optional=True),
             doc=
             " Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes. Loci which do not have any potential alternates have '.' for ALT.",
         ),
         ToolInput(
             tag="polyMoprhProbFlag",
             prefix="-P",
             input_type=Float(optional=True),
             default=0.0,
             doc=
             "Report sites if the probability that there is a polymorphism at the site is greater than N. default: 0.0. Note that post-filtering is generally recommended over the use of this parameter.",
         ),
         ToolInput(
             tag="strictFlag",
             prefix="--strict-vcf",
             input_type=Boolean(optional=True),
             doc="Generate strict VCF format (FORMAT/GQ will be an int)",
         ),
         ToolInput(
             tag="theta",
             prefix="-T",
             input_type=Float(),
             default=0.001,
             doc=
             "The expected mutation rate or pairwise nucleotide diversity among the population under analysis. This serves as the single parameter to the Ewens Sampling Formula prior model default: 0.001",
         ),
         ToolInput(
             tag="ploidy",
             prefix="-p",
             input_type=Int(),
             default=2,
             doc="Sets the default ploidy for the analysis to N. default: 2",
         ),
         ToolInput(
             tag="pooledDiscreteFlag",
             prefix="-J",
             input_type=Boolean(optional=True),
             doc=
             "Assume that samples result from pooled sequencing. Model pooled samples using discrete genotypes across pools. When using this flag, set --ploidy to the number of alleles in each sample or use the --cnv-map to define per-sample ploidy.",
         ),
         ToolInput(
             tag="pooledContinousFlag",
             prefix="-K",
             input_type=Boolean(optional=True),
             doc=
             "Output all alleles which pass input filters, regardles of genotyping outcome or model.",
         ),
         ToolInput(
             tag="addRefFlag",
             prefix="-Z",
             input_type=Boolean(optional=True),
             doc=
             "This flag includes the reference allele in the analysis as if it is another sample from the same population.",
         ),
         ToolInput(
             tag="refQual",
             prefix="--reference-quality",
             input_type=String(),
             default="100,60",
             doc=
             "--reference-quality MQ,BQ  Assign mapping quality of MQ to the reference allele at each site and base quality of BQ. default: 100,60",
         ),
         ToolInput(
             tag="ignoreSNPsFlag",
             prefix="-I",
             input_type=Boolean(optional=True),
             doc="Ignore SNP alleles.",
         ),
         ToolInput(
             tag="ignoreINDELsFlag",
             prefix="-i",
             input_type=Boolean(optional=True),
             doc="Ignore insertion and deletion alleles.",
         ),
         ToolInput(
             tag="ignoreMNPsFlag",
             prefix="-X",
             input_type=Boolean(optional=True),
             doc="Ignore multi-nuceotide polymorphisms, MNPs.",
         ),
         ToolInput(
             tag="ignoreComplexVarsFlag",
             prefix="-u",
             input_type=Boolean(optional=True),
             doc="Ignore complex events (composites of other classes).",
         ),
         ToolInput(
             tag="maxNumOfAlleles",
             prefix="-n",
             input_type=Int(),
             default=0,
             doc=
             "Evaluate only the best N SNP alleles, ranked by sum of supporting quality scores. (Set to 0 to use all; default: all)",
         ),
         ToolInput(
             tag="maxNumOfComplexVars",
             prefix="-E",
             input_type=Int(optional=True),
             doc="",
         ),
         ToolInput(
             tag="haplotypeLength",
             prefix="--haplotype-length",
             input_type=Int(),
             default=3,
             doc=
             "Allow haplotype calls with contiguous embedded matches of up to this length. Set N=-1 to disable clumping. (default: 3)",
         ),
         ToolInput(
             tag="minRepSize",
             prefix="--min-repeat-size",
             input_type=Int(),
             default=5,
             doc=
             "When assembling observations across repeats, require the total repeat length at least this many bp. (default: 5)",
         ),
         ToolInput(
             tag="minRepEntropy",
             prefix="--min-repeat-entropy",
             input_type=Int(),
             default=1,
             doc=
             "To detect interrupted repeats, build across sequence until it has  entropy > N bits per bp. Set to 0 to turn off. (default: 1)",
         ),
         ToolInput(
             tag="noPartObsFlag",
             prefix="--no-partial-observations",
             input_type=Boolean(optional=True),
             doc=
             "Exclude observations which do not fully span the dynamically-determined detection window. (default, use all observations, dividing partial support across matching haplotypes when generating haplotypes.)",
         ),
         ToolInput(
             tag="noNormaliseFlag",
             prefix="-O",
             input_type=Boolean(optional=True),
             doc=
             "Turn off left-alignment of indels, which is enabled by default.",
         ),
         ToolInput(
             tag="useDupFlag",
             prefix="-4",
             input_type=Boolean(),
             default=False,
             doc=
             "Include duplicate-marked alignments in the analysis. default: exclude duplicates marked as such in alignments",
         ),
         ToolInput(
             tag="minMappingQual",
             prefix="-m",
             input_type=Int(),
             default=1,
             doc=
             " Exclude alignments from analysis if they have a mapping quality less than Q. default: 1",
         ),
         ToolInput(
             tag="minBaseQual",
             prefix="-q",
             input_type=Int(),
             default=0,
             doc=
             " -q --min-base-quality Q Exclude alleles from analysis if their supporting base quality is less than Q. default: 0",
         ),
         ToolInput(
             tag="minSupQsum",
             prefix="-R",
             input_type=Int(),
             default=0,
             doc=
             " -R --min-supporting-allele-qsum Q Consider any allele in which the sum of qualities of supporting observations is at least Q. default: 0",
         ),
         ToolInput(
             tag="minSupMQsum",
             prefix="-Y",
             input_type=Int(),
             default=0,
             doc=
             " -Y --min-supporting-mapping-qsum Q Consider any allele in which and the sum of mapping qualities of supporting reads is at least Q. default: 0",
         ),
         ToolInput(
             tag="minSupBQthres",
             prefix="-Q",
             input_type=Int(),
             default=10,
             doc=
             " -Q --mismatch-base-quality-threshold Q Count mismatches toward --read-mismatch-limit if the base quality of the mismatch is >= Q. default: 10",
         ),
         ToolInput(
             tag="readMisMatchLim",
             prefix="-U",
             input_type=Int(optional=True),
             doc=
             " -U --read-mismatch-limit N Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold. default: ~unbounded",
         ),
         ToolInput(
             tag="maxMisMatchFrac",
             prefix="-z",
             input_type=Float(),
             default=1.0,
             doc=
             " -z --read-max-mismatch-fraction N Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold default: 1.0",
         ),
         ToolInput(
             tag="readSNPLim",
             prefix="-$",
             input_type=Int(optional=True),
             doc=
             " -$ --read-snp-limit N Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold. default: ~unbounded",
         ),
         ToolInput(
             tag="readINDELLim",
             prefix="-e",
             input_type=Int(optional=True),
             doc=
             " -e --read-indel-limit N Exclude reads with more than N separate gaps. default: ~unbounded",
         ),
         ToolInput(
             tag="standardFilterFlag",
             prefix="-0",
             input_type=Boolean(optional=True),
             doc=
             " -0 --standard-filters Use stringent input base and mapping quality filters Equivalent to -m 30 -q 20 -R 0 -S 0",
         ),
         ToolInput(
             tag="minAltFrac",
             prefix="-F",
             input_type=Float(),
             default=0.05,
             doc=
             " -F --min-alternate-fraction N Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position. default: 0.05",
         ),
         ToolInput(
             tag="minAltCount",
             prefix="-C",
             input_type=Int(),
             default=2,
             doc=
             " -C --min-alternate-count N Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position. default: 2",
         ),
         ToolInput(
             tag="minAltQSum",
             prefix="-3",
             input_type=Int(),
             default=0,
             doc=
             " -3 --min-alternate-qsum N Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position. default: 0",
         ),
         ToolInput(
             tag="minAltTotal",
             prefix="-G",
             input_type=Int(),
             default=1,
             doc=
             " -G --min-alternate-total N Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis. default: 1",
         ),
         ToolInput(
             tag="minCov",
             prefix="--min-coverage",
             input_type=Int(),
             default=0,
             doc=
             " --min-coverage N Require at least this coverage to process a site. default: 0",
         ),
         ToolInput(
             tag="maxCov",
             prefix="--max-coverage",
             input_type=Int(optional=True),
             doc=
             " --max-coverage N Do not process sites with greater than this coverage. default: no limit",
         ),
         ToolInput(
             tag="noPopPriorsFlag",
             prefix="-k",
             input_type=Boolean(optional=True),
             doc=
             " -k --no-population-priors Equivalent to --pooled-discrete --hwe-priors-off and removal of Ewens Sampling Formula component of priors.",
         ),
         ToolInput(
             tag="noHWEPriorsFlag",
             prefix="-w",
             input_type=Boolean(optional=True),
             doc=
             " -w --hwe-priors-off Disable estimation of the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency.",
         ),
         ToolInput(
             tag="noBinOBSPriorsFlag",
             prefix="-V",
             input_type=Boolean(optional=True),
             doc=
             " -V --binomial-obs-priors-off Disable incorporation of prior expectations about observations. Uses read placement probability, strand balance probability, and read position (5'-3') probability.",
         ),
         ToolInput(
             tag="noABPriorsFlag",
             prefix="-a",
             input_type=Boolean(optional=True),
             doc=
             " -a --allele-balance-priors-off Disable use of aggregate probability of observation balance between alleles as a component of the priors.",
         ),
         ToolInput(
             tag="obsBiasFile",
             prefix="--observation-bias",
             input_type=TextFile(optional=True),
             doc=
             " --observation-bias FILE Read length-dependent allele observation biases from FILE. The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias.",
         ),
         ToolInput(
             tag="baseQualCap",
             prefix="--base-quality-cap",
             input_type=Int(optional=True),
             doc=
             " --base-quality-cap Q Limit estimated observation quality by capping base quality at Q.",
         ),
         ToolInput(
             tag="probContamin",
             prefix="--prob-contamination",
             input_type=Float(),
             default=0.000000001,
             doc=
             " --prob-contamination F An estimate of contamination to use for all samples. default: 10e-9",
         ),
         ToolInput(
             tag="legGLScalc",
             prefix="--legacy-gls",
             input_type=Boolean(optional=True),
             doc=
             " --legacy-gls Use legacy (polybayes equivalent) genotype likelihood calculations",
         ),
         ToolInput(
             tag="contaminEst",
             prefix="--contamination-estimates",
             input_type=TextFile(optional=True),
             doc=
             " --contamination-estimates FILE A file containing per-sample estimates of contamination, such as those generated by VerifyBamID. The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates.",
         ),
         ToolInput(
             tag="repoprtMaxGLFlag",
             prefix="--report-genotype-likelihood-max",
             input_type=Boolean(optional=True),
             doc=
             " --report-genotype-likelihood-max Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods.",
         ),
         ToolInput(
             tag="genotypingMaxIter",
             prefix="-B",
             input_type=Int(),
             default=1000,
             doc=
             " -B --genotyping-max-iterations N Iterate no more than N times during genotyping step. default: 1000.",
         ),
         ToolInput(
             tag="genotypingMaxBDepth",
             prefix="--genotyping-max-banddepth",
             input_type=Int(),
             default=6,
             doc=
             " --genotyping-max-banddepth N Integrate no deeper than the Nth best genotype by likelihood when genotyping. default: 6.",
         ),
         ToolInput(
             tag="postIntegrationLim",
             prefix="-W",
             input_type=String(),
             default="1,3",
             doc=
             " -W --posterior-integration-limits N,M Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood. default: 1,3.",
         ),
         ToolInput(
             tag="excludeUnObsGT",
             prefix="-N",
             input_type=Boolean(optional=True),
             doc=
             " -N --exclude-unobserved-genotypes Skip sample genotypings for which the sample has no supporting reads.",
         ),
         ToolInput(
             tag="gtVarThres",
             prefix="-S",
             input_type=Int(optional=True),
             doc=
             " -S --genotype-variant-threshold N Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample. default: ~unbounded",
         ),
         ToolInput(
             tag="useMQFlag",
             prefix="-j",
             input_type=Boolean(optional=True),
             doc=
             " -j --use-mapping-quality Use mapping quality of alleles when calculating data likelihoods.",
         ),
         ToolInput(
             tag="harmIndelQualFlag",
             prefix="-H",
             input_type=Boolean(optional=True),
             doc=
             " -H --harmonic-indel-quality Use a weighted sum of base qualities around an indel, scaled by the distance from the indel. By default use a minimum BQ in flanking sequence.",
         ),
         ToolInput(
             tag="readDepFact",
             prefix="-D",
             input_type=Float(),
             default=0.9,
             doc=
             " -D --read-dependence-factor N Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations. default: 0.9",
         ),
         ToolInput(
             tag="gtQuals",
             prefix="-=",
             input_type=Boolean(optional=True),
             doc=
             " -= --genotype-qualities Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output.",
         ),
     ]