def add_addbamstats(self, normal_bam_source, tumor_bam_source): self.step( "addbamstats", AddBamStatsSomatic_0_1_0( normal_id=self.normal_name, tumor_id=self.tumor_name, normal_bam=normal_bam_source, tumor_bam=tumor_bam_source, reference=self.reference, vcf=self.vc_gatk_uncompressvcf.out.as_type(Vcf), ), ) self.output( "out_variants_bamstats", source=self.addbamstats.out, output_folder=[ "variants", ], output_name=StringFormatter( "{tumor_name}--{normal_name}", tumor_name=self.tumor_name, normal_name=self.normal_name, ), doc="Final vcf from GATK", )
def add_strelka_variantcaller(self, bam_source): # Strelka self.step("generate_manta_config", GenerateMantaConfig()) self.step( "vc_strelka", IlluminaGermlineVariantCaller( bam=bam_source, reference=self.reference, intervals=self.strelka_intervals, manta_config=self.generate_manta_config.out, ), ) self.output( "out_variants_strelka", source=self.vc_strelka.out, output_folder=[ "variants", ], output_name=StringFormatter( "{sample_name}_strelka", sample_name=self.sample_name, ), doc="Variants from the Strelka variant caller", )
def add_strelka_variantcaller(self, normal_bam_source, tumor_bam_source): self.step("generate_manta_config", GenerateMantaConfig()) self.step( "vc_strelka", IlluminaSomaticVariantCaller( normal_bam=normal_bam_source, tumor_bam=tumor_bam_source, intervals=self.strelka_intervals, reference=self.reference, manta_config=self.generate_manta_config.out, ), ) self.step("vc_strelka_compress", BGZipLatest(file=self.vc_strelka.out)) self.output( "out_variants_strelka", source=self.vc_strelka_compress.out.as_type(CompressedVcf), output_folder=[ "vcf", ], output_name=StringFormatter( "{tumor_name}--{normal_name}_strelka", tumor_name=self.tumor_name, normal_name=self.normal_name, ), doc="Variants from the Strelka variant caller", )
def test_string_formatter(self): wf = WorkflowBuilder("wf") wf.input("sampleName", str) wf.input("platform", str) wf.input( "readGroupHeaderLine", String(optional=True), default=StringFormatter( "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}", name=InputSelector("sampleName"), pl=InputSelector("platform"), ), ) wf.step("print", EchoTestTool(inp=wf.readGroupHeaderLine)) wf.output("out", source=wf.print) d, _ = cwl.CwlTranslator.translate_workflow( wf, with_container=False, allow_empty_container=True ) stepinputs = d.save()["steps"][0]["in"] self.assertEqual(4, len(stepinputs)) expression = stepinputs[-1]["valueFrom"] expected = ( "$((inputs._print_inp_readGroupHeaderLine != null) " "? inputs._print_inp_readGroupHeaderLine " ': "@RG\\\\tID:{name}\\\\tSM:{name}\\\\tLB:{name}\\\\tPL:{pl}".replace(/\\{name\\}/g, inputs._print_inp_sampleName).replace(/\\{pl\\}/g, inputs._print_inp_platform))' ) self.assertEqual(expected, expression)
def test_string_formatter_one_input_selector_param(self): b = StringFormatter("an input {arg}", arg=InputSelector("random_input")) res = cwl.get_input_value_from_potential_selector_or_generator( b, code_environment=False) self.assertEqual( '$("an input {arg}".replace(/\{arg\}/g, inputs.random_input))', res)
def arguments(self): return [ # BWA MEM command ToolArgument("bwa", position=0, shell_quote=False), ToolArgument("mem", position=1, shell_quote=False), ToolArgument( StringFormatter( "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}", name=InputSelector("sampleName"), pl=InputSelector("platformTechnology"), ), prefix="-R", position=3, doc="Complete read group header line.", ), ToolArgument( CpuSelector(), prefix="-t", position=3, shell_quote=False, doc="Number of threads. (default = 1)", ), ToolArgument("|", position=6, shell_quote=False), # Alt Aware Post Processing command ToolArgument("k8", position=7, shell_quote=False), ToolArgument("/opt/conda/bin/bwa-postalt.js", position=7, shell_quote=False), # Samtools View command ToolArgument("|", position=10, shell_quote=False), ToolArgument("samtools", position=11, shell_quote=False), ToolArgument("view", position=12, shell_quote=False), ToolArgument( InputSelector("reference"), prefix="-T", position=13, shell_quote=False, ), ToolArgument( CpuSelector(), position=13, prefix="--threads", doc="(@) Number of additional threads to use [0]", shell_quote=False, ), ToolArgument( "-h", position=13, shell_quote=False, doc="Include header in the output", ), ToolArgument( "-b", position=13, shell_quote=False, doc="Output in the BAM format.", ), ]
def add_vardict_variantcaller(self, normal_bam_source, tumor_bam_source): self.step( "generate_vardict_headerlines", GenerateVardictHeaderLines(reference=self.reference), ) self.step( "vc_vardict", VardictSomaticVariantCaller( normal_bam=normal_bam_source, tumor_bam=tumor_bam_source, normal_name=self.normal_name, tumor_name=self.tumor_name, header_lines=self.generate_vardict_headerlines.out, intervals=self.vardict_intervals, reference=self.reference, allele_freq_threshold=self.allele_freq_threshold, minMappingQual=self.minMappingQual, filter=self.filter, ), scatter="intervals", ) self.step("vc_vardict_merge", Gatk4GatherVcfs_4_1_3(vcfs=self.vc_vardict.out)) self.step("vc_vardict_compress_for_sort", BGZipLatest(file=self.vc_vardict_merge.out)) self.step( "vc_vardict_sort_combined", BcfToolsSort_1_9(vcf=self.vc_vardict_compress_for_sort.out.as_type( CompressedVcf)), ) self.step( "vc_vardict_uncompress_for_combine", UncompressArchive(file=self.vc_vardict_sort_combined.out), ) self.output( "out_variants_vardict", source=self.vc_vardict_sort_combined.out, output_folder=[ "vcf", ], output_name=StringFormatter( "{tumor_name}--{normal_name}_vardict", tumor_name=self.tumor_name, normal_name=self.normal_name, ), doc="Merged variants from the VarDict caller", ) self.output( "out_variants_vardict_split", source=self.vc_vardict.out, output_folder=[ "vcf", "VardictByInterval", ], doc="Unmerged variants from the GATK caller (by interval)", )
def arguments(self): return [ ToolArgument("java", position=0, shell_quote=False), ToolArgument( StringFormatter("-Xmx{memory}G", memory=MemorySelector() * 3 / 4), position=1, shell_quote=False, ), ToolArgument( StringFormatter( "/app/agent_{AGENT_VERSION}/lib/trimmer.jar", AGENT_VERSION=InputSelector("agentVersion"), ), prefix="-jar", position=2, shell_quote=False, ), ]
def test_string_formatter_one_input_selector_param(self): b = StringFormatter("an input {arg}", arg=InputSelector("random_input")) res = cwl.CwlTranslator.unwrap_expression( b, code_environment=False, inputs_dict={"random_input": ToolInput("random_input", str)}, ) self.assertEqual( '$("an input {arg}".replace(/\{arg\}/g, inputs.random_input))', res )
def add_gridss(self, normal_bam_source, tumor_bam_source): # GRIDSS self.step( "vc_gridss", Gridss_2_6_2( bams=[normal_bam_source, tumor_bam_source], reference=self.reference, blacklist=self.gridss_blacklist, ), ) # GRIDSS self.output( "out_gridss_assembly", source=self.vc_gridss.assembly, output_folder=[ "sv", "gridss", ], output_name=StringFormatter( "{tumor_name}--{normal_name}_gridss", tumor_name=self.tumor_name, normal_name=self.normal_name, ), doc="Assembly returned by GRIDSS", ) self.output( "out_variants_gridss", source=self.vc_gridss.out, output_folder=[ "sv", "gridss", ], output_name=StringFormatter( "{tumor_name}--{normal_name}_gridss", tumor_name=self.tumor_name, normal_name=self.normal_name, ), doc="Variants from the GRIDSS variant caller", )
def arguments(self): return [ ToolArgument("export TMPDIR=/tmp;", position=1, shell_quote=False), ToolArgument( StringFormatter( "/app/Pisces_v{PISCES_VERSION}/Pisces", PISCES_VERSION=InputSelector("piscesVersion"), ), position=3, shell_quote=False, ), ]
def test_string_formatter_two_param(self): # vardict input format b = StringFormatter( "{tumorName}:{normalName}", tumorName=InputSelector("tumorInputName"), normalName=InputSelector("normalInputName"), ) res = cwl.get_input_value_from_potential_selector_or_generator(b) self.assertEqual( '$("{tumorName}:{normalName}".replace(/\{tumorName\}/g, inputs.tumorInputName).replace(/\{normalName\}/g, inputs.normalInputName))', res, )
def outputs(self): return [ ToolOutput( "out_html", File, selector=StringFormatter( "{output_dir}/{filename}.html", output_dir=InputSelector("outdir"), filename=InputSelector("filename"), ), ), ToolOutput( "out_multiqc_data", Directory, selector=StringFormatter( "{output_dir}/{filename}_data", output_dir=InputSelector("outdir"), filename=InputSelector("filename"), ), ), ]
def constructor(self): # TODO: work out 'target_gene_file' # [ # vep # + vepfilter # + report_vep_cleanup # + report_vep_text, # + vepvcf + # vepfiltervcf # , [ # chr_rename + liftover + oncotator_format, # report_vep_vcf_cleanup + report_vep # ] # ] self.input("variants", Vcf()) self.step( "vep", VepCacheLatest( inputFile=self.variants, symbol=True, filterCommon=True, sift="b", polyphen="b", outputFilename="generated.txt", vcf=False, ), ) self.step( "vepfilter", FilterVep_98_3( input_file=self.vep.out, format="tab", filter=StringFormatter("SYMBOL in {target_gene_file}", target_gene_file="FILE"), ), ) self.step( "vepvcf", VepCacheLatest( inputFile=self.vepfilter, symbol=True, filterCommon=True, alleleNumber=True, sift="b", polyphen="b", ), )
def arguments(self): return [ ToolArgument("export TMPDIR=/tmp;", position=1, shell_quote=False), ToolArgument("dotnet", position=2, shell_quote=False), ToolArgument( StringFormatter( "/app/Pisces_v{PISCES_VERSION}/VariantQualityRecalibration.dll", PISCES_VERSION=InputSelector("piscesVersion"), ), position=3, shell_quote=False, ), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput( "out", File, glob=StringFormatter( "{output_dir}/{tumor_name}--{normal_name}.pdf", output_dir=InputSelector("output_dir"), tumor_name=InputSelector("tumor_name"), normal_name=InputSelector("normal_name"), ), ) ]
def add_combine_variants(self, normal_bam_source, tumor_bam_source): self.step( "combine_variants", CombineVariants_0_0_8( normal=self.normal_name, tumor=self.tumor_name, vcfs=[ self.vc_gatk_uncompressvcf.out.as_type(Vcf), self.vc_strelka.out, self.vc_vardict_uncompress_for_combine.out.as_type(Vcf), ], type="somatic", columns=["AD", "DP", "GT"], ), ) self.step("combined_compress", BGZipLatest(file=self.combine_variants.out)) self.step( "combined_sort", BcfToolsSort_1_9( vcf=self.combined_compress.out.as_type(CompressedVcf)), ) self.step("combined_uncompress", UncompressArchive(file=self.combined_sort.out)) self.step( "combined_addbamstats", AddBamStatsSomatic_0_1_0( normal_id=self.normal_name, tumor_id=self.tumor_name, normal_bam=normal_bam_source, tumor_bam=tumor_bam_source, vcf=self.combined_uncompress.out.as_type(Vcf), reference=self.reference, ), ) self.output( "out_variants", source=self.combined_addbamstats.out, output_folder=[ "vcf", ], output_name=StringFormatter( "{tumor_name}--{normal_name}_combined", tumor_name=self.tumor_name, normal_name=self.normal_name, ), doc="Combined variants from GATK, VarDict and Strelka callers", )
def add_arriba(self): self.step( "arriba", Arriba_1_2_0( aligned_inp=self.star.out_unsorted_bam.assert_not_null(), blacklist=self.blacklist, fusion_transcript=True, peptide_sequence=True, reference=self.reference, gtf_file=self.gtf, contigs=self.contigs, ), ) self.step( "sortsam", Gatk4SortSamLatest( bam=self.star.out_unsorted_bam.assert_not_null(), sortOrder="coordinate", createIndex=True, ), ) self.output("out_arriba_bam", source=self.sortsam.out, output_name=self.name) self.output( "out_arriba_fusion", source=self.arriba.out, output_name=StringFormatter("{sample_name}_fusion", sample_name=self.name), ) self.output( "out_arriba_fusion_discarded", source=self.arriba.out_discarded, output_name=StringFormatter("{sample_name}_fusion_discarded", sample_name=self.name), )
def arguments(self): return [ ToolArgument("bwa", position=0, shell_quote=False), ToolArgument("mem", position=1, shell_quote=False), ToolArgument("|", position=5, shell_quote=False), ToolArgument("samtools", position=6, shell_quote=False), ToolArgument("view", position=7, shell_quote=False), ToolArgument(InputSelector("reference"), prefix="-T", position=8, shell_quote=False), ToolArgument( CpuSelector(), position=8, shell_quote=False, prefix="--threads", doc="(-@) Number of additional threads to use [0]", ), ToolArgument( "-h", position=8, shell_quote=False, doc="Include the header in the output.", ), ToolArgument("-b", position=8, shell_quote=False, doc="Output in the BAM format."), ToolArgument( StringFormatter( "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}", name=InputSelector("sampleName"), pl=InputSelector("platformTechnology"), ), prefix="-R", position=2, doc= "Complete read group header line. ’\\t’ can be used in STR and will be converted to a TAB" "in the output SAM. The read group ID will be attached to every read in the output. " "An example is ’@RG\\tID:foo\\tSM:bar’. (Default=null) " "https://gatkforums.broadinstitute.org/gatk/discussion/6472/read-groups", ), ToolArgument( CpuSelector(), prefix="-t", position=2, shell_quote=False, doc="Number of threads. (default = 1)", ), ]
def outputs(self): return [ ToolOutput( "out_gene_fragments", File, selector=StringFormatter( "{output_dir}/{sample}.gene_fragments.gct", output_dir=InputSelector("output_dir"), sample=InputSelector("sample"), ), ), ToolOutput( "out_gene_reads", File, selector=StringFormatter( "{output_dir}/{sample}.gene_reads.gct", output_dir=InputSelector("output_dir"), sample=InputSelector("sample"), ), ), ToolOutput( "out_gene_tpm", File, selector=StringFormatter( "{output_dir}/{sample}.gene_tpm.gct", output_dir=InputSelector("output_dir"), sample=InputSelector("sample"), ), ), ToolOutput( "out_metrics_tsv", Tsv, selector=StringFormatter( "{output_dir}/{sample}.metrics.tsv", output_dir=InputSelector("output_dir"), sample=InputSelector("sample"), ), ), ToolOutput( "out_coverage_tsv", Tsv(optional=True), selector=StringFormatter( "{output_dir}/{sample}.coverage.tsv", output_dir=InputSelector("output_dir"), sample=InputSelector("sample"), ), ), ToolOutput( "out_exon_reads", File, selector=StringFormatter( "{output_dir}/{sample}.exon_reads.gct", output_dir=InputSelector("output_dir"), sample=InputSelector("sample"), ), ), ]
def add_snp_pileup(self): self.step( "snp_pileup", FacetsSnpPileup_2_0_8( normal_bam=self.normal_bam, tumor_bam=self.tumor_bam, output_prefix=StringFormatter( "{tumor}--{normal}", tumor=self.tumor_name, normal=self.normal_name, ), vcf_file=self.snps_dbsnp, pseudo_snps=self.pseudo_snps, max_depth=self.max_depth, ), )
def test_create_single_file_path_from_operator(self): command = CommandToolBuilder( **self.initial_params, files_to_create=[ ( StringFormatter("{name}.txt", name=InputSelector("name")), "this is contents", ) ], ) req = CwlTranslator.build_initial_workdir_from_tool(command).listing self.assertEqual(1, len(req)) self.assertIsInstance(req[0], cwlgen.Dirent) self.assertEqual( '$("{name}.txt".replace(/\{name\}/g, inputs.name))', req[0].entryname ) self.assertEqual("this is contents", req[0].entry)
def arguments(self) -> List[ToolArgument]: return [ ToolArgument("configManta.py", position=0, shell_quote=False), ToolArgument( StringFormatter(";") + InputSelector("runDir") + "/runWorkflow.py", position=2, shell_quote=False, ), ToolArgument( CpuSelector(None), position=3, shell_quote=False, prefix="-j", doc="(-j) number of jobs, must be an integer or 'unlimited' " "(default: Estimate total cores on this node for local mode, 128 for sge mode)", ), ]
def arguments(self): return [ ToolArgument("configureStrelkaSomaticWorkflow.py", position=0), ToolArgument( StringFormatter(";") + InputSelector("rundir") + "/runWorkflow.py", position=2, shell_quote=False, ), ToolArgument( CpuSelector(None), prefix="--jobs", position=3, shell_quote=False, doc=" (-j JOBS) number of jobs, must be an integer or 'unlimited' " "(default: Estimate total cores on this node for local mode, 128 for sge mode)", ), ]
def arguments(self): return [ ToolArgument( StringFormatter( "-Xmx{memory}G", memory=MemorySelector() * 3 / 4, ), position=-3, shell_quote=False, ), ToolArgument("-jar /usr/GenomeAnalysisTK.jar", position=-2, shell_quote=False), ToolArgument(f"-T {self.gatk_command()}", position=-1, shell_quote=False), ]
def test_string_formatter_two_param(self): # vardict input format b = StringFormatter( "{tumorName}:{normalName}", tumorName=InputSelector("tumorInputName"), normalName=InputSelector("normalInputName"), ) inputs_dict = { "tumorInputName": ToolInput("tumorInputName", str), "normalInputName": ToolInput("normalInputName", str), } res = cwl.CwlTranslator.unwrap_expression( b, code_environment=False, inputs_dict=inputs_dict ) self.assertEqual( '$("{tumorName}:{normalName}".replace(/\{tumorName\}/g, inputs.tumorInputName).replace(/\{normalName\}/g, inputs.normalInputName))', res, )
def add_circos_plot(self): self.step( "circos_plot", CircosPlot_0_1_2( tumor_name=self.tumor_name, normal_name=self.normal_name, facets_file=self.vc_facets.out_hisens_rds, sv_file=self.vc_strelka.tumor_sv, ), ) self.output( "out_circos_plot", source=self.circos_plot.out, output_folder="circos_plot", output_name=StringFormatter( "{tumor_name}--{normal_name}_circos_plot.pdf", tumor_name=self.tumor_name, normal_name=self.normal_name, ), )
def arguments(self): return [ ToolArgument( StringFormatter( "-Xmx{memory}G {compression} {otherargs}", memory=MemorySelector() * 3 / 4, compression=If( IsDefined(InputSelector("compression_level")), "-Dsamjdk.compress_level=" + InputSelector("compression_level"), "", ), otherargs=JoinOperator( FirstOperator([InputSelector("javaOptions"), []]), " "), ), prefix="--java-options", position=-1, ) ]
def add_addbamstats(self, bam_source): self.step( "vc_gatk_addbamstats", AddBamStatsGermline_0_1_0( bam=bam_source, vcf=self.vc_gatk_uncompress.out.as_type(Vcf), reference=self.reference, ), ) self.output( "out_variants_bamstats", source=self.vc_gatk_addbamstats.out, output_folder=[ "variants", ], output_name=StringFormatter( "{sample_name}", sample_name=self.sample_name, ), doc="Final vcf from GATK", )
def test_string_formatter_stepinput(self): wf = WorkflowBuilder("wf") wf.input("sampleName", str) wf.input("platform", str) wf.step( "print", EchoTestTool( inp=StringFormatter( "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}", name=wf.sampleName, pl=wf.platform, ) ), ) wf.output("out", source=wf.print) d, _ = cwl.CwlTranslator.translate_workflow( wf, with_container=False, allow_empty_container=True ) stepinputs = d.save()["steps"][0]["in"] self.assertEqual(3, len(stepinputs)) expression = stepinputs[-1]["valueFrom"] expected = '$("@RG\\\\tID:{name}\\\\tSM:{name}\\\\tLB:{name}\\\\tPL:{pl}".replace(/\\{name\\}/g, inputs._print_inp_sampleName).replace(/\\{pl\\}/g, inputs._print_inp_platform))' self.assertEqual(expected, expression)