def test_newstyle_task(self): test_pipeline = Pipeline("test") test_pipeline.files(task1, [[None, tempdir + "a.1"], [None, tempdir + "b.1"]])\ .follows(mkdir(tempdir)) test_pipeline.files(task2, [[None, tempdir + "c.1"], [None, tempdir + "d.1"]])\ .follows(mkdir(tempdir)) test_pipeline.transform(task_func=task3, input=task1, filter=regex(r"(.+)"), replace_inputs=ruffus.inputs( ((r"\1"), task2, "test_transform_inputs.*y")), output=r"\1.output") test_pipeline.merge(task4, (task3), tempdir + "final.output") test_pipeline.run([task4], multiprocess=10, verbose=0) correct_output = "{tempdir}a.1.output:test_transform_inputs.py,{tempdir}a.1,{tempdir}c.1,{tempdir}d.1;{tempdir}b.1.output:test_transform_inputs.py,{tempdir}b.1,{tempdir}c.1,{tempdir}d.1;".format( tempdir=tempdir) with open(tempdir + "final.output") as ff: real_output = ff.read() self.assertEqual(correct_output, real_output)
def test_newstyle_no_re_match (self): try: test_pipeline = Pipeline("test") test_pipeline.transform(task_func = task_2, input = None, filter = regex(tempdir + "b"), replace_inputs = inputs(tempdir + "a", tempdir + "b"), output = "task_1.output") test_pipeline.run(multiprocess = 10, verbose = 0) except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args: print("\tExpected exception thrown 1") return except ruffus.ruffus_exceptions.error_inputs_multiple_args: print("\tExpected exception thrown 2") return raise Exception("Inputs(...) with multiple arguments should have thrown an exception")
def test_newstyle_no_re_match(self): try: test_pipeline = Pipeline("test") test_pipeline.transform(task_func=task_2, input=None, filter=regex(tempdir + "b"), replace_inputs=inputs( tempdir + "a", tempdir + "b"), output="task_1.output") test_pipeline.run(multiprocess=10, verbose=0) except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args: print("\tExpected exception thrown 1") return except ruffus.ruffus_exceptions.error_inputs_multiple_args: print("\tExpected exception thrown 2") return raise Exception( "Inputs(...) with multiple arguments should have thrown an exception" )
@transform(task1, suffix(".1"), ".2") def task2(infiles, outfiles, *extra_params): """ Second task """ with open(tempdir + "jobs.start", "a") as oo: oo.write('job = %s\n' % json.dumps([infiles, outfiles])) test_job_io(infiles, outfiles, extra_params) with open(tempdir + "jobs.finish", "a") as oo: oo.write('job = %s\n' % json.dumps([infiles, outfiles])) # # task3 # @transform(task2, regex('(.*).2'), inputs([r"\1.2", tempdir + "a.1"]), r'\1.3') @posttask(lambda: do_write(test_file, "Task 3 Done\n")) def task3(infiles, outfiles, *extra_params): """ Third task """ with open(tempdir + "jobs.start", "a") as oo: oo.write('job = %s\n' % json.dumps([infiles, outfiles])) test_job_io(infiles, outfiles, extra_params) with open(tempdir + "jobs.finish", "a") as oo: oo.write('job = %s\n' % json.dumps([infiles, outfiles])) # # task4 #
cmd_dict = CMD_DICT.copy() cmd_dict['infile'] = input_file cmd_dict['outfile'] = output_file pmsg('Interval Creation', input_file, output_file) gatk_cmd = '%(gatk)s --analysis_type RealignerTargetCreator ' + \ '--reference_sequence %(reference)s ' + \ '--DBSNP %(dbsnp)s ' + \ '--input_file %(infile)s ' + \ '--out %(outfile)s' call(gatk_cmd, cmd_dict) # Realign around possible indels @follows(mkdir('realigned')) @transform(create_intervals, regex(r'^intervals/(.+)\.intervals$'), inputs([r'deduped/\1.deduped.bam', r'intervals/\1.intervals']), r'realigned/\1.realigned.bam') def local_realignment(input_files, output_file): '''Realign reads around candidate indels''' cmd_dict = CMD_DICT.copy() cmd_dict['bam_file'] = input_files[0] cmd_dict['indel_intervals'] = input_files[1] cmd_dict['outfile'] = output_file pmsg('Local Realignment', ', '.join(input_files), output_file) gatk_cmd = '%(gatk)s --analysis_type IndelRealigner ' + \ '--reference_sequence %(reference)s ' + \ '--DBSNP %(dbsnp)s ' + \ '--input_file %(bam_file)s ' + \ '--targetIntervals %(indel_intervals)s ' + \ '--out %(outfile)s' call(gatk_cmd, cmd_dict)
# 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 @follows(mkdir(tempdir)) @ruffus.files([[None, tempdir + "a.1"], [None, tempdir + "b.1"]]) def task1(i, o): touch(o) @follows(mkdir(tempdir)) @ruffus.files([[None, tempdir + "c.1"], [None, tempdir + "d.1"]]) def task2(i, o): touch(o) @transform(task1, regex(r"(.+)"), ruffus.inputs(((r"\1"), task2, "test_transform_inputs.*y")), r"\1.output") def task3(i, o): names = ",".join(sorted(i)) for f in o: with open(o, "w") as ff: ff.write(names) @merge((task3), tempdir + "final.output") def task4(i, o): with open(o, "w") as o_file: for f in sorted(i): with open(f) as ff: o_file.write(f + ":" + ff.read() + ";")
@follows(mkdir('sai'), mkdir('logs')) @transform(copy_sequence, regex(r'^fastq/(.+)_sequence\.fastq\.gz$'), r'sai/\1.sai') def fastq_to_sai(input_file, output_file): '''Convert FASTQ files to SAI files.''' cmd_dict = CMD_DICT.copy() cmd_dict['infile'] = input_file cmd_dict['outfile'] = output_file pmsg('Aligning sequences', cmd_dict['infile'], cmd_dict['outfile']) bwacmd = '%(bwa)s aln -t %(threads)s -f %(outfile)s %(reference)s %(infile)s' call(bwacmd, cmd_dict) # Merge paired ends to SAM @follows(mkdir('sam')) @transform(fastq_to_sai, regex(r'^sai/(\w+)_s_(\d)(_1)?\.sai$'), inputs([r'sai/\1_s_\2*.sai', r'fastq/\1_s_\2*.fastq.gz']), r'sam/\1_s_\2.sam') def make_sam(input_files, output_file): '''Convert SAI files and FASTQ files to SAM files.''' def saicmp(x, y): '''Compare function for moving sai files to front of list''' if x.endswith('sai') and not y.endswith('sai'): return - 1 elif y.endswith('sai') and not x.endswith('sai'): return 1 else: return cmp(x, y) cmd_dict = CMD_DICT.copy() assert type(input_files) is type([])
# 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # imports # 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # Tasks # 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 try: @transform(None, regex(tempdir + "b"), inputs(tempdir + "a", tempdir + "b"), "task_1.output") def task_1(i, o): for f in o: open(f, 'w') except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args: print("\tExpected exception thrown 1") except ruffus.ruffus_exceptions.error_inputs_multiple_args: print("\tExpected exception thrown 2") def task_2(i, o): for f in o: open(f, 'w') class Test_task_mkdir(unittest.TestCase):
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 import unittest import json #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # Tasks #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 try: @transform(None, regex(tempdir + "b"), inputs(tempdir + "a", tempdir + "b"), "task_1.output") def task_1 (i, o): for f in o: open(f, 'w') except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args: print("\tExpected exception thrown 1") except ruffus.ruffus_exceptions.error_inputs_multiple_args: print("\tExpected exception thrown 2") def task_2 (i, o): for f in o: open(f, 'w') class Test_task_mkdir(unittest.TestCase):
def main(): ######### # SETUP # ######### # catch jgi logon and password from cli parser = ruffus.cmdline.get_argparse( description='5 accessions variant calling pipeline.') parser.add_argument('--email', '-e', help='Logon email address for JGI', type=str, dest='jgi_logon') parser.add_argument('--password', '-p', help='JGI password', type=str, dest='jgi_password') options = parser.parse_args() jgi_logon = options.jgi_logon jgi_password = options.jgi_password ################## # PIPELINE STEPS # ################## # initialise pipeline main_pipeline = ruffus.Pipeline.pipelines["main"] # test originate job test_originate_files = ['ruffus/foo.txt', 'ruffus/bar.txt'] test_originate = main_pipeline.originate( name='test_originate', task_func=functions.generate_job_function( job_script='src/test_originate', job_name='test_originate', job_type='originate'), output=test_originate_files) # test download job if not (jgi_logon and jgi_password): raise ValueError('Supply jgi_logon and jgi_password') test_download = main_pipeline.originate( name='test_download', task_func=functions.generate_job_function( job_script='src/test_download', job_name='test_download', job_type='download'), output='ruffus/download.txt', extras=[jgi_logon, jgi_password]) # test transform with multiple outputs (e.g. bamfile, FASTA etc) test_transform = main_pipeline.transform( name="test_transform", task_func=functions.generate_job_function( job_script='src/test_transform', job_name='test_transform', job_type='transform'), input=test_originate, filter=ruffus.suffix(".txt"), output=["_transformed.txt", "_transformed.bam"]) # Transform ONLY the bam files produced by test_transform # The filtering here is a bit crazy. `input` has to be an object, not # ruffus.output_from(). `replace_inputs` should use `ruffus.inputs()` to # match the files, but `filter` has to match the first file produced by # the previous step, NOT necessarily the file that will be transformed! test_selective_transform = main_pipeline.transform( name="test_selective_transform", task_func=functions.generate_job_function( job_script='src/test_selective_transform', job_name='test_selective_transform', job_type='transform'), input=test_transform, replace_inputs=ruffus.inputs(r"\1.bam"), filter=ruffus.suffix(".txt"), output=".bof") test_merge = main_pipeline.merge( name='test_merge', task_func=functions.generate_job_function( job_script='src/test_merge', job_name='test_merge', job_type='merge'), input=test_transform, output='ruffus/foobar_merge.txt' ) ################### # RUFFUS COMMANDS # ################### # print the flowchart ruffus.pipeline_printout_graph( "ruffus/flowchart.pdf", "pdf", pipeline_name="Ruffus proforma pipeline") # run the pipeline ruffus.cmdline.run(options, multithread=8)
@transform(copy_sequence, regex(r'^fastq/(.+)_sequence\.fastq\.gz$'), r'sai/\1.sai') def fastq_to_sai(input_file, output_file): '''Convert FASTQ files to SAI files.''' cmd_dict = CMD_DICT.copy() cmd_dict['infile'] = input_file cmd_dict['outfile'] = output_file pmsg('Aligning sequences', cmd_dict['infile'], cmd_dict['outfile']) bwacmd = '%(bwa)s aln -t %(threads)s -f %(outfile)s %(reference)s %(infile)s' call(bwacmd, cmd_dict) # Merge paired ends to SAM @follows(mkdir('sam')) @transform(fastq_to_sai, regex(r'^sai/(\w+)_s_(\d)(_1)?\.sai$'), inputs([r'sai/\1_s_\2*.sai', r'fastq/\1_s_\2*.fastq.gz']), r'sam/\1_s_\2.sam') def make_sam(input_files, output_file): '''Convert SAI files and FASTQ files to SAM files.''' def saicmp(x, y): '''Compare function for moving sai files to front of list''' if x.endswith('sai') and not y.endswith('sai'): return -1 elif y.endswith('sai') and not x.endswith('sai'): return 1 else: return cmp(x, y) cmd_dict = CMD_DICT.copy() assert type(input_files) is type([]) pmsg('Generating SAM file', ', '.join(input_files), output_file)
def task2(infiles, outfiles, *extra_params): """ Second task """ with open(tempdir + "jobs.start", "a") as oo: oo.write('job = %s\n' % json.dumps([infiles, outfiles])) test_job_io(infiles, outfiles, extra_params) with open(tempdir + "jobs.finish", "a") as oo: oo.write('job = %s\n' % json.dumps([infiles, outfiles])) # # task3 # @transform(task2, regex('(.*).2'), inputs([r"\1.2", tempdir + "a.1"]), r'\1.3') @posttask(lambda: do_write(test_file, "Task 3 Done\n")) def task3(infiles, outfiles, *extra_params): """ Third task """ with open(tempdir + "jobs.start", "a") as oo: oo.write('job = %s\n' % json.dumps([infiles, outfiles])) test_job_io(infiles, outfiles, extra_params) with open(tempdir + "jobs.finish", "a") as oo: oo.write('job = %s\n' % json.dumps([infiles, outfiles])) # # task4