def test_newstyle_ruffus(self): test_pipeline = Pipeline("test") test_pipeline.files(create_random_numbers, None, tempdir + "random_numbers.list")\ .follows(mkdir(tempdir)) test_pipeline.split(task_func = step_4_split_numbers_into_chunks, input = tempdir + "random_numbers.list", output = tempdir + "*.chunks")\ .follows(create_random_numbers) test_pipeline.transform(task_func=step_5_calculate_sum_of_squares, input=step_4_split_numbers_into_chunks, filter=suffix(".chunks"), output=".sums") test_pipeline.merge(task_func = step_6_calculate_variance, input = step_5_calculate_sum_of_squares, output = os.path.join(tempdir, "variance.result"))\ .posttask(lambda: sys.stdout.write(" hooray\n"))\ .posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done"))) test_pipeline.run(multiprocess=50, verbose=0) output_file = os.path.join(tempdir, "variance.result") if not os.path.exists(output_file): raise Exception("Missing %s" % output_file)
def create_pipeline(self): """ Create new pipeline on the fly without using decorators """ global count_pipelines count_pipelines = count_pipelines + 1 test_pipeline = Pipeline("test %d" % count_pipelines) test_pipeline.transform(task_func=transform1, input=input_file, filter=suffix('.txt'), output='.output', extras=[runtime_data]) test_pipeline.transform(task_func=transform_raise_error, input=input_file, filter=suffix('.txt'), output='.output', extras=[runtime_data]) test_pipeline.split(task_func=split1, input=input_file, output=split1_outputs) test_pipeline.merge(task_func=merge2, input=split1, output=merge2_output) return test_pipeline
def test_newstyle_ruffus (self): test_pipeline = Pipeline("test") test_pipeline.files(create_random_numbers, None, tempdir + "random_numbers.list")\ .follows(mkdir(tempdir)) test_pipeline.split(task_func = step_4_split_numbers_into_chunks, input = tempdir + "random_numbers.list", output = tempdir + "*.chunks")\ .follows(create_random_numbers) test_pipeline.transform(task_func = step_5_calculate_sum_of_squares, input = step_4_split_numbers_into_chunks, filter = suffix(".chunks"), output = ".sums") test_pipeline.merge(task_func = step_6_calculate_variance, input = step_5_calculate_sum_of_squares, output = os.path.join(tempdir, "variance.result"))\ .posttask(lambda: sys.stdout.write(" hooray\n"))\ .posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done"))) test_pipeline.run(multiprocess = 50, verbose = 0) output_file = os.path.join(tempdir, "variance.result") if not os.path.exists (output_file): raise Exception("Missing %s" % output_file)
def test_newstyle_ruffus(self): test_pipeline = Pipeline("test") test_pipeline.split(task_func=split_fasta_file, input=tempdir + "original.fa", output=[tempdir + "files.split.success", tempdir + "files.split.*.fa"])\ .posttask(lambda: verbose_output.write(" Split into %d files\n" % 10)) test_pipeline.transform(task_func=align_sequences, input=split_fasta_file, filter=suffix(".fa"), output=".aln" # fa -> aln )\ .posttask(lambda: verbose_output.write(" Sequences aligned\n")) test_pipeline.transform(task_func=percentage_identity, input=align_sequences, # find all results from align_sequences # replace suffix with: filter=suffix(".aln"), output=[r".pcid", # .pcid suffix for the result r".pcid_success"] # .pcid_success to indicate job completed )\ .posttask(lambda: verbose_output.write(" %Identity calculated\n")) test_pipeline.merge(task_func=combine_results, input=percentage_identity, output=[tempdir + "all.combine_results", tempdir + "all.combine_results_success"])\ .posttask(lambda: verbose_output.write(" Results recombined\n")) test_pipeline.run(multiprocess=50, verbose=0) if not os.path.exists(tempdir + "all.combine_results"): raise Exception("Missing %s" % (tempdir + "all.combine_results"))
def test_newstyle_ruffus (self): print(" Run pipeline normally...") test_pipeline = Pipeline("test") test_pipeline.originate(make_start, [tempdir + 'start']) test_pipeline.split(split_start, make_start, tempdir + '*.split') test_pipeline.subdivide(subdivide_start, split_start, formatter(), tempdir + '{basename[0]}_*.subdivided', tempdir + '{basename[0]}') if self.graph_viz_present: test_pipeline.printout_graph(tempdir + "flowchart.dot") test_pipeline.printout_graph(tempdir + "flowchart.jpg", target_tasks =[subdivide_start], forcedtorun_tasks = [split_start], no_key_legend = True) test_pipeline.printout_graph(tempdir + "flowchart.svg", no_key_legend = False) # Unknown format try: test_pipeline.printout_graph(tempdir + "flowchart.unknown", no_key_legend = False) raise Exception("Failed to throw exception for test_pipeline.printout_graph unknown extension ") except CalledProcessError as err: pass test_pipeline.printout_graph(tempdir + "flowchart.unknown", "svg", no_key_legend = False) else: test_pipeline.printout_graph(tempdir + "flowchart.dot", target_tasks =[subdivide_start], forcedtorun_tasks = [split_start], no_key_legend = True)
def test_newstyle_mkdir_run(self): test_pipeline = Pipeline("test") test_pipeline.split(task_func=generate_initial_files1, input=1, output=[ tempdir + "/" + prefix + "_name.tmp1" for prefix in "abcd" ]) test_pipeline.transform( task_func = test_transform, input = generate_initial_files1, filter = formatter(), output = "{path[0]}/{basename[0]}.dir/{basename[0]}.tmp2")\ .mkdir(tempdir + "/test1")\ .mkdir(tempdir + "/test2")\ .mkdir(generate_initial_files1, formatter(), ["{path[0]}/{basename[0]}.dir", 3, "{path[0]}/{basename[0]}.dir2"]) test_pipeline.mkdir(test_transform2, tempdir + "/test3")\ .mkdir(generate_initial_files1, formatter(), "{path[0]}/{basename[0]}.dir2") cleanup_tmpdir() pipeline_run([test_transform, test_transform2], verbose=0, multiprocess=2, pipeline="main")
def test_newstyle_ruffus(self): test_pipeline = Pipeline("test") test_pipeline.originate(task_func=make_start, output=[tempdir + 'start']) test_pipeline.split(task_func=split_start, input=make_start, output=tempdir + '*.split') test_pipeline.subdivide(task_func=subdivide_start, input=split_start, filter=formatter( ), output=tempdir + '{basename[0]}_*.subdivided', extras=[tempdir + '{basename[0]}']) expected_files_after_1_runs = ["start", "0.split", "0_0.subdivided"] expected_files_after_2_runs = [ "1.split", "0_1.subdivided", "1_0.subdivided"] expected_files_after_3_runs = [ "2.split", "0_2.subdivided", "1_1.subdivided", "2_0.subdivided"] expected_files_after_4_runs = [ "3.split", "0_3.subdivided", "1_2.subdivided", "2_1.subdivided", "3_0.subdivided"] print(" 1 Run pipeline normally...") test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs, expected_files_after_2_runs) print(" 2 Check that running again does nothing. (All up to date).") test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs, expected_files_after_2_runs) time.sleep(2) print(" 3 Running again with forced tasks to generate more files...") test_pipeline.run(forcedtorun_tasks=[ "test::make_start"], multiprocess=10, verbose=TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs + expected_files_after_2_runs, expected_files_after_3_runs) print(" 4 Check that running again does nothing. (All up to date).") test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs + expected_files_after_2_runs, expected_files_after_3_runs) time.sleep(2) print(" 5 Running again with forced tasks to generate even more files...") test_pipeline.run(forcedtorun_tasks=make_start, multiprocess=10, verbose=TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs + expected_files_after_2_runs + expected_files_after_3_runs, expected_files_after_4_runs) print(" 6 Check that running again does nothing. (All up to date).") test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs + expected_files_after_2_runs + expected_files_after_3_runs, expected_files_after_4_runs)
def test_newstyle_ruffus(self): test_pipeline = Pipeline("test") test_pipeline.split(task_func=prepare_files, input=None, output=tempdir + '*.animal')\ .follows(mkdir(tempdir, tempdir + "test"))\ .posttask(lambda: do_write(tempdir + "task.done", "Task 1 Done\n")) test_pipeline.collate(task_func=summarise_by_grouping, input=prepare_files, filter=regex(r'(.*/).*\.(.*)\.animal'), output=r'\1\2.results')\ .posttask(lambda: do_write(tempdir + "task.done", "Task 2 Done\n")) test_pipeline.run(multiprocess=10, verbose=0) check_species_correct()
def test_newstyle_ruffus (self): test_pipeline = Pipeline("test") test_pipeline.originate(task_func = make_start, output = [tempdir + 'start']) test_pipeline.split(task_func = split_start, input = make_start, output = tempdir + '*.split') test_pipeline.subdivide(task_func = subdivide_start, input = split_start, filter = formatter(), output = tempdir + '{basename[0]}_*.subdivided', extras = [tempdir + '{basename[0]}']) expected_files_after_1_runs = ["start", "0.split", "0_0.subdivided"] expected_files_after_2_runs = ["1.split", "0_1.subdivided", "1_0.subdivided"] expected_files_after_3_runs = ["2.split", "0_2.subdivided", "1_1.subdivided", "2_0.subdivided"] expected_files_after_4_runs = ["3.split", "0_3.subdivided", "1_2.subdivided", "2_1.subdivided", "3_0.subdivided"] print(" 1 Run pipeline normally...") test_pipeline.run(multiprocess = 10, verbose = TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs, expected_files_after_2_runs) print(" 2 Check that running again does nothing. (All up to date).") test_pipeline.run(multiprocess = 10, verbose = TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs, expected_files_after_2_runs) time.sleep(2) print(" 3 Running again with forced tasks to generate more files...") test_pipeline.run(forcedtorun_tasks = ["test::make_start"], multiprocess = 10, verbose = TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs + expected_files_after_2_runs, expected_files_after_3_runs) print(" 4 Check that running again does nothing. (All up to date).") test_pipeline.run(multiprocess = 10, verbose = TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs + expected_files_after_2_runs, expected_files_after_3_runs) time.sleep(2) print(" 5 Running again with forced tasks to generate even more files...") test_pipeline.run(forcedtorun_tasks = make_start, multiprocess = 10, verbose = TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs + expected_files_after_2_runs + expected_files_after_3_runs, expected_files_after_4_runs) print(" 6 Check that running again does nothing. (All up to date).") test_pipeline.run(multiprocess = 10, verbose = TEST_VERBOSITY) self.check_file_exists_or_not_as_expected(expected_files_after_1_runs + expected_files_after_2_runs + expected_files_after_3_runs, expected_files_after_4_runs)
def test_newstyle_mkdir_run(self): test_pipeline = Pipeline("test") test_pipeline.split(task_func = generate_initial_files1, input = 1, output = [tempdir + "/" + prefix + "_name.tmp1" for prefix in "abcd"]) test_pipeline.transform( task_func = test_transform, input = generate_initial_files1, filter = formatter(), output = "{path[0]}/{basename[0]}.dir/{basename[0]}.tmp2")\ .mkdir(tempdir + "/test1")\ .mkdir(tempdir + "/test2")\ .mkdir(generate_initial_files1, formatter(), ["{path[0]}/{basename[0]}.dir", 3, "{path[0]}/{basename[0]}.dir2"]) test_pipeline.mkdir(test_transform2, tempdir + "/test3")\ .mkdir(generate_initial_files1, formatter(), "{path[0]}/{basename[0]}.dir2") cleanup_tmpdir() pipeline_run([test_transform, test_transform2], verbose=0, multiprocess = 2, pipeline= "main")
def test_newstyle_ruffus(self): print(" Run pipeline normally...") test_pipeline = Pipeline("test") test_pipeline.originate(make_start, [tempdir + 'start']) test_pipeline.split(split_start, make_start, tempdir + '*.split') test_pipeline.subdivide(subdivide_start, split_start, formatter(), tempdir + '{basename[0]}_*.subdivided', tempdir + '{basename[0]}') if self.graph_viz_present: test_pipeline.printout_graph(tempdir + "flowchart.dot") test_pipeline.printout_graph(tempdir + "flowchart.jpg", target_tasks=[subdivide_start], forcedtorun_tasks=[split_start], no_key_legend=True) test_pipeline.printout_graph(tempdir + "flowchart.svg", no_key_legend=False) # Unknown format try: test_pipeline.printout_graph(tempdir + "flowchart.unknown", no_key_legend=False) raise Exception( "Failed to throw exception for test_pipeline.printout_graph unknown extension " ) except CalledProcessError as err: pass test_pipeline.printout_graph(tempdir + "flowchart.unknown", "svg", no_key_legend=False) else: test_pipeline.printout_graph(tempdir + "flowchart.dot", target_tasks=[subdivide_start], forcedtorun_tasks=[split_start], no_key_legend=True)