def test_newstyle_ruffus(self):

        test_pipeline = Pipeline("test")

        test_pipeline.split(task_func=split_fasta_file,
                            input=tempdir + "original.fa",
                            output=[tempdir + "files.split.success",
                                    tempdir + "files.split.*.fa"])\
            .posttask(lambda: verbose_output.write("    Split into %d files\n" % 10))

        test_pipeline.transform(task_func=align_sequences,
                                input=split_fasta_file,
                                filter=suffix(".fa"),
                                output=".aln"                     # fa -> aln
                                )\
            .posttask(lambda: verbose_output.write("    Sequences aligned\n"))

        test_pipeline.transform(task_func=percentage_identity,
                                input=align_sequences,      # find all results from align_sequences
                                # replace suffix with:
                                filter=suffix(".aln"),
                                output=[r".pcid",  # .pcid suffix for the result
                                        r".pcid_success"]  # .pcid_success to indicate job completed
                                )\
            .posttask(lambda: verbose_output.write("    %Identity calculated\n"))

        test_pipeline.merge(task_func=combine_results,
                            input=percentage_identity,
                            output=[tempdir + "all.combine_results",
                                    tempdir + "all.combine_results_success"])\
            .posttask(lambda: verbose_output.write("    Results recombined\n"))

        test_pipeline.run(multiprocess=50, verbose=0)
        if not os.path.exists(tempdir + "all.combine_results"):
            raise Exception("Missing %s" % (tempdir + "all.combine_results"))
    def test_transform_with_missing_formatter_args_b(self):
        test_pipeline = Pipeline("test")

        test_pipeline.originate(task_func=generate_initial_files,
                                output=[os.path.join(tempdir, ff + ".tmp") for ff in "abcd"])\
            .mkdir(tempdir)

        test_pipeline.transform(
            task_func=transform_with_missing_formatter_args,
            input=generate_initial_files,
            filter=formatter(),
            output="{path[0]}/{basename[0]}.task1",
            extras=['echo {dynamic_message} > {some_file}'])
        s = StringIO()
        test_pipeline.printout(s, [transform_with_missing_formatter_args],
                               verbose=4,
                               wrap_width=10000,
                               pipeline="test")
        self.assertIn("Unmatched field {dynamic_message}", s.getvalue())

        # log to stream
        s = StringIO()
        logger = t_stream_logger(s)
        test_pipeline.run([transform_with_missing_formatter_args],
                          verbose=5,
                          pipeline="test",
                          logger=logger)
        self.assertIn("Unmatched field {dynamic_message}", s.getvalue())
Exemple #3
0
    def test_newstyle_ruffus (self):
        test_pipeline = Pipeline("test")

        test_pipeline.files(create_random_numbers, None, tempdir + "random_numbers.list")\
            .follows(mkdir(tempdir))


        test_pipeline.split(task_func = step_4_split_numbers_into_chunks,
                       input = tempdir + "random_numbers.list",
                       output = tempdir + "*.chunks")\
            .follows(create_random_numbers)

        test_pipeline.transform(task_func = step_5_calculate_sum_of_squares,
                           input = step_4_split_numbers_into_chunks,
                           filter = suffix(".chunks"),
                           output = ".sums")

        test_pipeline.merge(task_func = step_6_calculate_variance, input = step_5_calculate_sum_of_squares, output = os.path.join(tempdir, "variance.result"))\
            .posttask(lambda: sys.stdout.write("     hooray\n"))\
            .posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done")))

        test_pipeline.run(multiprocess = 50, verbose = 0)
        output_file = os.path.join(tempdir, "variance.result")
        if not os.path.exists (output_file):
            raise Exception("Missing %s" % output_file)
Exemple #4
0
    def test_newstyle_task(self):
        test_pipeline = Pipeline("test")

        test_pipeline.files(task1, [[None, tempdir + "a.1"], [None, tempdir + "b.1"]])\
            .follows(mkdir(tempdir))

        test_pipeline.files(task2, [[None, tempdir + "c.1"], [None, tempdir + "d.1"]])\
            .follows(mkdir(tempdir))

        test_pipeline.transform(task_func=task3,
                                input=task1,
                                filter=regex(r"(.+)"),
                                replace_inputs=ruffus.inputs(
                                    ((r"\1"), task2,
                                     "test_transform_inputs.*y")),
                                output=r"\1.output")
        test_pipeline.merge(task4, (task3), tempdir + "final.output")

        test_pipeline.run([task4], multiprocess=10, verbose=0)

        correct_output = "{tempdir}a.1.output:test_transform_inputs.py,{tempdir}a.1,{tempdir}c.1,{tempdir}d.1;{tempdir}b.1.output:test_transform_inputs.py,{tempdir}b.1,{tempdir}c.1,{tempdir}d.1;".format(
            tempdir=tempdir)
        with open(tempdir + "final.output") as ff:
            real_output = ff.read()
        self.assertEqual(correct_output, real_output)
Exemple #5
0
    def test_newstyle_ruffus(self):
        test_pipeline = Pipeline("test")

        test_pipeline.files(create_random_numbers, None, tempdir + "random_numbers.list")\
            .follows(mkdir(tempdir))


        test_pipeline.split(task_func = step_4_split_numbers_into_chunks,
                       input = tempdir + "random_numbers.list",
                       output = tempdir + "*.chunks")\
            .follows(create_random_numbers)

        test_pipeline.transform(task_func=step_5_calculate_sum_of_squares,
                                input=step_4_split_numbers_into_chunks,
                                filter=suffix(".chunks"),
                                output=".sums")

        test_pipeline.merge(task_func = step_6_calculate_variance, input = step_5_calculate_sum_of_squares, output = os.path.join(tempdir, "variance.result"))\
            .posttask(lambda: sys.stdout.write("     hooray\n"))\
            .posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done")))

        test_pipeline.run(multiprocess=50, verbose=0)
        output_file = os.path.join(tempdir, "variance.result")
        if not os.path.exists(output_file):
            raise Exception("Missing %s" % output_file)
Exemple #6
0
 def test_newstyle_ruffus (self):
     test_pipeline = Pipeline("test")
     test_pipeline.originate(start_task, ["a.1", "b.1"])
     test_pipeline.transform(same_file_name_task, start_task, suffix(".1"), ".1")
     test_pipeline.transform(linked_file_name_task, start_task, suffix(".1"), ".linked.1")
     test_pipeline.transform(final_task, [linked_file_name_task, same_file_name_task], suffix(".1"), ".3")
     test_pipeline.run(log_exceptions = True, verbose = 0)
    def test_newstyle_ruffus(self):
        # alternative syntax
        test_pipeline = Pipeline("test")

        test_pipeline.mkdir(data_dir, work_dir)
        test_pipeline.originate(task_func=task1,
                                output=[os.path.join(data_dir, "%s.1" % aa) for aa in "abcd"])

        test_pipeline.mkdir(filter=suffix(".1"),
                            output=".dir",
                            output_dir=work_dir)

        test_pipeline.transform(task_func=task2,
                                input=task1,
                                filter=suffix(".1"),
                                output=[".1", ".bak"],
                                extras=["extra.tst", 4, r"orig_dir=\1"],
                                output_dir=work_dir)

        test_pipeline.subdivide(task3, task2, suffix(
            ".1"), r"\1.*.2", [r"\1.a.2", r"\1.b.2"], output_dir=data_dir)
        test_pipeline.transform(task4, task3, suffix(
            ".2"), ".3", output_dir=work_dir)
        test_pipeline.merge(task5, task4, os.path.join(data_dir, "summary.5"))
        test_pipeline.run(multiprocess=50, verbose=0)

        with open(os.path.join(data_dir, "summary.5")) as ii:
            active_text = ii.read()
        if active_text != expected_active_text:
            raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" %
                            (expected_active_text, active_text))
Exemple #8
0
 def test_newstyle_ruffus(self):
     test_pipeline = Pipeline("test")
     test_pipeline.parallel(parallel_task, [["A", 1], ["B", 3], ["C", 3], ["D", 4], ["E", 4], ["F", 4]])
     try:
         test_pipeline.run(multiprocess=50, verbose=0)
     except ruffus.ruffus_exceptions.RethrownJobError:
         return
     raise Exception("Missing exception")
Exemple #9
0
 def test_newstyle_ruffus (self):
     test_pipeline = Pipeline("test")
     test_pipeline.parallel(parallel_task, [['A', 1], ['B',3], ['C',3], ['D',4], ['E',4], ['F',4]])
     try:
         test_pipeline.run(multiprocess = 50, verbose = 0)
     except ruffus.ruffus_exceptions.RethrownJobError:
         return
     raise Exception("Missing exception")
 def test_newstyle_simpler (self):
     test_pipeline = Pipeline("test")
     test_pipeline.originate(task1, input_file_names, extras = [logger_proxy, logging_mutex])
     test_pipeline.transform(task2, task1, suffix(".1"), ".2", extras = [logger_proxy, logging_mutex])
     test_pipeline.transform(task3, task2, suffix(".2"), ".3", extras = [logger_proxy, logging_mutex])
     test_pipeline.merge(task4, task3, final_file_name, extras = [logger_proxy, logging_mutex])
     #test_pipeline.merge(task4, task3, final_file_name, extras = {"logger_proxy": logger_proxy, "logging_mutex": logging_mutex})
     test_pipeline.run(multiprocess = 500, verbose = 0)
Exemple #11
0
 def test_newstyle_ruffus (self):
     test_pipeline = Pipeline("test")
     test_pipeline.parallel(parallel_task, [['A', 1], ['B',3], ['C',3], ['D',4], ['E',4], ['F',4]])
     try:
         test_pipeline.run(multiprocess = 50, verbose = 0)
     except ruffus.ruffus_exceptions.RethrownJobError:
         return
     raise Exception("Missing exception")
    def test_newstyle_mkdir (self):
        test_pipeline = Pipeline("test")
        test_pipeline.follows(task_which_makes_directories, mkdir(directories), mkdir(tempdir + 'c'), mkdir(tempdir + 'd', tempdir + 'e'), mkdir(tempdir + 'e'))
        test_pipeline.run(multiprocess = 10, verbose = 0)

        for d in 'abcde':
            fullpath = os.path.join(os.path.dirname(__file__), tempdir, d)
            self.assertTrue(os.path.exists(fullpath))
    def test_newstyle_task(self):
        test_pipeline = Pipeline("test")
        test_pipeline.files(task1, a)

        save_to_str_logger = t_save_to_str_logger()
        test_pipeline.run(multiprocess=10, logger=save_to_str_logger, verbose=1)
        self.assertTrue("@files() was empty" in save_to_str_logger.warning_str)
        print("\n    Warning printed out correctly", file=sys.stderr)
    def test_newstyle_collate(self):
        """
        As above but create pipeline on the fly using object orientated syntax rather than decorators
        """

        #
        # Create pipeline on the fly, joining up tasks
        #
        test_pipeline = Pipeline("test")

        test_pipeline.originate(task_func   = generate_initial_files,
                                output      = original_files)\
            .mkdir(tempdir, tempdir+"/test")


        test_pipeline.subdivide(    task_func   = split_fasta_file,
                                    input       = generate_initial_files,
                                    filter      = regex(r".*\/original_(\d+).fa"),       # match original files
                                    output      = [tempdir + r"/files.split.\1.success", # flag file for each original file
                                                   tempdir + r"/files.split.\1.*.fa"],   # glob pattern
                                    extras      = [r"\1"])\
            .posttask(lambda: sys.stderr.write("\tSplit into %d files each\n" % JOBS_PER_TASK))


        test_pipeline.transform(task_func   = align_sequences,
                                input       = split_fasta_file,
                                filter      = suffix(".fa"),
                                output      = ".aln")  \
            .posttask(lambda: sys.stderr.write("\tSequences aligned\n"))

        test_pipeline.transform(task_func   = percentage_identity,
                                input       = align_sequences,             # find all results from align_sequences
                                filter      = suffix(".aln"),             # replace suffix with:
                                output      = [r".pcid",                  #   .pcid suffix for the result
                                               r".pcid_success"]         #   .pcid_success to indicate job completed
                                )\
            .posttask(lambda: sys.stderr.write("\t%Identity calculated\n"))


        test_pipeline.collate(task_func   = combine_results,
                              input       = percentage_identity,
                              filter      = regex(r".*files.split\.(\d+)\.\d+.pcid"),
                              output      = [tempdir + r"/\1.all.combine_results",
                                             tempdir + r"/\1.all.combine_results_success"])\
            .posttask(lambda: sys.stderr.write("\tResults recombined\n"))

        #
        # Cleanup, printout and run
        #
        self.cleanup_tmpdir()
        s = StringIO()
        test_pipeline.printout(s, [combine_results],
                               verbose=5,
                               wrap_width=10000)
        self.assertTrue(
            re.search('Job needs update:.*Missing files.*', s.getvalue(),
                      re.DOTALL) is not None)
        test_pipeline.run(verbose=0)
    def test_newstyle_task (self):
        test_pipeline = Pipeline("test")
        test_pipeline.files(task1, a)

        save_to_str_logger = t_save_to_str_logger()
        test_pipeline.run(multiprocess = 10,
                            logger = save_to_str_logger,
                            verbose = 1)
        self.assertTrue("@files() was empty" in save_to_str_logger.warning_str)
        print("\n    Warning printed out correctly", file=sys.stderr)
    def test_newstyle_collate(self):
        """
        As above but create pipeline on the fly using object orientated syntax rather than decorators
        """

        #
        # Create pipeline on the fly, joining up tasks
        #
        test_pipeline = Pipeline("test")

        test_pipeline.originate(task_func=generate_initial_files,
                                output=original_files)\
            .mkdir(tempdir, tempdir+"/test")

        test_pipeline.subdivide(task_func=split_fasta_file,
                                input=generate_initial_files,
                                # match original files
                                filter=regex(r".*\/original_(\d+).fa"),
                                output=[tempdir + r"/files.split.\1.success",  # flag file for each original file
                                        tempdir + r"/files.split.\1.*.fa"],   # glob pattern
                                extras=[r"\1"])\
            .posttask(lambda: sys.stderr.write("\tSplit into %d files each\n" % JOBS_PER_TASK))

        test_pipeline.transform(task_func=align_sequences,
                                input=split_fasta_file,
                                filter=suffix(".fa"),
                                output=".aln")  \
            .posttask(lambda: sys.stderr.write("\tSequences aligned\n"))

        test_pipeline.transform(task_func=percentage_identity,
                                input=align_sequences,             # find all results from align_sequences
                                # replace suffix with:
                                filter=suffix(".aln"),
                                output=[r".pcid",  # .pcid suffix for the result
                                        r".pcid_success"]  # .pcid_success to indicate job completed
                                )\
            .posttask(lambda: sys.stderr.write("\t%Identity calculated\n"))

        test_pipeline.collate(task_func=combine_results,
                              input=percentage_identity,
                              filter=regex(r".*files.split\.(\d+)\.\d+.pcid"),
                              output=[tempdir + r"/\1.all.combine_results",
                                      tempdir + r"/\1.all.combine_results_success"])\
            .posttask(lambda: sys.stderr.write("\tResults recombined\n"))

        #
        # Cleanup, printout and run
        #
        self.cleanup_tmpdir()
        s = StringIO()
        test_pipeline.printout(s, [combine_results],
                               verbose=5, wrap_width=10000)
        self.assertTrue(re.search(
            'Job needs update:.*Missing files.*', s.getvalue(), re.DOTALL) is not None)
        test_pipeline.run(verbose=0)
Exemple #17
0
    def test_newstyle_mkdir(self):
        test_pipeline = Pipeline("test")
        test_pipeline.follows(task_which_makes_directories, mkdir(directories),
                              mkdir(tempdir + 'c'),
                              mkdir(tempdir + 'd', tempdir + 'e'),
                              mkdir(tempdir + 'e'))
        test_pipeline.run(multiprocess=10, verbose=0)

        for d in 'abcde':
            fullpath = os.path.join(os.path.dirname(__file__), tempdir, d)
            self.assertTrue(os.path.exists(fullpath))
    def test_newstyle_no_re_match (self):

        test_pipeline = Pipeline("test")
        test_pipeline.originate(task_1, tempdir + "a").mkdir(tempdir)
        test_pipeline.transform(task_2, task_1, regex("b"), "task_2.output")


        save_to_str_logger = t_save_to_str_logger()
        test_pipeline.run(multiprocess = 10, logger = save_to_str_logger, verbose = 1)
        print(save_to_str_logger.warning_str)
        self.assertTrue("no file names matched" in save_to_str_logger.warning_str)
        print("\n    Warning printed out correctly", file=sys.stderr)
    def test_newstyle_no_re_match(self):

        test_pipeline = Pipeline("test")
        test_pipeline.originate(task_1, tempdir + "a").mkdir(tempdir)
        test_pipeline.transform(task_2, task_1, regex("b"), "task_2.output")

        save_to_str_logger = t_save_to_str_logger()
        test_pipeline.run(
            multiprocess=10, logger=save_to_str_logger, verbose=1)
        print(save_to_str_logger.warning_str)
        self.assertTrue(
            "no file names matched" in save_to_str_logger.warning_str)
        print("\n    Warning printed out correctly", file=sys.stderr)
Exemple #20
0
    def test_newstyle_ruffus(self):

        test_pipeline = Pipeline("test")
        test_pipeline.originate(task_func=task1,
                                output=[tempdir + 'a.1'] + runtime_files)
        test_pipeline.transform(task2, task1, suffix(".1"), ".2")
        test_pipeline.transform(task_func=task3,
                                input=task2,
                                filter=suffix(".2"),
                                output=".3")
        test_pipeline.transform(task_func=task4,
                                input=runtime_parameter("a"),
                                filter=suffix(".3"),
                                output=".4").follows(task3)
        test_pipeline.run(verbose=0, runtime_data={"a": runtime_files})
Exemple #21
0
    def test_newstyle_ruffus(self):

        test_pipeline = Pipeline("test")
        test_pipeline.originate(task_func=task1,
                                output=[tempdir + 'a.1'] + runtime_files)
        test_pipeline.transform(task2, task1, suffix(".1"), ".2")
        test_pipeline.transform(task_func=task3,
                                input=task2,
                                filter=suffix(".2"),
                                output=".3")
        test_pipeline.transform(task_func=task4,
                                input=runtime_parameter("a"),
                                filter=suffix(".3"),
                                output=".4").follows(task3)
        test_pipeline.run(verbose=0, runtime_data={"a": runtime_files})
Exemple #22
0
    def test_newstyle_ruffus(self):
        test_pipeline = Pipeline("test")
        test_pipeline.split(task_func=prepare_files,
                            input=None,
                            output=tempdir + '*.animal')\
            .follows(mkdir(tempdir, tempdir + "test"))\
            .posttask(lambda: do_write(tempdir + "task.done", "Task 1 Done\n"))

        test_pipeline.collate(task_func=summarise_by_grouping,
                              input=prepare_files,
                              filter=regex(r'(.*/).*\.(.*)\.animal'),
                              output=r'\1\2.results')\
            .posttask(lambda: do_write(tempdir + "task.done", "Task 2 Done\n"))

        test_pipeline.run(multiprocess=10, verbose=0)
        check_species_correct()
Exemple #23
0
 def test_newstyle_task(self):
     """
     Same as above but construct a new pipeline on the fly without decorators
     """
     test_pipeline = Pipeline("test")
     test_pipeline.files(task1, None, tempdir + 'a.1')\
         .follows(mkdir(tempdir))
     test_pipeline.transform(task_func=task2,
                             input=task1,
                             filter=regex(r".*"),
                             output=tempdir + 'b.1')
     test_pipeline.files(task3, task2, tempdir + 'c.1')
     test_pipeline.files(task4, [[None, tempdir + 'd.1'], [None, tempdir + 'e.1']])\
         .follows(task3)
     test_pipeline.files(task5, task4, tempdir + "f.1")
     test_pipeline.run(multiprocess=10, verbose=0)
 def test_newstyle_no_re_match (self):
     try:
         test_pipeline = Pipeline("test")
         test_pipeline.transform(task_func = task_2,
                                 input = None,
                                 filter = regex(tempdir + "b"),
                                 replace_inputs = inputs(tempdir + "a", tempdir + "b"),
                                 output = "task_1.output")
         test_pipeline.run(multiprocess = 10, verbose = 0)
     except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args:
         print("\tExpected exception thrown 1")
         return
     except ruffus.ruffus_exceptions.error_inputs_multiple_args:
         print("\tExpected exception thrown 2")
         return
     raise Exception("Inputs(...) with multiple arguments should have thrown an exception")
 def test_newstyle_task (self):
     """
     Same as above but construct a new pipeline on the fly without decorators
     """
     test_pipeline = Pipeline("test")
     test_pipeline.files(task1, None, tempdir + 'a.1')\
         .follows(mkdir(tempdir))
     test_pipeline.transform(task_func   = task2,
                             input       = task1,
                             filter      = regex(r".*"),
                             output      = tempdir + 'b.1')
     test_pipeline.files(task3, task2, tempdir + 'c.1')
     test_pipeline.files(task4, [[None, tempdir + 'd.1'], [None, tempdir + 'e.1']])\
         .follows(task3)
     test_pipeline.files(task5, task4, tempdir + "f.1")
     test_pipeline.run(multiprocess = 10, verbose = 0)
Exemple #26
0
    def test_newstyle_ruffus(self):
        test_pipeline = Pipeline("test")
        test_pipeline.split(task_func=prepare_files,
                            input=None,
                            output=tempdir + '*.animal')\
            .follows(mkdir(tempdir, tempdir + "test"))\
            .posttask(lambda: do_write(tempdir + "task.done", "Task 1 Done\n"))

        test_pipeline.collate(task_func=summarise_by_grouping,
                              input=prepare_files,
                              filter=regex(r'(.*/).*\.(.*)\.animal'),
                              output=r'\1\2.results')\
            .posttask(lambda: do_write(tempdir + "task.done", "Task 2 Done\n"))

        test_pipeline.run(multiprocess=10, verbose=0)
        check_species_correct()
    def test_newstyle_mkdir (self):
        test_pipeline = Pipeline("test")

        test_pipeline.follows(task_which_makes_directories,
                         mkdir(directories),
                         mkdir(unicode(tempdir + "c")),
                         mkdir(unicode(tempdir + "d"),
                               unicode(tempdir + "e")),
                         mkdir(unicode(tempdir + "e")))\
            .posttask(touch_file(unicode(tempdir + "f")))

        test_pipeline.originate(task_which_makes_files, [tempdir + "g", tempdir + "h"])
        test_pipeline.run(multiprocess = 10, verbose = 0)

        for d in 'abcdefgh':
            fullpath = os.path.join(os.path.dirname(__file__), tempdir, d)
            self.assertTrue(os.path.exists(fullpath))
    def test_newstyle_ruffus (self):

        test_pipeline = Pipeline("test")

        test_pipeline.follows(setup_simulation_data, mkdir(gene_data_dir, simulation_data_dir))

        test_pipeline.files(gwas_simulation, generate_simulation_params)\
            .follows(setup_simulation_data)\
            .follows(mkdir(working_dir, os.path.join(working_dir, "simulation_results")))

        test_pipeline.collate(statistical_summary, gwas_simulation, regex(r"simulation_results/(\d+).\d+.simulation_res"), r"\1.mean")\
            .posttask(lambda : sys.stdout.write("\nOK\n"))

        test_pipeline.run(multiprocess = 50, verbose = 0)
        for oo in "000.mean", "001.mean":
            results_file_name = os.path.join(working_dir, oo)
            if not os.path.exists(results_file_name):
                raise Exception("Missing %s" % results_file_name)
Exemple #29
0
    def test_newstyle_mkdir(self):
        test_pipeline = Pipeline("test")

        test_pipeline.follows(task_which_makes_directories,
                              mkdir(directories),
                              mkdir(unicode(tempdir + "c")),
                              mkdir(unicode(tempdir + "d"),
                                    unicode(tempdir + "e")),
                              mkdir(unicode(tempdir + "e")))\
            .posttask(touch_file(unicode(tempdir + "f")))

        test_pipeline.originate(task_which_makes_files,
                                [tempdir + "g", tempdir + "h"])
        test_pipeline.run(multiprocess=10, verbose=0)

        for d in 'abcdefgh':
            fullpath = os.path.join(os.path.dirname(__file__), tempdir, d)
            self.assertTrue(os.path.exists(fullpath))
 def test_newstyle_no_re_match(self):
     try:
         test_pipeline = Pipeline("test")
         test_pipeline.transform(task_func=task_2,
                                 input=None,
                                 filter=regex(tempdir + "b"),
                                 replace_inputs=inputs(
                                     tempdir + "a", tempdir + "b"),
                                 output="task_1.output")
         test_pipeline.run(multiprocess=10, verbose=0)
     except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args:
         print("\tExpected exception thrown 1")
         return
     except ruffus.ruffus_exceptions.error_inputs_multiple_args:
         print("\tExpected exception thrown 2")
         return
     raise Exception(
         "Inputs(...) with multiple arguments should have thrown an exception"
     )
Exemple #31
0
    def test_newstyle_ruffus(self):

        test_pipeline = Pipeline("test")

        test_pipeline.follows(setup_simulation_data,
                              mkdir(gene_data_dir, simulation_data_dir))

        test_pipeline.files(gwas_simulation, generate_simulation_params)\
            .follows(setup_simulation_data)\
            .follows(mkdir(working_dir, os.path.join(working_dir, "simulation_results")))

        test_pipeline.collate(statistical_summary, gwas_simulation, regex(r"simulation_results/(\d+).\d+.simulation_res"), r"\1.mean")\
            .posttask(lambda: sys.stdout.write("\nOK\n"))

        test_pipeline.run(multiprocess=50, verbose=0)
        for oo in "000.mean", "001.mean":
            results_file_name = os.path.join(working_dir, oo)
            if not os.path.exists(results_file_name):
                raise Exception("Missing %s" % results_file_name)
Exemple #32
0
 def test_newstyle_simpler(self):
     test_pipeline = Pipeline("test")
     test_pipeline.originate(task1,
                             input_file_names,
                             extras=[logger_proxy, logging_mutex])
     test_pipeline.transform(task2,
                             task1,
                             suffix(".1"),
                             ".2",
                             extras=[logger_proxy, logging_mutex])
     test_pipeline.transform(task3,
                             task2,
                             suffix(".2"),
                             ".3",
                             extras=[logger_proxy, logging_mutex])
     test_pipeline.merge(task4,
                         task3,
                         final_file_name,
                         extras=[logger_proxy, logging_mutex])
     #test_pipeline.merge(task4, task3, final_file_name, extras = {"logger_proxy": logger_proxy, "logging_mutex": logging_mutex})
     test_pipeline.run(multiprocess=500, verbose=0)
    def test_newstyle_ruffus(self):
        # alternative syntax
        test_pipeline = Pipeline("test")

        test_pipeline.mkdir(data_dir, work_dir)
        test_pipeline.originate(
            task_func=task1,
            output=[os.path.join(data_dir, "%s.1" % aa) for aa in "abcd"])

        test_pipeline.mkdir(filter=suffix(".1"),
                            output=".dir",
                            output_dir=work_dir)

        test_pipeline.transform(task_func=task2,
                                input=task1,
                                filter=suffix(".1"),
                                output=[".1", ".bak"],
                                extras=["extra.tst", 4, r"orig_dir=\1"],
                                output_dir=work_dir)

        test_pipeline.subdivide(task3,
                                task2,
                                suffix(".1"),
                                r"\1.*.2", [r"\1.a.2", r"\1.b.2"],
                                output_dir=data_dir)
        test_pipeline.transform(task4,
                                task3,
                                suffix(".2"),
                                ".3",
                                output_dir=work_dir)
        test_pipeline.merge(task5, task4, os.path.join(data_dir, "summary.5"))
        test_pipeline.run(multiprocess=50, verbose=0)

        with open(os.path.join(data_dir, "summary.5")) as ii:
            active_text = ii.read()
        if active_text != expected_active_text:
            raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" %
                            (expected_active_text, active_text))
    def test_transform_with_missing_formatter_args_b(self):
        test_pipeline = Pipeline("test")


        test_pipeline.originate(task_func   = generate_initial_files,
                                output      = [os.path.join(tempdir, ff + ".tmp") for ff in "abcd"])\
            .mkdir(tempdir)


        test_pipeline.transform(task_func   = transform_with_missing_formatter_args,
                                input       = generate_initial_files,
                                filter      = formatter(),
                                output      = "{path[0]}/{basename[0]}.task1",
                                extras      =['echo {dynamic_message} > {some_file}'])
        s = StringIO()
        test_pipeline.printout(s, [transform_with_missing_formatter_args], verbose=4, wrap_width = 10000, pipeline= "test")
        self.assertIn("Missing key = {dynamic_message}", s.getvalue())

        #log to stream
        s = StringIO()
        logger = t_stream_logger(s)
        test_pipeline.run([transform_with_missing_formatter_args], verbose=5, pipeline= "test", logger=logger)
        self.assertIn("Missing key = {dynamic_message}", s.getvalue())
    def test_newstyle_task(self):
        test_pipeline = Pipeline("test")

        test_pipeline.files(task1, [[None, tempdir + "a.1"], [None, tempdir + "b.1"]])\
            .follows(mkdir(tempdir))

        test_pipeline.files(task2, [[None, tempdir + "c.1"], [None, tempdir + "d.1"]])\
            .follows(mkdir(tempdir))

        test_pipeline.transform(task_func=task3,
                                input=task1,
                                filter=regex(r"(.+)"),
                                replace_inputs=ruffus.inputs(
                                    ((r"\1"), task2, "test_transform_inputs.*y")),
                                output=r"\1.output")
        test_pipeline.merge(task4, (task3), tempdir + "final.output")

        test_pipeline.run([task4], multiprocess=10, verbose=0)

        correct_output = "{tempdir}a.1.output:test_transform_inputs.py,{tempdir}a.1,{tempdir}c.1,{tempdir}d.1;{tempdir}b.1.output:test_transform_inputs.py,{tempdir}b.1,{tempdir}c.1,{tempdir}d.1;".format(
            tempdir=tempdir)
        with open(tempdir + "final.output") as ff:
            real_output = ff.read()
        self.assertEqual(correct_output, real_output)
    def test_newstyle_ruffus (self):

        test_pipeline = Pipeline("test")
        test_pipeline.originate(task_func = make_start, output = [tempdir + 'start'])
        test_pipeline.split(task_func = split_start, input = make_start, output = tempdir + '*.split')
        test_pipeline.subdivide(task_func = subdivide_start, input = split_start, filter = formatter(), output = tempdir + '{basename[0]}_*.subdivided', extras = [tempdir + '{basename[0]}'])

        expected_files_after_1_runs = ["start", "0.split", "0_0.subdivided"]
        expected_files_after_2_runs = ["1.split", "0_1.subdivided", "1_0.subdivided"]
        expected_files_after_3_runs = ["2.split", "0_2.subdivided", "1_1.subdivided", "2_0.subdivided"]
        expected_files_after_4_runs = ["3.split", "0_3.subdivided", "1_2.subdivided", "2_1.subdivided", "3_0.subdivided"]

        print("     Run pipeline normally...")
        test_pipeline.run(multiprocess = 10, verbose=0)
        self.check_file_exists_or_not_as_expected(expected_files_after_1_runs,
                                                 expected_files_after_2_runs)

        print("     Check that running again does nothing. (All up to date).")
        test_pipeline.run(multiprocess = 10, verbose=0)
        self.check_file_exists_or_not_as_expected(expected_files_after_1_runs,
                                                 expected_files_after_2_runs)

        print("     Running again with forced tasks to generate more files...")
        test_pipeline.run(forcedtorun_tasks = ["test::make_start"], multiprocess = 10, verbose=0)
        self.check_file_exists_or_not_as_expected(expected_files_after_1_runs
                                                 + expected_files_after_2_runs,
                                                 expected_files_after_3_runs)

        print("     Check that running again does nothing. (All up to date).")
        test_pipeline.run(multiprocess = 10, verbose=0)
        self.check_file_exists_or_not_as_expected(expected_files_after_1_runs
                                                 + expected_files_after_2_runs,
                                                 expected_files_after_3_runs)


        print("     Running again with forced tasks to generate even more files...")
        test_pipeline.run(forcedtorun_tasks = make_start, multiprocess = 10, verbose=0)
        self.check_file_exists_or_not_as_expected(expected_files_after_1_runs
                                                 + expected_files_after_2_runs
                                                 + expected_files_after_3_runs,
                                                 expected_files_after_4_runs)
        print("     Check that running again does nothing. (All up to date).")
        test_pipeline.run(multiprocess = 10, verbose=0)
        self.check_file_exists_or_not_as_expected(expected_files_after_1_runs
                                                 + expected_files_after_2_runs
                                                 + expected_files_after_3_runs,
                                                 expected_files_after_4_runs)
    def test_newstyle_ruffus(self):

        test_pipeline = Pipeline("test")
        test_pipeline.originate(task_func=make_start,
                                output=[tempdir + 'start'])
        test_pipeline.split(task_func=split_start,
                            input=make_start,
                            output=tempdir + '*.split')
        test_pipeline.subdivide(task_func=subdivide_start,
                                input=split_start,
                                filter=formatter(),
                                output=tempdir + '{basename[0]}_*.subdivided',
                                extras=[tempdir + '{basename[0]}'])

        expected_files_after_1_runs = ["start", "0.split", "0_0.subdivided"]
        expected_files_after_2_runs = [
            "1.split", "0_1.subdivided", "1_0.subdivided"
        ]
        expected_files_after_3_runs = [
            "2.split", "0_2.subdivided", "1_1.subdivided", "2_0.subdivided"
        ]
        expected_files_after_4_runs = [
            "3.split", "0_3.subdivided", "1_2.subdivided", "2_1.subdivided",
            "3_0.subdivided"
        ]

        print("     1 Run pipeline normally...")
        test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY)
        self.check_file_exists_or_not_as_expected(expected_files_after_1_runs,
                                                  expected_files_after_2_runs)
        print(
            "     2 Check that running again does nothing. (All up to date).")
        test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY)
        self.check_file_exists_or_not_as_expected(expected_files_after_1_runs,
                                                  expected_files_after_2_runs)
        time.sleep(2)

        print(
            "     3 Running again with forced tasks to generate more files...")
        test_pipeline.run(forcedtorun_tasks=["test::make_start"],
                          multiprocess=10,
                          verbose=TEST_VERBOSITY)
        self.check_file_exists_or_not_as_expected(
            expected_files_after_1_runs + expected_files_after_2_runs,
            expected_files_after_3_runs)
        print(
            "     4 Check that running again does nothing. (All up to date).")
        test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY)
        self.check_file_exists_or_not_as_expected(
            expected_files_after_1_runs + expected_files_after_2_runs,
            expected_files_after_3_runs)
        time.sleep(2)

        print(
            "     5 Running again with forced tasks to generate even more files..."
        )
        test_pipeline.run(forcedtorun_tasks=make_start,
                          multiprocess=10,
                          verbose=TEST_VERBOSITY)
        self.check_file_exists_or_not_as_expected(
            expected_files_after_1_runs + expected_files_after_2_runs +
            expected_files_after_3_runs, expected_files_after_4_runs)
        print(
            "     6 Check that running again does nothing. (All up to date).")
        test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY)
        self.check_file_exists_or_not_as_expected(
            expected_files_after_1_runs + expected_files_after_2_runs +
            expected_files_after_3_runs, expected_files_after_4_runs)