def main(argv=None): if argv is None: argv = sys.argv options, args = P.initialize(argv, config_file="template.yml", defaults={ "min_value": 0.0, "num_samples": 1000, "mu": 0.0, "sigma": 1.0 }) pipeline = ruffus.Pipeline("template_pipeline") task_create_files = pipeline.originate( task_func=create_files, output=["sample_{:02}.txt".format(x) for x in range(10)]) task_compute_mean = pipeline.transform(task_func=compute_mean, input=task_create_files, filter=ruffus.suffix(".txt"), output=".mean") task_combine_means = pipeline.merge(task_func=combine_means, input=task_compute_mean, output="means.txt") # primary targets pipeline.merge(task_func=P.EmptyRunner("all"), input=task_combine_means, output="all") E.debug("starting workflow") return P.run_workflow(options, args)
def test_pipeline_action_show(capsys, build_pipeline): P.initialize(argv=["toolname", "show", "all"]) P.run_workflow(E.get_args(), pipeline=build_pipeline) captured = capsys.readouterr() assert "Tasks which will be run" in captured.out
def test_pipeline_action_state(capsys, build_pipeline): P.initialize(argv=["toolname", "state"]) P.run_workflow(E.get_args(), pipeline=build_pipeline) captured = capsys.readouterr() assert captured.out.startswith("function\tactive")
def main(argv): options = P.initialize(argv, config_file="benchmark.yml") # compatibility with cgatcore < 0.6.3 if isinstance(options, tuple): options = options[0] # not sure what this does # if not options.config_file: # P.get_parameters(options.config_file) # else: # sys.exit(P.main(options, args)) params = P.get_params() with arvados_enabled(always_mount=options.always_mount): mountpoint = params.get("mount_point", None) if mountpoint: redirect_defaults2mountpoint(mountpoint) # A selection of command line arguments are added to PARAMS # as 'extras' not implemented in ruffus 2.6.3 kwargs = collections.defaultdict(dict) if options.only_info: kwargs["extras"].update({'only_info': True}) P.PARAMS["only_info"] = True if options.is_test: kwargs["extras"].update({'is_test': True}) P.PARAMS["is_test"] = True E.debug("construction of workflow started") pipeline = ruffus.Pipeline('benchmark') # Tool execution suffix, tool_runners = add_tools_to_pipeline(pipeline, map_tool_to_runner, config=P.PARAMS, **kwargs) E.debug("added {} tools to workflow".format(len(tool_runners))) # Optionally, add externally computed files as # pseudo-tools: if "external" in P.PARAMS["setup"]: external_runners = add_external_data_to_pipeline(pipeline, config=P.PARAMS, **kwargs) tool_runners.extend(external_runners) # Optionally, combine tool runs into aggregate # outputs. The type of the output is preserved # (VCF -> VCF, etc.) # For example, call individual members in a trio # and then build a combined VCF to analyse mendelian # inconsistencies. if "collate" in P.PARAMS["setup"]: collate_runners = add_collations_to_pipeline( pipeline, map_collate_to_runner, P.PARAMS["setup"]["collate"], tasks=tool_runners, config=P.PARAMS) if P.PARAMS["setup"].get("only_collate", False): tool_runners = [] if P.PARAMS["setup"].get("no_collate_metrics", False): collate_runners = [] E.debug("added {} collators to workflow".format( len(collate_runners))) else: collate_runners = [] # Optionally, split up the output before applying # additional analyses. The type of the output is preserved # (VCF -> VCF, etc). # For example, identify false positives, false negatives # and true positives and collect metrics individually. if "split" in P.PARAMS["setup"]: split_runners = add_splits_to_pipeline(pipeline, map_split_to_runner, tool_runners, P.PARAMS["setup"]["split"], tasks=tool_runners, config=P.PARAMS) if P.PARAMS["setup"].get("only_split", False): tool_runners = [] E.debug("added {} splitters to workflow".format( len(split_runners))) else: split_runners = [] metric_runners = [] for prefix, r in zip(["tool", "collate", "split"], [tool_runners, collate_runners, split_runners]): if not r: continue metrics = None if prefix == "collate" and "collate_metrics" in P.PARAMS["setup"]: metrics = P.PARAMS["setup"]["collate_metrics"] elif prefix == "split" and "split_metrics" in P.PARAMS["setup"]: metrics = P.PARAMS["setup"]["split_metrics"] elif "metrics" in P.PARAMS["setup"]: metrics = P.PARAMS["setup"]["metrics"] else: raise KeyError( "configuration file requires a 'setup:metrics' section") # Metric execution mm = add_metrics_to_pipeline(pipeline, metrics, map_metric_to_runner, r, suffix=suffix, prefix=prefix + "_", config=P.PARAMS, **kwargs) if len(mm) == 0: raise ValueError( "workflow construction error: " "no metric tasks result for metrics {}".format(metrics)) metric_runners.extend(mm) E.debug("added {} {}_metrics to workflow".format(len(mm), prefix)) # add plot task if "aggregate" in P.PARAMS["setup"]: aggregate_metrics = add_collations_to_pipeline( pipeline, map_collate_to_runner, P.PARAMS["setup"]["aggregate"], metric_runners, config=P.PARAMS) E.debug("added metric aggregation to workflow") else: aggregate_metrics = [] add_upload_to_pipeline(pipeline, metric_runners + aggregate_metrics, P.PARAMS) E.debug("added upload to workflow".format(prefix)) # add export task export = P.PARAMS["setup"].get("export", ["tools", "collate", "split"]) map_export2runner = { "collate": collate_runners, "tools": tool_runners, "split": split_runners } export_runners = [] for e in export: try: export_runners.extend(map_export2runner[e]) except KeyError: raise KeyError("unknown export section: {}".format(e)) add_export_to_pipeline(pipeline, export_runners, suffix=suffix, config=P.PARAMS) E.debug("added export to workflow") add_all_task_to_pipeline(pipeline, metric_runners + aggregate_metrics) # Collate output files to facilitate analysis if "collation" in P.PARAMS: collators = add_collations_to_pipeline(pipeline, map_collate_to_runner, P.PARAMS["collation"], config=P.PARAMS) E.debug("construction of workflow completed") E.debug("starting workflow") P.run_workflow(options, pipeline=pipeline)