class TestRules(unittest.TestCase): """Test rules""" def setUp(self): self.workflow = Workflow("foo") name = self.workflow.add_rule(name="bar") self.workflow._rules["bar"].set_output(*(), **{"foo":"bar"}) self.workflow._rules["bar"].set_params(*(), **{"cmd":"foo", "options":["foo", "bar"]}) def test_create_rule(self): self.assertListEqual(["bar"], [x.name for x in self.workflow.rules]) create_rule_from_existing(name="foo", template="bar", workflow=self.workflow) self.assertListEqual(["bar", "foo"], [x.name for x in self.workflow.rules]) @raises(NoRulesException) def test_create_rule_empty_workflow(self): create_rule_from_existing(name="foo", template="bar", workflow=Workflow("foo")) @raises(AssertionError) def test_create_rule_wrong_workflow(self): create_rule_from_existing(name="foo", template="bar", workflow=None) @raises(UnknownRuleException) def test_create_rule_wrong_template(self): create_rule_from_existing(name="bar", template="foo", workflow=self.workflow) def test_create_rule_add_output(self): create_rule_from_existing(name="foo", template="bar", workflow=self.workflow, **{'output':((), {'bar':'foo'})}) self.assertDictEqual({'bar':'foo'}, dict(self.workflow.get_rule("foo").output)) def test_create_rule_add_params(self): create_rule_from_existing(name="foo", template="bar", workflow=self.workflow, **{'params':((), {'cmd':'bar', 'options':['foo']})}) self.assertDictEqual({'cmd':'bar', 'options':['foo']}, dict(self.workflow.get_rule("foo").params))
def bash_completion(snakefile="Snakefile"): if not len(sys.argv) >= 2: print("Calculate bash completion for snakemake. This tool shall not be invoked by hand.") sys.exit(1) prefix = sys.argv[2] if prefix.startswith("-"): opts = [ action.option_strings[0] for action in get_argument_parser()._actions if action.option_strings and action.option_strings[0].startswith(prefix)] print(*opts, sep="\n") else: files = glob.glob("{}*".format(prefix)) if files: print(*files, sep="\n") elif os.path.exists(snakefile): workflow = Workflow(snakefile=snakefile, snakemakepath=get_snakemake_path()) workflow.include(snakefile) workflow_files = sorted(set( file for file in workflow.concrete_files if file.startswith(prefix))) if workflow_files: print(*workflow_files, sep="\n") rules = [ rule.name for rule in workflow.rules if rule.name.startswith(prefix)] if rules: print(*rules, sep="\n") sys.exit(0)
def load_rule_args(snakefile, rule_name, default_wildcards=None, change_dir=False): """ Returns a rule object for some default arguments. Example usage: ``` try: snakemake except NameError: snakefile_path = os.getcwd() + "/Snakefile" snakemake = load_rule_args( snakefile = snakefile_path, rule_name = 'create_prediction_target', default_wildcards={ 'ds_dir': 'full_data_samplefilter' } ) ``` """ # save current working dir for later cwd = os.getcwd() try: if default_wildcards == None: default_wildcards = dict() # change to snakefile directory os.chdir(os.path.dirname(snakefile)) # load workflow workflow = Workflow(snakefile=snakefile) workflow.include(snakefile) # get rule rule = workflow.get_rule(rule_name) smk_input = dict(rule.expand_input(default_wildcards)[0]) smk_output = dict(rule.expand_output(default_wildcards)[0]) smk_params = dict(rule.expand_params( default_wildcards, rule.input, rule.output, AttrDict(rule.resources))) # setup rule arguments retval = SnakemakeRuleArgs( input=smk_input, params=smk_params, output=smk_output, wildcards=default_wildcards ) return retval finally: if not change_dir: # change back to previous working directory os.chdir(cwd)
def snake(args): chunk_path = 'mtsv-chunk' fm_build_path = 'mtsv-build' print(chunk_path) print(fm_build_path) print(args) print(partition(args)) # for i in args: # print(i) workflow = Workflow("__file__", overwrite_workdir=args.working_dir) if args.cluster_cfg is not None: workflow.cluster_cfg = args.cluster_cfg snakemake.workflow.rules = Rules() snakemake.workflow.config = dict()
def get_workflow(self): """ make sure there is a workflow object TODO: * allow multiple workflows? * what king of options to allow? * allow options every time or just first? """ if self.workflow is None: # create a new workflow object with some basic defaults # create a blank file just so snakemake has something to hang on to # (this file cannot be read from on some Windows systems...) self.tempfiles['root'] = tempfile.NamedTemporaryFile('w') self.workflow = Workflow(snakefile=self.tempfiles['root'].name) return self.workflow
def create_workflow(snakefile): workflow = Workflow(snakefile=snakefile, use_conda=True) try: workflow.include(snakefile, overwrite_first_rule=True, print_compilation=False) workflow.check() except (Exception, BaseException) as ex: print_exception(ex, workflow.linemaps) success = False return workflow, success
def load_rule_args(snakefile, rule_name, default_wildcards=None, change_dir=False, create_dir=True, root=None): """ Returns a rule object for some default arguments. Example usage: ``` snakefile_path = os.getcwd() + "/Snakefile" try: snakemake except NameError: snakemake = load_rule_args( snakefile = snakefile_path, rule_name = 'create_prediction_target', default_wildcards={ 'ds_dir': 'full_data_samplefilter' }, # root = "./" # path relative to snakefile ) ``` """ # save current working dir for later cwd = os.getcwd() if root is None: root = os.path.dirname(snakefile) else: if not os.path.isabs(root): root = os.path.join(os.path.dirname(snakefile), root) log.info("root dir: %s", root) try: if default_wildcards == None: default_wildcards = dict() # change to root directory os.chdir(root) # load workflow workflow = Workflow(snakefile=snakefile) workflow.include(snakefile) # get rule rule = workflow.get_rule(rule_name) smk_resources = AttrDict(rule.resources) smk_input = dict(rule.expand_input(default_wildcards)[0]) smk_output = dict(rule.expand_output(default_wildcards)[0]) smk_params = dict(rule.expand_params( default_wildcards, rule.input, rule.output, smk_resources )) # Make paths in snakemake inputs and outputs absolute smk_input = map_custom_wd(workflow, smk_input, root) smk_output = map_custom_wd(workflow, smk_output, root) if create_dir: mk_dirs(smk_output) # setup rule arguments retval = SnakemakeRuleArgs( resources=smk_resources, input=smk_input, params=smk_params, output=smk_output, wildcards=default_wildcards ) return retval finally: if not change_dir: # change back to previous working directory os.chdir(cwd)
logger.info("rule '{}' up to date".format(dest)) parser = argparse.ArgumentParser("Copy/sync rules to a given directory") parser.add_argument('Snakefile', help="Snakefile to import") parser.add_argument('-n', '--dry-run', action="store_true", help="Dry run") parser.add_argument('-d', '--outdir', action="store", default=os.curdir, help="Snakefile to import") args = parser.parse_args() snakefile = os.path.abspath(args.Snakefile) workflow = Workflow(snakefile=snakefile) try: workflow.include(snakefile, overwrite_first_rule=True, print_compilation=False) workflow.check() except (Exception, BaseException) as ex: print_exception(ex, workflow.linemaps) success = False # Map the rules included from snakemake_rules DEST = args.outdir rules = { x: os.path.join(DEST, os.path.relpath(x, SNAKEMAKE_RULES_PATH)) for x in workflow.included if x.startswith(SNAKEMAKE_RULES_PATH)
def snakemake(snakefile, listrules=False, cores=1, resources=None, workdir=None, targets=None, dryrun=False, touch=False, forcetargets=False, forceall=False, forcerun=None, prioritytargets=None, stats=None, printreason=False, printshellcmds=False, printdag=False, printrulegraph=False, nocolor=False, quiet=False, keepgoing=False, cluster=None, immediate_submit=False, standalone=False, ignore_ambiguity=False, snakemakepath=None, lock=True, unlock=False, cleanup_metadata=None, force_incomplete=False, ignore_incomplete=False, list_version_changes=False, list_code_changes=False, list_input_changes=False, list_params_changes=False, summary=False, output_wait=3, print_compilation=False, debug=False, notemp=False, nodeps=False, jobscript=None, timestamp=False): """ Run snakemake on a given snakefile. Note: at the moment, this function is not thread-safe! Arguments snakefile -- the snakefile. list -- list rules. jobs -- maximum number of parallel jobs (default: 1). directory -- working directory (default: current directory). rule -- execute this rule (default: first rule in snakefile). dryrun -- print the rules that would be executed, but do not execute them. forcethis -- force the selected rule to be executed forceall -- force all rules to be executed time_measurements -- measure the running times of all rules lock -- lock the working directory """ init_logger(nocolor=nocolor, stdout=dryrun, debug=debug, timestamp=timestamp) if not os.path.exists(snakefile): logger.error("Error: Snakefile \"{}\" not present.".format(snakefile)) return False if workdir: olddir = os.getcwd() workflow = Workflow( snakefile=snakefile, snakemakepath=snakemakepath, jobscript=jobscript) if standalone: try: # set the process group os.setpgrp() except: # ignore: if it does not work we can still work without it pass success = True try: workflow.include(snakefile, workdir=workdir, overwrite_first_rule=True, print_compilation=print_compilation) workflow.check() if not print_compilation: if listrules: workflow.list_rules() else: if not printdag and not printrulegraph: # handle subworkflows subsnakemake = partial( snakemake, cores=cores, resources=resources, dryrun=dryrun, touch=touch, printreason=printreason, printshellcmds=printshellcmds, nocolor=nocolor, quiet=quiet, keepgoing=keepgoing, cluster=cluster, immediate_submit=immediate_submit, standalone=standalone, ignore_ambiguity=ignore_ambiguity, snakemakepath=snakemakepath, lock=lock, unlock=unlock, cleanup_metadata=cleanup_metadata, force_incomplete=force_incomplete, ignore_incomplete=ignore_incomplete, output_wait=output_wait, debug=debug, notemp=notemp, nodeps=nodeps, jobscript=jobscript, timestamp=timestamp) for subworkflow in workflow.subworkflows: logger.warning("Executing subworkflow {}.".format(subworkflow.name)) if not subsnakemake(subworkflow.snakefile, workdir=subworkflow.workdir, targets=subworkflow.targets): success = False if workflow.subworkflows: logger.warning("Executing main workflow.") if success: success = workflow.execute( targets=targets, dryrun=dryrun, touch=touch, cores=cores, forcetargets=forcetargets, forceall=forceall, forcerun=forcerun, prioritytargets=prioritytargets, quiet=quiet, keepgoing=keepgoing, printshellcmds=printshellcmds, printreason=printreason, printrulegraph=printrulegraph, printdag=printdag, cluster=cluster, immediate_submit=immediate_submit, ignore_ambiguity=ignore_ambiguity, workdir=workdir, stats=stats, force_incomplete=force_incomplete, ignore_incomplete=ignore_incomplete, list_version_changes=list_version_changes, list_code_changes=list_code_changes, list_input_changes=list_input_changes, list_params_changes=list_params_changes, summary=summary, output_wait=output_wait, nolock=not lock, unlock=unlock, resources=resources, notemp=notemp, nodeps=nodeps, cleanup_metadata=cleanup_metadata ) except (Exception, BaseException) as ex: print_exception(ex, workflow.linemaps) success = False if workdir: os.chdir(olddir) if workflow.persistence: workflow.persistence.unlock() return success
def setUp(self): self.workflow = Workflow("foo") name = self.workflow.add_rule(name="bar") self.workflow._rules["bar"].set_output(*(), **{"foo":"bar"}) self.workflow._rules["bar"].set_params(*(), **{"cmd":"foo", "options":["foo", "bar"]})
def _load_snakefile(self, file_path): """Load the Snakefile""" logger.error("loading snakefile {}".format(file_path)) workflow = Workflow(snakefile=file_path) workflow.include(file_path) return workflow.rules
def snakemake(snakefile, listrules=False, list_target_rules=False, cores=1, nodes=1, local_cores=1, resources=dict(), config=dict(), configfile=None, config_args=None, workdir=None, targets=None, dryrun=False, touch=False, forcetargets=False, forceall=False, forcerun=[], prioritytargets=[], stats=None, printreason=False, printshellcmds=False, printdag=False, printrulegraph=False, printd3dag=False, nocolor=False, quiet=False, keepgoing=False, cluster=None, cluster_config=None, cluster_sync=None, drmaa=None, jobname="snakejob.{rulename}.{jobid}.sh", immediate_submit=False, standalone=False, ignore_ambiguity=False, snakemakepath=None, lock=True, unlock=False, cleanup_metadata=None, force_incomplete=False, ignore_incomplete=False, list_version_changes=False, list_code_changes=False, list_input_changes=False, list_params_changes=False, list_resources=False, summary=False, detailed_summary=False, latency_wait=3, benchmark_repeats=1, wait_for_files=None, print_compilation=False, debug=False, notemp=False, nodeps=False, keep_target_files=False, allowed_rules=None, jobscript=None, timestamp=False, greediness=None, no_hooks=False, overwrite_shellcmd=None, updated_files=None, log_handler=None, keep_logger=False, verbose=False): """Run snakemake on a given snakefile. This function provides access to the whole snakemake functionality. It is not thread-safe. Args: snakefile (str): the path to the snakefile listrules (bool): list rules (default False) list_target_rules (bool): list target rules (default False) cores (int): the number of provided cores (ignored when using cluster support) (default 1) nodes (int): the number of provided cluster nodes (ignored without cluster support) (default 1) local_cores (int): the number of provided local cores if in cluster mode (ignored without cluster support) (default 1) resources (dict): provided resources, a dictionary assigning integers to resource names, e.g. {gpu=1, io=5} (default {}) config (dict): override values for workflow config workdir (str): path to working directory (default None) targets (list): list of targets, e.g. rule or file names (default None) dryrun (bool): only dry-run the workflow (default False) touch (bool): only touch all output files if present (default False) forcetargets (bool): force given targets to be re-created (default False) forceall (bool): force all output files to be re-created (default False) forcerun (list): list of files and rules that shall be re-created/re-executed (default []) prioritytargets (list): list of targets that shall be run with maximum priority (default []) stats (str): path to file that shall contain stats about the workflow execution (default None) printreason (bool): print the reason for the execution of each job (default false) printshellcmds (bool): print the shell command of each job (default False) printdag (bool): print the dag in the graphviz dot language (default False) printrulegraph (bool): print the graph of rules in the graphviz dot language (default False) printd3dag (bool): print a D3.js compatible JSON representation of the DAG (default False) nocolor (bool): do not print colored output (default False) quiet (bool): do not print any default job information (default False) keepgoing (bool): keep goind upon errors (default False) cluster (str): submission command of a cluster or batch system to use, e.g. qsub (default None) cluster_config (str): configuration file for cluster options (default None) cluster_sync (str): blocking cluster submission command (like SGE 'qsub -sync y') (default None) drmaa (str): if not None use DRMAA for cluster support, str specifies native args passed to the cluster when submitting a job jobname (str): naming scheme for cluster job scripts (default "snakejob.{rulename}.{jobid}.sh") immediate_submit (bool): immediately submit all cluster jobs, regardless of dependencies (default False) standalone (bool): kill all processes very rudely in case of failure (do not use this if you use this API) (default False) ignore_ambiguity (bool): ignore ambiguous rules and always take the first possible one (default False) snakemakepath (str): path to the snakemake executable (default None) lock (bool): lock the working directory when executing the workflow (default True) unlock (bool): just unlock the working directory (default False) cleanup_metadata (bool): just cleanup metadata of output files (default False) force_incomplete (bool): force the re-creation of incomplete files (default False) ignore_incomplete (bool): ignore incomplete files (default False) list_version_changes (bool): list output files with changed rule version (default False) list_code_changes (bool): list output files with changed rule code (default False) list_input_changes (bool): list output files with changed input files (default False) list_params_changes (bool): list output files with changed params (default False) summary (bool): list summary of all output files and their status (default False) latency_wait (int): how many seconds to wait for an output file to appear after the execution of a job, e.g. to handle filesystem latency (default 3) benchmark_repeats (int): number of repeated runs of a job if declared for benchmarking (default 1) wait_for_files (list): wait for given files to be present before executing the workflow list_resources (bool): list resources used in the workflow (default False) summary (bool): list summary of all output files and their status (default False). If no option is specified a basic summary will be ouput. If 'detailed' is added as an option e.g --summary detailed, extra info about the input and shell commands will be included detailed_summary (bool): list summary of all input and output files and their status (default False) print_compilation (bool): print the compilation of the snakefile (default False) debug (bool): allow to use the debugger within rules notemp (bool): ignore temp file flags, e.g. do not delete output files marked as temp after use (default False) nodeps (bool): ignore dependencies (default False) keep_target_files (bool): Do not adjust the paths of given target files relative to the working directory. allowed_rules (set): Restrict allowed rules to the given set. If None or empty, all rules are used. jobscript (str): path to a custom shell script template for cluster jobs (default None) timestamp (bool): print time stamps in front of any output (default False) greediness (float): set the greediness of scheduling. This value between 0 and 1 determines how careful jobs are selected for execution. The default value (0.5 if prioritytargets are used, 1.0 else) provides the best speed and still acceptable scheduling quality. overwrite_shellcmd (str): a shell command that shall be executed instead of those given in the workflow. This is for debugging purposes only. updated_files(list): a list that will be filled with the files that are updated or created during the workflow execution verbose(bool): show additional debug output (default False) log_handler (function): redirect snakemake output to this custom log handler, a function that takes a log message dictionary (see below) as its only argument (default None). The log message dictionary for the log handler has to following entries: :level: the log level ("info", "error", "debug", "progress", "job_info") :level="info", "error" or "debug": :msg: the log message :level="progress": :done: number of already executed jobs :total: number of total jobs :level="job_info": :input: list of input files of a job :output: list of output files of a job :log: path to log file of a job :local: whether a job is executed locally (i.e. ignoring cluster) :msg: the job message :reason: the job reason :priority: the job priority :threads: the threads of the job Returns: bool: True if workflow execution was successful. """ if updated_files is None: updated_files = list() if cluster or cluster_sync or drmaa: cores = sys.maxsize else: nodes = sys.maxsize if cluster_config: cluster_config = load_configfile(cluster_config) else: cluster_config = dict() if not keep_logger: setup_logger(handler=log_handler, quiet=quiet, printreason=printreason, printshellcmds=printshellcmds, nocolor=nocolor, stdout=dryrun, debug=verbose, timestamp=timestamp) if greediness is None: greediness = 0.5 if prioritytargets else 1.0 else: if not (0 <= greediness <= 1.0): logger.error("Error: greediness must be a float between 0 and 1.") return False if not os.path.exists(snakefile): logger.error("Error: Snakefile \"{}\" not present.".format(snakefile)) return False snakefile = os.path.abspath(snakefile) cluster_mode = (cluster is not None) + (cluster_sync is not None) + (drmaa is not None) if cluster_mode > 1: logger.error("Error: cluster and drmaa args are mutually exclusive") return False if debug and (cores > 1 or cluster_mode): logger.error( "Error: debug mode cannot be used with more than one core or cluster execution.") return False overwrite_config = dict() if configfile: overwrite_config.update(load_configfile(configfile)) if config: overwrite_config.update(config) if workdir: olddir = os.getcwd() if not os.path.exists(workdir): logger.info( "Creating specified working directory {}.".format(workdir)) os.makedirs(workdir) workdir = os.path.abspath(workdir) os.chdir(workdir) workflow = Workflow(snakefile=snakefile, snakemakepath=snakemakepath, jobscript=jobscript, overwrite_shellcmd=overwrite_shellcmd, overwrite_config=overwrite_config, overwrite_workdir=workdir, overwrite_configfile=configfile, config_args=config_args, debug=debug) if standalone: try: # set the process group os.setpgrp() except: # ignore: if it does not work we can still work without it pass success = True try: workflow.include(snakefile, overwrite_first_rule=True, print_compilation=print_compilation) workflow.check() if not print_compilation: if listrules: workflow.list_rules() elif list_target_rules: workflow.list_rules(only_targets=True) elif list_resources: workflow.list_resources() else: # if not printdag and not printrulegraph: # handle subworkflows subsnakemake = partial(snakemake, cores=cores, nodes=nodes, local_cores=local_cores, resources=resources, dryrun=dryrun, touch=touch, printreason=printreason, printshellcmds=printshellcmds, nocolor=nocolor, quiet=quiet, keepgoing=keepgoing, cluster=cluster, cluster_config=cluster_config, cluster_sync=cluster_sync, drmaa=drmaa, jobname=jobname, immediate_submit=immediate_submit, standalone=standalone, ignore_ambiguity=ignore_ambiguity, snakemakepath=snakemakepath, lock=lock, unlock=unlock, cleanup_metadata=cleanup_metadata, force_incomplete=force_incomplete, ignore_incomplete=ignore_incomplete, latency_wait=latency_wait, benchmark_repeats=benchmark_repeats, verbose=verbose, notemp=notemp, nodeps=nodeps, jobscript=jobscript, timestamp=timestamp, greediness=greediness, no_hooks=no_hooks, overwrite_shellcmd=overwrite_shellcmd, config=config, config_args=config_args, keep_logger=True) success = workflow.execute( targets=targets, dryrun=dryrun, touch=touch, cores=cores, nodes=nodes, local_cores=local_cores, forcetargets=forcetargets, forceall=forceall, forcerun=forcerun, prioritytargets=prioritytargets, quiet=quiet, keepgoing=keepgoing, printshellcmds=printshellcmds, printreason=printreason, printrulegraph=printrulegraph, printdag=printdag, cluster=cluster, cluster_config=cluster_config, cluster_sync=cluster_sync, jobname=jobname, drmaa=drmaa, printd3dag=printd3dag, immediate_submit=immediate_submit, ignore_ambiguity=ignore_ambiguity, stats=stats, force_incomplete=force_incomplete, ignore_incomplete=ignore_incomplete, list_version_changes=list_version_changes, list_code_changes=list_code_changes, list_input_changes=list_input_changes, list_params_changes=list_params_changes, summary=summary, latency_wait=latency_wait, benchmark_repeats=benchmark_repeats, wait_for_files=wait_for_files, detailed_summary=detailed_summary, nolock=not lock, unlock=unlock, resources=resources, notemp=notemp, nodeps=nodeps, keep_target_files=keep_target_files, cleanup_metadata=cleanup_metadata, subsnakemake=subsnakemake, updated_files=updated_files, allowed_rules=allowed_rules, greediness=greediness, no_hooks=no_hooks) except BrokenPipeError: # ignore this exception and stop. It occurs if snakemake output is piped into less and less quits before reading the whole output. # in such a case, snakemake shall stop scheduling and quit with error 1 success = False except (Exception, BaseException) as ex: print_exception(ex, workflow.linemaps) success = False if workdir: os.chdir(olddir) if workflow.persistence: workflow.persistence.unlock() if not keep_logger: logger.cleanup() return success
def _create_snakemake_dag(snakefile: str, configfiles: Optional[List[str]] = None, **kwargs: Any) -> DAG: """Create ``snakemake.dag.DAG`` instance. The code of this function comes from the Snakemake codebase and is adapted to fullfil REANA purposes of getting the needed metadata. :param snakefile: Path to Snakefile. :type snakefile: string :param configfiles: List of config files paths. :type configfiles: List :param kwargs: Snakemake args. :type kwargs: Any """ overwrite_config = dict() if configfiles is None: configfiles = [] for f in configfiles: # get values to override. Later configfiles override earlier ones. overwrite_config.update(load_configfile(f)) # convert provided paths to absolute paths configfiles = list(map(os.path.abspath, configfiles)) workflow = Workflow( snakefile=snakefile, overwrite_configfiles=configfiles, overwrite_config=overwrite_config, ) workflow.include(snakefile=snakefile, overwrite_first_rule=True) workflow.check() # code copied and adapted from `snakemake.workflow.Workflow.execute()` # in order to build the DAG and calculate the job dependencies. # https://github.com/snakemake/snakemake/blob/75a544ba528b30b43b861abc0ad464db4d6ae16f/snakemake/workflow.py#L525 def rules(items): return map( workflow._rules.__getitem__, filter(workflow.is_rule, items), ) if kwargs.get("keep_target_files"): def files(items): return filterfalse(workflow.is_rule, items) else: def files(items): relpath = (lambda f: f if os.path.isabs(f) or f.startswith( "root://") else os.path.relpath(f)) return map(relpath, filterfalse(workflow.is_rule, items)) if not kwargs.get("targets"): targets = ([workflow.first_rule] if workflow.first_rule is not None else list()) prioritytargets = kwargs.get("prioritytargets", []) forcerun = kwargs.get("forcerun", []) until = kwargs.get("until", []) omit_from = kwargs.get("omit_from", []) priorityrules = set(rules(prioritytargets)) priorityfiles = set(files(prioritytargets)) forcerules = set(rules(forcerun)) forcefiles = set(files(forcerun)) untilrules = set(rules(until)) untilfiles = set(files(until)) omitrules = set(rules(omit_from)) omitfiles = set(files(omit_from)) targetrules = set( chain( rules(targets), filterfalse(Rule.has_wildcards, priorityrules), filterfalse(Rule.has_wildcards, forcerules), filterfalse(Rule.has_wildcards, untilrules), )) targetfiles = set( chain(files(targets), priorityfiles, forcefiles, untilfiles)) dag = DAG( workflow, workflow.rules, targetrules=targetrules, targetfiles=targetfiles, omitfiles=omitfiles, omitrules=omitrules, ) workflow.persistence = Persistence(dag=dag) dag.init() dag.update_checkpoint_dependencies() dag.check_dynamic() return dag
#!/usr/bin/env python3 import sys, os import unittest from snakemake.workflow import Workflow """ Can I unit test a Snakefile? Of course I can! Or, at least, I can test any functions defined at the top level. Importing the functions from the Snakefile requres parsing it with the Snakemake internals, and setting a couple of environment things. """ os.environ['TOOLBOX'] = 'dummy' sf = os.path.join(os.path.dirname(__file__), '..', 'Snakefile.qc') wf = Workflow(sf, overwrite_config=dict(runid='170221_K00166_0183_AHHT3HBBXX')) wf.include(sf) # I can now import top-level functions like so: split_fq_name = wf.globals['split_fq_name'] class T(unittest.TestCase): def test_split_fq_name(self): """Test the function which claims to work as follows... Break out components from the name of a a FASTQ file. eg. 10749/10749DMpool03/170221_K00166_0183_AHHT3HBBXX_8_10749DM0001L01_1.fastq.gz eg. 170221_K00166_0183_AHHT3HBBXX_1_unassigned_1.fastq.gz """ # eg. 1 self.assertEqual( split_fq_name(
n = 0 with open(input.fastq) as f: for _ in f: n += 1 with open(output.counts, 'w') as f: print(n / 4, file=f) """ from snakemake.workflow import Workflow, Rules import snakemake.workflow from snakemake import shell from snakemake.logging import setup_logger setup_logger() workflow = Workflow(__file__) snakemake.workflow.rules = Rules() snakemake.workflow.config = dict() ### Output from snakemake --print-compilation follows (reformatted) workflow.include("pipeline.conf") shell.prefix("set -euo pipefail;") @workflow.rule(name='all', lineno=6, snakefile='.../Snakefile') @workflow.input("reads.counts") @workflow.norun() @workflow.run def __all(input, output, params, wildcards, threads, resources, log, version):