Example #1
0
 def format_wildcards(self, string, **variables):
     """ Format a string with variables from the job. """
     _variables = dict()
     _variables.update(self.rule.workflow.globals)
     _variables.update(
         dict(
             input=self.input,
             output=self.output,
             params=self.params,
             wildcards=self._format_wildcards,
             threads=self.threads,
             resources=self.resources,
             log=self.log,
             jobid=self.jobid,
             version=self.rule.version,
             name=self.name,
             rule=self.rule.name,
             rulename=self.rule.name,
             bench_iteration=None,
         ))
     _variables.update(variables)
     try:
         return format(string, **_variables)
     except NameError as ex:
         raise RuleException("NameError: " + str(ex), rule=self.rule)
     except IndexError as ex:
         raise RuleException("IndexError: " + str(ex), rule=self.rule)
Example #2
0
 def decorate(ruleinfo):
     if ruleinfo.input:
         rule.set_input(*ruleinfo.input[0], **ruleinfo.input[1])
     if ruleinfo.output:
         rule.set_output(*ruleinfo.output[0], **ruleinfo.output[1])
     if ruleinfo.params:
         rule.set_params(*ruleinfo.params[0], **ruleinfo.params[1])
     if ruleinfo.threads:
         if not isinstance(ruleinfo.threads, int) and not callable(
                 ruleinfo.threads):
             raise RuleException(
                 "Threads value has to be an integer or a callable.",
                 rule=rule)
         rule.resources["_cores"] = ruleinfo.threads
     if ruleinfo.shadow_depth:
         if ruleinfo.shadow_depth not in (True, "shallow", "full"):
             raise RuleException(
                 "Shadow must either be 'shallow', 'full', "
                 "or True (equivalent to 'full')",
                 rule=rule)
         if ruleinfo.shadow_depth is True:
             rule.shadow_depth = 'full'
         else:
             rule.shadow_depth = ruleinfo.shadow_depth
     if ruleinfo.resources:
         args, resources = ruleinfo.resources
         if args:
             raise RuleException("Resources have to be named.")
         if not all(
                 map(lambda r: isinstance(r, int) or callable(r),
                     resources.values())):
             raise RuleException(
                 "Resources values have to be integers or callables",
                 rule=rule)
         rule.resources.update(resources)
     if ruleinfo.priority:
         if (not isinstance(ruleinfo.priority, int)
                 and not isinstance(ruleinfo.priority, float)):
             raise RuleException("Priority values have to be numeric.",
                                 rule=rule)
         rule.priority = ruleinfo.priority
     if ruleinfo.version:
         rule.version = ruleinfo.version
     if ruleinfo.log:
         rule.set_log(*ruleinfo.log[0], **ruleinfo.log[1])
     if ruleinfo.message:
         rule.message = ruleinfo.message
     if ruleinfo.benchmark:
         rule.benchmark = ruleinfo.benchmark
     rule.norun = ruleinfo.norun
     rule.docstring = ruleinfo.docstring
     rule.run_func = ruleinfo.func
     rule.shellcmd = ruleinfo.shellcmd
     ruleinfo.func.__name__ = "__{}".format(name)
     self.globals[ruleinfo.func.__name__] = ruleinfo.func
     setattr(rules, name, rule)
     return ruleinfo.func
Example #3
0
 def message(self):
     """ Return the message for this job. """
     try:
         return (self.format_wildcards(self.rule.message)
             if self.rule.message else None)
     except AttributeError as ex:
         raise RuleException(str(ex), rule=self.rule)
     except KeyError as ex:
         raise RuleException("Unknown variable in message "
             "of shell command: {}".format(str(ex)), rule=self.rule)
Example #4
0
 def shellcmd(self):
     """ Return the shell command. """
     try:
         return (self.format_wildcards(self.rule.shellcmd)
             if self.rule.shellcmd else None)
     except AttributeError as ex:
         raise RuleException(str(ex), rule=self.rule)
     except KeyError as ex:
         raise RuleException("Unknown variable when printing "
             "shell command: {}".format(str(ex)), rule=self.rule)
Example #5
0
 def check_caching(self):
     if self.name in self.workflow.cache_rules:
         if len(self.output) != 1:
             raise RuleException(
                 "Only rules with exactly 1 output file may be cached.",
                 rule=self)
         if self.dynamic_output:
             raise RuleException(
                 "Rules with dynamic output files may not be cached.",
                 rule=self)
Example #6
0
 def check_output(self, job, wait=3):
     """ Raise exception if output files of job are missing. """
     for f in job.expanded_output:
         if not f.exists:
             logger.warning(
                 "Output file {} not present. Waiting {} "
                 "seconds to ensure that this is not because of filesystem "
                 "latency.".format(f, wait))
             while not f.exists and wait > 0:
                 wait -= 1
                 time.sleep(1)
             if not f.exists:
                 raise MissingOutputException("Output file {} not "
                                              "produced by rule {}.".format(
                                                  f, job.rule.name),
                                              lineno=job.rule.lineno,
                                              snakefile=job.rule.snakefile)
     input_maxtime = job.input_maxtime
     if input_maxtime is not None:
         output_mintime = job.output_mintime
         if output_mintime is not None and output_mintime < input_maxtime:
             raise RuleException(
                 "Output files {} are older than input "
                 "files. Did you extract an archive? Make sure that output "
                 "files have a more recent modification date than the "
                 "archive, e.g. by using 'touch'.".format(", ".join(
                     job.expanded_output)),
                 rule=job.rule)
Example #7
0
 def _apply_wildcards(newitems,
                      olditems,
                      wildcards,
                      wildcards_obj,
                      concretize=apply_wildcards,
                      ruleio=None):
     for name, item in olditems.allitems():
         start = len(newitems)
         if callable(item):
             item = item(wildcards_obj)
             if not_iterable(item):
                 item = [item]
             for item_ in item:
                 if not isinstance(item_, str):
                     raise RuleException(
                         "Input function did not return str or list of str.",
                         rule=self)
                 concrete = concretize(item_, wildcards)
                 newitems.append(concrete)
                 if ruleio is not None:
                     ruleio[concrete] = item_
         else:
             if not_iterable(item):
                 item = [item]
             for item_ in item:
                 concrete = concretize(item_, wildcards)
                 newitems.append(concrete)
                 if ruleio is not None:
                     ruleio[concrete] = item_
         if name:
             newitems.set_name(name, start, end=len(newitems))
Example #8
0
def run_wrapper(run, input, output, params, wildcards, threads, resources, log, linemaps):
    """
    Wrapper around the run method that handles directory creation and
    output file deletion on error.

    Arguments
    run       -- the run method
    input     -- list of input files
    output    -- list of output files
    wildcards -- so far processed wildcards
    threads   -- usable threads
    log       -- path to log file
    """

    if log is None:
        log = Unformattable(errormsg="log used but undefined")
    try:
        # execute the actual run method.
        run(input, output, params, wildcards, threads, resources, log)
    except (Exception, BaseException) as ex:
        # this ensures that exception can be re-raised in the parent thread
        lineno, file = get_exception_origin(ex, linemaps)
        raise RuleException(format_error(
            ex, lineno, linemaps=linemaps, snakefile=file,
            show_traceback=True))
Example #9
0
    def check_wildcards(self, wildcards):
        missing_wildcards = self.wildcard_names - set(wildcards.keys())

        if missing_wildcards:
            raise RuleException(
                "Could not resolve wildcards in rule {}:\n{}".format(
                    self.name, "\n".join(self.wildcard_names)),
                lineno=self.lineno,
                snakefile=self.snakefile)
Example #10
0
 def check_caching(self):
     if self.name in self.workflow.cache_rules:
         if len(self.output) == 0:
             raise RuleException(
                 "Rules without output files cannot be cached.", rule=self)
         if len(self.output) > 1:
             prefixes = set(out.multiext_prefix for out in self.output)
             if None in prefixes or len(prefixes) > 1:
                 raise RuleException(
                     "Rules with multiple output files must define them as a single multiext() "
                     '(e.g. multiext("path/to/index", ".bwt", ".ann")). '
                     "The rationale is that multiple output files can only be unambiously resolved "
                     "if they can be distinguished by a fixed set of extensions (i.e. mime types).",
                     rule=self,
                 )
         if self.dynamic_output:
             raise RuleException(
                 "Rules with dynamic output files may not be cached.",
                 rule=self)
Example #11
0
 def decorate(ruleinfo):
     if ruleinfo.input:
         rule.set_input(*ruleinfo.input[0], **ruleinfo.input[1])
     if ruleinfo.output:
         rule.set_output(*ruleinfo.output[0], **ruleinfo.output[1])
     if ruleinfo.params:
         rule.set_params(*ruleinfo.params[0], **ruleinfo.params[1])
     if ruleinfo.threads:
         if not isinstance(ruleinfo.threads, int):
             raise RuleException("Threads value has to be an integer.",
                                 rule=rule)
         rule.resources["_cores"] = ruleinfo.threads
     if ruleinfo.resources:
         args, resources = ruleinfo.resources
         if args:
             raise RuleException("Resources have to be named.")
         if not all(
                 map(lambda r: isinstance(r, int), resources.values())):
             raise RuleException(
                 "Resources values have to be integers.", rule=rule)
         rule.resources.update(resources)
     if ruleinfo.priority:
         if (not isinstance(ruleinfo.priority, int)
                 and not isinstance(ruleinfo.priority, float)):
             raise RuleException("Priority values have to be numeric.",
                                 rule=rule)
         rule.priority = ruleinfo.priority
     if ruleinfo.version:
         rule.version = ruleinfo.version
     if ruleinfo.log:
         rule.set_log(*ruleinfo.log[0], **ruleinfo.log[1])
     if ruleinfo.message:
         rule.message = ruleinfo.message
     if ruleinfo.benchmark:
         rule.benchmark = ruleinfo.benchmark
     rule.norun = ruleinfo.norun
     rule.docstring = ruleinfo.docstring
     rule.run_func = ruleinfo.func
     rule.shellcmd = ruleinfo.shellcmd
     ruleinfo.func.__name__ = "__{}".format(name)
     self.globals[ruleinfo.func.__name__] = ruleinfo.func
     setattr(rules, name, rule)
     return ruleinfo.func
Example #12
0
def run_wrapper(run, input, output, params, wildcards, threads, resources, log,
                version, benchmark, benchmark_repeats, rule, conda_env, linemaps, debug=False,
                shadow_dir=None):
    """
    Wrapper around the run method that handles exceptions and benchmarking.

    Arguments
    run        -- the run method
    input      -- list of input files
    output     -- list of output files
    wildcards  -- so far processed wildcards
    threads    -- usable threads
    log        -- list of log files
    rule (str) -- rule name
    shadow_dir -- optional shadow directory root
    """
    if os.name == "posix" and debug:
        sys.stdin = open('/dev/stdin')

    try:
        runs = 1 if benchmark is None else benchmark_repeats
        wallclock = []
        for i in range(runs):
            w = time.time()
            # execute the actual run method.
            with change_working_directory(shadow_dir):
                run(input, output, params, wildcards, threads, resources, log,
                    version, rule, conda_env)
            w = time.time() - w
            wallclock.append(w)

    except (KeyboardInterrupt, SystemExit) as e:
        # re-raise the keyboard interrupt in order to record an error in the scheduler but ignore it
        raise e
    except (Exception, BaseException) as ex:
        log_verbose_traceback(ex)
        # this ensures that exception can be re-raised in the parent thread
        lineno, file = get_exception_origin(ex, linemaps)
        raise RuleException(format_error(ex, lineno,
                                         linemaps=linemaps,
                                         snakefile=file,
                                         show_traceback=True))

    if benchmark is not None:
        try:
            with open(benchmark, "w") as f:
                print("s", "h:m:s", sep="\t", file=f)
                for t in wallclock:
                    print(t, str(datetime.timedelta(seconds=t)), sep="\t", file=f)
        except (Exception, BaseException) as ex:
            raise WorkflowError(ex)
Example #13
0
    def update(self, jobs, file=None, visited=None, skip_until_dynamic=False):
        """ Update the DAG by adding given jobs and their dependencies. """
        if visited is None:
            visited = set()
        producer = None
        exceptions = list()
        jobs = sorted(jobs, reverse=not self.ignore_ambiguity)
        cycles = list()

        for i, job in enumerate(jobs):
            if file in job.input:
                cycles.append(job)
                continue
            if job in visited:
                cycles.append(job)
                continue
            try:
                self.update_(job,
                             visited=set(visited),
                             skip_until_dynamic=skip_until_dynamic)
                # TODO this might fail if a rule discarded here is needed
                # elsewhere
                if i > 0:
                    if job < jobs[i - 1] or self.ignore_ambiguity:
                        break
                    elif producer is not None:
                        raise AmbiguousRuleException(file, job.rule,
                                                     jobs[i - 1].rule)
                producer = job
            except (MissingInputException, CyclicGraphException) as ex:
                exceptions.append(ex)
            except RuntimeError as ex:
                if (isinstance(ex, RuntimeError) and str(ex).startswith(
                        "maximum recursion depth exceeded")):
                    ex = RuleException(
                        "Maximum recursion depth exceeded. "
                        "Maybe you have a cyclic dependency due to infinitely "
                        "filled wildcards?\nProblematic "
                        "input file:\n{}".format(file),
                        rule=job.rule)
                raise ex
        if producer is None:
            if cycles:
                job = cycles[0]
                raise CyclicGraphException(job.rule, file, rule=job.rule)
            if exceptions:
                raise exceptions[0]
        return producer
Example #14
0
 def format_wildcards(self, string, **variables):
     """ Format a string with variables from the job. """
     _variables = dict()
     _variables.update(self.rule.workflow.globals)
     _variables.update(variables)
     try:
         return format(string,
                   input=self.input,
                   output=self.output,
                   params=self.params,
                   wildcards=self._format_wildcards,
                   threads=self.threads,
                   resources=self.resources,
                   log=self.log, **_variables)
     except NameError as ex:
         raise RuleException("NameError: " + str(ex), rule=self.rule)
Example #15
0
    def init(self):
        """ Initialise the DAG. """
        for job in map(self.rule2job, self.targetrules):
            job = self.update([job])
            self.targetjobs.add(job)

        exceptions = defaultdict(list)
        for file in self.targetfiles:
            try:
                job = self.update(self.file2jobs(file), file=file)
                self.targetjobs.add(job)
            except MissingRuleException as ex:
                exceptions[file].append(ex)

        if exceptions:
            raise RuleException(include=chain(*exceptions.values()))
        self.update_needrun()
Example #16
0
    def check_output(self, job, wait=3):
        """ Raise exception if output files of job are missing. """
        try:
            wait_for_files(job.expanded_output, latency_wait=wait)
        except IOError as e:
            raise MissingOutputException(str(e), rule=job.rule)

        input_maxtime = job.input_maxtime
        if input_maxtime is not None:
            output_mintime = job.output_mintime
            if output_mintime is not None and output_mintime < input_maxtime:
                raise RuleException(
                    "Output files {} are older than input "
                    "files. Did you extract an archive? Make sure that output "
                    "files have a more recent modification date than the "
                    "archive, e.g. by using 'touch'.".format(", ".join(
                        job.expanded_output)),
                    rule=job.rule)
Example #17
0
        def decorate(ruleinfo):
            if ruleinfo.wildcard_constraints:
                rule.set_wildcard_constraints(
                    *ruleinfo.wildcard_constraints[0],
                    **ruleinfo.wildcard_constraints[1])
            if ruleinfo.input:
                rule.set_input(*ruleinfo.input[0], **ruleinfo.input[1])
            if ruleinfo.output:
                rule.set_output(*ruleinfo.output[0], **ruleinfo.output[1])
            if ruleinfo.params:
                rule.set_params(*ruleinfo.params[0], **ruleinfo.params[1])
            # handle default resources
            rule.resources = copy.deepcopy(self.default_resources)
            if ruleinfo.threads:
                if not isinstance(ruleinfo.threads, int) and not callable(
                        ruleinfo.threads):
                    raise RuleException(
                        "Threads value has to be an integer or a callable.",
                        rule=rule)
                rule.resources["_cores"] = ruleinfo.threads
            if ruleinfo.shadow_depth:
                if ruleinfo.shadow_depth not in (True, "shallow", "full",
                                                 "minimal"):
                    raise RuleException(
                        "Shadow must either be 'minimal', 'shallow', 'full', "
                        "or True (equivalent to 'full')",
                        rule=rule)
                if ruleinfo.shadow_depth is True:
                    rule.shadow_depth = 'full'
                    logger.warning(
                        "Shadow is set to True in rule {} (equivalent to 'full'). It's encouraged to use the more explicit options 'minimal|shallow|full' instead."
                        .format(rule))
                else:
                    rule.shadow_depth = ruleinfo.shadow_depth
            if ruleinfo.resources:
                args, resources = ruleinfo.resources
                if args:
                    raise RuleException("Resources have to be named.")
                if not all(
                        map(lambda r: isinstance(r, int) or callable(r),
                            resources.values())):
                    raise RuleException(
                        "Resources values have to be integers or callables",
                        rule=rule)
                rule.resources.update(resources)
            if ruleinfo.priority:
                if (not isinstance(ruleinfo.priority, int)
                        and not isinstance(ruleinfo.priority, float)):
                    raise RuleException("Priority values have to be numeric.",
                                        rule=rule)
                rule.priority = ruleinfo.priority
            if ruleinfo.version:
                rule.version = ruleinfo.version
            if ruleinfo.log:
                rule.set_log(*ruleinfo.log[0], **ruleinfo.log[1])
            if ruleinfo.message:
                rule.message = ruleinfo.message
            if ruleinfo.benchmark:
                rule.benchmark = ruleinfo.benchmark
            if not self.run_local and ruleinfo.group is not None:
                rule.group = ruleinfo.group
            if ruleinfo.wrapper:
                if self.use_conda:
                    rule.conda_env = snakemake.wrapper.get_conda_env(
                        ruleinfo.wrapper, prefix=self.wrapper_prefix)
                # TODO retrieve suitable singularity image

            if ruleinfo.conda_env and self.use_conda:
                if not (ruleinfo.script or ruleinfo.wrapper
                        or ruleinfo.shellcmd):
                    raise RuleException(
                        "Conda environments are only allowed "
                        "with shell, script, or wrapper directives "
                        "(not with run).",
                        rule=rule)
                if not (urllib.parse.urlparse(ruleinfo.conda_env).scheme
                        or os.path.isabs(ruleinfo.conda_env)):
                    ruleinfo.conda_env = os.path.join(self.current_basedir,
                                                      ruleinfo.conda_env)
                rule.conda_env = ruleinfo.conda_env

            if self.use_singularity:
                invalid_rule = not (ruleinfo.script or ruleinfo.wrapper
                                    or ruleinfo.shellcmd)
                if ruleinfo.singularity_img:
                    if invalid_rule:
                        raise RuleException(
                            "Singularity directive is only allowed "
                            "with shell, script or wrapper directives "
                            "(not with run).",
                            rule=rule)
                    rule.singularity_img = ruleinfo.singularity_img
                elif self.global_singularity_img:
                    if not invalid_rule:
                        # skip rules with run directive
                        rule.singularity_img = self.global_singularity_img

            if self.use_docker:
                #invalid_rule = not ruleinfo.shellcmd
                invalid_rule = not (ruleinfo.script or ruleinfo.shellcmd)
                if ruleinfo.docker_img:
                    if invalid_rule:
                        raise RuleException(
                            "docker_img directive is only allowed "
                            "with shell or script directives "
                            "(not with run/wrapper).",
                            rule=rule)
                    rule.docker_img = ruleinfo.docker_img
                #elif self.global_singularity_img:
                #    if not invalid_rule:
                #        # skip rules with run directive
                #        rule.singularity_img = self.global_singularity_img

            rule.norun = ruleinfo.norun
            rule.docstring = ruleinfo.docstring
            rule.run_func = ruleinfo.func
            rule.shellcmd = ruleinfo.shellcmd
            rule.script = ruleinfo.script
            rule.wrapper = ruleinfo.wrapper
            rule.cwl = ruleinfo.cwl
            rule.restart_times = self.restart_times
            rule.basedir = self.current_basedir

            ruleinfo.func.__name__ = "__{}".format(rule.name)
            self.globals[ruleinfo.func.__name__] = ruleinfo.func
            setattr(rules, rule.name, RuleProxy(rule))
            if checkpoint:
                checkpoints.register(rule)
            return ruleinfo.func
Example #18
0
    def expand(self, rule, ruleinfo):
        """Recursively expand wildcards within RuleInfo object"""
        fields = list(
            filter(None.__ne__, filter(self.expands_field, ruleinfo_fields)))
        # normalize field values and create namedlist dictionary
        args = {}
        for field in fields:
            attr = getattr(ruleinfo, field)
            if isinstance(attr, tuple):
                if len(attr) != 2:
                    raise Exception("Internal Error")
                # flatten named lists
                for key in attr[1]:
                    if is_container(attr[1][key]):
                        attr[1][key] = list(flatten(attr[1][key]))
                # flatten unnamed and overwrite tuples
                # also turn attr[0] into a list, making it mutable
                attr = (list(flatten(attr[0])), attr[1])

                setattr(ruleinfo, field, attr)
                args[field] = NamedList(fromtuple=attr)
            else:
                args[field] = NamedList()
                args[field].append(attr)

        # build graph of expansion dependencies
        deps = networkx().DiGraph()
        for field, nlist in args.items():
            for n, value in enumerate(nlist):
                if not isinstance(value, str):  # only strings can be expanded
                    continue
                s = "{}[{}]".format(field, n)
                # create node for value itself
                deps.add_node(s, core=True, name=field, idx=n)
                # node depends on wildcards contained in value
                deps.add_edges_from((s, t) for t in get_names(value)
                                    if t.split(".")[0].split("[")[0] in fields)
                # field node depends on all it's value nodes
                deps.add_edge(field, s)
            # create edges field.name -> field[n]
            for name, (i, j) in nlist.get_names():
                s = "{}.{}".format(field, name)
                if j is None:
                    j = i + 1
                deps.add_edges_from(
                    (s, "{}[{}]".format(field, n)) for n in range(i, j))

        # sort variables so that they can be expanded in order
        try:
            nodes = list(
                reversed([
                    node for node in
                    networkx().algorithms.dag.topological_sort(deps)
                    if deps.out_degree(node) > 0 and 'core' in deps.nodes[node]
                ]))
        except networkx().NetworkXUnfeasible:
            raise CircularReferenceException(deps, rule)

        # expand variables
        for node in nodes:
            name = deps.nodes[node]['name']
            idx = deps.nodes[node]['idx']
            value = args[name][idx]
            if isinstance(value, str):
                try:
                    value2 = partial_format(value, **args)
                except FormattingError as e:
                    raise RuleException(
                        "Unable to resolve wildcard '{{{}}}' in parameter {}"
                        "in rule {}".format(e.attr, node, rule.name))
                except IndexError as e:
                    raise RuleException(
                        "Unable to format '{}' using '{}'".format(value, args))
                args[name][idx] = value2
                if ymp.print_rule == 1:
                    log.error("{}::{}: {} => {}".format(
                        rule.name, node, value, value2))

        # update ruleinfo
        for name in fields:
            attr = getattr(ruleinfo, name)
            if isinstance(attr, tuple):
                if len(attr) != 2:
                    raise Exception("Internal Error")
                args[name].update_tuple(attr)
            else:
                setattr(ruleinfo, name, args[name][0])
Example #19
0
 def check_string_type(f):
     if not isinstance(f, str):
         raise RuleException(
             "Input function did not return str or list of str.",
             rule=self)
Example #20
0
    def prepare(self):
        """
        Prepare execution of job.
        This includes creation of directories and deletion of previously
        created dynamic files.
        Creates a shadow directory for the job if specified.
        """

        self.check_protected_output()

        unexpected_output = self.dag.reason(self).missing_output.intersection(
            self.existing_output)
        if unexpected_output:
            logger.warning(
                "Warning: the following output files of rule {} were not "
                "present when the DAG was created:\n{}".format(
                    self.rule, unexpected_output))

        self.remove_existing_output()

        for f, f_ in zip(self.output, self.rule.output):
            f.prepare()

        self.download_remote_input()

        for f in self.log:
            f.prepare()
        if self.benchmark:
            self.benchmark.prepare()

        if not self.is_shadow:
            return

        # Create shadow directory structure
        self.shadow_dir = tempfile.mkdtemp(
            dir=self.rule.workflow.persistence.shadow_path)
        cwd = os.getcwd()

        if self.rule.shadow_depth == "minimal":
            # Re-create the directory structure in the shadow directory
            for (f,
                 d) in set([(item, os.path.dirname(item))
                            for sublist in [self.input, self.output, self.log]
                            if sublist is not None for item in sublist]):
                if d and not os.path.isabs(d):
                    rel_path = os.path.relpath(d)
                    # Only create subdirectories
                    if not rel_path.split(os.path.sep)[0] == "..":
                        os.makedirs(os.path.join(self.shadow_dir, rel_path),
                                    exist_ok=True)
                    else:
                        raise RuleException(
                            "The following file name references a parent directory relative to your workdir.\n"
                            'This isn\'t supported for shadow: "minimal". Consider using an absolute path instead.\n{}'
                            .format(f),
                            rule=self.rule,
                        )

            # Symlink the input files
            for rel_path in set([
                    os.path.relpath(f) for f in self.input
                    if not os.path.isabs(f)
            ]):
                link = os.path.join(self.shadow_dir, rel_path)
                original = os.path.relpath(rel_path, os.path.dirname(link))
                os.symlink(original, link)

        # Shallow simply symlink everything in the working directory.
        elif self.rule.shadow_depth == "shallow":
            for source in os.listdir(cwd):
                link = os.path.join(self.shadow_dir, source)
                os.symlink(os.path.abspath(source), link)
        elif self.rule.shadow_depth == "full":
            snakemake_dir = os.path.join(cwd, ".snakemake")
            for dirpath, dirnames, filenames in os.walk(cwd):
                # Must exclude .snakemake and its children to avoid infinite
                # loop of symlinks.
                if os.path.commonprefix([snakemake_dir,
                                         dirpath]) == snakemake_dir:
                    continue
                for dirname in dirnames:
                    if dirname == ".snakemake":
                        continue
                    relative_source = os.path.relpath(
                        os.path.join(dirpath, dirname))
                    shadow = os.path.join(self.shadow_dir, relative_source)
                    os.mkdir(shadow)

                for filename in filenames:
                    source = os.path.join(dirpath, filename)
                    relative_source = os.path.relpath(source)
                    link = os.path.join(self.shadow_dir, relative_source)
                    os.symlink(source, link)
Example #21
0
    def expand_wildcards(self, wildcards=None):
        """
        Expand wildcards depending on the requested output
        or given wildcards dict.
        """
        def concretize_iofile(f, wildcards):
            if not isinstance(f, _IOFile):
                return IOFile(f, rule=self)
            else:
                return f.apply_wildcards(wildcards,
                                         fill_missing=f in self.dynamic_input,
                                         fail_dynamic=self.dynamic_output)

        def concretize_param(p, wildcards):
            if isinstance(p, str):
                return apply_wildcards(p, wildcards)
            return p

        def check_string_type(f):
            if not isinstance(f, str):
                raise RuleException(
                    "Input function did not return str or list of str.",
                    rule=self)

        def _apply_wildcards(newitems,
                             olditems,
                             wildcards,
                             wildcards_obj,
                             concretize=apply_wildcards,
                             check_return_type=check_string_type,
                             ruleio=None,
                             no_flattening=False):
            for name, item in olditems.allitems():
                start = len(newitems)
                is_iterable = True

                if callable(item):
                    try:
                        item = item(wildcards_obj)
                    except (Exception, BaseException) as e:
                        raise InputFunctionException(e,
                                                     rule=self,
                                                     wildcards=wildcards)

                if not_iterable(item) or no_flattening:
                    item = [item]
                    is_iterable = False
                for item_ in item:
                    check_return_type(item_)
                    concrete = concretize(item_, wildcards)
                    newitems.append(concrete)
                    if ruleio is not None:
                        ruleio[concrete] = item_

                if name:
                    newitems.set_name(
                        name,
                        start,
                        end=len(newitems) if is_iterable else None)

        if wildcards is None:
            wildcards = dict()
        missing_wildcards = self.wildcard_names - set(wildcards.keys())

        if missing_wildcards:
            raise RuleException(
                "Could not resolve wildcards in rule {}:\n{}".format(
                    self.name, "\n".join(self.wildcard_names)),
                lineno=self.lineno,
                snakefile=self.snakefile)

        ruleio = dict()

        try:
            input = InputFiles()
            wildcards_obj = Wildcards(fromdict=wildcards)
            _apply_wildcards(input,
                             self.input,
                             wildcards,
                             wildcards_obj,
                             concretize=concretize_iofile,
                             ruleio=ruleio)

            params = Params()
            #When applying wildcards to params, the return type need not be
            #a string, so the check is disabled.
            _apply_wildcards(params,
                             self.params,
                             wildcards,
                             wildcards_obj,
                             concretize=concretize_param,
                             check_return_type=lambda x: None,
                             no_flattening=True)

            output = OutputFiles(
                o.apply_wildcards(wildcards) for o in self.output)
            output.take_names(self.output.get_names())

            dependencies = {
                None if f is None else f.apply_wildcards(wildcards): rule
                for f, rule in self.dependencies.items()
            }

            ruleio.update(dict((f, f_) for f, f_ in zip(output, self.output)))

            log = Log()
            _apply_wildcards(log,
                             self.log,
                             wildcards,
                             wildcards_obj,
                             concretize=concretize_iofile)

            benchmark = self.benchmark.apply_wildcards(
                wildcards) if self.benchmark else None
            return input, output, params, log, benchmark, ruleio, dependencies
        except WildcardError as ex:
            # this can only happen if an input contains an unresolved wildcard.
            raise RuleException(
                "Wildcards in input, params, log or benchmark file of rule {} cannot be "
                "determined from output files:\n{}".format(self, str(ex)),
                lineno=self.lineno,
                snakefile=self.snakefile)
Example #22
0
        def decorate(ruleinfo):
            if ruleinfo.wildcard_constraints:
                rule.set_wildcard_constraints(
                    *ruleinfo.wildcard_constraints[0],
                    **ruleinfo.wildcard_constraints[1])
            if ruleinfo.input:
                rule.set_input(*ruleinfo.input[0], **ruleinfo.input[1])
            if ruleinfo.output:
                rule.set_output(*ruleinfo.output[0], **ruleinfo.output[1])
            if ruleinfo.params:
                rule.set_params(*ruleinfo.params[0], **ruleinfo.params[1])
            # handle default resources
            if self.default_resources is not None:
                rule.resources = copy.deepcopy(self.default_resources.parsed)
            if ruleinfo.threads is not None:
                if (not isinstance(ruleinfo.threads, int)
                        and not isinstance(ruleinfo.threads, float)
                        and not callable(ruleinfo.threads)):
                    raise RuleException(
                        "Threads value has to be an integer, float, or a callable.",
                        rule=rule,
                    )
                if name in self.overwrite_threads:
                    rule.resources["_cores"] = self.overwrite_threads[name]
                else:
                    if isinstance(ruleinfo.threads, float):
                        ruleinfo.threads = int(ruleinfo.threads)
                    rule.resources["_cores"] = ruleinfo.threads
            if ruleinfo.shadow_depth:
                if ruleinfo.shadow_depth not in (True, "shallow", "full",
                                                 "minimal"):
                    raise RuleException(
                        "Shadow must either be 'minimal', 'shallow', 'full', "
                        "or True (equivalent to 'full')",
                        rule=rule,
                    )
                if ruleinfo.shadow_depth is True:
                    rule.shadow_depth = "full"
                    logger.warning(
                        "Shadow is set to True in rule {} (equivalent to 'full'). It's encouraged to use the more explicit options 'minimal|shallow|full' instead."
                        .format(rule))
                else:
                    rule.shadow_depth = ruleinfo.shadow_depth
            if ruleinfo.resources:
                args, resources = ruleinfo.resources
                if args:
                    raise RuleException("Resources have to be named.")
                if not all(
                        map(lambda r: isinstance(r, int) or callable(r),
                            resources.values())):
                    raise RuleException(
                        "Resources values have to be integers or callables",
                        rule=rule)
                rule.resources.update(resources)
            if ruleinfo.priority:
                if not isinstance(ruleinfo.priority, int) and not isinstance(
                        ruleinfo.priority, float):
                    raise RuleException("Priority values have to be numeric.",
                                        rule=rule)
                rule.priority = ruleinfo.priority
            if ruleinfo.version:
                rule.version = ruleinfo.version
            if ruleinfo.log:
                rule.set_log(*ruleinfo.log[0], **ruleinfo.log[1])
            if ruleinfo.message:
                rule.message = ruleinfo.message
            if ruleinfo.benchmark:
                rule.benchmark = ruleinfo.benchmark
            if not self.run_local and ruleinfo.group is not None:
                rule.group = ruleinfo.group
            if ruleinfo.wrapper:
                rule.conda_env = snakemake.wrapper.get_conda_env(
                    ruleinfo.wrapper, prefix=self.wrapper_prefix)
                # TODO retrieve suitable singularity image

            if ruleinfo.env_modules:
                # If using environment modules and they are defined for the rule,
                # ignore conda and singularity directive below.
                # The reason is that this is likely intended in order to use
                # a software stack specifically compiled for a particular
                # HPC cluster.
                invalid_rule = not (ruleinfo.script or ruleinfo.wrapper
                                    or ruleinfo.shellcmd or ruleinfo.notebook)
                if invalid_rule:
                    raise RuleException(
                        "envmodules directive is only allowed with "
                        "shell, script, notebook, or wrapper directives (not with run)",
                        rule=rule,
                    )
                from snakemake.deployment.env_modules import EnvModules

                rule.env_modules = EnvModules(*ruleinfo.env_modules)

            if ruleinfo.conda_env:
                if not (ruleinfo.script or ruleinfo.wrapper
                        or ruleinfo.shellcmd or ruleinfo.notebook):
                    raise RuleException(
                        "Conda environments are only allowed "
                        "with shell, script, notebook, or wrapper directives "
                        "(not with run).",
                        rule=rule,
                    )
                if not (urllib.parse.urlparse(ruleinfo.conda_env).scheme
                        or os.path.isabs(ruleinfo.conda_env)):
                    ruleinfo.conda_env = os.path.join(self.current_basedir,
                                                      ruleinfo.conda_env)
                rule.conda_env = ruleinfo.conda_env

            invalid_rule = not (ruleinfo.script or ruleinfo.wrapper
                                or ruleinfo.shellcmd or ruleinfo.notebook)
            if ruleinfo.container_img:
                if invalid_rule:
                    raise RuleException(
                        "Singularity directive is only allowed "
                        "with shell, script, notebook or wrapper directives "
                        "(not with run).",
                        rule=rule,
                    )
                rule.container_img = ruleinfo.container_img
            elif self.global_container_img:
                if not invalid_rule:
                    # skip rules with run directive
                    rule.container_img = self.global_container_img

            rule.norun = ruleinfo.norun
            rule.docstring = ruleinfo.docstring
            rule.run_func = ruleinfo.func
            rule.shellcmd = ruleinfo.shellcmd
            rule.script = ruleinfo.script
            rule.notebook = ruleinfo.notebook
            rule.wrapper = ruleinfo.wrapper
            rule.cwl = ruleinfo.cwl
            rule.restart_times = self.restart_times
            rule.basedir = self.current_basedir

            if ruleinfo.cache is True:
                if not self.enable_cache:
                    logger.warning(
                        "Workflow defines that rule {} is eligible for caching between workflows "
                        "(use the --cache argument to enable this).".format(
                            rule.name))
                else:
                    self.cache_rules.add(rule.name)
            elif not (ruleinfo.cache is False):
                raise WorkflowError(
                    "Invalid argument for 'cache:' directive. Only true allowed. "
                    "To deactivate caching, remove directive.",
                    rule=rule,
                )

            ruleinfo.func.__name__ = "__{}".format(rule.name)
            self.globals[ruleinfo.func.__name__] = ruleinfo.func
            setattr(rules, rule.name, RuleProxy(rule))
            if checkpoint:
                checkpoints.register(rule)
            return ruleinfo.func
Example #23
0
    def expand_wildcards(self, wildcards=None):
        """
        Expand wildcards depending on the requested output
        or given wildcards dict.
        """
        def concretize_iofile(f, wildcards):
            if not isinstance(f, _IOFile):
                return IOFile(f, rule=self)
            else:
                return f.apply_wildcards(wildcards,
                                         fill_missing=f in self.dynamic_input,
                                         fail_dynamic=self.dynamic_output)

        def _apply_wildcards(newitems,
                             olditems,
                             wildcards,
                             wildcards_obj,
                             concretize=apply_wildcards,
                             ruleio=None):
            for name, item in olditems.allitems():
                start = len(newitems)
                if callable(item):
                    item = item(wildcards_obj)
                    if not_iterable(item):
                        item = [item]
                    for item_ in item:
                        if not isinstance(item_, str):
                            raise RuleException(
                                "Input function did not return str or list of str.",
                                rule=self)
                        concrete = concretize(item_, wildcards)
                        newitems.append(concrete)
                        if ruleio is not None:
                            ruleio[concrete] = item_
                else:
                    if not_iterable(item):
                        item = [item]
                    for item_ in item:
                        concrete = concretize(item_, wildcards)
                        newitems.append(concrete)
                        if ruleio is not None:
                            ruleio[concrete] = item_
                if name:
                    newitems.set_name(name, start, end=len(newitems))

        if wildcards is None:
            wildcards = dict()
        # TODO validate
        missing_wildcards = self.wildcard_names - set(wildcards.keys())

        if missing_wildcards:
            raise RuleException(
                "Could not resolve wildcards in rule {}:\n{}".format(
                    self.name, "\n".join(self.wildcard_names)),
                lineno=self.lineno,
                snakefile=self.snakefile)

        ruleio = dict()

        try:
            input = InputFiles()
            wildcards_obj = Wildcards(fromdict=wildcards)
            _apply_wildcards(input,
                             self.input,
                             wildcards,
                             wildcards_obj,
                             concretize=concretize_iofile,
                             ruleio=ruleio)

            params = Params()
            _apply_wildcards(params, self.params, wildcards, wildcards_obj)

            output = OutputFiles(
                o.apply_wildcards(wildcards) for o in self.output)
            output.take_names(self.output.get_names())

            ruleio.update(dict((f, f_) for f, f_ in zip(output, self.output)))

            log = self.log.apply_wildcards(wildcards) if self.log else None
            return input, output, params, log, ruleio
        except WildcardError as ex:
            # this can only happen if an input contains an unresolved wildcard.
            raise RuleException(
                "Wildcards in input or log file of rule {} cannot be "
                "determined from output files:\n{}".format(self, str(ex)),
                lineno=self.lineno,
                snakefile=self.snakefile)