Ejemplo n.º 1
0
    def lint_not_used_params(
        self,
        rule,
        valid_names={"input", "output", "log", "params", "wildcards", "threads"},
        regex=re.compile("{{(?P<name>{}).*?}}".format(NAME_PATTERN)),
    ):
        if rule.shellcmd:
            for match in regex.finditer(rule.shellcmd):
                name = match.group("name")

                before = match.start() - 1
                after = match.end()

                if name not in valid_names and (
                    not (before >= 0 and after < len(rule.shellcmd))
                    or (rule.shellcmd[before] != "{" and rule.shellcmd[after] != "}")
                ):
                    yield Lint(
                        title="Shell command directly uses variable {} from outside of the rule".format(
                            name
                        ),
                        body="It is recommended to pass all files as input and output, and non-file parameters "
                        "via the params directive. Otherwise, provenance tracking is less accurate.",
                        links=[links.params],
                    )
Ejemplo n.º 2
0
 def lint_dynamic(self, rule):
     for file in chain(rule.output, rule.input):
         if is_flagged(file, "dynamic"):
             yield Lint(
                 title="The dynamic flag is deprecated",
                 body="Use checkpoints instead, which are more powerful and less error-prone.",
                 links=[links.checkpoints],
             )
Ejemplo n.º 3
0
 def lint_version(self, rule):
     if rule.version:
         yield Lint(
             title="The version directive is deprecated",
             body="It was meant for documenting tool version, but this has been replaced "
             "by using the conda or container directive.",
             links=[links.package_management, links.containers],
         )
Ejemplo n.º 4
0
 def lint_iofile_by_index(self, rule, regex=re.compile("(input|output)\[[0-9]+\]")):
     if rule.shellcmd and regex.search(rule.shellcmd):
         yield Lint(
             title="Do not access input and output files individually by index in shell commands",
             body="When individual access to input or output files is needed (i.e., just writing '{input}' "
             "is impossible), use names ('{input.somename}') instead of index based access.",
             links=[links.rules],
         )
Ejemplo n.º 5
0
 def lint_tab_usage(self, snakefile, regex=re.compile(r"^ *\t")):
     for match in regex.finditer(snakefile):
         line = get_line(match, snakefile)
         yield Lint(
             title="Tab usage in line {}.".format(line),
             body=
             "Both Python and Snakemake can get confused when mixing tabs and spaces for indentation. "
             "It is recommended to only use spaces for indentation.",
         )
Ejemplo n.º 6
0
 def lint_log_directive(self, rule):
     if not rule.log and not rule.norun:
         yield Lint(
             title="No log directive defined",
             body="Without a log directive, all output will be printed "
             "to the terminal. In distributed environments, this means "
             "that errors are harder to discover. In local environments, "
             "output of concurrent jobs will be mixed and become unreadable.",
             links=[links.log],
         )
Ejemplo n.º 7
0
 def lint_singularity(self, snakefile, regex=re.compile("singularity:")):
     for match in regex.finditer(snakefile):
         line = get_line(match, snakefile)
         yield Lint(
             title="Deprecated singularity directive used for container definition in line {}.".format(
                 line
             ),
             body="Use the container directive instead (it is agnostic of the underlying container runtime).",
             links=[links.containers],
         )
Ejemplo n.º 8
0
 def lint_absolute_paths(self, snakefile, regex=re.compile(ABS_PATH_PATTERN)):
     for match in regex.finditer(snakefile):
         line = get_line(match, snakefile)
         yield Lint(
             title='Absolute path "{}" in line {}'.format(match.group("path"), line),
             body="Do not define absolute paths inside of the workflow, since this "
             "renders your workflow irreproducible on other machines. "
             "Use path relative to the working directory instead, or make the path "
             "configurable via a config file.",
             links=[links.config],
         )
Ejemplo n.º 9
0
    def lint_long_run(self, rule):
        func_code = rule.run_func.__code__.co_code

        if rule.is_run and len(func_code) > 70:
            yield Lint(
                title="Migrate long run directives into scripts or notebooks",
                body="Long run directives hamper workflow readability. Use the script or notebook direcive instead. "
                "Note that the script or notebook directive does not involve boilerplate. Similar to run, you "
                "will have direct access to params, input, output, and wildcards."
                "Only use the run direcive for a handful of lines.",
                links=[links.external_scripts, links.notebooks],
            )
Ejemplo n.º 10
0
 def lint_missing_software_definition(self, rule):
     if (
         not rule.norun
         and not rule.is_run
         and not (rule.conda_env or rule.container_img)
     ):
         if rule.env_modules:
             yield Lint(
                 title="Additionally specify a conda environment or container for each rule, environment modules are not enough",
                 body="While environment modules allow to document and deploy the required software on a certain "
                 "platform, they lock your workflow in there, disabling easy reproducibility on other machines "
                 "that don't have exactly the same environment modules. Hence env modules (which might be beneficial "
                 "in certain cluster environments), should allways be complemented with equivalent conda "
                 "environments.",
                 links=[links.package_management, links.containers],
             )
         else:
             yield Lint(
                 title="Specify a conda environment or container for each rule.",
                 body="This way, the used software for each specific step is documented, and "
                 "the workflow can be executed on any machine without prerequisites.",
                 links=[links.package_management, links.containers],
             )
Ejemplo n.º 11
0
 def lint_mixed_func_and_rules(
         self,
         snakefile,
         rule_regex=re.compile("rule .+?:"),
         func_regex=re.compile("def .+?:"),
 ):
     if rule_regex.search(snakefile) and func_regex.search(snakefile):
         yield Lint(
             title="Mixed rules and functions in same snakefile.",
             body="Small one-liner functions used only once should be "
             "defined as lambda expressions. Other functions should be collected "
             "in a common module, e.g. 'rules/common.smk'. This makes the workflow "
             "steps more readable.",
             links=[links.includes],
         )
Ejemplo n.º 12
0
 def lint_params_prefix(self, rule):
     for param, value in rule.params.items():
         if (isinstance(value, str) and value and any(
                 f.startswith(value) for f in chain(rule.input, rule.output)
                 if isinstance(f, str))):
             yield Lint(
                 title=
                 "Param {} is a prefix of input or output file but hardcoded"
                 .format(param),
                 body=
                 "If this is meant to represent a file path prefix, it will fail when running "
                 "workflow in environments without a shared filesystem. "
                 "Instead, provide a function that infers the appropriate prefix from the input or "
                 "output file, e.g.: lambda w, input: os.path.splitext(input[0])[0]",
                 links=[links.params, links.input_functions],
             )
Ejemplo n.º 13
0
 def lint_path_add(
     self,
     snakefile,
     regex1=re.compile("{name} *\\+ *{path}".format(name=NAME_PATTERN,
                                                    path=PATH_PATTERN)),
     regex2=re.compile("{path} *\\+ *{name}".format(path=PATH_PATTERN,
                                                    name=NAME_PATTERN)),
 ):
     for match in chain(regex1.finditer(snakefile),
                        regex2.finditer(snakefile)):
         line = get_line(match, snakefile)
         yield Lint(
             title="Path composition with '+' in line {}".format(line),
             body=
             "This becomes quickly unreadable. Usually, it is better to endure some "
             "redundancy against having a more readable workflow. Hence, just repeat common "
             'prefixes. If path composition is unavoidable, use pathlib or (python >= 3.6) string formatting with f"...". ',
         )
Ejemplo n.º 14
0
 def lint_envvars(
     self,
     snakefile,
     regex=re.compile("os.environ\[(?P<quote>['\"])(?P<name>.+)?(?P=quote)\]"),
 ):
     for match in regex.finditer(snakefile):
         line = get_line(match, snakefile)
         name = match.group("name")
         if name not in self.workflow.envvars:
             yield Lint(
                 title="Environment variable {} used but not asserted with envvars directive in line {}.".format(
                     name, line
                 ),
                 body="Asserting existence of environment variables with the envvars directive ensures proper error "
                 "messages if the user fails to invoke a workflow with all required environment variables defined. "
                 "Further, it allows snakemake to pass them on in case of distributed execution.",
                 links=[links.envvars],
             )