Ejemplo n.º 1
0
    def spawn_jobscript(self, job, jobscript, **kwargs):
        overwrite_workdir = ""
        if self.workflow.overwrite_workdir:
            overwrite_workdir = "--directory {} ".format(
                self.workflow.overwrite_workdir)
        overwrite_config = ""
        if self.workflow.overwrite_configfile:
            overwrite_config = "--configfile {} ".format(
                self.workflow.overwrite_configfile)
        if self.workflow.config_args:
            overwrite_config += "--config {} ".format(
                " ".join(self.workflow.config_args))

        target = job.output if job.output else job.rule.name
        format = partial(str.format,
                         job=job,
                         overwrite_workdir=overwrite_workdir,
                         overwrite_config=overwrite_config,
                         workflow=self.workflow,
                         cores=self.cores,
                         properties=job.json(),
                         latency_wait=self.latency_wait,
                         benchmark_repeats=self.benchmark_repeats,
                         target=target, **kwargs)
        try:
            exec_job = format(self.exec_job)
            with open(jobscript, "w") as f:
                print(format(self.jobscript, exec_job=exec_job), file=f)
        except KeyError as e:
            raise WorkflowError(
                "Error formatting jobscript: {} not found\n"
                "Make sure that your custom jobscript it up to date.".format(e))
        os.chmod(jobscript, os.stat(jobscript).st_mode | stat.S_IXUSR)
Ejemplo n.º 2
0
    def __init__(self, filename, job_a, job_b, lineno=None, snakefile=None):
        from snakemake import utils

        wildcards_a = utils.format("{}", job_a._format_wildcards)
        wildcards_b = utils.format("{}", job_b._format_wildcards)
        super().__init__(
            "Rules {job_a} and {job_b} are ambiguous for the file {f}.\n"
            "Consider starting rule output with a unique prefix, constrain "
            "your wildcards, or use the ruleorder directive.\n"
            "Wildcards:\n"
            "\t{job_a}: {wildcards_a}\n"
            "\t{job_b}: {wildcards_b}\n"
            "Expected input files:\n"
            "\t{job_a}: {job_a.input}\n"
            "\t{job_b}: {job_b.input}"
            "Expected output files:\n"
            "\t{job_a}: {job_a.output}\n"
            "\t{job_b}: {job_b.output}".format(
                job_a=job_a,
                job_b=job_b,
                f=filename,
                wildcards_a=wildcards_a,
                wildcards_b=wildcards_b,
            ),
            lineno=lineno,
            snakefile=snakefile,
        )
        self.rule1, self.rule2 = job_a.rule, job_b.rule
Ejemplo n.º 3
0
def get_source(path, basedir="."):
    source = None
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    # TODO this should probably be removed again. It does not work for report and hash!
    path = format(path, stepout=1)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        source = git_content(path).encode()
        (root_path, file_path, version) = split_git_path(path)
        path = path.rstrip("@" + version)
    else:
        sourceurl = path

    if source is None:
        with urlopen(sourceurl) as source:
            source = source.read()

    language = get_language(path, source)

    return path, source, language
Ejemplo n.º 4
0
def get_source(path, basedir="."):
    source = None
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    path = format(path, stepout=1)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        source = git_content(path)
        (root_path, file_path, version) = split_git_path(path)
        path = path.rstrip("@" + version)
    else:
        sourceurl = path

    language = None
    if path.endswith(".py"):
        language = "python"
    elif path.endswith(".R"):
        language = "r"
    elif path.endswith(".Rmd"):
        language = "rmarkdown"
    elif path.endswith(".jl"):
        language = "julia"

    if source is None:
        with urlopen(sourceurl) as source:
            return path, source.read(), language
    else:
        return path, source, language
Ejemplo n.º 5
0
 def format_wildcards(self, string, **variables):
     """ Format a string with variables from the job. """
     _variables = dict()
     _variables.update(self.dag.workflow.globals)
     _variables.update(
         dict(
             input=self.input,
             output=self.output,
             threads=self.threads,
             jobid=self.jobid,
             name=self.name,
             rule="GROUP",
             rulename="GROUP",
             resources=self.resources,
         )
     )
     _variables.update(variables)
     try:
         return format(string, **_variables)
     except NameError as ex:
         raise WorkflowError(
             "NameError with group job {}: {}".format(self.jobid, str(ex))
         )
     except IndexError as ex:
         raise WorkflowError(
             "IndexError with group job {}: {}".format(self.jobid, str(ex))
         )
Ejemplo n.º 6
0
def get_source(path, basedir=".", wildcards=None, params=None):
    source = None
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = smart_join(basedir, path, abspath=True)
        if is_local_file(path):
            path = "file://" + path
    if wildcards is not None and params is not None:
        # Format path if wildcards are given.
        path = format(path, wildcards=wildcards, params=params)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        source = git_content(path).encode()
        (root_path, file_path, version) = split_git_path(path)
        path = path.rstrip("@" + version)
    else:
        sourceurl = path

    if source is None:
        with urlopen(sourceurl) as source:
            source = source.read()

    language = get_language(path, source)

    return path, source, language
Ejemplo n.º 7
0
 def format_wildcards(self, string, **variables):
     """ Format a string with variables from the job. """
     _variables = dict()
     _variables.update(self.rule.workflow.globals)
     _variables.update(
         dict(
             input=self.input,
             output=self.output,
             params=self.params,
             wildcards=self._format_wildcards,
             threads=self.threads,
             resources=self.resources,
             log=self.log,
             jobid=self.jobid,
             version=self.rule.version,
             name=self.name,
             rule=self.rule.name,
             rulename=self.rule.name,
             bench_iteration=None,
         ))
     _variables.update(variables)
     try:
         return format(string, **_variables)
     except NameError as ex:
         raise RuleException("NameError: " + str(ex), rule=self.rule)
     except IndexError as ex:
         raise RuleException("IndexError: " + str(ex), rule=self.rule)
Ejemplo n.º 8
0
    def format_job_pattern(self, pattern, job=None, **kwargs):
        overwrite_workdir = []
        if self.workflow.overwrite_workdir:
            overwrite_workdir.extend(("--directory", self.workflow.overwrite_workdir))

        overwrite_config = []
        if self.workflow.overwrite_configfile:
            overwrite_config.extend(("--configfile", self.workflow.overwrite_configfile))
        if self.workflow.config_args:
            overwrite_config.append("--config")
            overwrite_config.extend(self.workflow.config_args)

        printshellcmds = ""
        if self.workflow.printshellcmds:
            printshellcmds = "-p"

        target = job.output if job.output else job.rule.name

        return format(pattern,
                      job=job,
                      overwrite_workdir=overwrite_workdir,
                      overwrite_config=overwrite_config,
                      printshellcmds=printshellcmds,
                      workflow=self.workflow,
                      cores=self.cores,
                      benchmark_repeats=self.benchmark_repeats,
                      target=target,
                      **kwargs)
Ejemplo n.º 9
0
def get_source(
    path,
    sourcecache: sourcecache.SourceCache,
    basedir=None,
    wildcards=None,
    params=None,
):
    if wildcards is not None and params is not None:
        if isinstance(path, SourceFile):
            path = path.get_path_or_uri()
        # Format path if wildcards are given.
        path = infer_source_file(format(path, wildcards=wildcards, params=params))

    if basedir is not None:
        basedir = infer_source_file(basedir)

    source_file = infer_source_file(path, basedir)
    with sourcecache.open(source_file) as f:
        source = f.read()

    language = get_language(source_file, source)

    is_local = isinstance(source_file, LocalSourceFile)

    return source_file, source, language, is_local
Ejemplo n.º 10
0
def cwl(
    path,
    basedir,
    input,
    output,
    params,
    wildcards,
    threads,
    resources,
    log,
    config,
    rulename,
    use_singularity,
    bench_record,
    jobid,
    runtime_sourcecache_path,
):
    """
    Load cwl from the given basedir + path and execute it.
    """
    if shutil.which("cwltool") is None:
        raise WorkflowError(
            "'cwltool' must be in PATH in order to execute cwl directive.")

    if not path.startswith("http"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    path = format(path, wildcards=wildcards)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    else:
        sourceurl = path

    def file_spec(f):
        if isinstance(f, str):
            return {"path": os.path.abspath(f), "class": "File"}
        return [file_spec(f_) for f_ in f]

    inputs = dict()
    inputs.update({name: file_spec(f) for name, f in input.items()})
    inputs.update({name: p for name, p in params.items()})
    inputs.update({name: f for name, f in output.items()})
    inputs.update({name: f for name, f in log.items()})

    args = "--singularity" if use_singularity else ""

    with tempfile.NamedTemporaryFile(mode="w") as input_file:
        json.dump(inputs, input_file)
        input_file.flush()
        cmd = "cwltool {} {} {}".format(args, sourceurl, input_file.name)
        shell(cmd, bench_record=bench_record)
Ejemplo n.º 11
0
class shell:
    _process_args = {}
    _process_prefix = ""
    _process_suffix = ""

    @classmethod
    def executable(cls, cmd):
        if os.path.split(cmd)[-1] == "bash":
            cls._process_prefix = "set -euo pipefail; "
        cls._process_args["executable"] = cmd

    @classmethod
    def prefix(cls, prefix):
        cls._process_prefix = format(prefix, stepout=2)

    @classmethod
    def suffix(cls, suffix):
        cls._process_suffix = format(suffix, stepout=2)

    def __new__(cls,
                cmd,
                *args,
                async=False,
                iterable=False,
                read=False,
                **kwargs):
        if "stepout" in kwargs:
            raise KeyError("Argument stepout is not allowed in shell command.")
        cmd = format(cmd, *args, stepout=2, **kwargs)

        logger.shellcmd(cmd)

        stdout = sp.PIPE if iterable or async or read else STDOUT

        close_fds = sys.platform != 'win32'

        proc = sp.Popen("{} {} {}".format(cls._process_prefix, cmd.rstrip(),
                                          cls._process_suffix),
                        bufsize=-1,
                        shell=True,
                        stdout=stdout,
                        close_fds=close_fds,
                        **cls._process_args)

        ret = None
        if iterable:
            return cls.iter_stdout(proc, cmd)
        if read:
            ret = proc.stdout.read()
        elif async:
            return proc
        retcode = proc.wait()
        if retcode:
            raise sp.CalledProcessError(retcode, cmd)
        return ret
Ejemplo n.º 12
0
    def spawn_jobscript(self, job, jobscript, **kwargs):
        overwrite_workdir = ""
        if self.workflow.overwrite_workdir:
            overwrite_workdir = "--directory {} ".format(
                self.workflow.overwrite_workdir)
        overwrite_config = ""
        if self.workflow.overwrite_configfile:
            overwrite_config = "--configfile {} ".format(
                self.workflow.overwrite_configfile)
        if self.workflow.config_args:
            overwrite_config += "--config {} ".format(" ".join(
                self.workflow.config_args))

        target = job.output if job.output else job.rule.name
        wait_for_files = list(job.local_input) + [self.tmpdir]
        if job.shadow_dir:
            wait_for_files.append(job.shadow_dir)
        format = partial(str.format,
                         job=job,
                         overwrite_workdir=overwrite_workdir,
                         overwrite_config=overwrite_config,
                         workflow=self.workflow,
                         cores=self.cores,
                         properties=json.dumps(
                             job.properties(cluster=self.cluster_params(job))),
                         latency_wait=self.latency_wait,
                         benchmark_repeats=self.benchmark_repeats,
                         target=target,
                         wait_for_files=" ".join(wait_for_files),
                         **kwargs)
        try:
            exec_job = format(self.exec_job)
            with open(jobscript, "w") as f:
                print(format(self.jobscript, exec_job=exec_job), file=f)
        except KeyError as e:
            raise WorkflowError(
                "Error formatting jobscript: {} not found\n"
                "Make sure that your custom jobscript it up to date.".format(
                    e))
        os.chmod(jobscript, os.stat(jobscript).st_mode | stat.S_IXUSR)
Ejemplo n.º 13
0
 def format_wildcards(self, string, **variables):
     """ Format a string with variables from the job. """
     _variables = dict()
     _variables.update(self.dag.workflow.globals)
     _variables.update(dict(input=self.input,
                            output=self.output,
                            threads=self.threads,
                            resources=self.resources))
     _variables.update(variables)
     try:
         return format(string, **_variables)
     except NameError as ex:
         raise WorkflowError("NameError: " + str(ex))
     except IndexError as ex:
         raise WorkflowError("IndexError: " + str(ex))
Ejemplo n.º 14
0
 def format_wildcards(self, string, **variables):
     """ Format a string with variables from the job. """
     _variables = dict()
     _variables.update(self.rule.workflow.globals)
     _variables.update(variables)
     try:
         return format(string,
                   input=self.input,
                   output=self.output,
                   params=self.params,
                   wildcards=self._format_wildcards,
                   threads=self.threads,
                   resources=self.resources,
                   log=self.log, **_variables)
     except NameError as ex:
         raise RuleException("NameError: " + str(ex), rule=self.rule)
Ejemplo n.º 15
0
def get_source(path, basedir="."):
    import nbformat

    source = None
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    path = format(path, stepout=1)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        source = git_content(path)
        (root_path, file_path, version) = split_git_path(path)
        path = path.rstrip("@" + version)
    else:
        sourceurl = path

    if source is None:
        with urlopen(sourceurl) as source:
            source = source.read()

    language = None
    if path.endswith(".py"):
        language = "python"
    elif path.endswith(".ipynb"):
        language = "jupyter"
    elif path.endswith(".R"):
        language = "r"
    elif path.endswith(".Rmd"):
        language = "rmarkdown"
    elif path.endswith(".jl"):
        language = "julia"

    # detect kernel language for Jupyter Notebooks
    if language == "jupyter":
        nb = nbformat.reads(source, as_version=nbformat.NO_CONVERT)
        kernel_language = nb["metadata"]["language_info"]["name"]

        language += "_" + kernel_language.lower()

    return path, source, language
Ejemplo n.º 16
0
Archivo: util.py Proyecto: shafferm/ymp
def R(code="", **kwargs):
    """Execute R code

    This function executes the R code given as a string. Additional arguments
    are injected into the R environment. The value of the last R statement
    is returned.

    The function requires rpy2 to be installed.

    Args:
        code (str): R code to be executed
        **kwargs (dict): variables to inject into R globalenv
    Yields:
        value of last R statement

    >>>  R("1*1", input=input)
    """
    try:
        import rpy2.robjects as robjects
        from rpy2.rlike.container import TaggedList
        from rpy2.rinterface import RNULLType
    except ImportError:
        raise ValueError("Python 3 package rpy2 needs to be installed to use"
                         "the R function.")

    activate_R()

    # translate Namedlists into rpy2's TaggedList to have named lists in R
    for key in kwargs:
        value = kwargs[key]
        if isinstance(value, Namedlist):
            kwargs[key] = TaggedList([y for x, y in value.allitems()],
                                     [x for x, y in value.allitems()])

    code = format(textwrap.dedent(code), stepout=2)
    # wrap code in function to preserve clean global env and execute
    rval = robjects.r("function({}){{ {} }}"
                      "".format(",".join(kwargs), code))(**kwargs)

    # Reduce vectors of length 1 to scalar, as implicit in R.
    if isinstance(rval, RNULLType):
        rval = None
    if rval and len(rval) == 1:
        return rval[0]
    return rval
Ejemplo n.º 17
0
class shell:
    _process_args = {}
    _process_prefix = ""

    @classmethod
    def executable(cls, cmd):
        cls._process_args["executable"] = cmd

    @classmethod
    def prefix(cls, prefix):
        cls._process_prefix = format(prefix, stepout=2)

    def __new__(cls,
                cmd,
                *args,
                async=False,
                iterable=False,
                read=False,
                **kwargs):
        cmd = format(cmd, *args, stepout=2, **kwargs)
        stdout = sp.PIPE if iterable or async or read else STDOUT

        close_fds = sys.platform != 'win32'
        proc = sp.Popen(cls._process_prefix + cmd,
                        bufsize=-1,
                        shell=True,
                        stdout=stdout,
                        close_fds=close_fds,
                        **cls._process_args)

        ret = None
        if iterable:
            return cls.iter_stdout(proc, cmd)
        if read:
            ret = proc.stdout.read()
        elif async:
            return proc
        retcode = proc.wait()
        if retcode:
            raise sp.CalledProcessError(retcode, cmd)
        return ret
Ejemplo n.º 18
0
    def run(
        self, job, callback=None, submit_callback=None, error_callback=None):
        super()._run(job)
        workdir = os.getcwd()
        jobid = self.dag.jobid(job)
        properties = job.json()

        jobscript = self.get_jobscript(job)
        jobfinished = os.path.join(self.tmpdir, "{}.jobfinished".format(jobid))
        jobfailed = os.path.join(self.tmpdir, "{}.jobfailed".format(jobid))
        with open(jobscript, "w") as f:
            print(format(self.jobscript, workflow=self.workflow, cores=self.cores), file=f)
        os.chmod(jobscript, os.stat(jobscript).st_mode | stat.S_IXUSR)

        deps = " ".join(self.external_jobid[f] for f in job.input if f in self.external_jobid)
        submitcmd = job.format_wildcards(self.submitcmd, dependencies=deps)
        try:
            ext_jobid = subprocess.check_output(
                '{submitcmd} "{jobscript}"'.format(
                    submitcmd=submitcmd,
                    jobscript=jobscript),
                shell=True).decode().split("\n")
        except subprocess.CalledProcessError as ex:
            raise WorkflowError("Error executing jobscript (exit code {}):\n{}".format(ex.returncode, ex.output.decode()), rule=job.rule)
        if ext_jobid and ext_jobid[0]:
            ext_jobid = ext_jobid[0]
            self.external_jobid.update((f, ext_jobid) for f in job.output)
            logger.debug("Submitted job {} with external jobid {}.".format(jobid, ext_jobid))

        thread = threading.Thread(
            target=self._wait_for_job,
            args=(job, callback, error_callback,
                jobscript, jobfinished, jobfailed))
        thread.daemon = True
        thread.start()
        self.threads.append(thread)

        submit_callback(job)
Ejemplo n.º 19
0
    return counts.astype(np.uint64)


def write_sig_dists(counts, filename):
    with gzip.open(filename, 'wb') as outf:
        outf.write(struct.pack('<III', 4, counts.shape[0], counts.shape[1]))
        outf.write(counts.astype(np.uint32).tostring())


counts_aggr = None

for signals, taginfo, out in zip(input.signals, input.taginfo, output.taginfo):
    cmd = format('{BINDIR}/tailseq-polya-ruler {wildcards.tile} {signals} \
        {input.score_cutoffs} {CONF[polyA_finder][signal_analysis_trigger]} \
        {CONF[polyA_ruler][downhill_extension_weight]} \
        {taginfo} {CONF[polyA_seeder][dist_sampling_bins]} \
        {CONF[polyA_ruler][signal_resampling_gap]} \
        {output.sigdists} | {BGZIP_CMD} -c > {out}',
                 wildcards=wildcards,
                 input=input,
                 output=output)
    shell(cmd)

    counts_new = load_sig_dists(output.sigdists)
    if counts_aggr is None:
        counts_aggr = counts_new
    else:
        counts_aggr += counts_new

write_sig_dists(counts_aggr, output.sigdists)
Ejemplo n.º 20
0
def script(path, basedir, input, output, params, wildcards, threads, resources,
           log, config, rulename, conda_env):
    """
    Load a script from the given basedir + path and execute it.
    Supports Python 3 and R.
    """
    if not path.startswith("http"):
        if path.startswith("file://"):
            path = path[7:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    path = format(path, stepout=1)

    try:
        with urlopen(path) as source:
            if path.endswith(".py"):
                snakemake = Snakemake(input, output, params, wildcards,
                                      threads, resources, log, config, rulename)
                snakemake = pickle.dumps(snakemake)
                # obtain search path for current snakemake module
                # the module is needed for unpickling in the script
                searchpath = os.path.dirname(os.path.dirname(__file__))
                preamble = textwrap.dedent("""
                ######## Snakemake header ########
                import sys; sys.path.insert(0, "{}"); import pickle; snakemake = pickle.loads({})
                ######## Original script #########
                """).format(searchpath, snakemake)
            elif path.endswith(".R"):
                preamble = textwrap.dedent("""
                ######## Snakemake header ########
                library(methods)
                Snakemake <- setClass(
                    "Snakemake",
                    slots = c(
                        input = "list",
                        output = "list",
                        params = "list",
                        wildcards = "list",
                        threads = "numeric",
                        log = "list",
                        resources = "list",
                        config = "list",
                        rule = "character"
                    )
                )
                snakemake <- Snakemake(
                    input = {},
                    output = {},
                    params = {},
                    wildcards = {},
                    threads = {},
                    log = {},
                    resources = {},
                    config = {},
                    rule = {}
                )
                ######## Original script #########
                """).format(REncoder.encode_namedlist(input),
                           REncoder.encode_namedlist(output),
                           REncoder.encode_namedlist(params),
                           REncoder.encode_namedlist(wildcards), threads,
                           REncoder.encode_namedlist(log),
                           REncoder.encode_namedlist({
                               name: value
                               for name, value in resources.items()
                               if name != "_cores" and name != "_nodes"
                           }), REncoder.encode_dict(config), REncoder.encode_value(rulename))
            else:
                raise ValueError(
                    "Unsupported script: Expecting either Python (.py) or R (.R) script.")

            dir = ".snakemake/scripts"
            os.makedirs(dir, exist_ok=True)
            with tempfile.NamedTemporaryFile(
                suffix="." + os.path.basename(path),
                prefix="",
                dir=dir,
                delete=False) as f:
                f.write(preamble.encode())
                f.write(source.read())
            if path.endswith(".py"):
                py_exec = sys.executable
                if conda_env is not None:
                    py = os.path.join(conda_env, "bin", "python")
                    if os.path.exists(py):
                        out = subprocess.check_output([py, "--version"],
                                                      stderr=subprocess.STDOUT,
                                                      universal_newlines=True)
                        ver = tuple(map(int, PY_VER_RE.match(out).group("ver_min").split(".")))
                        if ver >= MIN_PY_VERSION:
                            # Python version is new enough, make use of environment
                            # to execute script
                            py_exec = "python"
                        else:
                            logger.info("Conda environment defines Python "
                                        "version < {}.{}. Using Python of the "
                                        "master process to execute "
                                        "script.".format(*MIN_PY_VERSION))
                # use the same Python as the running process or the one from the environment
                shell("{py_exec} {f.name}")
            elif path.endswith(".R"):
                shell("Rscript {f.name}")
            os.remove(f.name)

    except URLError as e:
        raise WorkflowError(e)
Ejemplo n.º 21
0
    def __new__(cls,
                cmd,
                *args,
                iterable=False,
                read=False,
                bench_record=None,
                **kwargs):
        if "stepout" in kwargs:
            raise KeyError("Argument stepout is not allowed in shell command.")
        cmd = format(cmd, *args, stepout=2, **kwargs)
        context = inspect.currentframe().f_back.f_locals
        # add kwargs to context (overwriting the locals of the caller)
        context.update(kwargs)

        stdout = sp.PIPE if iterable or read else STDOUT

        close_fds = sys.platform != "win32"

        jobid = context.get("jobid")
        if not context.get("is_shell"):
            logger.shellcmd(cmd)

        env_prefix = ""
        conda_env = context.get("conda_env", None)
        singularity_img = context.get("singularity_img", None)
        env_modules = context.get("env_modules", None)
        shadow_dir = context.get("shadow_dir", None)

        cmd = "{} {} {}".format(cls._process_prefix, cmd.strip(),
                                cls._process_suffix).strip()

        if env_modules:
            cmd = env_modules.shellcmd(cmd)
            logger.info(
                "Activating environment modules: {}".format(env_modules))

        if conda_env:
            cmd = Conda(singularity_img).shellcmd(conda_env, cmd)

        if singularity_img:
            args = context.get("singularity_args", "")
            cmd = singularity.shellcmd(
                singularity_img,
                cmd,
                args,
                shell_executable=cls._process_args["executable"],
                container_workdir=shadow_dir,
            )
            logger.info(
                "Activating singularity image {}".format(singularity_img))
        if conda_env:
            logger.info("Activating conda environment: {}".format(conda_env))

        proc = sp.Popen(cmd,
                        bufsize=-1,
                        shell=True,
                        stdout=stdout,
                        universal_newlines=iterable or None,
                        close_fds=close_fds,
                        **cls._process_args)

        if jobid is not None:
            with cls._lock:
                cls._processes[jobid] = proc

        ret = None
        if iterable:
            return cls.iter_stdout(proc, cmd)
        if read:
            ret = proc.stdout.read()
        if bench_record is not None:
            from snakemake.benchmark import benchmarked

            with benchmarked(proc.pid, bench_record):
                retcode = proc.wait()
        else:
            retcode = proc.wait()

        if jobid is not None:
            with cls._lock:
                del cls._processes[jobid]

        if retcode:
            raise sp.CalledProcessError(retcode, cmd)
        return ret
Ejemplo n.º 22
0
 def suffix(cls, suffix):
     cls._process_suffix = format(suffix, stepout=2)
Ejemplo n.º 23
0
    def __new__(cls,
                cmd,
                *args,
                iterable=False,
                read=False,
                bench_record=None,
                **kwargs):
        if "stepout" in kwargs:
            raise KeyError("Argument stepout is not allowed in shell command.")
        cmd = format(cmd, *args, stepout=2, **kwargs)
        context = inspect.currentframe().f_back.f_locals
        # add kwargs to context (overwriting the locals of the caller)
        context.update(kwargs)

        stdout = sp.PIPE if iterable or read else STDOUT

        close_fds = sys.platform != "win32"

        jobid = context.get("jobid")
        if not context.get("is_shell"):
            logger.shellcmd(cmd)

        env_prefix = ""
        conda_env = context.get("conda_env", None)
        container_img = context.get("container_img", None)
        env_modules = context.get("env_modules", None)
        shadow_dir = context.get("shadow_dir", None)

        cmd = "{} {} {}".format(cls._process_prefix, cmd.strip(),
                                cls._process_suffix).strip()

        if env_modules:
            cmd = env_modules.shellcmd(cmd)
            logger.info(
                "Activating environment modules: {}".format(env_modules))

        if conda_env:
            cmd = Conda(container_img).shellcmd(conda_env, cmd)

        if container_img:
            args = context.get("singularity_args", "")
            cmd = singularity.shellcmd(
                container_img,
                cmd,
                args,
                shell_executable=cls._process_args["executable"],
                container_workdir=shadow_dir,
            )
            logger.info(
                "Activating singularity image {}".format(container_img))
        if conda_env:
            logger.info("Activating conda environment: {}".format(conda_env))

        threads = str(context.get("threads", 1))
        # environment variable lists for linear algebra libraries taken from:
        # https://stackoverflow.com/a/53224849/2352071
        # https://github.com/xianyi/OpenBLAS/tree/59243d49ab8e958bb3872f16a7c0ef8c04067c0a#setting-the-number-of-threads-using-environment-variables
        envvars = dict(os.environ)
        envvars["OMP_NUM_THREADS"] = threads
        envvars["GOTO_NUM_THREADS"] = threads
        envvars["OPENBLAS_NUM_THREADS"] = threads
        envvars["MKL_NUM_THREADS"] = threads
        envvars["VECLIB_MAXIMUM_THREADS"] = threads
        envvars["NUMEXPR_NUM_THREADS"] = threads

        use_shell = True

        if ON_WINDOWS and cls.get_executable():
            # If executable is set on Windows shell mode can not be used
            # and the executable should be prepended the command together
            # with a command prefix (e.g. -c for bash).
            use_shell = False
            cmd = '"{}" {} {}'.format(cls.get_executable(),
                                      cls._win_command_prefix, argvquote(cmd))

        proc = sp.Popen(
            cmd,
            bufsize=-1,
            shell=use_shell,
            stdout=stdout,
            universal_newlines=iterable or read or None,
            close_fds=close_fds,
            **cls._process_args,
            env=envvars,
        )

        if jobid is not None:
            with cls._lock:
                cls._processes[jobid] = proc

        ret = None
        if iterable:
            return cls.iter_stdout(proc, cmd)
        if read:
            ret = proc.stdout.read()
        if bench_record is not None:
            from snakemake.benchmark import benchmarked

            with benchmarked(proc.pid, bench_record):
                retcode = proc.wait()
        else:
            retcode = proc.wait()

        if jobid is not None:
            with cls._lock:
                del cls._processes[jobid]

        if retcode:
            raise sp.CalledProcessError(retcode, cmd)
        return ret
Ejemplo n.º 24
0
    def __new__(
        cls, cmd, *args, iterable=False, read=False, bench_record=None, **kwargs
    ):
        if "stepout" in kwargs:
            raise KeyError("Argument stepout is not allowed in shell command.")
        cmd = format(cmd, *args, stepout=2, **kwargs)
        context = inspect.currentframe().f_back.f_locals
        print(context)

        stdout = sp.PIPE if iterable or read else STDOUT

        close_fds = sys.platform != "win32"

        jobid = context.get("jobid")
        if not context.get("is_shell"):
            logger.shellcmd(cmd)

        env_prefix = ""
        conda_env = context.get("conda_env", None)
        singularity_img = context.get("singularity_img", None)
        shadow_dir = context.get("shadow_dir", None)

        cmd = "{} {} {}".format(
            format(cls._process_prefix, *args, stepout=2, **kwargs),
            cmd.strip(),
            format(cls._process_suffix, *args, stepout=2, **kwargs),
        ).strip()
        print(cmd)

        conda = None
        if conda_env:
            cmd = Conda(singularity_img).shellcmd(conda_env, cmd)

        if singularity_img:
            args = context.get("singularity_args", "")
            cmd = singularity.shellcmd(
                singularity_img,
                cmd,
                args,
                shell_executable=cls._process_args["executable"],
                container_workdir=shadow_dir,
            )
            logger.info("Activating singularity image {}".format(singularity_img))
        if conda_env:
            logger.info("Activating conda environment: {}".format(conda_env))

        proc = sp.Popen(
            cmd,
            bufsize=-1,
            shell=True,
            stdout=stdout,
            universal_newlines=iterable or None,
            close_fds=close_fds,
            **cls._process_args
        )

        if jobid is not None:
            with cls._lock:
                cls._processes[jobid] = proc

        ret = None
        if iterable:
            return cls.iter_stdout(proc, cmd)
        if read:
            ret = proc.stdout.read()
        if bench_record is not None:
            from benchmark import benchmarked

            gpu = None
            try:
                gpu = [int(x) for x in context.get("params").devices.split(",")]
                print("[snakeshell] Attempting to benchmark GPU process with GPUtil on devices: %s" % gpu)
            except Exception as e:
                pass

            rt_bench_path = None
            try:
                rt_bench_path = context.get("output").rtbench
                print("[snakeshell] Attempting to benchmark in real-time to: %s" % rt_bench_path)
            except Exception as e:
                pass

            with benchmarked(proc.pid, bench_record, gpus=gpu, rt_path=rt_bench_path, interval=15):
                retcode = proc.wait()
        else:
            retcode = proc.wait()

        if jobid is not None:
            with cls._lock:
                del cls._processes[jobid]

        if retcode:
            raise sp.CalledProcessError(retcode, cmd)
        return ret
Ejemplo n.º 25
0
    counts = np.fromstring(inpf.read(), np.uint32).reshape((cycles, bins))
    return counts.astype(np.uint64)

def write_sig_dists(counts, filename):
    with gzip.open(filename, 'wb') as outf:
        outf.write(struct.pack('<III', 4, counts.shape[0], counts.shape[1]))
        outf.write(counts.astype(np.uint32).tostring())

counts_aggr = None

for signals, taginfo, out in zip(input.signals, input.taginfo,
                                 output.taginfo):
    cmd = format('{BINDIR}/tailseq-polya-ruler {wildcards.tile} {signals} \
        {input.score_cutoffs} {CONF[polyA_finder][signal_analysis_trigger]} \
        {CONF[polyA_ruler][downhill_extension_weight]} \
        {taginfo} {CONF[polyA_seeder][dist_sampling_bins]} \
        {CONF[polyA_ruler][signal_resampling_gap]} \
        {output.sigdists} | {BGZIP_CMD} -c > {out}', wildcards=wildcards,
        input=input, output=output)
    shell(cmd)

    counts_new = load_sig_dists(output.sigdists)
    if counts_aggr is None:
        counts_aggr = counts_new
    else:
        counts_aggr += counts_new

write_sig_dists(counts_aggr, output.sigdists)

Ejemplo n.º 26
0
def script(basedir, path, input, output, params, wildcards, threads, resources,
           log, config):
    """
    Load a script from the given basedir + path and execute it.
    Supports Python 3 and R.
    """
    if not path.startswith("http"):
        if path.startswith("file://"):
            path = path[7:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    path = format(path, stepout=1)

    try:
        with urlopen(path) as source:
            if path.endswith(".py"):
                try:
                    exec(compile(source.read().decode(), path, "exec"), {
                        "snakemake": Snakemake(input, output, params, wildcards,
                                               threads, resources, log, config)
                    })
                except (Exception, BaseException) as ex:
                    raise WorkflowError("".join(traceback.format_exception(type(ex), ex, ex.__traceback__)))
            elif path.endswith(".R"):
                try:
                    import rpy2.robjects as robjects
                except ImportError:
                    raise ValueError(
                        "Python 3 package rpy2 needs to be installed to use the R function.")
                with urlopen(path) as source:
                    preamble = """
                    Snakemake <- setClass(
                        "Snakemake",
                        slots = c(
                            input = "list",
                            output = "list",
                            params = "list",
                            wildcards = "list",
                            threads = "numeric",
                            log = "list",
                            resources = "list",
                            config = "list"
                        )
                    )
                    snakemake <- Snakemake(
                        input = {},
                        output = {},
                        params = {},
                        wildcards = {},
                        threads = {},
                        log = {},
                        resources = {},
                        config = {}
                    )
                    """.format(REncoder.encode_namedlist(input),
                               REncoder.encode_namedlist(output),
                               REncoder.encode_namedlist(params),
                               REncoder.encode_namedlist(wildcards), threads,
                               REncoder.encode_namedlist(log),
                               REncoder.encode_namedlist({
                                   name: value
                                   for name, value in resources.items()
                                   if name != "_cores" and name != "_nodes"
                               }), REncoder.encode_dict(config))
                    logger.debug(preamble)
                    source = preamble + source.read().decode()
                    robjects.r(source)
            else:
                raise ValueError(
                    "Unsupported script: Expecting either Python (.py) or R (.R) script.")
    except URLError as e:
        raise WorkflowError(e)
Ejemplo n.º 27
0
def script(path, basedir, input, output, params, wildcards, threads, resources,
           log, config, rulename, conda_env, singularity_img, singularity_args,
           bench_record, jobid, bench_iteration, shadow_dir):
    """
    Load a script from the given basedir + path and execute it.
    Supports Python 3 and R.
    """
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    path = format(path, stepout=1)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        (root_path, file_path, version) = split_git_path(path)
        dir = ".snakemake/wrappers"
        os.makedirs(dir, exist_ok=True)
        new_path = os.path.join(dir,
                                version + "-" + "-".join(file_path.split("/")))
        with open(new_path, 'w') as wrapper:
            wrapper.write(git_content(path))
            sourceurl = "file:" + new_path
            path = path.rstrip("@" + version)
    else:
        sourceurl = path

    f = None
    try:
        with urlopen(sourceurl) as source:
            if path.endswith(".py"):
                wrapper_path = path[7:] if path.startswith("file://") else path
                snakemake = Snakemake(input, output, params, wildcards,
                                      threads, resources, log, config,
                                      rulename, bench_iteration,
                                      os.path.dirname(wrapper_path))
                snakemake = pickle.dumps(snakemake)
                # Obtain search path for current snakemake module.
                # The module is needed for unpickling in the script.
                # We append it at the end (as a fallback).
                searchpath = SNAKEMAKE_SEARCHPATH
                if singularity_img is not None:
                    searchpath = singularity.SNAKEMAKE_MOUNTPOINT
                searchpath = '"{}"'.format(searchpath)
                # For local scripts, add their location to the path in case they use path-based imports
                if path.startswith("file://"):
                    searchpath += ', "{}"'.format(os.path.dirname(path[7:]))
                preamble = textwrap.dedent("""
                ######## Snakemake header ########
                import sys; sys.path.extend([{searchpath}]); import pickle; snakemake = pickle.loads({snakemake}); from snakemake.logging import logger; logger.printshellcmds = {printshellcmds}; __real_file__ = __file__; __file__ = {file_override};
                ######## Original script #########
                """).format(searchpath=escape_backslash(searchpath),
                            snakemake=snakemake,
                            printshellcmds=logger.printshellcmds,
                            file_override=repr(os.path.realpath(wrapper_path)))
            elif path.endswith(".R") or path.endswith(".Rmd"):
                preamble = textwrap.dedent("""
                ######## Snakemake header ########
                library(methods)
                Snakemake <- setClass(
                    "Snakemake",
                    slots = c(
                        input = "list",
                        output = "list",
                        params = "list",
                        wildcards = "list",
                        threads = "numeric",
                        log = "list",
                        resources = "list",
                        config = "list",
                        rule = "character",
                        bench_iteration = "numeric",
                        scriptdir = "character",
                        source = "function"
                    )
                )
                snakemake <- Snakemake(
                    input = {},
                    output = {},
                    params = {},
                    wildcards = {},
                    threads = {},
                    log = {},
                    resources = {},
                    config = {},
                    rule = {},
                    bench_iteration = {},
                    scriptdir = {},
                    source = function(...){{
                        wd <- getwd()
                        setwd(snakemake@scriptdir)
                        source(...)
                        setwd(wd)
                    }}
                )

                ######## Original script #########
                """).format(
                    REncoder.encode_namedlist(input),
                    REncoder.encode_namedlist(output),
                    REncoder.encode_namedlist(params),
                    REncoder.encode_namedlist(wildcards), threads,
                    REncoder.encode_namedlist(log),
                    REncoder.encode_namedlist({
                        name: value
                        for name, value in resources.items()
                        if name != "_cores" and name != "_nodes"
                    }), REncoder.encode_dict(config),
                    REncoder.encode_value(rulename),
                    REncoder.encode_numeric(bench_iteration),
                    REncoder.encode_value(
                        os.path.dirname(path[7:]) if path.
                        startswith("file://") else os.path.dirname(path)))
            else:
                raise ValueError(
                    "Unsupported script: Expecting either Python (.py), R (.R) or RMarkdown (.Rmd) script."
                )

            dir = ".snakemake/scripts"
            os.makedirs(dir, exist_ok=True)

            with tempfile.NamedTemporaryFile(suffix="." +
                                             os.path.basename(path),
                                             dir=dir,
                                             delete=False) as f:
                if not path.endswith(".Rmd"):
                    f.write(preamble.encode())
                    f.write(source.read())
                else:
                    # Insert Snakemake object after the RMarkdown header
                    code = source.read().decode()
                    pos = next(islice(re.finditer(r"---\n", code), 1,
                                      2)).start() + 3
                    f.write(str.encode(code[:pos]))
                    preamble = textwrap.dedent("""
                        ```{r, echo=FALSE, message=FALSE, warning=FALSE}
                        %s
                        ```
                        """ % preamble)
                    f.write(preamble.encode())
                    f.write(str.encode(code[pos:]))

            if path.endswith(".py"):
                py_exec = sys.executable
                if conda_env is not None:
                    py = os.path.join(conda_env, "bin", "python")
                    if os.path.exists(py):
                        out = subprocess.check_output([py, "--version"],
                                                      stderr=subprocess.STDOUT,
                                                      universal_newlines=True)
                        ver = tuple(
                            map(
                                int,
                                PY_VER_RE.match(out).group("ver_min").split(
                                    ".")))
                        if ver >= MIN_PY_VERSION:
                            # Python version is new enough, make use of environment
                            # to execute script
                            py_exec = "python"
                        else:
                            logger.warning(
                                "Conda environment defines Python "
                                "version < {0}.{1}. Using Python of the "
                                "master process to execute "
                                "script. Note that this cannot be avoided, "
                                "because the script uses data structures from "
                                "Snakemake which are Python >={0}.{1} "
                                "only.".format(*MIN_PY_VERSION))
                if singularity_img is not None:
                    # use python from image
                    py_exec = "python"
                # use the same Python as the running process or the one from the environment
                shell("{py_exec} {f.name:q}", bench_record=bench_record)
            elif path.endswith(".R"):
                if conda_env is not None and "R_LIBS" in os.environ:
                    logger.warning("R script job uses conda environment but "
                                   "R_LIBS environment variable is set. This "
                                   "is likely not intended, as R_LIBS can "
                                   "interfere with R packages deployed via "
                                   "conda. Consider running `unset R_LIBS` or "
                                   "remove it entirely before executing "
                                   "Snakemake.")
                shell("Rscript --vanilla {f.name:q}",
                      bench_record=bench_record)
            elif path.endswith(".Rmd"):
                if len(output) != 1:
                    raise WorkflowError(
                        "RMarkdown scripts (.Rmd) may only have a single output file."
                    )
                out = os.path.abspath(output[0])
                shell(
                    "Rscript --vanilla -e 'rmarkdown::render(\"{f.name}\", output_file=\"{out}\", quiet=TRUE, knit_root_dir = \"{workdir}\", params = list(rmd=\"{f.name}\"))'",
                    bench_record=bench_record,
                    workdir=os.getcwd())

    except URLError as e:
        raise WorkflowError(e)
    finally:
        if f:
            os.remove(f.name)
Ejemplo n.º 28
0
def script(path, basedir, input, output, params, wildcards, threads, resources,
           log, config, rulename, conda_env, singularity_img, singularity_args,
           bench_record, jobid):
    """
    Load a script from the given basedir + path and execute it.
    Supports Python 3 and R.
    """
    if not path.startswith("http"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    path = format(path, stepout=1)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    else:
        sourceurl = path

    f = None
    try:
        with urlopen(sourceurl) as source:
            if path.endswith(".py"):
                snakemake = Snakemake(input, output, params, wildcards,
                                      threads, resources, log, config,
                                      rulename)
                snakemake = pickle.dumps(snakemake)
                # Obtain search path for current snakemake module.
                # The module is needed for unpickling in the script.
                # We append it at the end (as a fallback).
                searchpath = os.path.dirname(os.path.dirname(__file__))
                preamble = textwrap.dedent("""
                ######## Snakemake header ########
                import sys; sys.path.append("{}"); import pickle; snakemake = pickle.loads({}); from snakemake.logging import logger; logger.printshellcmds = {}
                ######## Original script #########
                """).format(escape_backslash(searchpath), snakemake,
                            logger.printshellcmds)
            elif path.endswith(".R") or path.endswith(".Rmd"):
                preamble = textwrap.dedent("""
                ######## Snakemake header ########
                library(methods)
                Snakemake <- setClass(
                    "Snakemake",
                    slots = c(
                        input = "list",
                        output = "list",
                        params = "list",
                        wildcards = "list",
                        threads = "numeric",
                        log = "list",
                        resources = "list",
                        config = "list",
                        rule = "character"
                    )
                )
                snakemake <- Snakemake(
                    input = {},
                    output = {},
                    params = {},
                    wildcards = {},
                    threads = {},
                    log = {},
                    resources = {},
                    config = {},
                    rule = {}
                )
                ######## Original script #########
                """).format(
                    REncoder.encode_namedlist(input),
                    REncoder.encode_namedlist(output),
                    REncoder.encode_namedlist(params),
                    REncoder.encode_namedlist(wildcards), threads,
                    REncoder.encode_namedlist(log),
                    REncoder.encode_namedlist({
                        name: value
                        for name, value in resources.items()
                        if name != "_cores" and name != "_nodes"
                    }), REncoder.encode_dict(config),
                    REncoder.encode_value(rulename))
            else:
                raise ValueError(
                    "Unsupported script: Expecting either Python (.py), R (.R) or RMarkdown (.Rmd) script."
                )

            if path.startswith("file://"):
                # in case of local path, use the same directory
                dir = os.path.dirname(path)[7:]
                prefix = ".snakemake."
            else:
                dir = ".snakemake/scripts"
                prefix = ""
                os.makedirs(dir, exist_ok=True)

            with tempfile.NamedTemporaryFile(suffix="." +
                                             os.path.basename(path),
                                             prefix=prefix,
                                             dir=dir,
                                             delete=False) as f:
                if not path.endswith(".Rmd"):
                    f.write(preamble.encode())
                    f.write(source.read())
                else:
                    # Insert Snakemake object after the RMarkdown header
                    code = source.read().decode()
                    pos = code.rfind("---")
                    f.write(str.encode(code[:pos + 3]))
                    preamble = textwrap.dedent("""
                        ```{r, echo=FALSE, message=FALSE, warning=FALSE}
                        %s
                        ```
                        """ % preamble)
                    f.write(preamble.encode())
                    f.write(str.encode(code[pos + 3:]))

            if path.endswith(".py"):
                py_exec = sys.executable
                if conda_env is not None:
                    py = os.path.join(conda_env, "bin", "python")
                    if os.path.exists(py):
                        out = subprocess.check_output([py, "--version"],
                                                      stderr=subprocess.STDOUT,
                                                      universal_newlines=True)
                        ver = tuple(
                            map(
                                int,
                                PY_VER_RE.match(out).group("ver_min").split(
                                    ".")))
                        if ver >= MIN_PY_VERSION:
                            # Python version is new enough, make use of environment
                            # to execute script
                            py_exec = "python"
                        else:
                            logger.warning(
                                "Conda environment defines Python "
                                "version < {0}.{1}. Using Python of the "
                                "master process to execute "
                                "script. Note that this cannot be avoided, "
                                "because the script uses data structures from "
                                "Snakemake which are Python >={0}.{1} "
                                "only.".format(*MIN_PY_VERSION))
                if singularity_img is not None:
                    # use python from image
                    py_exec = "python"
                # use the same Python as the running process or the one from the environment
                shell("{py_exec} {f.name}", bench_record=bench_record)
            elif path.endswith(".R"):
                shell("Rscript {f.name}", bench_record=bench_record)
            elif path.endswith(".Rmd"):
                if len(output) != 1:
                    raise WorkflowError(
                        "RMarkdown scripts (.Rmd) may only have a single output file."
                    )
                out = os.path.abspath(output[0])
                shell(
                    "Rscript -e 'rmarkdown::render(\"{f.name}\", output_file=\"{out}\", quiet=TRUE, params = list(rmd=\"{f.name}\"))'",
                    bench_record=bench_record)

    except URLError as e:
        raise WorkflowError(e)
    finally:
        if f:
            os.remove(f.name)
Ejemplo n.º 29
0
 def suffix(cls, suffix):
     cls._process_suffix = format(suffix, stepout=2)
Ejemplo n.º 30
0
    def __new__(cls,
                cmd,
                *args,
                iterable=False,
                read=False,
                bench_record=None,
                **kwargs):
        if "stepout" in kwargs:
            raise KeyError("Argument stepout is not allowed in shell command.")
        cmd = format(cmd, *args, stepout=2, **kwargs)
        context = inspect.currentframe().f_back.f_locals
        # add kwargs to context (overwriting the locals of the caller)
        context.update(kwargs)

        stdout = sp.PIPE if iterable or read else STDOUT

        close_fds = sys.platform != "win32"

        jobid = context.get("jobid")
        if not context.get("is_shell"):
            logger.shellcmd(cmd)

        conda_env = context.get("conda_env", None)
        container_img = context.get("container_img", None)
        env_modules = context.get("env_modules", None)
        shadow_dir = context.get("shadow_dir", None)

        cmd = " ".join((cls._process_prefix, cmd, cls._process_suffix)).strip()

        if env_modules:
            cmd = env_modules.shellcmd(cmd)
            logger.info(
                "Activating environment modules: {}".format(env_modules))

        if conda_env:
            cmd = Conda(container_img).shellcmd(conda_env, cmd)

        tmpdir = None
        if len(cmd.replace("'", r"'\''")) + 2 > MAX_ARG_LEN:
            tmpdir = tempfile.mkdtemp(dir=".snakemake", prefix="shell_tmp.")
            script = os.path.join(os.path.abspath(tmpdir), "script.sh")
            with open(script, "w") as script_fd:
                print(cmd, file=script_fd)
            os.chmod(script,
                     os.stat(script).st_mode | stat.S_IXUSR | stat.S_IRUSR)
            cmd = '"{}" "{}"'.format(cls.get_executable() or "/bin/sh", script)

        if container_img:
            args = context.get("singularity_args", "")
            cmd = singularity.shellcmd(
                container_img,
                cmd,
                args,
                envvars=None,
                shell_executable=cls._process_args["executable"],
                container_workdir=shadow_dir,
            )
            logger.info(
                "Activating singularity image {}".format(container_img))
        if conda_env:
            logger.info("Activating conda environment: {}".format(conda_env))

        threads = str(context.get("threads", 1))
        # environment variable lists for linear algebra libraries taken from:
        # https://stackoverflow.com/a/53224849/2352071
        # https://github.com/xianyi/OpenBLAS/tree/59243d49ab8e958bb3872f16a7c0ef8c04067c0a#setting-the-number-of-threads-using-environment-variables
        envvars = dict(os.environ)
        envvars["OMP_NUM_THREADS"] = threads
        envvars["GOTO_NUM_THREADS"] = threads
        envvars["OPENBLAS_NUM_THREADS"] = threads
        envvars["MKL_NUM_THREADS"] = threads
        envvars["VECLIB_MAXIMUM_THREADS"] = threads
        envvars["NUMEXPR_NUM_THREADS"] = threads
        if conda_env and cls.conda_block_conflicting_envvars:
            # remove envvars that conflict with conda
            for var in ["R_LIBS", "PYTHONPATH", "PERLLIB", "PERL5LIB"]:
                try:
                    del envvars[var]
                except KeyError:
                    pass

        use_shell = True
        if ON_WINDOWS and cls.get_executable():
            # If executable is set on Windows shell mode can not be used
            # and the executable should be prepended the command together
            # with a command prefix (e.g. -c for bash).
            use_shell = False
            cmd = '"{}" {} {}'.format(cls.get_executable(),
                                      cls._win_command_prefix, argvquote(cmd))

        proc = sp.Popen(
            cmd,
            bufsize=-1,
            shell=use_shell,
            stdout=stdout,
            universal_newlines=iterable or read or None,
            close_fds=close_fds,
            **cls._process_args,
            env=envvars,
        )

        if jobid is not None:
            with cls._lock:
                cls._processes[jobid] = proc

        ret = None
        if iterable:
            return cls.iter_stdout(proc, cmd, tmpdir)
        if read:
            ret = proc.stdout.read()
        if bench_record is not None:
            from snakemake.benchmark import benchmarked

            with benchmarked(proc.pid, bench_record):
                retcode = proc.wait()
        else:
            retcode = proc.wait()

        if tmpdir:
            shutil.rmtree(tmpdir)

        if jobid is not None:
            with cls._lock:
                del cls._processes[jobid]

        if retcode:
            raise sp.CalledProcessError(retcode, cmd)
        return ret
Ejemplo n.º 31
0
 def prefix(cls, prefix):
     cls._process_prefix = format(prefix, stepout=2)
Ejemplo n.º 32
0
    def __new__(cls,
                cmd,
                *args,
                iterable=False,
                read=False,
                bench_record=None,
                **kwargs):
        if "stepout" in kwargs:
            raise KeyError("Argument stepout is not allowed in shell command.")

        if ON_WINDOWS and not cls.get_executable():
            # If bash is not used on Windows quoting must be handled in a special way
            kwargs["quote_func"] = cmd_exe_quote

        cmd = format(cmd, *args, stepout=2, **kwargs)

        stdout = sp.PIPE if iterable or read else STDOUT

        close_fds = sys.platform != "win32"

        func_context = inspect.currentframe().f_back.f_locals

        if func_context.get(RULEFUNC_CONTEXT_MARKER):
            # If this comes from a rule, we expect certain information to be passed
            # implicitly via the rule func context, which is added here.
            context = func_context
        else:
            # Otherwise, context is just filled via kwargs.
            context = dict()
        # add kwargs to context (overwriting the locals of the caller)
        context.update(kwargs)

        jobid = context.get("jobid")
        if not context.get("is_shell"):
            logger.shellcmd(cmd)

        conda_env = context.get("conda_env", None)
        conda_base_path = context.get("conda_base_path", None)
        container_img = context.get("container_img", None)
        env_modules = context.get("env_modules", None)
        shadow_dir = context.get("shadow_dir", None)
        resources = context.get("resources", {})
        singularity_args = context.get("singularity_args", "")
        threads = context.get("threads", 1)

        cmd = " ".join((cls._process_prefix, cmd, cls._process_suffix)).strip()

        if env_modules:
            cmd = env_modules.shellcmd(cmd)
            logger.info(
                "Activating environment modules: {}".format(env_modules))

        if conda_env:
            if ON_WINDOWS and not cls.get_executable():
                # If we use cmd.exe directly on winodws we need to prepend batch activation script.
                cmd = Conda(container_img,
                            prefix_path=conda_base_path).shellcmd_win(
                                conda_env, cmd)
            else:
                cmd = Conda(container_img,
                            prefix_path=conda_base_path).shellcmd(
                                conda_env, cmd)

        tmpdir = None
        if len(cmd.replace("'", r"'\''")) + 2 > MAX_ARG_LEN:
            tmpdir = tempfile.mkdtemp(dir=".snakemake", prefix="shell_tmp.")
            script = os.path.join(os.path.abspath(tmpdir), "script.sh")
            with open(script, "w") as script_fd:
                print(cmd, file=script_fd)
            os.chmod(script,
                     os.stat(script).st_mode | stat.S_IXUSR | stat.S_IRUSR)
            cmd = '"{}" "{}"'.format(cls.get_executable() or "/bin/sh", script)

        if container_img:
            cmd = singularity.shellcmd(
                container_img,
                cmd,
                singularity_args,
                envvars=None,
                shell_executable=cls._process_args["executable"],
                container_workdir=shadow_dir,
                is_python_script=context.get("is_python_script", False),
            )
            logger.info(
                "Activating singularity image {}".format(container_img))
        if conda_env:
            logger.info("Activating conda environment: {}".format(
                os.path.relpath(conda_env)))

        tmpdir_resource = resources.get("tmpdir", None)
        # environment variable lists for linear algebra libraries taken from:
        # https://stackoverflow.com/a/53224849/2352071
        # https://github.com/xianyi/OpenBLAS/tree/59243d49ab8e958bb3872f16a7c0ef8c04067c0a#setting-the-number-of-threads-using-environment-variables
        envvars = dict(os.environ)
        threads = str(threads)
        envvars["OMP_NUM_THREADS"] = threads
        envvars["GOTO_NUM_THREADS"] = threads
        envvars["OPENBLAS_NUM_THREADS"] = threads
        envvars["MKL_NUM_THREADS"] = threads
        envvars["VECLIB_MAXIMUM_THREADS"] = threads
        envvars["NUMEXPR_NUM_THREADS"] = threads

        if tmpdir_resource:
            envvars["TMPDIR"] = tmpdir_resource
            envvars["TMP"] = tmpdir_resource
            envvars["TEMPDIR"] = tmpdir_resource
            envvars["TEMP"] = tmpdir_resource

        if "additional_envvars" in kwargs:
            env = kwargs["additional_envvars"]
            if not isinstance(env, dict) or not all(
                    isinstance(v, str) for v in env.values()):
                raise WorkflowError(
                    "Given environment variables for shell command have to be a dict of strings, "
                    "but the following was provided instead:\n{}".format(env))
            envvars.update(env)

        if conda_env and cls.conda_block_conflicting_envvars:
            # remove envvars that conflict with conda
            for var in ["R_LIBS", "PYTHONPATH", "PERLLIB", "PERL5LIB"]:
                try:
                    del envvars[var]
                except KeyError:
                    pass

        use_shell = True
        if ON_WINDOWS and cls.get_executable():
            # If executable is set on Windows shell mode can not be used
            # and the executable should be prepended the command together
            # with a command prefix (e.g. -c for bash).
            use_shell = False
            cmd = '"{}" {} {}'.format(cls.get_executable(),
                                      cls._win_command_prefix, argvquote(cmd))

        proc = sp.Popen(
            cmd,
            bufsize=-1,
            shell=use_shell,
            stdout=stdout,
            universal_newlines=iterable or read or None,
            close_fds=close_fds,
            **cls._process_args,
            env=envvars,
        )

        if jobid is not None:
            with cls._lock:
                cls._processes[jobid] = proc

        ret = None
        if iterable:
            return cls.iter_stdout(proc, cmd, tmpdir)
        if read:
            ret = proc.stdout.read()
        if bench_record is not None:
            from snakemake.benchmark import benchmarked

            with benchmarked(proc.pid, bench_record):
                retcode = proc.wait()
        else:
            retcode = proc.wait()

        if tmpdir:
            shutil.rmtree(tmpdir)

        if jobid is not None:
            with cls._lock:
                del cls._processes[jobid]

        if retcode:
            raise sp.CalledProcessError(retcode, cmd)
        return ret
Ejemplo n.º 33
0
def report(
    text,
    path,
    stylesheet=None,
    defaultenc="utf8",
    template=None,
    metadata=None,
    **files
):
    if stylesheet is None:
        os.path.join(os.path.dirname(__file__), "report.css")
    outmime, _ = mimetypes.guess_type(path)
    if outmime != "text/html":
        raise ValueError("Path to report output has to be an HTML file.")
    definitions = textwrap.dedent(
        """
    .. role:: raw-html(raw)
       :format: html

    """
    )

    metadata = textwrap.dedent(
        """

    .. container::
       :name: metadata

       {metadata}{date}

    """
    ).format(
        metadata=metadata + " | " if metadata else "",
        date=datetime.date.today().isoformat(),
    )

    text = format(textwrap.dedent(text), stepout=3)

    attachments = []
    if files:
        attachments = [
            textwrap.dedent(
                """
            .. container::
               :name: attachments

            """
            )
        ]
        for name, _files in sorted(files.items()):
            if not isinstance(_files, list):
                _files = [_files]
            links = []
            for file in sorted(_files):
                data = data_uri_from_file(file)
                links.append(
                    ':raw-html:`<a href="{data}" download="{filename}" draggable="true">{filename}</a>`'.format(
                        data=data, filename=os.path.basename(file)
                    )
                )
            links = "\n\n              ".join(links)
            attachments.append(
                """
       .. container::
          :name: {name}

          {name}:
              {links}
                """.format(
                    name=name, links=links
                )
            )

    text = definitions + text + "\n\n" + "\n\n".join(attachments) + metadata

    overrides = dict()
    if template is not None:
        overrides["template"] = template
    if stylesheet is not None:
        overrides["stylesheet_path"] = stylesheet
    html = open(path, "w")
    publish_file(
        source=io.StringIO(text),
        destination=html,
        writer_name="html",
        settings_overrides=overrides,
    )
Ejemplo n.º 34
0
 def prefix(cls, prefix):
     cls._process_prefix = format(prefix, stepout=2)