Example #1
0
File: job.py Project: Nollde/law
import os
import time
import re
import subprocess
import logging

import six

from law.config import Config
from law.job.base import BaseJobManager, BaseJobFileFactory
from law.util import interruptable_popen, make_list, make_unique, quote_cmd

logger = logging.getLogger(__name__)

_cfg = Config.instance()


class LSFJobManager(BaseJobManager):

    # chunking settings
    chunk_size_submit = 0
    chunk_size_cancel = _cfg.get_expanded_int("job", "lsf_chunk_size_cancel")
    chunk_size_query = _cfg.get_expanded_int("job", "lsf_chunk_size_query")

    submission_job_id_cre = re.compile(r"^Job <(\d+)> is submitted.+$")

    def __init__(self, queue=None, emails=False, threads=1):
        super(LSFJobManager, self).__init__()

        self.queue = queue
Example #2
0
    def cmd(self, proxy_cmd):
        cfg = Config.instance()

        # get args for the singularity command as configured in the task
        # TODO: this looks pretty random
        args = make_list(
            getattr(self.task, "singularity_args",
                    self.default_singularity_args))

        # helper to build forwarded paths
        section = "singularity_" + self.image
        section = section if cfg.has_section(section) else "singularity"
        forward_dir = cfg.get(section, "forward_dir")
        python_dir = cfg.get(section, "python_dir")
        bin_dir = cfg.get(section, "bin_dir")
        stagein_dir = cfg.get(section, "stagein_dir")
        stageout_dir = cfg.get(section, "stageout_dir")

        def dst(*args):
            return os.path.join(forward_dir, *(str(arg) for arg in args))

        # helper for mounting a volume
        volume_srcs = []

        def mount(*vol):
            src = vol[0]

            # make sure, the same source directory is not mounted twice
            if src in volume_srcs:
                return
            volume_srcs.append(src)

            # ensure that source directories exist
            if not os.path.isfile(src) and not os.path.exists(src):
                os.makedirs(src)

            # store the mount point
            args.extend(["-B", ":".join(vol)])

        # environment variables to set
        env = OrderedDict()

        # sandboxing variables
        env["LAW_SANDBOX"] = self.key
        env["LAW_SANDBOX_SWITCHED"] = "1"
        if self.stagein_info:
            env["LAW_SANDBOX_STAGEIN_DIR"] = "{}".format(dst(stagein_dir))
            mount(self.stagein_info.stage_dir.path, dst(stagein_dir))
        if self.stageout_info:
            env["LAW_SANDBOX_STAGEOUT_DIR"] = "{}".format(dst(stageout_dir))
            mount(self.stageout_info.stage_dir.path, dst(stageout_dir))

        # adjust path variables
        env["PATH"] = os.pathsep.join(["$PATH", dst("bin")])
        env["PYTHONPATH"] = os.pathsep.join(["$PYTHONPATH", dst(python_dir)])

        # forward python directories of law and dependencies
        for mod in law_deps:
            path = os.path.dirname(mod.__file__)
            name, ext = os.path.splitext(os.path.basename(mod.__file__))
            if name == "__init__":
                vsrc = path
                vdst = dst(python_dir, os.path.basename(path))
            else:
                vsrc = os.path.join(path, name + ".py")
                vdst = dst(python_dir, name + ".py")
            mount(vsrc, vdst)

        # forward the law cli dir to bin as it contains a law executable
        env["PATH"] = os.pathsep.join(
            [env["PATH"], dst(python_dir, "law", "cli")])

        # forward the law config file
        if cfg.config_file:
            mount(cfg.config_file, dst("law.cfg"))
            env["LAW_CONFIG_FILE"] = dst("law.cfg")

        # forward the luigi config file
        for p in luigi.configuration.LuigiConfigParser._config_paths[::-1]:
            if os.path.exists(p):
                mount(p, dst("luigi.cfg"))
                env["LUIGI_CONFIG_PATH"] = dst("luigi.cfg")
                break

        # add env variables defined in the config and by the task
        env.update(self.get_config_env())
        env.update(self.get_task_env())

        # forward volumes defined in the config and by the task
        vols = {}
        vols.update(self.get_config_volumes())
        vols.update(self.get_task_volumes())
        for hdir, cdir in six.iteritems(vols):
            if not cdir:
                mount(hdir)
            else:
                cdir = cdir.replace("${PY}", dst(python_dir)).replace(
                    "${BIN}", dst(bin_dir))
                mount(hdir, cdir)

        # handle scheduling within the container
        ls_flag = "--local-scheduler"
        if self.force_local_scheduler() and ls_flag not in proxy_cmd:
            proxy_cmd.append(ls_flag)
        if ls_flag not in proxy_cmd:
            if getattr(self.task, "_worker_id", None):
                env["LAW_SANDBOX_WORKER_ID"] = self.task._worker_id
            if getattr(self.task, "_worker_task", None):
                env["LAW_SANDBOX_WORKER_TASK"] = self.task._worker_task

        # build commands to add env variables
        pre_cmds = []
        for tpl in env.items():
            pre_cmds.append("export {}=\"{}\"".format(*tpl))

        # build the final command
        cmd = "singularity exec {args} {image} bash -l -c '{pre_cmd}; {proxy_cmd}'".format(
            args=" ".join(args),
            image=self.image,
            pre_cmd="; ".join(pre_cmds),
            proxy_cmd=" ".join(proxy_cmd))

        return cmd
Example #3
0
File: remote.py Project: yrath/law
from law.util import make_list, makedirs_perm, human_bytes, create_hash, user_owns_file, io_lock

logger = logging.getLogger(__name__)

# try to import gfal2
try:
    import gfal2

    HAS_GFAL2 = True

    # configure gfal2 logging
    if not getattr(gfal2, "_law_configured_logging", False):
        gfal2._law_configured_logging = True
        gfal2_logger = logging.getLogger("gfal2")
        gfal2_logger.addHandler(logging.StreamHandler())
        level = Config.instance().get_expanded("target", "gfal2_log_level")
        if isinstance(level, six.string_types):
            level = getattr(logging, level, logging.WARNING)
        gfal2_logger.setLevel(level)

except ImportError:
    HAS_GFAL2 = False

    class gfal2Dummy(object):
        def __getattr__(self, attr):
            raise Exception(
                "trying to access 'gfal2.{}', but gfal2 is not installed".
                format(attr))

    gfal2 = gfal2Dummy()
Example #4
0
    def cmd(self, proxy_cmd):
        cfg = Config.instance()

        # singularity exec command arguments
        args = ["-e"]

        # add args configured on the task
        args_getter = getattr(self.task, "singularity_args", None)
        args += make_list(args_getter() if callable(args_getter) else self.default_singularity_args)

        # environment variables to set
        env = self._get_env()

        # prevent python from writing byte code files
        env["PYTHONDONTWRITEBYTECODE"] = "1"

        # helper to build forwarded paths
        section = self.get_config_section()
        forward_dir = cfg.get_expanded(section, "forward_dir")
        python_dir = cfg.get_expanded(section, "python_dir")
        bin_dir = cfg.get_expanded(section, "bin_dir")
        stagein_dir = cfg.get_expanded(section, "stagein_dir")
        stageout_dir = cfg.get_expanded(section, "stageout_dir")

        def dst(*args):
            return os.path.join(forward_dir, *(str(arg) for arg in args))

        # helper for mounting a volume
        volume_srcs = []

        def mount(*vol):
            src = vol[0]

            # make sure, the same source directory is not mounted twice
            if src in volume_srcs:
                return
            volume_srcs.append(src)

            # ensure that source directories exist
            if not os.path.isfile(src) and not os.path.exists(src):
                os.makedirs(src)

            # store the mount point
            args.extend(["-B", ":".join(vol)])

        if self.forward_env:
            # adjust path variables
            if self.allow_binds:
                env["PATH"] = os.pathsep.join(["$PATH", dst("bin")])
                env["PYTHONPATH"] = os.pathsep.join(["$PYTHONPATH", dst(python_dir)])
            else:
                env["PATH"] = "$PATH"
                env["PYTHONPATH"] = "$PYTHONPATH"

            # forward python directories of law and dependencies
            for mod in law_deps:
                path = os.path.dirname(mod.__file__)
                name, ext = os.path.splitext(os.path.basename(mod.__file__))
                if name == "__init__":
                    vsrc = path
                    vdst = dst(python_dir, os.path.basename(path))
                else:
                    vsrc = os.path.join(path, name + ".py")
                    vdst = dst(python_dir, name + ".py")
                if self.allow_binds:
                    mount(vsrc, vdst)
                else:
                    dep_path = os.path.dirname(vsrc)
                    if dep_path not in env["PYTHONPATH"].split(os.pathsep):
                        env["PYTHONPATH"] = os.pathsep.join([env["PYTHONPATH"], dep_path])

            # forward the law cli dir to bin as it contains a law executable
            if self.allow_binds:
                env["PATH"] = os.pathsep.join([env["PATH"], dst(python_dir, "law", "cli")])
            else:
                cli_dir = os.path.join(law_src_path(), "cli")
                env["PATH"] = os.pathsep.join([env["PATH"], cli_dir])

            # forward the law config file
            if cfg.config_file:
                if self.allow_binds:
                    mount(cfg.config_file, dst("law.cfg"))
                    env["LAW_CONFIG_FILE"] = dst("law.cfg")
                else:
                    env["LAW_CONFIG_FILE"] = cfg.config_file

            # forward the luigi config file
            for p in luigi.configuration.LuigiConfigParser._config_paths[::-1]:
                if os.path.exists(p):
                    if self.allow_binds:
                        mount(p, dst("luigi.cfg"))
                        env["LUIGI_CONFIG_PATH"] = dst("luigi.cfg")
                    else:
                        env["LUIGI_CONFIG_PATH"] = p
                    break

        if (self.stagein_info or self.stageout_info) and not self.allow_binds:
            raise Exception("Cannot use stage-in or -out if binds are not allowed.")
        # add staging directories
        if self.stagein_info:
            env["LAW_SANDBOX_STAGEIN_DIR"] = dst(stagein_dir)
            mount(self.stagein_info.stage_dir.path, dst(stagein_dir))
        if self.stageout_info:
            env["LAW_SANDBOX_STAGEOUT_DIR"] = dst(stageout_dir)
            mount(self.stageout_info.stage_dir.path, dst(stageout_dir))

        # forward volumes defined in the config and by the task
        vols = self._get_volumes()
        if vols and not self.allow_binds:
            raise Exception("Cannot forward volumes to Sandbox if binds are not allowed.")

        for hdir, cdir in six.iteritems(vols):
            if not cdir:
                mount(hdir)
            else:
                cdir = cdir.replace("${PY}", dst(python_dir)).replace("${BIN}", dst(bin_dir))
                mount(hdir, cdir)

        # extend by arguments needed for both env loading and executing the job
        args.extend(self.common_args())

        # build commands to set up environment
        setup_cmds = self._build_setup_cmds(env)

        # handle scheduling within the container
        ls_flag = "--local-scheduler"
        if self.force_local_scheduler() and ls_flag not in proxy_cmd:
            proxy_cmd.append(ls_flag)

        # build the final command
        cmd = quote_cmd(["singularity", "exec"] + args + [self.image, "bash", "-l", "-c",
            "; ".join(flatten(setup_cmds, " ".join(proxy_cmd)))
        ])

        return cmd
Example #5
0
def get_config(name):
    section, option = name.split(".", 1)
    return Config.instance().get(section, option)
Example #6
0
logger = logging.getLogger(__name__)


# try to import gfal2
try:
    import gfal2

    HAS_GFAL2 = True

    # configure gfal2 logging
    if not getattr(gfal2, "_law_configured_logging", False):
        gfal2._law_configured_logging = True
        gfal2_logger = logging.getLogger("gfal2")
        gfal2_logger.addHandler(logging.StreamHandler())
        level = Config.instance().get("target", "gfal2_log_level")
        if isinstance(level, six.string_types):
            level = getattr(logging, level, logging.WARNING)
        gfal2_logger.setLevel(level)

except ImportError:
    HAS_GFAL2 = False

    class gfal2Dummy(object):

        def __getattr__(self, attr):
            raise Exception("trying to access 'gfal2.{}', but gfal2 is not installed".format(attr))

    gfal2 = gfal2Dummy()

Example #7
0
File: index.py Project: yrath/law
def execute(args):
    """
    Executes the *index* subprogram with parsed commandline *args*.
    """
    cfg = Config.instance()
    index_file = cfg.get_expanded("core", "index_file")

    # just print the file location?
    if args.location:
        print(index_file)
        return

    # just remove the index file?
    if args.remove:
        if os.path.exists(index_file):
            os.remove(index_file)
            print("removed index file {}".format(index_file))
        return

    # get modules to lookup
    lookup = [m.strip() for m in cfg.keys("modules")]
    if args.modules:
        lookup += args.modules

    print("loading tasks from {} module(s)".format(len(lookup)))

    # loop through modules, import everything to load tasks
    for modid in lookup:
        if not modid:
            continue

        if args.verbose:
            sys.stdout.write("loading module '{}'".format(modid))

        try:
            import_module(modid)
        except Exception as e:
            if not args.verbose:
                print("Error in module '{}': {}".format(colored(modid, "red"), str(e)))
            else:
                print("\n\nError in module '{}':".format(colored(modid, "red")))
                traceback.print_exc()
            continue

        if args.verbose:
            print(", {}".format(colored("done", style="bright")))

    # determine tasks to write into the index file
    seen_families = []
    task_classes = []
    lookup = [Task]
    while lookup:
        cls = lookup.pop(0)
        lookup.extend(cls.__subclasses__())

        # skip already seen task families
        if cls.task_family in seen_families:
            continue
        seen_families.append(cls.task_family)

        # skip when explicitly excluded
        if cls.exclude_index:
            continue

        # skip external tasks
        is_external_task = issubclass(cls, ExternalTask)
        if args.no_externals and is_external_task:
            continue

        # skip non-external tasks without run implementation
        run_is_callable = callable(getattr(cls, "run", None))
        run_is_abstract = getattr(cls.run, "__isabstractmethod__", False)
        if not is_external_task and (not run_is_callable or run_is_abstract):
            continue

        task_classes.append(cls)

    def get_task_params(cls):
        params = []
        for attr in dir(cls):
            member = getattr(cls, attr)
            if isinstance(member, luigi.Parameter):
                exclude = getattr(cls, "exclude_params_index", set())
                if not multi_match(attr, exclude, any):
                    params.append(attr.replace("_", "-"))
        return params

    def index_line(cls, params):
        # format: "module_id:task_family:param param ..."
        return "{}:{}:{}".format(cls.__module__, cls.task_family, " ".join(params))

    stats = OrderedDict()

    # write the index file
    if not os.path.exists(os.path.dirname(index_file)):
        os.makedirs(os.path.dirname(index_file))

    with open(index_file, "w") as f:
        for cls in task_classes:
            # get prams
            params = get_task_params(cls)

            # fill stats
            if cls.__module__ not in stats:
                stats[cls.__module__] = []
            stats[cls.__module__].append((cls.task_family, params))

            f.write(index_line(cls, params) + "\n")

    # print stats
    if args.verbose:
        for mod, data in six.iteritems(stats):
            print("\nmodule '{}', {} task(s):".format(colored(mod, style="bright"), len(data)))
            for task_family, _ in data:
                print("    - {}".format(colored(task_family, "green")))
        print("")

    print("written {} task(s) to index file '{}'".format(len(task_classes), index_file))