import os import time import re import subprocess import logging import six from law.config import Config from law.job.base import BaseJobManager, BaseJobFileFactory from law.util import interruptable_popen, make_list, make_unique, quote_cmd logger = logging.getLogger(__name__) _cfg = Config.instance() class LSFJobManager(BaseJobManager): # chunking settings chunk_size_submit = 0 chunk_size_cancel = _cfg.get_expanded_int("job", "lsf_chunk_size_cancel") chunk_size_query = _cfg.get_expanded_int("job", "lsf_chunk_size_query") submission_job_id_cre = re.compile(r"^Job <(\d+)> is submitted.+$") def __init__(self, queue=None, emails=False, threads=1): super(LSFJobManager, self).__init__() self.queue = queue
def cmd(self, proxy_cmd): cfg = Config.instance() # get args for the singularity command as configured in the task # TODO: this looks pretty random args = make_list( getattr(self.task, "singularity_args", self.default_singularity_args)) # helper to build forwarded paths section = "singularity_" + self.image section = section if cfg.has_section(section) else "singularity" forward_dir = cfg.get(section, "forward_dir") python_dir = cfg.get(section, "python_dir") bin_dir = cfg.get(section, "bin_dir") stagein_dir = cfg.get(section, "stagein_dir") stageout_dir = cfg.get(section, "stageout_dir") def dst(*args): return os.path.join(forward_dir, *(str(arg) for arg in args)) # helper for mounting a volume volume_srcs = [] def mount(*vol): src = vol[0] # make sure, the same source directory is not mounted twice if src in volume_srcs: return volume_srcs.append(src) # ensure that source directories exist if not os.path.isfile(src) and not os.path.exists(src): os.makedirs(src) # store the mount point args.extend(["-B", ":".join(vol)]) # environment variables to set env = OrderedDict() # sandboxing variables env["LAW_SANDBOX"] = self.key env["LAW_SANDBOX_SWITCHED"] = "1" if self.stagein_info: env["LAW_SANDBOX_STAGEIN_DIR"] = "{}".format(dst(stagein_dir)) mount(self.stagein_info.stage_dir.path, dst(stagein_dir)) if self.stageout_info: env["LAW_SANDBOX_STAGEOUT_DIR"] = "{}".format(dst(stageout_dir)) mount(self.stageout_info.stage_dir.path, dst(stageout_dir)) # adjust path variables env["PATH"] = os.pathsep.join(["$PATH", dst("bin")]) env["PYTHONPATH"] = os.pathsep.join(["$PYTHONPATH", dst(python_dir)]) # forward python directories of law and dependencies for mod in law_deps: path = os.path.dirname(mod.__file__) name, ext = os.path.splitext(os.path.basename(mod.__file__)) if name == "__init__": vsrc = path vdst = dst(python_dir, os.path.basename(path)) else: vsrc = os.path.join(path, name + ".py") vdst = dst(python_dir, name + ".py") mount(vsrc, vdst) # forward the law cli dir to bin as it contains a law executable env["PATH"] = os.pathsep.join( [env["PATH"], dst(python_dir, "law", "cli")]) # forward the law config file if cfg.config_file: mount(cfg.config_file, dst("law.cfg")) env["LAW_CONFIG_FILE"] = dst("law.cfg") # forward the luigi config file for p in luigi.configuration.LuigiConfigParser._config_paths[::-1]: if os.path.exists(p): mount(p, dst("luigi.cfg")) env["LUIGI_CONFIG_PATH"] = dst("luigi.cfg") break # add env variables defined in the config and by the task env.update(self.get_config_env()) env.update(self.get_task_env()) # forward volumes defined in the config and by the task vols = {} vols.update(self.get_config_volumes()) vols.update(self.get_task_volumes()) for hdir, cdir in six.iteritems(vols): if not cdir: mount(hdir) else: cdir = cdir.replace("${PY}", dst(python_dir)).replace( "${BIN}", dst(bin_dir)) mount(hdir, cdir) # handle scheduling within the container ls_flag = "--local-scheduler" if self.force_local_scheduler() and ls_flag not in proxy_cmd: proxy_cmd.append(ls_flag) if ls_flag not in proxy_cmd: if getattr(self.task, "_worker_id", None): env["LAW_SANDBOX_WORKER_ID"] = self.task._worker_id if getattr(self.task, "_worker_task", None): env["LAW_SANDBOX_WORKER_TASK"] = self.task._worker_task # build commands to add env variables pre_cmds = [] for tpl in env.items(): pre_cmds.append("export {}=\"{}\"".format(*tpl)) # build the final command cmd = "singularity exec {args} {image} bash -l -c '{pre_cmd}; {proxy_cmd}'".format( args=" ".join(args), image=self.image, pre_cmd="; ".join(pre_cmds), proxy_cmd=" ".join(proxy_cmd)) return cmd
from law.util import make_list, makedirs_perm, human_bytes, create_hash, user_owns_file, io_lock logger = logging.getLogger(__name__) # try to import gfal2 try: import gfal2 HAS_GFAL2 = True # configure gfal2 logging if not getattr(gfal2, "_law_configured_logging", False): gfal2._law_configured_logging = True gfal2_logger = logging.getLogger("gfal2") gfal2_logger.addHandler(logging.StreamHandler()) level = Config.instance().get_expanded("target", "gfal2_log_level") if isinstance(level, six.string_types): level = getattr(logging, level, logging.WARNING) gfal2_logger.setLevel(level) except ImportError: HAS_GFAL2 = False class gfal2Dummy(object): def __getattr__(self, attr): raise Exception( "trying to access 'gfal2.{}', but gfal2 is not installed". format(attr)) gfal2 = gfal2Dummy()
def cmd(self, proxy_cmd): cfg = Config.instance() # singularity exec command arguments args = ["-e"] # add args configured on the task args_getter = getattr(self.task, "singularity_args", None) args += make_list(args_getter() if callable(args_getter) else self.default_singularity_args) # environment variables to set env = self._get_env() # prevent python from writing byte code files env["PYTHONDONTWRITEBYTECODE"] = "1" # helper to build forwarded paths section = self.get_config_section() forward_dir = cfg.get_expanded(section, "forward_dir") python_dir = cfg.get_expanded(section, "python_dir") bin_dir = cfg.get_expanded(section, "bin_dir") stagein_dir = cfg.get_expanded(section, "stagein_dir") stageout_dir = cfg.get_expanded(section, "stageout_dir") def dst(*args): return os.path.join(forward_dir, *(str(arg) for arg in args)) # helper for mounting a volume volume_srcs = [] def mount(*vol): src = vol[0] # make sure, the same source directory is not mounted twice if src in volume_srcs: return volume_srcs.append(src) # ensure that source directories exist if not os.path.isfile(src) and not os.path.exists(src): os.makedirs(src) # store the mount point args.extend(["-B", ":".join(vol)]) if self.forward_env: # adjust path variables if self.allow_binds: env["PATH"] = os.pathsep.join(["$PATH", dst("bin")]) env["PYTHONPATH"] = os.pathsep.join(["$PYTHONPATH", dst(python_dir)]) else: env["PATH"] = "$PATH" env["PYTHONPATH"] = "$PYTHONPATH" # forward python directories of law and dependencies for mod in law_deps: path = os.path.dirname(mod.__file__) name, ext = os.path.splitext(os.path.basename(mod.__file__)) if name == "__init__": vsrc = path vdst = dst(python_dir, os.path.basename(path)) else: vsrc = os.path.join(path, name + ".py") vdst = dst(python_dir, name + ".py") if self.allow_binds: mount(vsrc, vdst) else: dep_path = os.path.dirname(vsrc) if dep_path not in env["PYTHONPATH"].split(os.pathsep): env["PYTHONPATH"] = os.pathsep.join([env["PYTHONPATH"], dep_path]) # forward the law cli dir to bin as it contains a law executable if self.allow_binds: env["PATH"] = os.pathsep.join([env["PATH"], dst(python_dir, "law", "cli")]) else: cli_dir = os.path.join(law_src_path(), "cli") env["PATH"] = os.pathsep.join([env["PATH"], cli_dir]) # forward the law config file if cfg.config_file: if self.allow_binds: mount(cfg.config_file, dst("law.cfg")) env["LAW_CONFIG_FILE"] = dst("law.cfg") else: env["LAW_CONFIG_FILE"] = cfg.config_file # forward the luigi config file for p in luigi.configuration.LuigiConfigParser._config_paths[::-1]: if os.path.exists(p): if self.allow_binds: mount(p, dst("luigi.cfg")) env["LUIGI_CONFIG_PATH"] = dst("luigi.cfg") else: env["LUIGI_CONFIG_PATH"] = p break if (self.stagein_info or self.stageout_info) and not self.allow_binds: raise Exception("Cannot use stage-in or -out if binds are not allowed.") # add staging directories if self.stagein_info: env["LAW_SANDBOX_STAGEIN_DIR"] = dst(stagein_dir) mount(self.stagein_info.stage_dir.path, dst(stagein_dir)) if self.stageout_info: env["LAW_SANDBOX_STAGEOUT_DIR"] = dst(stageout_dir) mount(self.stageout_info.stage_dir.path, dst(stageout_dir)) # forward volumes defined in the config and by the task vols = self._get_volumes() if vols and not self.allow_binds: raise Exception("Cannot forward volumes to Sandbox if binds are not allowed.") for hdir, cdir in six.iteritems(vols): if not cdir: mount(hdir) else: cdir = cdir.replace("${PY}", dst(python_dir)).replace("${BIN}", dst(bin_dir)) mount(hdir, cdir) # extend by arguments needed for both env loading and executing the job args.extend(self.common_args()) # build commands to set up environment setup_cmds = self._build_setup_cmds(env) # handle scheduling within the container ls_flag = "--local-scheduler" if self.force_local_scheduler() and ls_flag not in proxy_cmd: proxy_cmd.append(ls_flag) # build the final command cmd = quote_cmd(["singularity", "exec"] + args + [self.image, "bash", "-l", "-c", "; ".join(flatten(setup_cmds, " ".join(proxy_cmd))) ]) return cmd
def get_config(name): section, option = name.split(".", 1) return Config.instance().get(section, option)
logger = logging.getLogger(__name__) # try to import gfal2 try: import gfal2 HAS_GFAL2 = True # configure gfal2 logging if not getattr(gfal2, "_law_configured_logging", False): gfal2._law_configured_logging = True gfal2_logger = logging.getLogger("gfal2") gfal2_logger.addHandler(logging.StreamHandler()) level = Config.instance().get("target", "gfal2_log_level") if isinstance(level, six.string_types): level = getattr(logging, level, logging.WARNING) gfal2_logger.setLevel(level) except ImportError: HAS_GFAL2 = False class gfal2Dummy(object): def __getattr__(self, attr): raise Exception("trying to access 'gfal2.{}', but gfal2 is not installed".format(attr)) gfal2 = gfal2Dummy()
def execute(args): """ Executes the *index* subprogram with parsed commandline *args*. """ cfg = Config.instance() index_file = cfg.get_expanded("core", "index_file") # just print the file location? if args.location: print(index_file) return # just remove the index file? if args.remove: if os.path.exists(index_file): os.remove(index_file) print("removed index file {}".format(index_file)) return # get modules to lookup lookup = [m.strip() for m in cfg.keys("modules")] if args.modules: lookup += args.modules print("loading tasks from {} module(s)".format(len(lookup))) # loop through modules, import everything to load tasks for modid in lookup: if not modid: continue if args.verbose: sys.stdout.write("loading module '{}'".format(modid)) try: import_module(modid) except Exception as e: if not args.verbose: print("Error in module '{}': {}".format(colored(modid, "red"), str(e))) else: print("\n\nError in module '{}':".format(colored(modid, "red"))) traceback.print_exc() continue if args.verbose: print(", {}".format(colored("done", style="bright"))) # determine tasks to write into the index file seen_families = [] task_classes = [] lookup = [Task] while lookup: cls = lookup.pop(0) lookup.extend(cls.__subclasses__()) # skip already seen task families if cls.task_family in seen_families: continue seen_families.append(cls.task_family) # skip when explicitly excluded if cls.exclude_index: continue # skip external tasks is_external_task = issubclass(cls, ExternalTask) if args.no_externals and is_external_task: continue # skip non-external tasks without run implementation run_is_callable = callable(getattr(cls, "run", None)) run_is_abstract = getattr(cls.run, "__isabstractmethod__", False) if not is_external_task and (not run_is_callable or run_is_abstract): continue task_classes.append(cls) def get_task_params(cls): params = [] for attr in dir(cls): member = getattr(cls, attr) if isinstance(member, luigi.Parameter): exclude = getattr(cls, "exclude_params_index", set()) if not multi_match(attr, exclude, any): params.append(attr.replace("_", "-")) return params def index_line(cls, params): # format: "module_id:task_family:param param ..." return "{}:{}:{}".format(cls.__module__, cls.task_family, " ".join(params)) stats = OrderedDict() # write the index file if not os.path.exists(os.path.dirname(index_file)): os.makedirs(os.path.dirname(index_file)) with open(index_file, "w") as f: for cls in task_classes: # get prams params = get_task_params(cls) # fill stats if cls.__module__ not in stats: stats[cls.__module__] = [] stats[cls.__module__].append((cls.task_family, params)) f.write(index_line(cls, params) + "\n") # print stats if args.verbose: for mod, data in six.iteritems(stats): print("\nmodule '{}', {} task(s):".format(colored(mod, style="bright"), len(data))) for task_family, _ in data: print(" - {}".format(colored(task_family, "green"))) print("") print("written {} task(s) to index file '{}'".format(len(task_classes), index_file))