def __init__(self, base, bases=None, gfal_options=None, transfer_config=None, atomic_contexts=False, retries=0, retry_delay=0): object.__init__(self) # cache for gfal context objects and transfer parameters per pid for thread safety self._contexts = {} self._transfer_parameters = {} # convert base(s) to list for round-robin self.base = make_list(base) self.bases = {k: make_list(v) for k, v in six.iteritems(bases)} if bases else {} # prepare gfal options self.gfal_options = gfal_options or {} # prepare transfer config self.transfer_config = transfer_config or {} self.transfer_config.setdefault("checksum_check", False) self.transfer_config.setdefault("overwrite", True) self.transfer_config.setdefault("nbstreams", 1) # other configs self.atomic_contexts = atomic_contexts self.retries = retries self.retry_delay = retry_delay
def fetch(self, rpaths, lpaths=None, cache=True, **kwargs): # alias caches = cache if self.cache else False multi = isinstance(rpaths, (list, tuple, set)) # check arguments rpaths = make_list(rpaths) n = len(rpaths) if lpaths is None: lpaths = [None] * n else: lpaths = make_list(lpaths) caches = make_list(caches) if len(caches) == 1: caches *= n if n != len(lpaths): raise ValueError("rpaths and lpaths count must match") if n != len(caches): raise ValueError("rpaths and cache count must match") if not n: return [] if multi else None fetch = lambda tpl: self._fetch(tpl[0], tpl[1], cache=tpl[2], **kwargs) tpls = zip(rpaths, lpaths, caches) if not self.pool: results = map(fetch, tpls) else: results = self.pool.map(fetch, tpls) return results if multi else results[0]
def put(self, lpaths, rpaths, cache=True, **kwargs): # alias caches = cache if self.cache else False # check arguments rpaths = make_list(rpaths) lpaths = make_list(lpaths) n = len(rpaths) caches = make_list(caches) if len(caches) == 1: caches *= n if n != len(lpaths): raise ValueError("rpaths and lpaths count must match") if n != len(caches): raise ValueError("rpaths and cache count must match") if not n: return put = lambda tpl: self._put(tpl[0], tpl[1], cache=tpl[2], **kwargs) tpls = zip(lpaths, rpaths, caches) if not self.pool: map(put, tpls) else: self.pool.map(put, tpls)
def fetch(self, rpaths=None, pattern=None, lpaths=None, lbase=None, **kwargs): if rpaths is None: rpaths = self.listdir(pattern=pattern) lpaths = None else: rpaths = [rpath.strip("/") for rpath in make_list(rpaths)] full_rpaths = [os.path.join(self.path, rpath) for rpath in rpaths] if lpaths is None: if lbase is not None: lpaths = [os.path.join(lbase, rpath) for rpath in rpaths] else: lpaths = make_list(lpaths) for i, lpath in enumerate(lpaths): if isinstance(lpath, FileSystemTarget): lpaths[i] = lpath.path lpaths = self.fs.fetch(full_rpaths, lpaths=lpaths, **kwargs) return dict(zip(rpaths, lpaths))
def __init__(self, base=None, bases=None, retries=0, retry_delay=0, random_base=True, **kwargs): super(RemoteFileInterface, self).__init__() # convert base(s) to list for random selection base = make_list(base or []) bases = {k: make_list(b) for k, b in six.iteritems(bases)} if bases else {} # at least one base in expected if len(base) == 0: raise Exception( "{} expected at least one base path, received none".format( self.__class__.__name__)) # expand variables in base and bases self.base = list(map(os.path.expandvars, base)) self.bases = { k: list(map(os.path.expandvars, b)) for k, b in six.iteritems(bases) } # store other attributes self.retries = retries self.retry_delay = retry_delay self.random_base = random_base
def submit(self, job_file, ce=None, delegation_id=None, retries=0, retry_delay=3, silent=False): # default arguments if ce is None: ce = self.ce if delegation_id is None: delegation_id = self.delegation_id # check arguments if not ce: raise ValueError("ce must not be empty") # prepare round robin for ces and delegations ce = make_list(ce) if delegation_id: delegation_id = make_list(delegation_id) if len(ce) != len(delegation_id): raise Exception("numbers of CEs ({}) and delegation ids ({}) do not match".format( len(ce), len(delegation_id))) # get the job file location as the submission command is run it the same directory job_file_dir, job_file_name = os.path.split(os.path.abspath(job_file)) # define the actual submission in a loop to simplify retries while True: # build the command i = random.randint(0, len(ce) - 1) cmd = ["glite-ce-job-submit", "-r", ce[i]] if delegation_id: cmd += ["-D", delegation_id[i]] cmd += [job_file_name] cmd = quote_cmd(cmd) # run the command # glite prints everything to stdout logger.debug("submit glite job with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=sys.stderr, cwd=job_file_dir) # in some cases, the return code is 0 but the ce did not respond with a valid id if code == 0: job_id = out.strip().split("\n")[-1].strip() if not self.submission_job_id_cre.match(job_id): code = 1 out = "bad job id '{}' from output:\n{}".format(job_id, out) # retry or done? if code == 0: return job_id else: logger.debug("submission of glite job '{}' failed with code {}:\n{}".format( code, job_file, out)) if retries > 0: retries -= 1 time.sleep(retry_delay) continue elif silent: return None else: raise Exception("submission of glite job '{}' failed:\n{}".format( job_file, out))
def req_params(cls, inst, _exclude=None, _prefer_cli=None, **kwargs): # common/intersection params params = luigi.util.common_params(inst, cls) # determine parameters to exclude _exclude = set() if _exclude is None else set(make_list(_exclude)) # also use this class' req and req_get sets # and the req and req_set sets of the instance's class _exclude.update(cls.exclude_params_req, cls.exclude_params_req_get) _exclude.update(inst.exclude_params_req, inst.exclude_params_req_set) # remove excluded parameters for name in list(params.keys()): if multi_match(name, _exclude, any): del params[name] # add kwargs params.update(kwargs) # remove params that are preferably set via cli class arguments if _prefer_cli: cls_args = [] prefix = cls.get_task_family() + "_" if luigi.cmdline_parser.CmdlineParser.get_instance(): for key in global_cmdline_values().keys(): if key.startswith(prefix): cls_args.append(key[len(prefix):]) for name in make_list(_prefer_cli): if name in params and name in cls_args: del params[name] return params
def put(self, srcpath, dstpath): srcpaths = make_list(srcpath) dstpaths = make_list(dstpath) if len(srcpaths) != len(dstpaths): raise ValueError("srcpath(s) and dstpath(s) must have equal lengths") for srcpath, dstpath in zip(srcpaths, dstpaths): shutil.copy2(srcpath, dstpath)
def _law_run_inst(cls, inst, _exclude=None, _replace=None, _global=None, _run_kwargs=None): # get the cli arguments args = inst.cli_args(exclude=_exclude, replace=_replace) args = sum((make_list(tpl) for tpl in args.items()), []) # add global parameters when given if _global: args.extend([str(arg) for arg in make_list(_global)]) # build the full command cmd = [cls.get_task_family()] + args # run it return law_run(cmd, **(_run_kwargs or {}))
def cancel(self, job_id, pool=None, scheduler=None, silent=False): # default arguments if pool is None: pool = self.pool if scheduler is None: scheduler = self.scheduler # build the command cmd = ["condor_rm"] if pool: cmd += ["-pool", pool] if scheduler: cmd += ["-name", scheduler] cmd += make_list(job_id) cmd = quote_cmd(cmd) # run it logger.debug("cancel htcondor job(s) with command '{}'".format(cmd)) code, out, err = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # check success if code != 0 and not silent: raise Exception( "cancellation of htcondor job(s) '{}' failed with code {}:\n{}" .format(job_id, code, err))
def submit_batch(self, job_files, threads=None, callback=None, **kwargs): """ Submits a batch of jobs given by *job_files* via a thread pool of size *threads* which defaults to its instance attribute. When *callback* is set, it is invoked after each successful job submission with the job number (starting from 0) and the result object. All other *kwargs* are passed the :py:meth:`submit`. The return value is a list containing the return values of the particular :py:meth:`submit` calls, in an order that corresponds to *job_files*. When an exception was raised during a submission, this exception is added to the returned list. """ # default arguments threads = threads or self.threads def _callback(i): return (lambda r: callback(i, r)) if callable(callback) else None # threaded processing pool = ThreadPool(max(threads, 1)) results = [pool.apply_async(self.submit, (job_file,), kwargs, callback=_callback(i)) for i, job_file in enumerate(job_files)] pool.close() pool.join() # store return values or errors outputs = [] for res in results: try: outputs += make_list(res.get()) except Exception as e: outputs.append(e) return outputs
def cleanup(self, job_id, job_list=None, silent=False): # default arguments if job_list is None: job_list = self.job_list # build the command cmd = ["arcclean"] if job_list: cmd += ["-j", job_list] cmd += make_list(job_id) cmd = quote_cmd(cmd) # run it logger.debug("cleanup arc job(s) with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=sys.stderr) # check success if code != 0 and not silent: # glite prints everything to stdout raise Exception( "cleanup of arc job(s) '{}' failed with code {}:\n{}".format( job_id, code, out))
def cancel(self, job_id, queue=None, silent=False): # default arguments if queue is None: queue = self.queue # build the command cmd = ["bkill"] if queue: cmd += ["-q", queue] cmd += make_list(job_id) cmd = quote_cmd(cmd) # run it logger.debug("cancel lsf job(s) with command '{}'".format(cmd)) code, out, err = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # check success if code != 0 and not silent: raise Exception( "cancellation of lsf job(s) '{}' failed with code {}:\n{}". format(code, job_id, err))
def submit_batch(self, job_files, threads=None, callback=None, **kwargs): # default arguments threads = threads or self.threads def _callback(i): return (lambda r: callback(r, i)) if callable(callback) else None # threaded processing pool = ThreadPool(max(threads, 1)) results = [ pool.apply_async(self.submit, (job_file, ), kwargs, callback=_callback(i)) for i, job_file in enumerate(job_files) ] pool.close() pool.join() # store return values or errors outputs = [] for res in results: try: outputs += make_list(res.get()) except Exception as e: outputs.append(e) return outputs
def cli_args(self, exclude=None, replace=None): exclude = set() if exclude is None else set(make_list(exclude)) # always exclude interactive parameters exclude |= set(self.interactive_params) return super(Task, self).cli_args(exclude=exclude, replace=replace)
def query(self, job_id, silent=False): multi = isinstance(job_id, (list, tuple)) job_ids = make_list(job_id) # build the command and run it cmd = ["glite-ce-job-status", "-n", "-L", "0"] + job_ids logger.debug("query glite job(s) with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, stdout=subprocess.PIPE, stderr=sys.stderr) # handle errors if code != 0: if silent: return None else: # glite prints everything to stdout raise Exception("status query of glite job(s) '{}' failed:\n{}".format(job_id, out)) # parse the output and extract the status per job query_data = self.parse_query_output(out) # compare to the requested job ids and perform some checks for _job_id in job_ids: if _job_id not in query_data: if not multi: if silent: return None else: raise Exception("glite job(s) '{}' not found in query response".format( job_id)) else: query_data[_job_id] = self.job_status_dict(job_id=_job_id, status=self.FAILED, error="job not found in query response") return query_data if multi else query_data[job_id]
def _docker_run_cmd(self): """ Part of the "docker run" command that is common to env requests and run. """ cmd = ["docker", "run"] # rm flag cmd.extend(["--rm"]) # use the pid namespace of the host so killing the outer process will stop the container cmd.extend(["--pid", "host"]) # task-specific arguments if self.task: # user flag sandbox_user = self.task.sandbox_user() if sandbox_user: if not isinstance(sandbox_user, (tuple, list)) or len(sandbox_user) != 2: raise Exception("sandbox_user() must return 2-tuple") cmd.extend(["-u", "{}:{}".format(*sandbox_user)]) # add args configured on the task args_getter = getattr(self.task, "docker_args", None) if callable(args_getter): cmd.extend(make_list(args_getter())) return cmd
def cancel(self, job_id, partition=None, silent=False): # default arguments if partition is None: partition = self.partition # build the command cmd = ["scancel"] if partition: cmd += ["--partition", partition] cmd += make_list(job_id) cmd = quote_cmd(cmd) # run it logger.debug("cancel slurm job(s) with command '{}'".format(cmd)) code, out, err = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # check success if code != 0 and not silent: raise Exception( "cancellation of slurm job(s) '{}' failed with code {}:\n{}". format(job_id, code, err))
def submit(self, job_file, ce=None, job_list=None, retries=0, retry_delay=3, silent=False): # default arguments ce = ce or self.ce job_list = job_list or self.job_list # check arguments if not ce: raise ValueError("ce must not be empty") ce = make_list(ce) # get the job file location as the submission command is run it the same directory job_file_dir, job_file_name = os.path.split(os.path.abspath(job_file)) # define the actual submission in a loop to simplify retries while True: # build the command cmd = ["arcsub", "-c", random.choice(ce)] if job_list: cmd += ["-j", job_list] cmd += [job_file_name] # run the command logger.debug("submit arc job with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, stdout=subprocess.PIPE, stderr=sys.stderr, cwd=job_file_dir) # in some cases, the return code is 0 but the ce did not respond with a valid id if code == 0: m = self.submission_job_id_cre.match(out.strip()) if m: job_id = m.group(1) else: code = 1 out = "cannot find job id output:\n{}".format(out) # retry or done? if code == 0: return job_id else: logger.debug("submission of arc job '{}' failed:\n{}".format( job_file, out)) if retries > 0: retries -= 1 time.sleep(retry_delay) continue elif silent: return None else: raise Exception( "submission of arc job '{}' failed:\n{}".format( job_file, out))
def hadd_cmd(input_paths, output_path): cmd = ["hadd", "-n", "0"] cmd.extend(["-d", cwd.path]) if hadd_args: cmd.extend(make_list(hadd_args)) cmd.append(output_path) cmd.extend(input_paths) return quote_cmd(cmd)
def split_remote_kwargs(cls, kwargs, include=None, skip=None): """ Takes keyword arguments *kwargs*, splits them into two separate dictionaries depending on their content, and returns them in a tuple. The first one will contain arguments related to potential remote file operations (e.g. ``"cache"`` or ``"retries"``), while the second one will contain all remaining arguments. This function is used internally to decide which arguments to pass to target formatters. *include* (*skip*) can be a list of argument keys that are considered as well (ignored). """ include = make_list(include) if include else [] skip = make_list(skip) if skip else [] transfer_kwargs = { name: kwargs.pop(name) for name in ["cache", "prefer_cache", "retries", "retry_delay"] + include if name in kwargs and name not in skip } return transfer_kwargs, kwargs
def req_params(cls, inst, _exclude=None, _prefer_cli=None, **kwargs): _exclude = set() if _exclude is None else set(make_list(_exclude)) # always exclude interactive parameters _exclude |= set(inst.interactive_params) return super(Task, cls).req_params(inst, _exclude=_exclude, _prefer_cli=_prefer_cli, **kwargs)
def cli_args(self, exclude=None, replace=None): exclude = set() if exclude is None else set(make_list(exclude)) if self.is_branch(): exclude |= self.exclude_params_branch else: exclude |= self.exclude_params_workflow return super(BaseWorkflow, self).cli_args(exclude=exclude, replace=replace)
def cancel(self, job_id, silent=False): # build the command and run it cmd = ["glite-ce-job-cancel", "-N"] + make_list(job_id) logger.debug("cancel glite job(s) with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, stdout=subprocess.PIPE, stderr=sys.stderr) # check success if code != 0 and not silent: # glite prints everything to stdout raise Exception("cancellation of glite job(s) '{}' failed:\n{}".format(job_id, out))
def cmd(self, task, task_cmd): # get args for the docker command as configured in the task docker_args = make_list( getattr(task, "docker_args", self.default_docker_args)) # destination of all forwarded paths forward_dst = "/law_forward" def dst(*args): return os.path.join(forward_dst, *(str(arg) for arg in args)) # path variables to adjust pathvars = defaultdict(list) # forward the law executable pathvars["PATH"].append(dst("bin")) docker_args.extend(["-v", which("law") + ":" + dst("bin", "law")]) # forward python directories of law and dependencies pathvars["PYTHONPATH"].append(dst("py")) for mod in (six, luigi, law): path = mod.__file__ dirname = os.path.dirname(path) name, ext = os.path.splitext(os.path.basename(path)) if name == "__init__": vsrc = dirname vdst = dst("py", os.path.basename(dirname)) else: vsrc = os.path.join(dirname, name) + ".py" vdst = dst("py", name) + ".py" docker_args.extend(["-v", "%s:%s" % (vsrc, vdst)]) # update paths in task_cmd for name, paths in pathvars.items(): task_cmd = "export %s=$%s:%s; " % (name, name, ":".join(paths)) + task_cmd # forward the luigi config file for p in luigi.configuration.LuigiConfigParser._config_paths[::-1]: if os.path.exists(p): docker_args.extend(["-v", "%s:%s" % (p, dst("luigi.cfg"))]) task_cmd = "export LUIGI_CONFIG_PATH=%s; " % dst( "luigi.cfg") + task_cmd break # prevent python from writing byte code files task_cmd = "export PYTHONDONTWRITEBYTECODE=1; " + task_cmd cmd = "docker run {docker_args} {image} bash -c '{task_cmd}'" cmd = cmd.format(docker_args=" ".join(docker_args), image=self.image, task_cmd=task_cmd) return cmd
def _singularity_exec_cmd(self): cmd = ["singularity", "exec"] # task-specific argiments if self.task: # add args configured on the task args_getter = getattr(self.task, "singularity_args", None) if callable(args_getter): cmd.extend(make_list(args_getter())) return cmd
def query(self, job_id, queue=None, silent=False): # default arguments if queue is None: queue = self.queue chunking = isinstance(job_id, (list, tuple)) job_ids = make_list(job_id) # build the command cmd = ["bjobs"] if self.lsf_v912: cmd.append("-noheader") if queue: cmd += ["-q", queue] cmd += job_ids cmd = quote_cmd(cmd) # run it logger.debug("query lsf job(s) with command '{}'".format(cmd)) code, out, err = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # handle errors if code != 0: if silent: return None else: raise Exception( "status query of lsf job(s) '{}' failed with code {}:\n{}". format(job_id, code, err)) # parse the output and extract the status per job query_data = self.parse_query_output(out) # compare to the requested job ids and perform some checks for _job_id in job_ids: if _job_id not in query_data: if not chunking: if silent: return None else: raise Exception( "lsf job(s) '{}' not found in query response". format(job_id)) else: query_data[_job_id] = self.job_status_dict( job_id=_job_id, status=self.FAILED, error="job not found in query response") return query_data if chunking else query_data[job_id]
def __init__(self, base, bases=None, gfal_options=None, transfer_config=None, atomic_contexts=False, retries=0, retry_delay=0, random_base=True): object.__init__(self) # cache for gfal context objects and transfer parameters per pid for thread safety self._contexts = {} self._transfer_parameters = {} # convert base(s) to list for random selection self.base = make_list(base) self.bases = {k: make_list(b) for k, b in six.iteritems(bases)} if bases else {} # expand variables in base and bases self.base = list(map(os.path.expandvars, self.base)) self.bases = { k: list(map(os.path.expandvars, b)) for k, b in six.iteritems(self.bases) } # prepare gfal options self.gfal_options = gfal_options or {} # prepare transfer config self.transfer_config = transfer_config or {} self.transfer_config.setdefault("checksum_check", False) self.transfer_config.setdefault("overwrite", True) self.transfer_config.setdefault("nbstreams", 1) # other configs self.atomic_contexts = atomic_contexts self.retries = retries self.retry_delay = retry_delay self.random_base = random_base
def query(self, job_id, job_list=None, silent=False): # default arguments if job_list is None: job_list = self.job_list chunking = isinstance(job_id, (list, tuple)) job_ids = make_list(job_id) # build the command cmd = ["arcstat"] if job_list: cmd += ["-j", job_list] cmd += job_ids cmd = quote_cmd(cmd) # run it logger.debug("query arc job(s) with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # handle errors if code != 0: if silent: return None else: # glite prints everything to stdout raise Exception( "status query of arc job(s) '{}' failed with code {}:\n{}". format(job_id, code, out)) # parse the output and extract the status per job query_data = self.parse_query_output(out) # compare to the requested job ids and perform some checks for _job_id in job_ids: if _job_id not in query_data: if not chunking: if silent: return None else: raise Exception( "arc job(s) '{}' not found in query response". format(job_id)) else: query_data[_job_id] = self.job_status_dict( job_id=_job_id, status=self.FAILED, error="job not found in query response") return query_data if chunking else query_data[job_id]
def put(self, lpaths, rpaths=None, **kwargs): lpaths = make_list(lpaths) for i, lpath in enumerate(lpaths): if isinstance(lpath, FileSystemTarget): lpaths[i] = lpath.path if rpaths is None: full_rpaths = [os.path.join(self.path, os.path.basename(lpath.rstrip("/"))) \ for lpath in lpaths] else: rpaths = make_list(rpaths) full_rpaths = [] for i, rpath in enumerate(rpaths): if isinstance(rpath, RemoteTarget): full_rpaths.append(rpath.path) else: full_rpaths.append( os.path.join(self.path, rpath.strip("/"))) self.fs.put(lpaths, full_rpaths, **kwargs)