def cmd(self, proxy_cmd): # environment variables to set env = self._get_env() # add staging directories if self.stagein_info: env["LAW_SANDBOX_STAGEIN_DIR"] = self.stagein_info.stage_dir.path if self.stageout_info: env["LAW_SANDBOX_STAGEOUT_DIR"] = self.stageout_info.stage_dir.path # get the bash command bash_cmd = self._bash_cmd() # build commands to setup the environment setup_cmds = self._build_setup_cmds(env) # handle scheduling within the container ls_flag = "--local-scheduler" if self.force_local_scheduler() and ls_flag not in proxy_cmd: proxy_cmd.append(ls_flag) # build the final command cmd = quote_cmd(bash_cmd + [ "-c", "; ".join( flatten("source \"{}\"".format(self.script), setup_cmds, quote_cmd(proxy_cmd))) ]) return cmd
def _run_bash(self, cmd): # build the command if not isinstance(cmd, six.string_types): cmd = quote_cmd(cmd) cmd = quote_cmd(["bash", "-c", cmd]) # run it self.shell.system_piped(cmd)
def env(self): # strategy: create a tempfile, forward it to a container, let python dump its full env, # close the container and load the env file if self.image not in self._envs: tmp = LocalFileTarget(is_tmp=".env") tmp.touch() env_file = os.path.join("/tmp", tmp.unique_basename) # get the docker run command docker_run_cmd = self._docker_run_cmd() # mount the env file docker_run_cmd.extend(["-v", "{}:{}".format(tmp.path, env_file)]) # build commands to setup the environment setup_cmds = self._build_setup_cmds(self._get_env()) # build the python command that dumps the environment py_cmd = "import os,pickle;" \ + "pickle.dump(dict(os.environ),open('{}','wb'),protocol=2)".format(env_file) # build the full command cmd = quote_cmd(docker_run_cmd + [ self.image, "bash", "-l", "-c", "; ".join( flatten(setup_cmds, quote_cmd(["python", "-c", py_cmd]))), ]) # run it code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if code != 0: raise Exception( "docker sandbox env loading failed:\n{}".format(out)) # load the environment from the tmp file env = tmp.load(formatter="pickle") # cache self._envs[self.image] = env return self._envs[self.image]
def cleanup(self, job_id, job_list=None, silent=False): # default arguments if job_list is None: job_list = self.job_list # build the command cmd = ["arcclean"] if job_list: cmd += ["-j", job_list] cmd += make_list(job_id) cmd = quote_cmd(cmd) # run it logger.debug("cleanup arc job(s) with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=sys.stderr) # check success if code != 0 and not silent: # glite prints everything to stdout raise Exception( "cleanup of arc job(s) '{}' failed with code {}:\n{}".format( job_id, code, out))
def checksum(self): if not self.cmssw_checksumming: return None if self.custom_checksum != NO_STR: return self.custom_checksum if self._checksum is None: cmd = [ rel_path(__file__, "scripts", "cmssw_checksum.sh"), self.get_cmssw_path() ] if self.exclude != NO_STR: cmd += [self.exclude] cmd = quote_cmd(cmd) code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE) if code != 0: raise Exception("cmssw checksum calculation failed") self._checksum = out.strip() return self._checksum
def cmd(self, proxy_cmd): # environment variables to set env = self._get_env() # add staging directories if self.stagein_info: env["LAW_SANDBOX_STAGEIN_DIR"] = self.stagein_info.stage_dir.path if self.stageout_info: env["LAW_SANDBOX_STAGEOUT_DIR"] = self.stageout_info.stage_dir.path # get the bash command bash_cmd = self._bash_cmd() # build commands to setup the environment setup_cmds = self._build_setup_cmds(env) # handle local scheduling within the container if self.force_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) # build the final command cmd = quote_cmd(bash_cmd + [ "-c", "; ".join( flatten("source \"{}\" \"\"".format(self.script), setup_cmds, proxy_cmd.build())), ]) return cmd
def _arc_proxy_info(args=None, proxy_file=None, silent=False): if args is None: args = ["--info"] cmd = ["arcproxy"] + (args or []) # when proxy_file is None, get the default # when empty string, don't add a --proxy argument if proxy_file is None: proxy_file = get_arc_proxy_file() if proxy_file: proxy_file = os.path.expandvars(os.path.expanduser(proxy_file)) cmd.extend(["--proxy", proxy_file]) code, out, err = interruptable_popen(quote_cmd(cmd), shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # arcproxy does not use proper exit codes but writes into stderr in case of an error if err: code = 1 if not silent and code != 0: raise Exception("arcproxy failed: {}".format(err)) return code, out, err
def cancel(self, job_id, queue=None, silent=False): # default arguments if queue is None: queue = self.queue # build the command cmd = ["bkill"] if queue: cmd += ["-q", queue] cmd += make_list(job_id) cmd = quote_cmd(cmd) # run it logger.debug("cancel lsf job(s) with command '{}'".format(cmd)) code, out, err = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # check success if code != 0 and not silent: raise Exception( "cancellation of lsf job(s) '{}' failed with code {}:\n{}". format(code, job_id, err))
def cancel(self, job_id, partition=None, silent=False): # default arguments if partition is None: partition = self.partition # build the command cmd = ["scancel"] if partition: cmd += ["--partition", partition] cmd += make_list(job_id) cmd = quote_cmd(cmd) # run it logger.debug("cancel slurm job(s) with command '{}'".format(cmd)) code, out, err = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # check success if code != 0 and not silent: raise Exception( "cancellation of slurm job(s) '{}' failed with code {}:\n{}". format(job_id, code, err))
def cmd(self, proxy_cmd): # environment variables to set env = self._get_env() # add staging directories if self.stagein_info: env["LAW_SANDBOX_STAGEIN_DIR"] = self.stagein_info.stage_dir.path if self.stageout_info: env["LAW_SANDBOX_STAGEOUT_DIR"] = self.stageout_info.stage_dir.path # get the activation command venv_cmd = self._venv_cmd() # build commands to setup the environment setup_cmds = self._build_setup_cmds(env) # handle local scheduling within the container if self.force_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) # build the full command cmd = "; ".join([quote_cmd(venv_cmd)] + setup_cmds + [proxy_cmd.build()], ) return cmd
def encode_params(cls, params): """ Encodes a list of command line parameters *params* into a string via :py:func:`law.util.quote_cmd` followed by base64 encoding. """ encoded = base64.b64encode(six.b(quote_cmd(params) or "-")) return encoded.decode("utf-8") if six.PY3 else encoded
def submit(self, job_file, ce=None, delegation_id=None, retries=0, retry_delay=3, silent=False): # default arguments if ce is None: ce = self.ce if delegation_id is None: delegation_id = self.delegation_id # check arguments if not ce: raise ValueError("ce must not be empty") # prepare round robin for ces and delegations ce = make_list(ce) if delegation_id: delegation_id = make_list(delegation_id) if len(ce) != len(delegation_id): raise Exception("numbers of CEs ({}) and delegation ids ({}) do not match".format( len(ce), len(delegation_id))) # get the job file location as the submission command is run it the same directory job_file_dir, job_file_name = os.path.split(os.path.abspath(job_file)) # define the actual submission in a loop to simplify retries while True: # build the command i = random.randint(0, len(ce) - 1) cmd = ["glite-ce-job-submit", "-r", ce[i]] if delegation_id: cmd += ["-D", delegation_id[i]] cmd += [job_file_name] cmd = quote_cmd(cmd) # run the command # glite prints everything to stdout logger.debug("submit glite job with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=sys.stderr, cwd=job_file_dir) # in some cases, the return code is 0 but the ce did not respond with a valid id if code == 0: job_id = out.strip().split("\n")[-1].strip() if not self.submission_job_id_cre.match(job_id): code = 1 out = "bad job id '{}' from output:\n{}".format(job_id, out) # retry or done? if code == 0: return job_id else: logger.debug("submission of glite job '{}' failed with code {}:\n{}".format( code, job_file, out)) if retries > 0: retries -= 1 time.sleep(retry_delay) continue elif silent: return None else: raise Exception("submission of glite job '{}' failed:\n{}".format( job_file, out))
def cancel(self, job_id, pool=None, scheduler=None, silent=False): # default arguments if pool is None: pool = self.pool if scheduler is None: scheduler = self.scheduler # build the command cmd = ["condor_rm"] if pool: cmd += ["-pool", pool] if scheduler: cmd += ["-name", scheduler] cmd += make_list(job_id) cmd = quote_cmd(cmd) # run it logger.debug("cancel htcondor job(s) with command '{}'".format(cmd)) code, out, err = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # check success if code != 0 and not silent: raise Exception( "cancellation of htcondor job(s) '{}' failed with code {}:\n{}" .format(job_id, code, err))
def hadd_cmd(input_paths, output_path): cmd = ["hadd", "-n", "0"] if force: cmd.append("-f") cmd.extend(["-d", cwd.path]) cmd.append(output_path) cmd.extend(input_paths) return quote_cmd(cmd)
def hadd_cmd(input_paths, output_path): cmd = ["hadd", "-n", "0"] cmd.extend(["-d", cwd.path]) if hadd_args: cmd.extend(make_list(hadd_args)) cmd.append(output_path) cmd.extend(input_paths) return quote_cmd(cmd)
def env(self): # strategy: create a tempfile, let python dump its full env in a subprocess and load the # env file again afterwards script = self.script if script not in self._envs: with tmp_file() as tmp: tmp_path = os.path.realpath(tmp[1]) # get the bash command bash_cmd = self._bash_cmd() # build commands to setup the environment setup_cmds = self._build_setup_cmds(self._get_env()) # build the python command that dumps the environment py_cmd = "import os,pickle;" \ + "pickle.dump(dict(os.environ),open('{}','wb'),protocol=2)".format(tmp_path) # build the full command cmd = quote_cmd(bash_cmd + [ "-c", "; ".join( flatten("source \"{}\" \"\"".format( self.script), setup_cmds, quote_cmd(["python", "-c", py_cmd]))), ]) # run it returncode = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0] if returncode != 0: raise Exception("bash sandbox env loading failed") # load the environment from the tmp file pickle_kwargs = {"encoding": "utf-8"} if six.PY3 else {} with open(tmp_path, "rb") as f: env = collections.OrderedDict( six.moves.cPickle.load(f, **pickle_kwargs)) # cache it self._envs[script] = env return self._envs[script]
def env(self): # strategy: create a tempfile, forward it to a container, let python dump its full env, # close the container and load the env file if self.image not in self._envs: with tmp_file() as tmp: tmp_path = os.path.realpath(tmp[1]) env_path = os.path.join("/tmp", str(hash(tmp_path))[-8:]) # build commands to setup the environment setup_cmds = self._build_setup_cmds(self._get_env()) # arguments to configure the environment args = ["-v", "{}:{}".format(tmp_path, env_path) ] + self.common_args() # build the command py_cmd = "import os,pickle;" \ + "pickle.dump(dict(os.environ),open('{}','wb'),protocol=2)".format(env_path) cmd = quote_cmd(["docker", "run"] + args + [ self.image, "bash", "-l", "-c", "; ".join( flatten(setup_cmds, quote_cmd(["python", "-c", py_cmd ]))), ]) # run it returncode = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0] if returncode != 0: raise Exception("docker sandbox env loading failed") # load the environment from the tmp file with open(tmp_path, "rb") as f: env = six.moves.cPickle.load(f) # cache self._envs[self.image] = env return self._envs[self.image]
def query(self, job_id, queue=None, silent=False): # default arguments if queue is None: queue = self.queue chunking = isinstance(job_id, (list, tuple)) job_ids = make_list(job_id) # build the command cmd = ["bjobs"] if self.lsf_v912: cmd.append("-noheader") if queue: cmd += ["-q", queue] cmd += job_ids cmd = quote_cmd(cmd) # run it logger.debug("query lsf job(s) with command '{}'".format(cmd)) code, out, err = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # handle errors if code != 0: if silent: return None else: raise Exception( "status query of lsf job(s) '{}' failed with code {}:\n{}". format(job_id, code, err)) # parse the output and extract the status per job query_data = self.parse_query_output(out) # compare to the requested job ids and perform some checks for _job_id in job_ids: if _job_id not in query_data: if not chunking: if silent: return None else: raise Exception( "lsf job(s) '{}' not found in query response". format(job_id)) else: query_data[_job_id] = self.job_status_dict( job_id=_job_id, status=self.FAILED, error="job not found in query response") return query_data if chunking else query_data[job_id]
def query(self, job_id, job_list=None, silent=False): # default arguments if job_list is None: job_list = self.job_list chunking = isinstance(job_id, (list, tuple)) job_ids = make_list(job_id) # build the command cmd = ["arcstat"] if job_list: cmd += ["-j", job_list] cmd += job_ids cmd = quote_cmd(cmd) # run it logger.debug("query arc job(s) with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # handle errors if code != 0: if silent: return None else: # glite prints everything to stdout raise Exception( "status query of arc job(s) '{}' failed with code {}:\n{}". format(job_id, code, out)) # parse the output and extract the status per job query_data = self.parse_query_output(out) # compare to the requested job ids and perform some checks for _job_id in job_ids: if _job_id not in query_data: if not chunking: if silent: return None else: raise Exception( "arc job(s) '{}' not found in query response". format(job_id)) else: query_data[_job_id] = self.job_status_dict( job_id=_job_id, status=self.FAILED, error="job not found in query response") return query_data if chunking else query_data[job_id]
def build(self, skip_run=False): # start with the run command cmd = [] if skip_run else self.build_run_cmd() # add arguments and insert dummary key value separators which are replaced with "=" later for key, value in self.args: cmd.extend([key, self.arg_sep, value]) cmd = " ".join(quote_cmd([c]) for c in cmd) cmd = cmd.replace(" " + self.arg_sep + " ", "=") return cmd
def bundle(self, dst_path): cmd = [ rel_path(__file__, "scripts", "bundle_repository.sh"), self.get_repo_path(), get_path(dst_path), " ".join(self.exclude_files), " ".join(self.include_files), ] cmd = quote_cmd(cmd) code = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0] if code != 0: raise Exception("repository bundling failed")
def bundle(self, dst_path): cmd = [ rel_path(__file__, "scripts", "bundle_cmssw.sh"), self.get_cmssw_path(), get_path(dst_path), ] if self.exclude != NO_STR: cmd += [self.exclude] cmd = quote_cmd(cmd) code = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0] if code != 0: raise Exception("cmssw bundling failed")
def submit(self, job_file, pool=None, scheduler=None, retries=0, retry_delay=3, silent=False): # default arguments if pool is None: pool = self.pool if scheduler is None: scheduler = self.scheduler # get the job file location as the submission command is run it the same directory job_file_dir, job_file_name = os.path.split(os.path.abspath(job_file)) # build the command cmd = ["condor_submit"] if pool: cmd += ["-pool", pool] if scheduler: cmd += ["-name", scheduler] cmd += [job_file_name] cmd = quote_cmd(cmd) # define the actual submission in a loop to simplify retries while True: # run the command logger.debug("submit htcondor job with command '{}'".format(cmd)) code, out, err = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=job_file_dir) # get the job id(s) if code == 0: last_line = out.strip().split("\n")[-1].strip() m = self.submission_job_id_cre.match(last_line) if m: job_ids = ["{}.{}".format(m.group(2), i) for i in range(int(m.group(1)))] else: code = 1 err = "cannot parse htcondor job id(s) from output:\n{}".format(out) # retry or done? if code == 0: return job_ids else: logger.debug("submission of htcondor job '{}' failed with code {}:\n{}".format( job_file, code, err)) if retries > 0: retries -= 1 time.sleep(retry_delay) continue elif silent: return None else: raise Exception("submission of htcondor job '{}' failed:\n{}".format( job_file, err))
def cleanup(self, job_id, silent=False): # build the command cmd = ["glite-ce-job-purge", "-N"] + make_list(job_id) cmd = quote_cmd(cmd) # run it logger.debug("cleanup glite job(s) with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=sys.stderr) # check success if code != 0 and not silent: # glite prints everything to stdout raise Exception("cleanup of glite job(s) '{}' failed with code {}:\n{}".format( code, job_id, out))
def _voms_proxy_info(args=None, proxy_file=None, silent=False): cmd = ["voms-proxy-info"] + (args or []) # when proxy_file is None, get the default # when empty string, don't add a --file argument if proxy_file is None: proxy_file = get_voms_proxy_file() if proxy_file: proxy_file = os.path.expandvars(os.path.expanduser(proxy_file)) cmd.extend(["--file", proxy_file]) code, out, err = interruptable_popen(quote_cmd(cmd), shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE) if not silent and code != 0: raise Exception("voms-proxy-info failed: {}".format(err)) return code, out, err
def cli_args(self, exclude=None, replace=None): if exclude is None: exclude = set() if replace is None: replace = {} # always exclude interactive parameters exclude |= set(self.interactive_params) args = [] for name, param in self.get_params(): if multi_match(name, exclude, any): continue raw = replace.get(name, getattr(self, name)) val = param.serialize(raw) arg = "--{}".format(name.replace("_", "-")) args.extend([arg, quote_cmd([val])]) return args
def checksum(self): if self.custom_checksum != NO_STR: return self.custom_checksum if self._checksum is None: cmd = [ rel_path(__file__, "scripts", "repository_checksum.sh"), self.get_repo_path() ] cmd = quote_cmd(cmd) code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE) if code != 0: raise Exception("repository checksum calculation failed") self._checksum = out.strip() return self._checksum
def env(self): # strategy: unlike docker, singularity might not allow binding of paths that do not exist # in the container, so create a tmp directory on the host system and bind it as /tmp, let # python dump its full env into a file, and read the file again on the host system if self.image not in self._envs: tmp_dir = LocalDirectoryTarget(is_tmp=True) tmp_dir.touch() tmp = tmp_dir.child("env", type="f") tmp.touch() # determine whether volume binding is allowed allow_binds_cb = getattr(self.task, "singularity_allow_binds", None) if callable(allow_binds_cb): allow_binds = allow_binds_cb() else: cfg = Config.instance() allow_binds = cfg.get_expanded(self.get_config_section(), "allow_binds") # arguments to configure the environment args = ["-e"] if allow_binds: args.extend(["-B", "{}:/tmp".format(tmp_dir.path)]) env_file = "/tmp/{}".format(tmp.basename) else: env_file = tmp.path # get the singularity exec command singularity_exec_cmd = self._singularity_exec_cmd() + args # build commands to setup the environment setup_cmds = self._build_setup_cmds(self._get_env()) # build the python command that dumps the environment py_cmd = "import os,pickle;" \ + "pickle.dump(dict(os.environ),open('{}','wb'),protocol=2)".format(env_file) # build the full command cmd = quote_cmd(singularity_exec_cmd + [ self.image, "bash", "-l", "-c", "; ".join( flatten(setup_cmds, quote_cmd(["python", "-c", py_cmd]))), ]) # run it code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if code != 0: raise Exception( "singularity sandbox env loading failed:\n{}".format(out)) # load the environment from the tmp file env = tmp.load(formatter="pickle") # cache self._envs[self.image] = env return self._envs[self.image]
def submit(self, job_file, job_list=None, ce=None, retries=0, retry_delay=3, silent=False): # default arguments if job_list is None: job_list = self.job_list if ce is None: ce = self.ce # check arguments if not ce: raise ValueError("ce must not be empty") ce = make_list(ce) # arc supports multiple jobs to be submitted with a single arcsub call, # so job_file can be a sequence of files # when this is the case, we have to make the assumption that their input files are all # absolute, or they are relative but all in the same directory chunking = isinstance(job_file, (list, tuple)) job_files = make_list(job_file) job_file_dir = os.path.dirname(os.path.abspath(job_files[0])) job_file_names = [os.path.basename(jf) for jf in job_files] # define the actual submission in a loop to simplify retries while True: # build the command cmd = ["arcsub", "-c", random.choice(ce)] if job_list: cmd += ["-j", job_list] cmd += job_file_names cmd = quote_cmd(cmd) # run the command logger.debug("submit arc job(s) with command '{}'".format(cmd)) code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=sys.stderr, cwd=job_file_dir) # in some cases, the return code is 0 but the ce did not respond valid job ids job_ids = [] if code == 0: for line in out.strip().split("\n"): m = self.submission_job_id_cre.match(line.strip()) if m: job_id = m.group(1) job_ids.append(job_id) if not job_ids: code = 1 out = "cannot find job id(s) in output:\n{}".format(out) elif len(job_ids) != len(job_files): raise Exception( "number of job ids in output ({}) does not match number of " "jobs to submit ({}) in output:\n{}".format( len(job_ids), len(job_files), out)) # retry or done? if code == 0: return job_ids if chunking else job_ids[0] else: logger.debug( "submission of arc job(s) '{}' failed with code {}:\n{}". format(job_files, code, out)) if retries > 0: retries -= 1 time.sleep(retry_delay) continue elif silent: return None else: raise Exception( "submission of arc job(s) '{}' failed:\n{}".format( job_files, out))
def cmd(self, proxy_cmd): cfg = Config.instance() # docker run command arguments args = [] # add args configured on the task args_getter = getattr(self.task, "docker_args", None) args += make_list(args_getter() if callable(args_getter) else self. default_docker_args) # container name args.extend([ "--name", "{}_{}".format(self.task.task_id, str(uuid.uuid4())[:8]) ]) # container hostname args.extend(["-h", "{}".format(socket.gethostname())]) # helper to build forwarded paths section = self.get_config_section() forward_dir = cfg.get_expanded(section, "forward_dir") python_dir = cfg.get_expanded(section, "python_dir") bin_dir = cfg.get_expanded(section, "bin_dir") stagein_dir = cfg.get_expanded(section, "stagein_dir") stageout_dir = cfg.get_expanded(section, "stageout_dir") def dst(*args): return os.path.join(forward_dir, *(str(arg) for arg in args)) # helper for mounting a volume volume_srcs = [] def mount(*vol): src = vol[0] # make sure, the same source directory is not mounted twice if src in volume_srcs: return volume_srcs.append(src) # ensure that source directories exist if not os.path.isfile(src) and not os.path.exists(src): os.makedirs(src) # store the mount point args.extend(["-v", ":".join(vol)]) # environment variables to set env = self._get_env() # add staging directories if self.stagein_info: env["LAW_SANDBOX_STAGEIN_DIR"] = dst(stagein_dir) mount(self.stagein_info.stage_dir.path, dst(stagein_dir)) if self.stageout_info: env["LAW_SANDBOX_STAGEOUT_DIR"] = dst(stageout_dir) mount(self.stageout_info.stage_dir.path, dst(stageout_dir)) # prevent python from writing byte code files env["PYTHONDONTWRITEBYTECODE"] = "1" # adjust path variables env["PATH"] = os.pathsep.join(["$PATH", dst("bin")]) env["PYTHONPATH"] = os.pathsep.join(["$PYTHONPATH", dst(python_dir)]) # forward python directories of law and dependencies for mod in law_deps: path = os.path.dirname(mod.__file__) name, ext = os.path.splitext(os.path.basename(mod.__file__)) if name == "__init__": vsrc = path vdst = dst(python_dir, os.path.basename(path)) else: vsrc = os.path.join(path, name + ".py") vdst = dst(python_dir, name + ".py") mount(vsrc, vdst) # forward the law cli dir to bin as it contains a law executable env["PATH"] = os.pathsep.join( [env["PATH"], dst(python_dir, "law", "cli")]) # forward the law config file if cfg.config_file: mount(cfg.config_file, dst("law.cfg")) env["LAW_CONFIG_FILE"] = dst("law.cfg") # forward the luigi config file for p in luigi.configuration.LuigiConfigParser._config_paths[::-1]: if os.path.exists(p): mount(p, dst("luigi.cfg")) env["LUIGI_CONFIG_PATH"] = dst("luigi.cfg") break # forward volumes defined in the config and by the task vols = self._get_volumes() for hdir, cdir in six.iteritems(vols): if not cdir: mount(hdir) else: cdir = cdir.replace("${PY}", dst(python_dir)).replace( "${BIN}", dst(bin_dir)) mount(hdir, cdir) # extend by arguments needed for both env loading and executing the job args.extend(self.common_args()) # build commands to setup the environment setup_cmds = self._build_setup_cmds(env) # handle scheduling within the container ls_flag = "--local-scheduler" if self.force_local_scheduler() and ls_flag not in proxy_cmd: proxy_cmd.append(ls_flag) if ls_flag not in proxy_cmd: # when the scheduler runs on the host system, we need to set the network interace to the # host system and set the correct luigi scheduler host as seen by the container if self.scheduler_on_host(): args.extend(["--network", "host"]) proxy_cmd.extend( ["--scheduler-host", "{}".format(self.get_host_ip())]) # build the final command cmd = quote_cmd(["docker", "run"] + args + [ self.image, "bash", "-l", "-c", "; ".join( flatten(setup_cmds, " ".join(proxy_cmd))) ]) return cmd