def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix _postfix = lambda path: self.job_file_factory.postfix_file( path, postfix) pf = lambda s: "__law_job_postfix__:{}".format(s) # collect task parameters proxy_cmd = ProxyCommand( task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.lsf_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.lsf_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.lsf_wrapper_file()) config.command = "bash {} {}".format( _postfix(os.path.basename(wrapper_file)), job_args.join()) # meta infos config.job_name = task.task_id config.emails = True # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] # add the bootstrap file bootstrap_file = task.lsf_bootstrap_file() if bootstrap_file: config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.lsf_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # logging # we do not use lsf's logging mechanism since it requires that the submission directory # is present when it retrieves logs, and therefore we rely on the job.sh script config.stdout = None config.stderr = None if task.transfer_logs: log_file = "stdall.txt" config.custom_log_file = log_file config.render_variables["log_file"] = pf(log_file) # we can use lsf's file stageout only when the output directory is local # otherwise, one should use the stageout_file and stageout manually output_dir = task.lsf_output_directory() if isinstance(output_dir, LocalDirectoryTarget): config.absolute_paths = True config.cwd = output_dir.path else: del config.output_files[:] # task hook config = task.lsf_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) # build the job file and get the sanitized config job_file, config = self.job_file_factory(**config.__dict__) # determine the absolute custom log file if set abs_log_file = None if config.custom_log_file and isinstance(output_dir, LocalDirectoryTarget): abs_log_file = output_dir.child(config.custom_log_file, type="f").path # return job and log files return {"job": job_file, "log": abs_log_file}
def create_proxy_cmd(self): return ProxyCommand(self.task, exclude_task_args=self.task.exclude_params_sandbox, exclude_global_args=["workers"])
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "__law_job_postfix__:{}".format(s) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.glite_wrapper_file()) config.executable = os.path.basename(wrapper_file) # collect task parameters proxy_cmd = ProxyCommand(task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.glite_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.glite_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # meta infos config.output_uri = task.glite_output_uri() # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.glite_bootstrap_file() config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf(os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.glite_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf(os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf(os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # log file if task.transfer_logs: log_file = "stdall.txt" config.stdout = log_file config.stderr = log_file config.custom_log_file = log_file config.render_variables["log_file"] = pf(log_file) else: config.stdout = None config.stderr = None # task hook config = task.glite_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [pf(os.path.basename(path)) for path in config.input_files] config.render_variables["input_files"] = " ".join(input_basenames) # build the job file and get the sanitized config job_file, config = self.job_file_factory(**config.__dict__) # determine the custom log file uri if set abs_log_file = None if config.custom_log_file: abs_log_file = os.path.join(config.output_uri, config.custom_log_file) # return job and log files return {"job": job_file, "log": abs_log_file}
def create_job_file(self, job_num, branches): task = self.task # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) # create the config c = self.job_file_factory.Config() c.input_files = DeprecatedInputFiles() c.output_files = [] c.render_variables = {} c.custom_content = [] # get the actual wrapper file that will be executed by the remote job c.executable = get_path(task.glite_wrapper_file()) c.input_files["executable_file"] = c.executable law_job_file = law_src_path("job", "law_job.sh") if c.executable != law_job_file: c.input_files["job_file"] = law_job_file # collect task parameters proxy_cmd = ProxyCommand( task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.glite_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.glite_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) c.arguments = job_args.join() # add the bootstrap file bootstrap_file = task.glite_bootstrap_file() if bootstrap_file: c.input_files["bootstrap_file"] = bootstrap_file # add the stageout file stageout_file = task.glite_stageout_file() if stageout_file: c.input_files["stageout_file"] = stageout_file # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: c.input_files["dashboard_file"] = dashboard_file # log file c.stdout = None c.stderr = None if task.transfer_logs: log_file = "stdall.txt" c.stdout = log_file c.stderr = log_file c.custom_log_file = log_file # meta infos c.output_uri = task.glite_output_uri() # task hook c = task.glite_job_config(c, job_num, branches) # build the job file and get the sanitized config job_file, c = self.job_file_factory(postfix=postfix, **c.__dict__) # determine the custom log file uri if set abs_log_file = None if c.custom_log_file: abs_log_file = os.path.join(c.output_uri, c.custom_log_file) # return job and log files return {"job": job_file, "log": abs_log_file}
def create_job_file(self, job_num, branches): task = self.task # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) # create the config c = self.job_file_factory.Config() c.input_files = DeprecatedInputFiles() c.output_files = [] c.render_variables = {} c.custom_content = [] # get the actual wrapper file that will be executed by the remote job c.executable = get_path(task.htcondor_wrapper_file()) c.input_files["executable_file"] = c.executable law_job_file = law_src_path("job", "law_job.sh") if c.executable != law_job_file: c.input_files["job_file"] = law_job_file # collect task parameters proxy_cmd = ProxyCommand( task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.htcondor_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.htcondor_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) c.arguments = job_args.join() # add the bootstrap file bootstrap_file = task.htcondor_bootstrap_file() if bootstrap_file: c.input_files["bootstrap_file"] = bootstrap_file # add the stageout file stageout_file = task.htcondor_stageout_file() if stageout_file: c.input_files["stageout_file"] = stageout_file # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: c.input_files["dashboard_file"] = dashboard_file # logging # we do not use htcondor's logging mechanism since it might require that the submission # directory is present when it retrieves logs, and therefore we use a custom log file c.log = None c.stdout = None c.stderr = None if task.transfer_logs: c.custom_log_file = "stdall.txt" # when the output dir is local, we can run within this directory for easier output file # handling and use absolute paths for input files output_dir = task.htcondor_output_directory() output_dir_is_local = isinstance(output_dir, LocalDirectoryTarget) if output_dir_is_local: c.absolute_paths = True c.custom_content.append(("initialdir", output_dir.path)) # task hook c = task.htcondor_job_config(c, job_num, branches) # when the output dir is not local, direct output files are not possible if not output_dir_is_local: del c.output_files[:] # build the job file and get the sanitized config job_file, c = self.job_file_factory(postfix=postfix, **c.__dict__) # get the location of the custom local log file if any abs_log_file = None if output_dir_is_local and c.custom_log_file: abs_log_file = os.path.join(output_dir.path, c.custom_log_file) # return job and log files return {"job": job_file, "log": abs_log_file}