def prepare_stageout(self, tmp_dir): # get the sandbox stage-out mask stageout_mask = self.task.sandbox_stageout() if not stageout_mask: return None # determine outputs as seen from outside and within the sandbox outputs = self.task.output() with patch_object(os, "environ", self.task.env, lock=True): sandbox_outputs = self.task.output() # apply the mask to both structs outputs = mask_struct(stageout_mask, outputs) sandbox_outputs = mask_struct(stageout_mask, sandbox_outputs) if not outputs: return None # define the stage-out directory cfg = Config.instance() section = self.sandbox_inst.get_config_section() stageout_dir = tmp_dir.child(cfg.get_expanded(section, "stageout_dir"), type="d") stageout_dir.touch() # create a lookup for input -> sandbox input sandbox_targets = dict(zip(flatten(outputs), flatten(sandbox_outputs))) return StageInfo(outputs, stageout_dir, sandbox_targets)
def stagein(self, tmp_dir): # check if the stage-in dir is set cfg = Config.instance() section = self.sandbox_inst.get_config_section() stagein_dir_name = cfg.get_expanded(section, "stagein_dir_name") if not stagein_dir_name: return None # get the sandbox stage-in mask stagein_mask = self.task.sandbox_stagein() if not stagein_mask: return None # determine inputs as seen from outside and within the sandbox inputs = self.task.input() with patch_object(os, "environ", self.task.env, lock=True): sandbox_inputs = self.task.input() # apply the mask to both structs inputs = mask_struct(stagein_mask, inputs) sandbox_inputs = mask_struct(stagein_mask, sandbox_inputs) if not inputs: return None # create a lookup for input -> sandbox input sandbox_targets = dict(zip(flatten(inputs), flatten(sandbox_inputs))) # create the stage-in directory stagein_dir = tmp_dir.child(stagein_dir_name, type="d") stagein_dir.touch() # create the structure of staged inputs def stagein_target(target): sandbox_target = sandbox_targets[target] staged_target = make_staged_target(stagein_dir, sandbox_target) logger.debug("stage-in {} to {}".format(target.path, staged_target.path)) target.copy_to_local(staged_target) return staged_target def map_collection(func, collection, **kwargs): map_struct(func, collection.targets, **kwargs) staged_inputs = map_struct( stagein_target, inputs, custom_mappings={TargetCollection: map_collection}) logger.info("staged-in {} file(s)".format(len(stagein_dir.listdir()))) return StageInfo(inputs, stagein_dir, staged_inputs)
def live_task_id(self): """ The task id depends on the task family and parameters, and is generated by luigi once in the constructor. As the latter may change, this property returns to the id with the current set of parameters. """ # create a temporary dictionary of param_kwargs that is patched for the duration of the # call to create the string representation of the parameters param_kwargs = {attr: getattr(self, attr) for attr in self.param_kwargs} # only_public was introduced in 2.8.0, so check if that arg exists str_params_kwargs = {"only_significant": True} if "only_public" in getargspec(self.to_str_params).args: str_params_kwargs["only_public"] = True with patch_object(self, "param_kwargs", param_kwargs): str_params = self.to_str_params(**str_params_kwargs) # create the task id task_id = luigi.task.task_id_str(self.get_task_family(), str_params) return task_id
def _fetch_output(self, args): import law.target.remote as ltr with patch_object(ltr, "global_retries", 0, lock=True): return fetch_task_output(self, *args)