def stageout(self, stageout_info): # traverse actual outputs, try to identify them in tmp_dir # and move them to their proper location def stageout_target(target): sandbox_target = stageout_info.stage_targets[target] staged_target = make_staged_target(stageout_info.stage_dir, sandbox_target) logger.debug("stage-out {} to {}".format(staged_target.path, target)) if staged_target.exists(): target.copy_from_local(staged_target) else: logger.warning( "could not find output target at {} for stage-out".format( staged_target.path)) def map_collection(func, collection, **kwargs): map_struct(func, collection.targets, **kwargs) map_struct(stageout_target, stageout_info.targets, custom_mappings={TargetCollection: map_collection}) logger.info("staged-out {} file(s)".format( len(stageout_info.stage_dir.listdir())))
def stageout(self, stageout_info): # traverse actual outputs, try to identify them in tmp_dir # and move them to their proper location def find_and_move(target): tmp_target = make_staged_target(stageout_info.stage_dir, target) if tmp_target.exists(): tmp_target.move(target) map_struct(find_and_move, stageout_info.targets)
def make_staged_target_struct(stage_dir, struct): def map_target(target): return make_staged_target(stage_dir, target) def map_collection(func, collection, **kwargs): staged_targets = map_struct(func, collection.targets, **kwargs) return collection.__class__(staged_targets, **collection._copy_kwargs()) return map_struct(map_target, struct, custom_mappings={TargetCollection: map_collection})
def _staged_output(self): outputs = self.__getattribute__("output", proxy=False)() # create the struct of staged inputs and use the mask to deeply select between the two def map_targets(target): return make_staged_target(_sandbox_stageout_dir, target) staged_outputs = map_struct(map_targets, outputs) outputs = mask_struct(self.sandbox_stageout_mask(), staged_outputs, outputs) return outputs
def stagein(self, tmp_dir): # check if the stage-in dir is set cfg = Config.instance() section = self.sandbox_inst.get_config_section() stagein_dir_name = cfg.get_expanded(section, "stagein_dir_name") if not stagein_dir_name: return None # get the sandbox stage-in mask stagein_mask = self.task.sandbox_stagein() if not stagein_mask: return None # determine inputs as seen from outside and within the sandbox inputs = self.task.input() with patch_object(os, "environ", self.task.env, lock=True): sandbox_inputs = self.task.input() # apply the mask to both structs inputs = mask_struct(stagein_mask, inputs) sandbox_inputs = mask_struct(stagein_mask, sandbox_inputs) if not inputs: return None # create a lookup for input -> sandbox input sandbox_targets = dict(zip(flatten(inputs), flatten(sandbox_inputs))) # create the stage-in directory stagein_dir = tmp_dir.child(stagein_dir_name, type="d") stagein_dir.touch() # create the structure of staged inputs def stagein_target(target): sandbox_target = sandbox_targets[target] staged_target = make_staged_target(stagein_dir, sandbox_target) logger.debug("stage-in {} to {}".format(target.path, staged_target.path)) target.copy_to_local(staged_target) return staged_target def map_collection(func, collection, **kwargs): map_struct(func, collection.targets, **kwargs) staged_inputs = map_struct( stagein_target, inputs, custom_mappings={TargetCollection: map_collection}) logger.info("staged-in {} file(s)".format(len(stagein_dir.listdir()))) return StageInfo(inputs, stagein_dir, staged_inputs)
def prepare_stageout(self): outputs = mask_struct(self.task.sandbox_stageout_mask(), self.task.output()) if not outputs: return None # create a tmp dir tmp_dir = LocalDirectoryTarget(is_tmp=True) tmp_dir.touch() # map output files to local local targets in tmp_dir def map_target(target): return make_staged_target(tmp_dir, target) stage_outputs = map_struct(map_target, outputs) return StageInfo(outputs, tmp_dir, stage_outputs)
def stagein(self): inputs = mask_struct(self.task.sandbox_stagein_mask(), self.task.input()) if not inputs: return None # create a tmp dir tmp_dir = LocalDirectoryTarget(is_tmp=True) tmp_dir.touch() # copy input files and map to local targets in tmp_dir def map_target(target): tmp_target = make_staged_target(tmp_dir, target) target.copy(tmp_target) return tmp_target stage_inputs = map_struct(map_target, inputs) return StageInfo(inputs, tmp_dir, stage_inputs)
def localize_file_targets(struct, *args, **kwargs): """ Takes an arbitrary *struct* of targets, opens the contexts returned by their :py:meth:`FileSystemFileTarget.localize` implementations and yields their localized representations in the same structure as passed in *struct*. When the context is closed, the contexts of all localized targets are closed. """ managers = [] def enter(target): if callable(getattr(target, "localize", None)): manager = target.localize(*args, **kwargs) managers.append(manager) return manager.__enter__() else: return target # localize all targets, maintain the structure localized_targets = map_struct(enter, struct) # prepare exception info exc = None exc_info = (None, None, None) try: yield localized_targets except (Exception, KeyboardInterrupt) as e: exc = e exc_info = sys.exc_info() raise finally: exit_exc = [] for manager in managers: try: manager.__exit__(*exc_info) except Exception as e: exit_exc.append(e) # when there was no exception during the actual yield and # an exception occured in one of the exit methods, raise the first one if not exc and exit_exc: raise exit_exc[0]
def output(self): output = self.merge_output() if self.is_forest(): return output if isinstance(output, (list, tuple, TargetCollection)): output = output[self.tree_index] if self.is_root(): return output # get the directory in which intermediate outputs are stored if isinstance(output, SiblingFileCollection): intermediate_dir = output.dir else: first_output = flatten(output)[0] if not isinstance(first_output, FileSystemTarget): raise Exception( "cannot determine directory for intermediate merged outputs from " "'{}'".format(output)) intermediate_dir = first_output.parent # helper to create an intermediate output def get_intermediate_output(leaf_output): name, ext = os.path.splitext(leaf_output.basename) basename = self.node_format.format(name=name, ext=ext, tree=self.tree_index, branch=self.branch, depth=self.tree_depth) return intermediate_dir.child(basename, type="f") # return intermediate outputs in the same structure if isinstance(output, TargetCollection): return output.map(get_intermediate_output) return map_struct(get_intermediate_output, output)
def map_collection(func, collection, **kwargs): staged_targets = map_struct(func, collection.targets, **kwargs) return collection.__class__(staged_targets, **collection._copy_kwargs())
def map_collection(func, collection, **kwargs): map_struct(func, collection.targets, **kwargs)