Exemple #1
0
    def prepare_stageout(self, tmp_dir):
        # get the sandbox stage-out mask
        stageout_mask = self.task.sandbox_stageout()
        if not stageout_mask:
            return None

        # determine outputs as seen from outside and within the sandbox
        outputs = self.task.output()
        with patch_object(os, "environ", self.task.env, lock=True):
            sandbox_outputs = self.task.output()

        # apply the mask to both structs
        outputs = mask_struct(stageout_mask, outputs)
        sandbox_outputs = mask_struct(stageout_mask, sandbox_outputs)
        if not outputs:
            return None

        # define the stage-out directory
        cfg = Config.instance()
        section = self.sandbox_inst.get_config_section()
        stageout_dir = tmp_dir.child(cfg.get_expanded(section, "stageout_dir"),
                                     type="d")
        stageout_dir.touch()

        # create a lookup for input -> sandbox input
        sandbox_targets = dict(zip(flatten(outputs), flatten(sandbox_outputs)))

        return StageInfo(outputs, stageout_dir, sandbox_targets)
Exemple #2
0
    def stagein(self, tmp_dir):
        # check if the stage-in dir is set
        cfg = Config.instance()
        section = self.sandbox_inst.get_config_section()
        stagein_dir_name = cfg.get_expanded(section, "stagein_dir_name")
        if not stagein_dir_name:
            return None

        # get the sandbox stage-in mask
        stagein_mask = self.task.sandbox_stagein()
        if not stagein_mask:
            return None

        # determine inputs as seen from outside and within the sandbox
        inputs = self.task.input()
        with patch_object(os, "environ", self.task.env, lock=True):
            sandbox_inputs = self.task.input()

        # apply the mask to both structs
        inputs = mask_struct(stagein_mask, inputs)
        sandbox_inputs = mask_struct(stagein_mask, sandbox_inputs)
        if not inputs:
            return None

        # create a lookup for input -> sandbox input
        sandbox_targets = dict(zip(flatten(inputs), flatten(sandbox_inputs)))

        # create the stage-in directory
        stagein_dir = tmp_dir.child(stagein_dir_name, type="d")
        stagein_dir.touch()

        # create the structure of staged inputs
        def stagein_target(target):
            sandbox_target = sandbox_targets[target]
            staged_target = make_staged_target(stagein_dir, sandbox_target)
            logger.debug("stage-in {} to {}".format(target.path,
                                                    staged_target.path))
            target.copy_to_local(staged_target)
            return staged_target

        def map_collection(func, collection, **kwargs):
            map_struct(func, collection.targets, **kwargs)

        staged_inputs = map_struct(
            stagein_target,
            inputs,
            custom_mappings={TargetCollection: map_collection})

        logger.info("staged-in {} file(s)".format(len(stagein_dir.listdir())))

        return StageInfo(inputs, stagein_dir, staged_inputs)
Exemple #3
0
    def live_task_id(self):
        """
        The task id depends on the task family and parameters, and is generated by luigi once in the
        constructor. As the latter may change, this property returns to the id with the current set
        of parameters.
        """
        # create a temporary dictionary of param_kwargs that is patched for the duration of the
        # call to create the string representation of the parameters
        param_kwargs = {attr: getattr(self, attr) for attr in self.param_kwargs}
        # only_public was introduced in 2.8.0, so check if that arg exists
        str_params_kwargs = {"only_significant": True}
        if "only_public" in getargspec(self.to_str_params).args:
            str_params_kwargs["only_public"] = True
        with patch_object(self, "param_kwargs", param_kwargs):
            str_params = self.to_str_params(**str_params_kwargs)

        # create the task id
        task_id = luigi.task.task_id_str(self.get_task_family(), str_params)

        return task_id
Exemple #4
0
 def _fetch_output(self, args):
     import law.target.remote as ltr
     with patch_object(ltr, "global_retries", 0, lock=True):
         return fetch_task_output(self, *args)