def open(self, path, mode, cache=None, **kwargs): if cache is None: cache = self.cache is not None elif cache and self.cache is None: cache = False path = self.abspath(path) yield_path = kwargs.pop("_yield_path", False) if mode == "r": if cache: lpath = self._cached_copy(path, None, cache=True, **kwargs) lpath = remove_scheme(lpath) else: tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path self._cached_copy(path, add_scheme(lpath, "file"), cache=False, **kwargs) try: if yield_path: yield lpath else: f = open(lpath, "r") yield f if not f.closed: f.close() finally: if not cache: del tmp elif mode == "w": tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path try: if yield_path: yield lpath else: f = open(lpath, "w") yield f if not f.closed: f.close() if tmp.exists(): self._cached_copy(add_scheme(lpath, "file"), path, cache=cache, **kwargs) finally: del tmp else: raise Exception("unknown mode {}, use r or w".format(mode))
def localize(self, mode="r", perm=None, dir_perm=None, tmp_dir=None, **kwargs): if mode not in ["r", "w", "a"]: raise Exception( "unknown mode '{}', use 'r', 'w' or 'a'".format(mode)) logger.debug("localizing file target {!r} with mode '{}'".format( self, mode)) if mode == "r": with self.fs.open(self.path, "r", _yield_path=True, perm=perm, **kwargs) as lpath: yield LocalFileTarget(lpath) else: # mode "w" or "a" tmp = LocalFileTarget(is_tmp=self.ext(n=1) or True, tmp_dir=tmp_dir) # copy to local in append mode if mode == "a" and self.exists(): self.copy_to_local(tmp) try: yield tmp if tmp.exists(): self.copy_from_local(tmp, perm=perm, dir_perm=dir_perm, **kwargs) else: logger.warning( "cannot move non-existing localized file target {!r}". format(self)) finally: tmp.remove()
def localize(self, mode="r", perm=None, parent_perm=None, **kwargs): if mode not in ("r", "w"): raise Exception("unknown mode '{}', use r or w".format(mode)) if mode == "r": with self.fs.open(self.path, "r", _yield_path=True, **kwargs) as lpath: yield LocalFileTarget(lpath) else: # w tmp = LocalFileTarget(is_tmp=self.ext() or True) try: yield tmp if tmp.exists(): self.copy_from_local(tmp, dir_perm=parent_perm, **kwargs) self.chmod(perm) else: logger.warning("cannot move non-existing localized file target {!r}".format( self)) finally: del tmp
def hadd_task(task, inputs, output, cwd=None, local=False, force=True, hadd_args=None): """ This method is intended to be used by tasks that are supposed to merge root files, e.g. when inheriting from :py:class:`law.contrib.tasks.MergeCascade`. *inputs* should be a sequence of local targets that represent the files to merge into *output*. *cwd* is the working directory in which hadd is invoked. When empty, a temporary directory is used. The *task* itself is used to print and publish messages via its :py:meth:`law.Task.publish_message` and :py:meth:`law.Task.publish_step` methods. When *local* is *True*, the input and output targets are assumed to be local and the merging is based on their local paths. Otherwise, the targets are fetched first and the output target is localized. When *force* is *True*, any existing output file is overwritten. *hadd_args* can be a sequence of additional arguments that are added to the hadd command. """ # ensure inputs are targets inputs = [ LocalFileTarget(inp) if isinstance(inp, six.string_types) else inp for inp in inputs ] # ensure output is a target if isinstance(output, six.string_types): output = LocalFileTarget(output) # default cwd if not cwd: cwd = LocalDirectoryTarget(is_tmp=True) elif isinstance(cwd, six.string_types): cwd = LocalDirectoryTarget(cwd) cwd.touch() # helper to create the hadd cmd def hadd_cmd(input_paths, output_path): cmd = ["hadd", "-n", "0"] cmd.extend(["-d", cwd.path]) if hadd_args: cmd.extend(make_list(hadd_args)) cmd.append(output_path) cmd.extend(input_paths) return quote_cmd(cmd) if local: # when local, there is no need to download inputs input_paths = [inp.path for inp in inputs] with task.publish_step("merging ...", runtime=True): # clear the output if necessary if output.exists() and force: output.remove() if len(inputs) == 1: output.copy_from_local(inputs[0]) else: # merge using hadd cmd = hadd_cmd(input_paths, output.path) code = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0] if code != 0: raise Exception("hadd failed") task.publish_message("merged file size: {}".format( human_bytes(output.stat().st_size, fmt=True))) else: # when not local, we need to fetch files first into the cwd with task.publish_step("fetching inputs ...", runtime=True): def fetch(inp): inp.copy_to_local(cwd.child(inp.unique_basename, type="f"), cache=False) return inp.unique_basename def callback(i): task.publish_message("fetch file {} / {}".format( i + 1, len(inputs))) bases = map_verbose(fetch, inputs, every=5, callback=callback) # start merging into the localized output with output.localize("w", cache=False) as tmp_out: with task.publish_step("merging ...", runtime=True): if len(bases) == 1: tmp_out.path = cwd.child(bases[0]).path else: # merge using hadd cmd = hadd_cmd(bases, tmp_out.path) code = interruptable_popen(cmd, shell=True, executable="/bin/bash", cwd=cwd.path)[0] if code != 0: raise Exception("hadd failed") task.publish_message("merged file size: {}".format( human_bytes(tmp_out.stat().st_size, fmt=True)))