Exemple #1
0
    def __init__(self, path=None, is_tmp=False, **kwargs):
        # handle tmp paths manually since luigi uses the env tmp dir
        if not path:
            if not is_tmp:
                raise Exception("either path or is_tmp must be set")

            # get the tmp dir from the config and ensure it exists
            tmp_dir = os.path.realpath(Config.instance().get_expanded("target", "tmp_dir"))
            if not self.fs.exists(tmp_dir):
                perm = Config.instance().get("target", "tmp_dir_permission")
                self.fs.mkdir(tmp_dir, perm=perm and int(perm))

            # create a random path
            while True:
                path = os.path.join(tmp_dir, "luigi-tmp-%09d" % (random.randint(0, 999999999,)))
                if not self.fs.exists(path):
                    break

            # is_tmp might be an extension
            if isinstance(is_tmp, six.string_types):
                if is_tmp[0] != ".":
                    is_tmp = "." + is_tmp
                path += is_tmp
        else:
            path = self.fs.abspath(os.path.expandvars(os.path.expanduser(remove_scheme(path))))

        luigi.LocalTarget.__init__(self, path=path, is_tmp=is_tmp)
        FileSystemTarget.__init__(self, self.path, **kwargs)
Exemple #2
0
 def uri(b):
     uri = os.path.join(
         b,
         self.sanitize_path(path).lstrip("/")).rstrip("/")
     if not scheme:
         uri = remove_scheme(uri)
     return uri
Exemple #3
0
 def move_to_local(self, dst=None, perm=None, dir_perm=None, **kwargs):
     if dst:
         dst = add_scheme(self.fs.local_fs.abspath(get_path(dst)), "file")
     dst = FileSystemFileTarget.move_to(self,
                                        dst,
                                        perm=perm,
                                        dir_perm=dir_perm,
                                        **kwargs)
     return remove_scheme(dst)
Exemple #4
0
    def open(self, path, mode, cache=None, **kwargs):
        if cache is None:
            cache = self.cache is not None
        elif cache and self.cache is None:
            cache = False

        path = self.abspath(path)

        yield_path = kwargs.pop("_yield_path", False)

        if mode == "r":
            if cache:
                lpath = self._cached_copy(path, None, cache=True, **kwargs)
                lpath = remove_scheme(lpath)
            else:
                tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
                lpath = tmp.path

                self._cached_copy(path,
                                  add_scheme(lpath, "file"),
                                  cache=False,
                                  **kwargs)
            try:
                if yield_path:
                    yield lpath
                else:
                    f = open(lpath, "r")
                    yield f
                    if not f.closed:
                        f.close()
            finally:
                if not cache:
                    del tmp

        elif mode == "w":
            tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
            lpath = tmp.path

            try:
                if yield_path:
                    yield lpath
                else:
                    f = open(lpath, "w")
                    yield f
                    if not f.closed:
                        f.close()

                if tmp.exists():
                    self._cached_copy(add_scheme(lpath, "file"),
                                      path,
                                      cache=cache,
                                      **kwargs)
            finally:
                del tmp

        else:
            raise Exception("unknown mode {}, use r or w".format(mode))
Exemple #5
0
    def open(self, path, mode, cache=None, **kwargs):
        if cache is None:
            cache = self.cache is not None
        elif cache and self.cache is None:
            cache = False

        yield_path = kwargs.pop("_yield_path", False)

        path = self.abspath(path)
        tmp = None

        if mode == "r":
            if cache:
                lpath = self._cached_copy(path, None, cache=True, **kwargs)
                lpath = remove_scheme(lpath)
            else:
                tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
                lpath = tmp.path

                self._cached_copy(path,
                                  add_scheme(lpath, "file"),
                                  cache=False,
                                  **kwargs)

            def cleanup():
                if not cache and tmp.exists():
                    tmp.remove()

            f = lpath if yield_path else open(lpath, "r")
            return RemoteFileProxy(f, success_fn=cleanup, failure_fn=cleanup)

        elif mode == "w":
            tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
            lpath = tmp.path

            def cleanup():
                if tmp.exists():
                    tmp.remove()

            def copy_and_cleanup():
                try:
                    if tmp.exists():
                        self._cached_copy(add_scheme(lpath, "file"),
                                          path,
                                          cache=cache,
                                          **kwargs)
                finally:
                    cleanup()

            f = lpath if yield_path else open(lpath, "w")
            return RemoteFileProxy(f,
                                   success_fn=copy_and_cleanup,
                                   failure_fn=cleanup)

        else:
            raise Exception("unknown mode {}, use r or w".format(mode))
Exemple #6
0
    def open(self, path, mode, perm=None, dir_perm=None, cache=None, **kwargs):
        if self.cache is None:
            cache = False
        elif cache is None:
            cache = self.use_cache
        else:
            cache = bool(cache)

        yield_path = kwargs.pop("_yield_path", False)
        path = self.abspath(path)
        tmp = None
        read_mode = mode.startswith("r")

        if read_mode:
            if cache:
                lpath = self._cached_copy(path, None, cache=cache, **kwargs)
            else:
                tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
                lpath = self.copy(path, tmp.uri(), cache=cache, **kwargs)
            lpath = remove_scheme(lpath)

            def cleanup():
                if not cache and tmp and tmp.exists():
                    tmp.remove()

            f = lpath if yield_path else open(lpath, mode)
            return RemoteFileProxy(f, success_fn=cleanup, failure_fn=cleanup)

        else:  # write or update
            tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
            lpath = tmp.path

            def cleanup():
                tmp.remove(silent=True)

            def copy_and_cleanup():
                exists = True
                try:
                    exists = tmp.exists()
                    if exists:
                        self.copy(tmp.uri(),
                                  path,
                                  perm=perm,
                                  dir_perm=dir_perm,
                                  cache=cache,
                                  **kwargs)
                finally:
                    if exists:
                        tmp.remove(silent=True)

            f = lpath if yield_path else open(lpath, mode)
            return RemoteFileProxy(f,
                                   success_fn=copy_and_cleanup,
                                   failure_fn=cleanup)
Exemple #7
0
    def __init__(self,
                 path=None,
                 fs=LocalFileSystem.default_instance,
                 is_tmp=False,
                 tmp_dir=None,
                 **kwargs):
        if isinstance(fs, six.string_types):
            fs = LocalFileSystem(fs)

        # handle tmp paths manually since luigi uses the env tmp dir
        if not path:
            if not is_tmp:
                raise Exception("either path or is_tmp must be set")

            # if not set, get the tmp dir from the config and ensure that it exists
            cfg = Config.instance()
            if tmp_dir:
                tmp_dir = get_path(tmp_dir)
            else:
                tmp_dir = os.path.realpath(
                    cfg.get_expanded("target", "tmp_dir"))
            if not fs.exists(tmp_dir):
                perm = cfg.get_expanded_int("target", "tmp_dir_perm")
                fs.mkdir(tmp_dir, perm=perm)

            # create a random path
            while True:
                basename = "luigi-tmp-{:09d}".format(
                    random.randint(0, 999999999))
                path = os.path.join(tmp_dir, basename)
                if not fs.exists(path):
                    break

            # is_tmp might be a file extension
            if isinstance(is_tmp, six.string_types):
                if is_tmp[0] != ".":
                    is_tmp = "." + is_tmp
                path += is_tmp
        else:
            # ensure path is not a target and does not contain, then normalize
            path = remove_scheme(get_path(path))
            path = fs.abspath(os.path.expandvars(os.path.expanduser(path)))

        luigi.LocalTarget.__init__(self, path=path, is_tmp=is_tmp)
        FileSystemTarget.__init__(self, self.path, fs=fs, **kwargs)
Exemple #8
0
 def uri(b):
     uri = os.path.join(
         b,
         self.sanitize_path(path).lstrip("/")).rstrip("/")
     return uri if scheme else remove_scheme(uri)
Exemple #9
0
 def _unscheme(self, path):
     return remove_scheme(path) if get_scheme(path) == "file" else path
Exemple #10
0
    def _cached_copy(self,
                     src,
                     dst,
                     cache=None,
                     prefer_cache=False,
                     validate=None,
                     **kwargs):
        cache = self._use_cache(cache)

        # ensure absolute paths
        src = self.abspath(src)
        dst = self.abspath(dst) if dst else None

        # determine the copy mode for code readability
        # (remote-remote: "rr", remote-local: "rl", remote-cache: "rc", ...)
        src_local = self.is_local(src)
        dst_local = dst and self.is_local(dst)
        mode = "rl"[src_local] + ("rl"[dst_local] if dst is not None else "c")

        # disable caching when the mode is local-local, local-cache or remote-remote
        if mode in ("ll", "lc", "rr"):
            cache = False

        # dst can be None, but in this case, caching should be enabled
        if dst is None and not cache:
            raise Exception(
                "copy destination must not be empty when caching is disabled")

        # paths including scheme and base
        full_src = src if has_scheme(src) else self.gfal.url(src,
                                                             cmd="filecopy")
        full_dst = None
        if dst:
            full_dst = dst if has_scheme(dst) else self.gfal.url(
                dst, cmd="filecopy")

        if cache:
            kwargs_no_retries = kwargs.copy()
            kwargs_no_retries["retries"] = 0
            kwargs_no_retries = kwargs

            # handle 3 cases: lr, rl, rc
            if mode == "lr":
                # strategy: copy to remote, copy to cache, sync stats

                # copy to remote, no need to validate as we need the stat anyway
                self._atomic_copy(src, full_dst, validate=False, **kwargs)
                rstat = self.stat(dst, **kwargs_no_retries)

                # remove the cache entry
                if dst in self.cache:
                    self.cache.remove(dst)

                # allocate cache space and copy to cache
                lstat = _local_fs.stat(src)
                self.cache.allocate(lstat.st_size)
                full_cdst = add_scheme(self.cache.cache_path(dst), "file")
                with self.cache.lock(dst):
                    self._atomic_copy(src, full_cdst, validate=False)
                    self.cache.touch(dst, (int(time.time()), rstat.st_mtime))

                return dst

            else:  # rl, rc
                # strategy: copy to cache when not up to date, sync stats, opt. copy to local

                # build the full cache path of the src file
                full_csrc = add_scheme(self.cache.cache_path(src), "file")

                # if the file is cached and prefer_cache is true,
                # return the cache path, no questions asked
                # otherwise, check if the file is there and up to date
                if not prefer_cache or src not in self.cache:
                    with self.cache.lock(src):
                        # in cache and outdated?
                        rstat = self.stat(src, **kwargs_no_retries)
                        if src in self.cache and abs(
                                self.cache.mtime(src) - rstat.st_mtime) > 1:
                            self.cache.remove(src, lock=False)
                        # in cache at all?
                        if src not in self.cache:
                            self.cache.allocate(rstat.st_size)
                            self._atomic_copy(full_src,
                                              full_csrc,
                                              validate=validate,
                                              **kwargs)
                            self.cache.touch(
                                src, (int(time.time()), rstat.st_mtime))

                if mode == "rl":
                    # copy to local without permission bits
                    copy_no_perm(remove_scheme(full_csrc),
                                 remove_scheme(full_dst))
                    return dst
                else:  # rc
                    return full_csrc

        else:
            # simply copy and return the dst path
            self._atomic_copy(full_src, full_dst, validate=validate, **kwargs)

            return full_dst if dst_local else dst
Exemple #11
0
 def copy_to_local(self, dst=None, **kwargs):
     if dst:
         dst = add_scheme(self.fs.local_fs.abspath(get_path(dst)), "file")
     dst = FileSystemFileTarget.copy_to(self, dst, **kwargs)
     return remove_scheme(dst)
Exemple #12
0
 def move_from_local(self, *args, **kwargs):
     return remove_scheme(self.move_from(*args, **kwargs))
Exemple #13
0
 def move_to_local(self, *args, **kwargs):
     return remove_scheme(self.move_to(*args, **kwargs))
Exemple #14
0
 def move_to_local(self, dst=None, **kwargs):
     if dst:
         dst = add_scheme(self.fs.local_fs.abspath(get_path(dst)), "file")
     dst = self.move_to(dst, **kwargs)
     return remove_scheme(dst)