Example #1
0
def fetch_local_list(args, recursive = None):
    def _get_filelist_local(loc_list, local_uri, cache):
        info(u"Compiling list of local files...")

        if deunicodise(local_uri.basename()) == "-":
            loc_list["-"] = {
                'full_name_unicode' : '-',
                'full_name' : '-',
                'size' : -1,
                'mtime' : -1,
            }
            return loc_list, True
        if local_uri.isdir():
            local_base = deunicodise(local_uri.basename())
            local_path = deunicodise(local_uri.path())
            if cfg.follow_symlinks:
                filelist = _fswalk_follow_symlinks(local_path)
            else:
                filelist = _fswalk_no_symlinks(local_path)
            single_file = False
        else:
            local_base = ""
            local_path = deunicodise(local_uri.dirname())
            filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
            single_file = True
        for root, dirs, files in filelist:
            rel_root = root.replace(local_path, local_base, 1)
            for f in files:
                full_name = os.path.join(root, f)
                if not os.path.isfile(full_name):
                    continue
                if os.path.islink(full_name):
                                    if not cfg.follow_symlinks:
                                            continue
                relative_file = unicodise(os.path.join(rel_root, f))
                if os.path.sep != "/":
                    # Convert non-unix dir separators to '/'
                    relative_file = "/".join(relative_file.split(os.path.sep))
                if cfg.urlencoding_mode == "normal":
                    relative_file = replace_nonprintables(relative_file)
                if relative_file.startswith('./'):
                    relative_file = relative_file[2:]
                sr = os.stat_result(os.lstat(full_name))
                loc_list[relative_file] = {
                    'full_name_unicode' : unicodise(full_name),
                    'full_name' : full_name,
                    'size' : sr.st_size,
                    'mtime' : sr.st_mtime,
                    'dev'   : sr.st_dev,
                    'inode' : sr.st_ino,
                    'uid' : sr.st_uid,
                    'gid' : sr.st_gid,
                    'sr': sr # save it all, may need it in preserve_attrs_list
                    ## TODO: Possibly more to save here...
                }
                if 'md5' in cfg.sync_checks:
                    md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
                    if md5 is None:
                            try:
                                md5 = loc_list.get_md5(relative_file) # this does the file I/O
                            except IOError:
                                continue
                            cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
                    loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
        return loc_list, single_file

    def _maintain_cache(cache, local_list):
        if cfg.cache_file:
            cache.mark_all_for_purge()
            for i in local_list.keys():
                cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
            cache.purge()
            cache.save(cfg.cache_file)

    cfg = Config()

    cache = HashCache()
    if cfg.cache_file:
        try:
            cache.load(cfg.cache_file)
        except IOError:
            info(u"No cache file found, creating it.")

    local_uris = []
    local_list = FileDict(ignore_case = False)
    single_file = False

    if type(args) not in (list, tuple):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive

    for arg in args:
        uri = S3Uri(arg)
        if not uri.type == 'file':
            raise ParameterError("Expecting filename or directory instead of: %s" % arg)
        if uri.isdir() and not recursive:
            raise ParameterError("Use --recursive to upload a directory: %s" % arg)
        local_uris.append(uri)

    for uri in local_uris:
        list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)

    ## Single file is True if and only if the user
    ## specified one local URI and that URI represents
    ## a FILE. Ie it is False if the URI was of a DIR
    ## and that dir contained only one FILE. That's not
    ## a case of single_file==True.
    if len(local_list) > 1:
        single_file = False

    _maintain_cache(cache, local_list)

    return local_list, single_file
Example #2
0
        # a subset of the actual tree.  We should not purge content
        # outside of that subset as we don't know if it's valid or
        # not.  Leave it to a non-files_from run to purge.
        if cfg.cache_file and len(cfg.files_from) == 0:
            cache.mark_all_for_purge()
            for i in local_list.keys():
                cache.unmark_for_purge(local_list[i]['dev'],
                                       local_list[i]['inode'],
                                       local_list[i]['mtime'],
                                       local_list[i]['size'])
            cache.purge()
            cache.save(cfg.cache_file)

    cfg = Config()

    cache = HashCache()
    if cfg.cache_file:
        try:
            cache.load(cfg.cache_file)
        except IOError:
            info(u"No cache file found, creating it.")

    local_uris = []
    local_list = FileDict(ignore_case=False)
    single_file = False

    if type(args) not in (list, tuple, set):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive
Example #3
0
def fetch_local_list(args, is_src = False, recursive = None):

    def _fetch_local_list_info(loc_list):
        len_loc_list = len(loc_list)
        total_size = 0
        info(u"Running stat() and reading/calculating MD5 values on %d files, this may take some time..." % len_loc_list)
        counter = 0
        for relative_file in loc_list:
            counter += 1
            if counter % 1000 == 0:
                info(u"[%d/%d]" % (counter, len_loc_list))

            if relative_file == '-': continue

            full_name = loc_list[relative_file]['full_name']
            try:
                sr = os.stat_result(os.stat(deunicodise(full_name)))
            except OSError as e:
                if e.errno == errno.ENOENT:
                    # file was removed async to us getting the list
                    continue
                else:
                    raise
            loc_list[relative_file].update({
                'size' : sr.st_size,
                'mtime' : sr.st_mtime,
                'dev'   : sr.st_dev,
                'inode' : sr.st_ino,
                'uid' : sr.st_uid,
                'gid' : sr.st_gid,
                'sr': sr # save it all, may need it in preserve_attrs_list
                ## TODO: Possibly more to save here...
            })
            total_size += sr.st_size
            if 'md5' in cfg.sync_checks:
                md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
                if md5 is None:
                        try:
                            md5 = loc_list.get_md5(relative_file) # this does the file I/O
                        except IOError:
                            continue
                        cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
                loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5, sr.st_size)
        return total_size


    def _get_filelist_local(loc_list, local_uri, cache):
        info(u"Compiling list of local files...")

        if local_uri.basename() == "-":
            try:
                uid = os.geteuid()
                gid = os.getegid()
            except:
                uid = 0
                gid = 0
            loc_list["-"] = {
                'full_name' : '-',
                'size' : -1,
                'mtime' : -1,
                'uid' : uid,
                'gid' : gid,
                'dev' : 0,
                'inode': 0,
            }
            return loc_list, True
        if local_uri.isdir():
            local_base = local_uri.basename()
            local_path = local_uri.path()
            if is_src and len(cfg.files_from):
                filelist = _get_filelist_from_file(cfg, local_path)
                single_file = False
            else:
                if cfg.follow_symlinks:
                    filelist = _fswalk_follow_symlinks(local_path)
                else:
                    filelist = _fswalk_no_symlinks(local_path)
                single_file = False
        else:
            local_base = ""
            local_path = local_uri.dirname()
            filelist = [( local_path, [], [local_uri.basename()] )]
            single_file = True
        for root, dirs, files in filelist:
            rel_root = root.replace(local_path, local_base, 1)
            for f in files:
                full_name = os.path.join(root, f)
                if not os.path.isfile(deunicodise(full_name)):
                    if os.path.exists(deunicodise(full_name)):
                        warning(u"Skipping over non regular file: %s" % full_name)
                    continue
                if os.path.islink(deunicodise(full_name)):
                    if not cfg.follow_symlinks:
                        warning(u"Skipping over symbolic link: %s" % full_name)
                        continue
                relative_file = os.path.join(rel_root, f)
                if os.path.sep != "/":
                    # Convert non-unix dir separators to '/'
                    relative_file = "/".join(relative_file.split(os.path.sep))
                if cfg.urlencoding_mode == "normal":
                    relative_file = replace_nonprintables(relative_file)
                if relative_file.startswith('./'):
                    relative_file = relative_file[2:]
                loc_list[relative_file] = {
                    'full_name' : full_name,
                }

        return loc_list, single_file

    def _maintain_cache(cache, local_list):
        # if getting the file list from files_from, it is going to be
        # a subset of the actual tree.  We should not purge content
        # outside of that subset as we don't know if it's valid or
        # not.  Leave it to a non-files_from run to purge.
        if cfg.cache_file and len(cfg.files_from) == 0:
            cache.mark_all_for_purge()
            for i in local_list.keys():
                cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
            cache.purge()
            cache.save(cfg.cache_file)

    cfg = Config()

    cache = HashCache()
    if cfg.cache_file:
        try:
            cache.load(cfg.cache_file)
        except IOError:
            info(u"No cache file found, creating it.")

    local_uris = []
    local_list = FileDict(ignore_case = False)
    single_file = False

    if type(args) not in (list, tuple, set):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive

    for arg in args:
        uri = S3Uri(arg)
        if not uri.type == 'file':
            raise ParameterError("Expecting filename or directory instead of: %s" % arg)
        if uri.isdir() and not recursive:
            raise ParameterError("Use --recursive to upload a directory: %s" % arg)
        local_uris.append(uri)

    for uri in local_uris:
        list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)

    ## Single file is True if and only if the user
    ## specified one local URI and that URI represents
    ## a FILE. Ie it is False if the URI was of a DIR
    ## and that dir contained only one FILE. That's not
    ## a case of single_file==True.
    if len(local_list) > 1:
        single_file = False

    local_list, exclude_list = filter_exclude_include(local_list)
    total_size = _fetch_local_list_info(local_list)
    _maintain_cache(cache, local_list)
    return local_list, single_file, exclude_list, total_size
Example #4
0
    def _maintain_cache(cache, local_list):
        # if getting the file list from files_from, it is going to be
        # a subset of the actual tree.  We should not purge content
        # outside of that subset as we don't know if it's valid or
        # not.  Leave it to a non-files_from run to purge.
        if cfg.cache_file and len(cfg.files_from) == 0:
            cache.mark_all_for_purge()
            for i in local_list.keys():
                cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
            cache.purge()
            cache.save(cfg.cache_file)

    cfg = Config()

    cache = HashCache()
    if cfg.cache_file:
        try:
            cache.load(cfg.cache_file)
        except IOError:
            info(u"No cache file found, creating it.")

    local_uris = []
    local_list = FileDict(ignore_case = False)
    single_file = False

    if type(args) not in (list, tuple):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive
Example #5
0
def fetch_local_list(args, is_src=False, recursive=None):
    def _fetch_local_list_info(loc_list):
        len_loc_list = len(loc_list)
        total_size = 0
        info(
            u"Running stat() and reading/calculating MD5 values on %d files, this may take some time..."
            % len_loc_list)
        counter = 0
        for relative_file in loc_list:
            counter += 1
            if counter % 1000 == 0:
                info(u"[%d/%d]" % (counter, len_loc_list))

            if relative_file == '-': continue

            full_name = loc_list[relative_file]['full_name']
            try:
                sr = os.stat_result(os.stat(deunicodise(full_name)))
            except OSError as e:
                if e.errno == errno.ENOENT:
                    # file was removed async to us getting the list
                    continue
                else:
                    raise
            loc_list[relative_file].update({
                'size': sr.st_size,
                'mtime': sr.st_mtime,
                'dev': sr.st_dev,
                'inode': sr.st_ino,
                'uid': sr.st_uid,
                'gid': sr.st_gid,
                'sr': sr  # save it all, may need it in preserve_attrs_list
                ## TODO: Possibly more to save here...
            })
            total_size += sr.st_size
            if 'md5' in cfg.sync_checks:
                md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
                if md5 is None:
                    try:
                        md5 = loc_list.get_md5(
                            relative_file)  # this does the file I/O
                    except IOError:
                        continue
                    cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size,
                              md5)
                loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino,
                                         md5, sr.st_size)
        return total_size

    def _get_filelist_local(loc_list, local_uri, cache):
        info(u"Compiling list of local files...")

        if local_uri.basename() == "-":
            try:
                uid = os.geteuid()
                gid = os.getegid()
            except:
                uid = 0
                gid = 0
            loc_list["-"] = {
                'full_name': '-',
                'size': -1,
                'mtime': -1,
                'uid': uid,
                'gid': gid,
                'dev': 0,
                'inode': 0,
            }
            return loc_list, True
        if local_uri.isdir():
            local_base = local_uri.basename()
            local_path = local_uri.path()
            if is_src and len(cfg.files_from):
                filelist = _get_filelist_from_file(cfg, local_path)
                single_file = False
            else:
                if cfg.follow_symlinks:
                    filelist = _fswalk_follow_symlinks(local_path)
                else:
                    filelist = _fswalk_no_symlinks(local_path)
                single_file = False
        else:
            local_base = ""
            local_path = local_uri.dirname()
            filelist = [(local_path, [], [local_uri.basename()])]
            single_file = True
        for root, dirs, files in filelist:
            rel_root = root.replace(local_path, local_base, 1)
            for f in files:
                full_name = os.path.join(root, f)
                if not os.path.isfile(deunicodise(full_name)):
                    if os.path.exists(deunicodise(full_name)):
                        warning(u"Skipping over non regular file: %s" %
                                full_name)
                    continue
                if os.path.islink(deunicodise(full_name)):
                    if not cfg.follow_symlinks:
                        warning(u"Skipping over symbolic link: %s" % full_name)
                        continue
                relative_file = os.path.join(rel_root, f)
                if os.path.sep != "/":
                    # Convert non-unix dir separators to '/'
                    relative_file = "/".join(relative_file.split(os.path.sep))
                if cfg.urlencoding_mode == "normal":
                    relative_file = replace_nonprintables(relative_file)
                if relative_file.startswith('./'):
                    relative_file = relative_file[2:]
                loc_list[relative_file] = {
                    'full_name': full_name,
                }

        return loc_list, single_file

    def _maintain_cache(cache, local_list):
        # if getting the file list from files_from, it is going to be
        # a subset of the actual tree.  We should not purge content
        # outside of that subset as we don't know if it's valid or
        # not.  Leave it to a non-files_from run to purge.
        if cfg.cache_file and len(cfg.files_from) == 0:
            cache.mark_all_for_purge()
            for i in local_list.keys():
                cache.unmark_for_purge(local_list[i]['dev'],
                                       local_list[i]['inode'],
                                       local_list[i]['mtime'],
                                       local_list[i]['size'])
            cache.purge()
            cache.save(cfg.cache_file)

    cfg = Config()

    cache = HashCache()
    if cfg.cache_file:
        try:
            cache.load(cfg.cache_file)
        except IOError:
            info(u"No cache file found, creating it.")

    local_uris = []
    local_list = FileDict(ignore_case=False)
    single_file = False

    if type(args) not in (list, tuple, set):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive

    for arg in args:
        uri = S3Uri(arg)
        if not uri.type == 'file':
            raise ParameterError(
                "Expecting filename or directory instead of: %s" % arg)
        if uri.isdir() and not recursive:
            raise ParameterError("Use --recursive to upload a directory: %s" %
                                 arg)
        local_uris.append(uri)

    for uri in local_uris:
        list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)

    ## Single file is True if and only if the user
    ## specified one local URI and that URI represents
    ## a FILE. Ie it is False if the URI was of a DIR
    ## and that dir contained only one FILE. That's not
    ## a case of single_file==True.
    if len(local_list) > 1:
        single_file = False

    local_list, exclude_list = filter_exclude_include(local_list)
    total_size = _fetch_local_list_info(local_list)
    _maintain_cache(cache, local_list)
    return local_list, single_file, exclude_list, total_size