Ejemplo n.º 1
0
    def __init__(self, entry: DirEntry):

        self.name = entry.name.decode()

        self.path = entry.path.decode()
        self.rel_path = relpath(self.path, models_dir)

        self.is_dir = entry.is_dir()

        # save created_time and modified_time as date objects to provide
        # better date comparison
        self.created_time = datetime.fromtimestamp(entry.stat().st_ctime)
        self.modified_time = datetime.fromtimestamp(entry.stat().st_mtime)

        self.size = self._human_readable_size(self._get_size(entry.path))
Ejemplo n.º 2
0
    def _insert_sorted(self, item: os.DirEntry, sort_by: SortBy) -> None:
        """Insert every scanned item into the local `_items` list on-the-fly by the given `sort_by` parameter.

        :param item: DirEntry object from `_iter_items()` async iteration
                within the async parallel scanning.
        :type item: posix.DirEntry
        :param sort_by: SortBy enum attribute
        :type sort_by: SortBy
        :rtype: None
        """
        attrs = self._get_attributes(item)

        # It is an empty folder, grab folder timestamps
        if attrs["atime"] == 0 and attrs["mtime"] == 0 and attrs["ctime"] == 0:
            stat = item.stat(follow_symlinks=False)
            attrs["atime"] = int(stat.st_atime)
            attrs["mtime"] = int(stat.st_mtime)
            attrs["ctime"] = int(stat.st_ctime)

        summary = {
            "name": os.path.relpath(item.path, self._root),
            "size": attrs["size"],
            "depth": attrs["depth"],
            "num_of_files": attrs["num_of_files"],
            "atime": attrs["atime"],
            "mtime": attrs["mtime"],
            "ctime": attrs["ctime"],
        }

        index = self._find_index(summary, sort_by)
        self._total_size += summary["size"]
        self._items_len += 1
        self._items.insert(index, summary)
Ejemplo n.º 3
0
 def should_copy_file_DirEntry(self, src: os.DirEntry, dst: Path):
     retVal = True
     if not self.top_destination_does_not_exist:
         try:
             dst_stats = dst.stat()
             src_stats = src.stat()
             if src_stats.st_ino == 0:  # on windows os.DirEntry.stat sets st_ino to zero and os.stat should be called
                 # see https://docs.python.org/3.6/library/os.html#os.DirEntry
                 src_stats = os.stat(src.path, follow_symlinks=False)
             if src_stats.st_ino == dst_stats.st_ino:
                 retVal = False
                 log.debug(
                     f"{self.progress_msg()} skip copy file, same inode '{src.path}' to '{dst}'"
                 )
             elif src_stats.st_size == dst_stats.st_size and src_stats.st_mtime == dst_stats.st_mtime:
                 retVal = False
                 log.debug(
                     f"{self.progress_msg()} skip copy file, same time and size '{src.path}' to '{dst}'"
                 )
             if retVal:  # destination exists and file should be copied, so make sure it's writable
                 with Chmod(dst, "a+rw",
                            own_progress_count=0) as mod_changer:
                     mod_changer()
                 if self.should_no_flags_file(dst):
                     with ChFlags(dst,
                                  "nohidden",
                                  "nosystem",
                                  "unlocked",
                                  ignore_all_errors=True,
                                  own_progress_count=0) as flags_changer:
                         flags_changer()
         except Exception as ex:  # most likely dst.stat() failed because dst does not exist
             retVal = True
     return retVal
Ejemplo n.º 4
0
    def _dirEntryToFileInfo(dirEntry: os.DirEntry, path: str, realpath: str):
        try:
            linkname = os.readlink(realpath) if dirEntry.is_symlink() else ""
        except OSError:
            linkname = ""

        return FolderMountSource._statsToFileInfo(
            dirEntry.stat(follow_symlinks=False), linkname, path)
Ejemplo n.º 5
0
    def from_dir_entry(self,
                       dir_entry: os.DirEntry,
                       file_hash: str or None = None,
                       dont_hash: bool = False) -> None:
        if not isinstance(dir_entry, os.DirEntry):
            raise ValueError(
                'Unsupported data type {type}'.format(type=type(dir_entry)))

        self.hash = file_hash
        self.name = dir_entry.name
        self.path = dir_entry.path
        self.size = dir_entry.stat().st_size
        self.ctime = self._strip_millis(dir_entry.stat().st_ctime)
        self.mtime = self._strip_millis(dir_entry.stat().st_mtime)
        self.inode = dir_entry.stat().st_ino

        if not dont_hash:
            self.calculate_hash()
Ejemplo n.º 6
0
def print_dir_entry(entry: DirEntry,
                    *,
                    path: bool = False,
                    path_field_size=70):
    stat = entry.stat()
    kind = 'DIR' if entry.is_dir() else 'FILE'
    data = entry.path if path else entry.name
    print(
        f"{data:<{path_field_size}s} {kind:<4s} {str(stat.st_size) if entry.is_file() else '':>8s}"
    )
Ejemplo n.º 7
0
def get_file_info(entry: DirEntry):
    os.stat(entry.path)
    stat = entry.stat()
    return {
        'size':
        stat[6],
        'lastAccessed':
        datetime.fromtimestamp(stat[7]).strftime("%A, %B %d, %Y %I:%M:%S"),
        'lastModified':
        datetime.fromtimestamp(stat[8]).strftime("%A, %B %d, %Y %I:%M:%S"),
    }
Ejemplo n.º 8
0
    def from_dir_entry(cls, dir: "File", entry: os.DirEntry) -> "File":
        try:
            st = entry.stat()
        except FileNotFoundError:
            log.warning("%s: cannot stat() file: broken symlink?",
                        os.path.join(dir.abspath, entry.name))
            st = None

        return cls(relpath=os.path.join(dir.relpath, entry.name),
                   abspath=os.path.join(dir.abspath, entry.name),
                   stat=st)
Ejemplo n.º 9
0
    def _get_attributes(self, item: os.DirEntry) -> dict:
        """Parse entire item and subdirectories.

        Returns:
        * Total size in bytes
        * Maximum folder depth of item
        * Total number of files this item contains
        * Access timestamp
        * Modification timestamp
        * Change timestamp

        in the same order as tuple.

        :param item: DirEntry object
        :type item: posix.DirEntry
        :return: Dictionary of {size, depth, num_of_files, atime, mtime, ctime}
        :rtype: dict

        """
        # it's a file or symlink, size is already on item stat
        if not item.is_dir(follow_symlinks=False):
            stat = item.stat(follow_symlinks=False)
            return {
                "size": stat.st_size,
                "depth": self._get_depth(item.path) - self._level,
                "num_of_files": 1,
                "atime": int(stat.st_atime),
                "mtime": int(stat.st_mtime),
                "ctime": int(stat.st_ctime),
            }

        # It is a folder, recursive size check
        else:
            total_size = num_of_files = depth = 0
            atime = mtime = ctime = 0
            # TODO: try/except catch PermissionError
            with os.scandir(item.path) as directory:
                for i in directory:
                    attrs = self._get_attributes(i)
                    total_size += attrs["size"]
                    num_of_files += attrs["num_of_files"]
                    atime = max(atime, attrs["atime"])
                    mtime = max(mtime, attrs["mtime"])
                    ctime = max(ctime, attrs["ctime"])
                    depth = max(depth, attrs["depth"])

            return {
                "size": total_size,
                "depth": depth,
                "num_of_files": num_of_files,
                "atime": atime,
                "mtime": mtime,
                "ctime": ctime,
            }
Ejemplo n.º 10
0
def os_dir_entry_to_directory_list_entry(
        virtual_path: str, dir_entry: os.DirEntry) -> DirectoryListEntry:
    """Convert an `os.DirEntry` instance to a `DirectoryListEntry`."""
    s: os.stat_result = dir_entry.stat()

    return DirectoryListEntry(
        dir_entry.name.decode("utf8"),
        utf8_path_join(virtual_path, dir_entry.name),
        DirectoryEntryType.DIRECTORY
        if dir_entry.is_dir() else DirectoryEntryType.FILE,
        DiskSource(),
        datetime.datetime.fromtimestamp(s.st_mtime),
    )
Ejemplo n.º 11
0
 def __init__(self, dir_entry: os.DirEntry):
     self.file_path = dir_entry.path
     self.file_name = dir_entry.name
     self.is_dir = self.file_name in [ARTICLE_FILENAME, SERIES_FILENAME]
     self.dir_path = os.path.dirname(self.file_path) \
         if self.is_dir else None
     self.dir_name = self.dir_path.rsplit(os.path.sep, 1)[1] \
         if self.is_dir else None
     self.last_updated = timestamp_to_datetime(dir_entry.stat().st_mtime,
                                               tzlocal.get_localzone())
     with open(self.file_path) as f:
         data = frontmatter.load(f)
     self.frontmatter = data.metadata
     self.markdown = data.content
Ejemplo n.º 12
0
    def copy_file_to_file_DirEntry(self,
                                   src: os.DirEntry,
                                   dst: Path,
                                   follow_symlinks=True):
        """ copy the file src to the file dst. dst should either be an existing file
            or not exists at all - i.e. dst cannot be a folder. The parent folder of dst
            is assumed to exist.
            src is assumed to be of type os.DirEntry
        """
        self.last_src, self.last_dst = os.fspath(src), os.fspath(dst)
        self.doing = f"""copy file '{self.last_src}' to '{self.last_dst}'"""

        if self.should_copy_file_DirEntry(src, dst):
            try:
                if not self.should_hard_link_file_DirEntry(src):
                    log.debug(
                        f"copy file '{self.last_src}' to '{self.last_dst}'")
                    if not self.dry_run:
                        _fast_copy_file(src, dst)
                        shutil.copystat(src,
                                        dst,
                                        follow_symlinks=follow_symlinks)
                else:  # try to create hard link
                    try:
                        self.dry_run or os.link(src, dst)
                        log.debug(
                            f"hard link file '{self.last_src}' to '{self.last_dst}'"
                        )
                        self.statistics['hard_links'] += 1
                    except OSError as ose:
                        self.hard_links_failed = True
                        log.debug(
                            f"copy file '{self.last_src}' to '{self.last_dst}'"
                        )

                        if not self.dry_run:
                            _fast_copy_file(src, dst)
                            shutil.copystat(src,
                                            dst,
                                            follow_symlinks=follow_symlinks)
                if self.copy_owner and self.has_chown:
                    src_st = src.stat()  # !
                    os.chown(dst, src_st[stat.ST_UID], src_st[stat.ST_GID])
            except Exception as ex:
                self.who_locks_file_error_dict(_fast_copy_file, self.last_dst)
                raise
        else:
            self.statistics['skipped_files'] += 1
        return dst
Ejemplo n.º 13
0
 def digestEntry(self, entry:os.DirEntry ):
     _,ext = os.path.splitext(entry.name)
     if not ext in self.extDicts:
         self.extDicts[ext] = { "num":0, "bytes":0,"maxbytes":0,"maxname":"" }
     exd = self.extDicts[ext]
     exd["num"] += 1
     esize = entry.stat().st_size
     exd["bytes"] += esize
     if esize>exd["maxbytes"]:
         exd["maxbytes"] = esize
         exd["maxname"] = entry.path
     if esize>10e6:
         self.bigFileList.append(entry)
         emb = "%.3f" % round(esize/1e6,3)
         lgg.info(f"    big file: {emb} mb - {entry.path}")
Ejemplo n.º 14
0
    def validate_file(self, dir_entry: os.DirEntry) -> bool:
        """Validates given DirEntry. Returns False if entry should be completely ignored,
        or True if we want to keep it for further processing.

        Ignore all zero length files. There are usually there for a purpose like .dummy etc,
        so there can be tons of it with the same name even, so by default, ignore them completely.
        Also ignore all symlinks."""

        from .log import Log

        if dir_entry.is_symlink():
            Log.vv('{name}: It is the symbolic link. Skipping.'.format(
                name=dir_entry.name))
            return False

        # NOTE: do not call is_file() on DirEntry. It will fail in endless
        # recursion for invalid (dead) symbolic links. os.path.isfile() works).
        if not dir_entry.is_file():
            Log.vv('{name}: This is not a file. Skipping.'.format(
                name=dir_entry.name))
            return False

        item_size = dir_entry.stat().st_size

        if item_size == 0:
            Log.vv('{name}: File is 0 bytes long. Skipping.'.format(
                name=dir_entry.name))
            return False

        if self.min_size > 0 and item_size < self.min_size:
            Log.vv('{name}: File is shorter than min size ({size}). Skipping.'.
                   format(name=dir_entry.name, size=item_size))
            return False

        if 0 < self.max_size < item_size:
            Log.vv('{name}: File is biger than max size ({size}). Skipping.'.
                   format(name=dir_entry.name, size=item_size))
            return False

        for list_item in self._file_name_blacklist:
            match = re.match(list_item, dir_entry.name)
            if match is not None:
                Log.vv('File "{name}" blacklisted by "{re}" rule. Skipping.'.
                       format(name=dir_entry.name, re=list_item))
                return False

        return True
Ejemplo n.º 15
0
    def __init__(self, dir_entry: os.DirEntry,
                 category_manager: CategoryManager = injectable,
                 tag_manager: TagManager = injectable):
        self.category_manager = category_manager
        self.tag_manager = tag_manager

        self.file_path = dir_entry.path
        self.file_name = dir_entry.name
        self.is_dir = self.file_name in [Config.BLOG_ARTICLE_FILENAME,
                                         Config.BLOG_SERIES_FILENAME]
        self.dir_path = os.path.dirname(self.file_path) \
            if self.is_dir else None
        self.dir_name = self.dir_path.rsplit(os.path.sep, 1)[1] \
            if self.is_dir else None
        self.last_updated = timestamp_to_datetime(dir_entry.stat().st_mtime,
                                                  tzlocal.get_localzone())
        with open(self.file_path) as f:
            data = frontmatter.load(f)
        self.frontmatter = data.metadata
        self.markdown = data.content
Ejemplo n.º 16
0
 def add(self, entry: os.DirEntry, local_zip_path: typing.Optional[str] = None) -> None:
     stats = entry.stat(follow_symlinks=False)
     handle = self._get_handle()
     
     self._iteration_total_bytes += stats.st_size
     handle.write(entry.path, local_zip_path if local_zip_path else None)
Ejemplo n.º 17
0
 def create_time(f: os.DirEntry) -> float:
     return f.stat().st_ctime
Ejemplo n.º 18
0
 def from_dir_entry(cls, dir: "File", entry: os.DirEntry) -> "File":
     return cls(relpath=os.path.join(dir.relpath, entry.name),
                abspath=os.path.join(dir.abspath, entry.name),
                stat=entry.stat())
def _get_timestamp(entry: os.DirEntry) -> datetime:
    return datetime.fromtimestamp(entry.stat().st_mtime)
def get_file_date(entry: os.DirEntry) -> datetime.date:
    return datetime.fromtimestamp(entry.stat().st_mtime).date()
Ejemplo n.º 21
0
def hardlink_identical_files(*, dir_entry: os.DirEntry,
                             args: argparse.Namespace) -> None:
    """hardlink identical files

    The purpose of this function is to hardlink files together if the files are
    the same.  To be considered the same they must be equal in the following
    criteria:
          * file size
          * file contents
          * file mode (default)
          * owner user id (default)
          * owner group id (default)
          * modified time (default)

    Also, files will only be hardlinked if they are on the same device.  This
    is because hardlink does not allow you to hardlink across file systems.

    The basic idea on how this is done is as follows:

        Walk the directory tree building up a list of the files.

     For each file, generate a simple hash based on the size and modified time.

     For any other files which share this hash make sure that they are not
     identical to this file.  If they are identical then hardlink the files.

     Add the file info to the list of files that have the same hash value.
     """

    for exclude in args.excludes:
        if re.search(exclude, dir_entry.path):
            return

    stat_info = dir_entry.stat(follow_symlinks=False)
    # Is it a regular file?
    if stat.S_ISREG(stat_info.st_mode):
        # Create the hash for the file.
        file_hash = hash_value(
            size=stat_info.st_size,
            time=stat_info.st_mtime,
            notimestamp=(args.notimestamp or args.content_only),
        )
        # Bump statistics count of regular files found.
        gStats.found_regular_file()
        if args.verbose >= 2:
            print(f"File: {dir_entry.path}")
        work_file_info = (dir_entry.path, stat_info)
        work_file_info = FileInfo(filename=dir_entry.path, stat_info=stat_info)
        if file_hash in file_hashes:
            # We have file(s) that have the same hash as our current file.
            # Let's go through the list of files with the same hash and see if
            # we are already hardlinked to any of them.
            for temp_file_info in file_hashes[file_hash]:
                if is_already_hardlinked(st1=stat_info,
                                         st2=temp_file_info.stat_info):
                    gStats.found_hardlink(
                        temp_file_info.filename,
                        dir_entry.path,
                        temp_file_info.stat_info,
                    )
                    break
            else:
                # We did not find this file as hardlinked to any other file
                # yet.  So now lets see if our file should be hardlinked to any
                # of the other files with the same hash.
                for temp_file_info in file_hashes[file_hash]:
                    if are_files_hardlinkable(
                            file_info_1=work_file_info,
                            # file_info_2=(temp_filename, temp_stat_info),
                            file_info_2=temp_file_info,
                            args=args,
                    ):
                        hardlink_files(
                            sourcefile=temp_file_info.filename,
                            destfile=dir_entry.path,
                            stat_info=temp_file_info.stat_info,
                            args=args,
                        )
                        break
                else:
                    # The file should NOT be hardlinked to any of the other
                    # files with the same hash.  So we will add it to the list
                    # of files.
                    file_hashes[file_hash].append(work_file_info)
        else:
            # There weren't any other files with the same hash value so we will
            # create a new entry and store our file.
            file_hashes[file_hash] = [work_file_info]