Esempio n. 1
0
    def write(self, data: Iterator[str]) -> None:
        """
        Write new file data to text file

        Args:
            data (Iterator[str]): List of new file pathss
        """
        clear_print(f"Writing new file paths to {self.path}")

        with self.path.open("w") as fp:
            for new in data:
                print(new, file=fp)
Esempio n. 2
0
def make_tree(base_path: Path) -> DirPath:
    """
    Make directory tree

    Args:
        base_path (pathlib.Path): Base directory path

    Returns:
        (DirPath): The tree!
    """
    clear_print("Calculating total size...")
    root_dir = DirPath(base_path)
    clear_print(
        f"Total file size: {root_dir.size:,} ({byte_shorten(root_dir.size)})")
    return root_dir
Esempio n. 3
0
    def write(self, duplications: list[Duplication],
              duplication_name: str) -> None:
        """
        Write duplication data to JSON file

        Args:
            duplications     (list[Duplication]): Duplication data to be written
            duplication_name (str)              : Duplication name. For printing only
        """
        clear_print(f"Writing {duplication_name} JSON to {self.path}...")

        with self.path.open("w") as fp:
            json_dump(
                [duplication.to_json_dict() for duplication in duplications],
                fp,
                indent=2,
            )
Esempio n. 4
0
    def write(self, file_stats: list[FileStat],
              removed_path_strs: list[str]) -> None:
        """
        Write data to database file

        Args:
            file_stats (list[FileStat]):
                List of file stats to write to the db
            removed_path_strs (list[str]):
                List of paths removed
        """
        clear_print(f"Writing all file data DB to {self.path}...")

        with self._open_db() as con:
            with con:
                con.executemany(Db._DELETE_ROW_CMD,
                                ((path, ) for path in removed_path_strs))

            with con:
                con.executemany(
                    Db._INSERT_ROW_CMD,
                    (file_stat.to_db_row() for file_stat in file_stats),
                )
Esempio n. 5
0
    def read(self) -> dict[str, FileStat]:
        """
        Read data from database file

        Returns:
            (dict[str, File_Props]): Existing path string-file property mapping
        """
        clear_print("Reading DB...")

        file_stats: dict[str, FileStat] = {}

        with self._open_db() as con:
            with con:
                con.execute(Db._CREATE_TABLE_CMD)

            cursor = con.execute(Db._SELECT_ROWS_CMD)
            data: list[DatabaseRow] = cursor.fetchall()

            for row in data:
                file_stats[row[0]] = FileStat.from_db_row(row)

        clear_print(f"Read {len(file_stats)} entries from DB")
        return file_stats
Esempio n. 6
0
def walk_tree(
    root_dir: DirPath, existing_file_stats: dict[str, FileStat]
) -> tuple[list[Duplication], list[FileStat], list[str]]:
    """
    Get duplication data

    Args:
        base_path (pathlib.Path):
            Base directory path
        existing_file_stats (dict[str, FileStat]):
            Existing path string-file property mapping

    Returns:
        (list[Duplication]): All duplications
        (list[FileStat])   : All file properties
        (list[str])        : Records to be removed from the database
    """
    clear_print("Getting all file data...")
    total_progress = Progress(root_dir.size)
    eta = ETA(root_dir.size)
    leftover_file_stats = existing_file_stats.copy()
    new_file_stats: list[FileStat] = []

    try:
        root_dir.process_dir(leftover_file_stats, total_progress, eta,
                             new_file_stats)
    except KeyboardInterrupt:
        clear_print("KeyboardInterrupt detected; stopping...")
        # Don't remove anything from the database if the procedure is interrupted
        leftover_file_stats = {}
    except Exception:
        clear_print("\nException occurred!")
        print_exc()
        print()
        leftover_file_stats = {}

    new_file_stats.sort()
    clear_print(
        f"Found {root_dir.length} files, of which {len(new_file_stats)} are new"
    )

    clear_print("Finding duplicates...")
    potential_duplications: defaultdict[IdStat, list[str]] = defaultdict(list)

    for path_str, file_stat in existing_file_stats.items():
        if path_str in leftover_file_stats:
            continue
        potential_duplications[file_stat.to_id_stat()].append(path_str)

    for file_stat in new_file_stats:
        potential_duplications[file_stat.to_id_stat()].append(file_stat.path)

    duplications: list[Duplication] = []

    for id_stat, file_path_strs in potential_duplications.items():
        if len(file_path_strs) > 1:
            duplications.append(Duplication(*id_stat, file_path_strs))

    duplications.sort()

    clear_print(f"Found {len(duplications)} groups of duplicates")
    return duplications, new_file_stats, sorted(leftover_file_stats)