def write(self, data: Iterator[str]) -> None: """ Write new file data to text file Args: data (Iterator[str]): List of new file pathss """ clear_print(f"Writing new file paths to {self.path}") with self.path.open("w") as fp: for new in data: print(new, file=fp)
def make_tree(base_path: Path) -> DirPath: """ Make directory tree Args: base_path (pathlib.Path): Base directory path Returns: (DirPath): The tree! """ clear_print("Calculating total size...") root_dir = DirPath(base_path) clear_print( f"Total file size: {root_dir.size:,} ({byte_shorten(root_dir.size)})") return root_dir
def write(self, duplications: list[Duplication], duplication_name: str) -> None: """ Write duplication data to JSON file Args: duplications (list[Duplication]): Duplication data to be written duplication_name (str) : Duplication name. For printing only """ clear_print(f"Writing {duplication_name} JSON to {self.path}...") with self.path.open("w") as fp: json_dump( [duplication.to_json_dict() for duplication in duplications], fp, indent=2, )
def write(self, file_stats: list[FileStat], removed_path_strs: list[str]) -> None: """ Write data to database file Args: file_stats (list[FileStat]): List of file stats to write to the db removed_path_strs (list[str]): List of paths removed """ clear_print(f"Writing all file data DB to {self.path}...") with self._open_db() as con: with con: con.executemany(Db._DELETE_ROW_CMD, ((path, ) for path in removed_path_strs)) with con: con.executemany( Db._INSERT_ROW_CMD, (file_stat.to_db_row() for file_stat in file_stats), )
def read(self) -> dict[str, FileStat]: """ Read data from database file Returns: (dict[str, File_Props]): Existing path string-file property mapping """ clear_print("Reading DB...") file_stats: dict[str, FileStat] = {} with self._open_db() as con: with con: con.execute(Db._CREATE_TABLE_CMD) cursor = con.execute(Db._SELECT_ROWS_CMD) data: list[DatabaseRow] = cursor.fetchall() for row in data: file_stats[row[0]] = FileStat.from_db_row(row) clear_print(f"Read {len(file_stats)} entries from DB") return file_stats
def walk_tree( root_dir: DirPath, existing_file_stats: dict[str, FileStat] ) -> tuple[list[Duplication], list[FileStat], list[str]]: """ Get duplication data Args: base_path (pathlib.Path): Base directory path existing_file_stats (dict[str, FileStat]): Existing path string-file property mapping Returns: (list[Duplication]): All duplications (list[FileStat]) : All file properties (list[str]) : Records to be removed from the database """ clear_print("Getting all file data...") total_progress = Progress(root_dir.size) eta = ETA(root_dir.size) leftover_file_stats = existing_file_stats.copy() new_file_stats: list[FileStat] = [] try: root_dir.process_dir(leftover_file_stats, total_progress, eta, new_file_stats) except KeyboardInterrupt: clear_print("KeyboardInterrupt detected; stopping...") # Don't remove anything from the database if the procedure is interrupted leftover_file_stats = {} except Exception: clear_print("\nException occurred!") print_exc() print() leftover_file_stats = {} new_file_stats.sort() clear_print( f"Found {root_dir.length} files, of which {len(new_file_stats)} are new" ) clear_print("Finding duplicates...") potential_duplications: defaultdict[IdStat, list[str]] = defaultdict(list) for path_str, file_stat in existing_file_stats.items(): if path_str in leftover_file_stats: continue potential_duplications[file_stat.to_id_stat()].append(path_str) for file_stat in new_file_stats: potential_duplications[file_stat.to_id_stat()].append(file_stat.path) duplications: list[Duplication] = [] for id_stat, file_path_strs in potential_duplications.items(): if len(file_path_strs) > 1: duplications.append(Duplication(*id_stat, file_path_strs)) duplications.sort() clear_print(f"Found {len(duplications)} groups of duplicates") return duplications, new_file_stats, sorted(leftover_file_stats)