def process(self): progress = Progress('hashing') __LOG__.debug( 'hashing %d files (%s)', len(self._queue), sizeof_fmt(self.bytes_to_hash) ) progress.start(self, maxval=self.bytes_to_hash) for (file_path, file_size, dest) in self._queue: __LOG__.debug( 'hashing %s (%s)...', quote(file_path), sizeof_fmt(file_size) ) try: hash_values = hashfile(file_path) except Exception as err: __LOG__.exception('hashing of %s failed: ', file_path) self.errors.append(err) else: dest.update(hash_values) self.processed.append(dest) self.bytes_processed = self.bytes_processed + file_size progress.update(self, val=self.bytes_processed) progress.finish(self) __LOG__.debug('%d files hashed', len(self.processed)) return self.processed
def duplicates(self, limit=-1): progress = Progress('find duplicates') index = 0 progress.start(self, maxval=limit if limit > 0 else None) for i in self.file_registry.find_duplicates(limit=limit): yield i index = index + 1 progress.update(self, val=index) progress.finish(self)
def _step0_scan_db(self): progress = Progress('scanning db') __LOG__.debug('scanning db...') db_entries_count = self.file_registry.count() old_files_deleted = 0 mtime_changed = 0 size_changed = 0 index = 0 progress.start(self, maxval=db_entries_count) for db_file in self.file_registry.find_all(): index = index + 1 progress.update(self, val=index) abs_path = join(self.base_dir, db_file.path) try: changed = False stat = os.stat(abs_path) if db_file.mtime != int(stat.st_mtime): mtime_changed = mtime_changed + 1 changed = True if db_file.size != stat.st_size: size_changed = size_changed + 1 changed = True if changed: db_file.mtime = int(stat.st_mtime) db_file.size = stat.st_size self.hash_queue.append(abs_path, stat.st_size, db_file) self.visited_files.append(s(db_file.path)) except OSError as err: if err.errno == errno.ENOENT: __LOG__.debug('deleting %s', quote(db_file.path)) self.file_registry.delete(db_file) old_files_deleted = old_files_deleted + 1 else: raise # pragma: no cover progress.finish(self) __LOG__.debug('mtime of %d files changed', mtime_changed) __LOG__.debug('size of %d files changed', size_changed) __LOG__.debug('%d old files deleted', old_files_deleted) return len(self.visited_files)
def _step1_scan_fs(self): progress = Progress('scanning fs') __LOG__.debug('scanning %s', quote(self.base_dir)) new_files_found = 0 progress.start(self) for entry in files_of_dir(self.base_dir, self.is_excluded): progress.update(self) rel_path = os.path.relpath(entry.path, self.base_dir) if rel_path not in self.visited_files: new_files_found = new_files_found + 1 db_file = model.File( path=u(rel_path), mtime=int(entry.stats.st_mtime), size=entry.stats.st_size, ) self.hash_queue.append( entry.path, entry.stats.st_size, db_file ) progress.finish(self) __LOG__.debug('%d new files found', new_files_found) return new_files_found
def init(self): progress = Progress('init') __LOG__.debug('initializing...') progress.start(self, maxval=1) model.create_schema(self.db_session.get_bind()) progress.finish(self)