def archive_package_iter(self) -> (FileEntryDTO, str, int): """ :return: * File entry DTO * path to the archive file * number of the split package, or -1 if there is no source file split """ ext = self.archiver.extension for file_package in self.file_bulker.file_package_iter(): if len(file_package) == 1 and file_package[0].size > self.max_size: # split file file = file_package[0] # guess parts parts = int(ceil(file.size / self.max_size)) i = 0 with create_pg(total=parts, unit='part', leave=False, desc='Compressing part') as t: for split_file in self.split_file(file.original_file): t.set_postfix(file=file.relative_file) t.unpause() self.archiver.compress_file(split_file, file, self.temp_archive_file) t.update(1) yield ArchivePackage(file_package, ext, self.temp_archive_file, i) i += 1 else: # normal package self.archiver.compress_files(file_package, self.temp_archive_file) yield ArchivePackage(file_package, ext, self.temp_archive_file, -1)
def split_file(self, input_file, buffer=1024) -> str: """ Splits the file in multiple parts which have 'roughly' the size of self.max_size. The smallest size is determined by the buffer size. """ file_size = os.stat(input_file).st_size with create_pg(total=file_size, leave=False, unit='B', unit_scale=True, unit_divisor=1024, desc='Splitting file') as t: with open(input_file, 'rb') as src: while True: with tempfile.NamedTemporaryFile() as f: with open(f.name, 'wb') as dest: written = 0 while written < self.max_size: data = src.read(buffer) if data: dest.write(data) written += buffer t.update(len(data)) else: if written == 0: return # file has ended on split size - don't yield break yield f.name
def store_archive(self, archive_package, disc_domain, archive_domain, pressure): super(BackupDirectoryStorageController, self).store_archive(archive_package, disc_domain, archive_domain, pressure) archive_name = _create_archive_name(self._parameters, disc_domain, archive_domain) assert os.path.exists(archive_package.archive_file) src_file = archive_package.archive_file final_archive_name = archive_name + "." + archive_package.final_file_extension with create_pg(total=-1, leave=False, unit='B', unit_scale=True, unit_divisor=1024, desc='Copy archive to destination') as t: copy_with_progress(src_file, final_archive_name, t) pressure.unregister_pressure() temp_file = getattr(archive_package, "tempfile", None) if temp_file: temp_file.close() # empty the temporary directory archive_domain.name = os.path.basename(final_archive_name) archive_domain.save() self._current_medium_size += self._get_size(final_archive_name)
def walk_directory(self, directory: str, calculate_sha=True): file_count = -1 if self.absolute_progress: file_count = self.count_files(directory) # with tqdm(total=file_count, leave=False, unit='files') as t: with create_pg(total=file_count, leave=False, unit='files', desc='Processing source files') as t: for subdir, file in self.file_generator(directory): f = os.path.join(subdir, file) e = FileEntryDTO() e.original_path = subdir e.original_filename = file e.size = os.stat(f).st_size e.modified_time = os.path.getmtime(f) e.relative_file = self._find_relative_file(directory, f) if calculate_sha: e.sha_sum = self.calculate_hash(f, e.size) # logger.debug("Found file <%s> (sha=%s, modified_time=%s, size=%s)" # % (e.relative_file, e.sha_sum, e.modified_time, e.size) # ) t.update(1) yield e
def compress_files(self, input_files: [FileEntryDTO], output_archive): with tarfile.open(output_archive, self.open_spec) as tar: with create_pg(total=len(input_files), leave=False, unit='file', desc='Compressing files') as t: for file in input_files: src_path = file.original_file bck_path = file.relative_file t.set_postfix(file=file.relative_file) tar.add(src_path, arcname=bck_path) t.update(1)
def count_files(self, directory: str) -> int: count = 0 with create_pg(total=None, leave=False, unit='files', desc='Counting files') as t: for _, _, files in os.walk(directory): for _ in files: count += 1 t.update(1) return count
def calculate_hash(filename: str, file_size: int = -1) -> str: sha256_hash = hashlib.sha256() with open(filename, "rb") as f: # Read and update hash string value in blocks of 4K block_size = 4096 with create_pg(total=file_size, leave=False, unit='B', unit_scale=True, unit_divisor=1024, desc='Calculate hash') as t: t.set_postfix(file=filename) for byte_block in iter(lambda: f.read(block_size), b""): sha256_hash.update(byte_block) t.update(block_size) return sha256_hash.hexdigest()
def compress_file_tar(open_spec, input_files: [str], output_archive): """ Compresses files into tar files. - 'w:gz' Open for gzip compressed writing. - 'w:bz2' Open for bzip2 compressed writing. - 'w:xz' Open for lzma compressed writing. """ with tarfile.open(output_archive, open_spec) as tar: with create_pg(total=len(input_files), leave=False, unit='file', desc='Compressing files') as t: for file in input_files: t.set_postfix(file=file) tar.add(file) t.update(1)
def create_reader_from_backup(backup_root: BackupsEntry, backup_start): file_relative_file = dict() file_sha = dict() if backup_root: backups = list() for backup in backup_root.backups.order_by( BackupEntry.created.desc()): backups.append(backup) if backup.type == BackupType.FULL: break backups.reverse() # generate full file list # guess records to process total_records = 0 for backup in backups: total_records += len(backup.all_files) with create_pg(total=total_records, leave=False, unit='records', desc='Reading backup information') as t: for backup in backups: for backup_file_map in backup.all_files.select(): f = backup_file_map.file if backup_file_map.state in (FileState.NEW, FileState.UPDATED): info = BackupDatabaseReader.FileInfo( backup_file_map.file, backup_file_map.state, backup) file_relative_file[f.relative_file] = info elif backup_file_map.state == FileState.DELETED: del file_relative_file[f.relative_file] t.update(1) return BackupDatabaseReader(file_relative_file, file_sha)
def test_basic_workflow_simple(self): set_simple() with create_pg(desc='Description', total=10) as t: for i in range(10): t.update(1)