def archive_package_iter(self) -> (FileEntryDTO, str, int):
        """

        :return:
            * File entry DTO
            * path to the archive file
            * number of the split package, or -1 if there is no source file split
        """
        ext = self.archiver.extension
        for file_package in self.file_bulker.file_package_iter():
            if len(file_package) == 1 and file_package[0].size > self.max_size:
                # split file
                file = file_package[0]

                # guess parts
                parts = int(ceil(file.size / self.max_size))

                i = 0
                with create_pg(total=parts, unit='part', leave=False, desc='Compressing part') as t:
                    for split_file in self.split_file(file.original_file):
                        t.set_postfix(file=file.relative_file)
                        t.unpause()
                        self.archiver.compress_file(split_file, file, self.temp_archive_file)
                        t.update(1)
                        yield ArchivePackage(file_package, ext, self.temp_archive_file, i)

                    i += 1

            else:
                # normal package
                self.archiver.compress_files(file_package, self.temp_archive_file)
                yield ArchivePackage(file_package, ext, self.temp_archive_file, -1)
    def split_file(self, input_file, buffer=1024) -> str:
        """
        Splits the file in multiple parts which have 'roughly' the size of self.max_size. The smallest size is
        determined by the buffer size.
        """
        file_size = os.stat(input_file).st_size
        with create_pg(total=file_size, leave=False, unit='B', unit_scale=True, unit_divisor=1024,
                       desc='Splitting file') as t:
            with open(input_file, 'rb') as src:
                while True:
                    with tempfile.NamedTemporaryFile() as f:
                        with open(f.name, 'wb') as dest:
                            written = 0
                            while written < self.max_size:
                                data = src.read(buffer)
                                if data:
                                    dest.write(data)
                                    written += buffer
                                    t.update(len(data))
                                else:
                                    if written == 0:
                                        return  # file has ended on split size - don't yield

                                    break

                        yield f.name
Exemple #3
0
    def store_archive(self, archive_package, disc_domain, archive_domain,
                      pressure):
        super(BackupDirectoryStorageController,
              self).store_archive(archive_package, disc_domain, archive_domain,
                                  pressure)
        archive_name = _create_archive_name(self._parameters, disc_domain,
                                            archive_domain)

        assert os.path.exists(archive_package.archive_file)

        src_file = archive_package.archive_file
        final_archive_name = archive_name + "." + archive_package.final_file_extension

        with create_pg(total=-1,
                       leave=False,
                       unit='B',
                       unit_scale=True,
                       unit_divisor=1024,
                       desc='Copy archive to destination') as t:
            copy_with_progress(src_file, final_archive_name, t)
            pressure.unregister_pressure()

        temp_file = getattr(archive_package, "tempfile", None)
        if temp_file:
            temp_file.close()  # empty the temporary directory

        archive_domain.name = os.path.basename(final_archive_name)
        archive_domain.save()

        self._current_medium_size += self._get_size(final_archive_name)
Exemple #4
0
    def walk_directory(self, directory: str, calculate_sha=True):
        file_count = -1

        if self.absolute_progress:
            file_count = self.count_files(directory)

        # with tqdm(total=file_count, leave=False, unit='files') as t:
        with create_pg(total=file_count,
                       leave=False,
                       unit='files',
                       desc='Processing source files') as t:
            for subdir, file in self.file_generator(directory):
                f = os.path.join(subdir, file)

                e = FileEntryDTO()

                e.original_path = subdir
                e.original_filename = file

                e.size = os.stat(f).st_size
                e.modified_time = os.path.getmtime(f)
                e.relative_file = self._find_relative_file(directory, f)

                if calculate_sha:
                    e.sha_sum = self.calculate_hash(f, e.size)

                # logger.debug("Found file <%s> (sha=%s, modified_time=%s, size=%s)"
                #              % (e.relative_file, e.sha_sum, e.modified_time, e.size)
                #           )
                t.update(1)

                yield e
    def compress_files(self, input_files: [FileEntryDTO], output_archive):
        with tarfile.open(output_archive, self.open_spec) as tar:
            with create_pg(total=len(input_files), leave=False, unit='file', desc='Compressing files') as t:
                for file in input_files:
                    src_path = file.original_file
                    bck_path = file.relative_file

                    t.set_postfix(file=file.relative_file)
                    tar.add(src_path, arcname=bck_path)
                    t.update(1)
Exemple #6
0
    def count_files(self, directory: str) -> int:
        count = 0

        with create_pg(total=None,
                       leave=False,
                       unit='files',
                       desc='Counting files') as t:
            for _, _, files in os.walk(directory):
                for _ in files:
                    count += 1

                    t.update(1)

        return count
Exemple #7
0
    def calculate_hash(filename: str, file_size: int = -1) -> str:
        sha256_hash = hashlib.sha256()
        with open(filename, "rb") as f:
            # Read and update hash string value in blocks of 4K
            block_size = 4096
            with create_pg(total=file_size,
                           leave=False,
                           unit='B',
                           unit_scale=True,
                           unit_divisor=1024,
                           desc='Calculate hash') as t:
                t.set_postfix(file=filename)

                for byte_block in iter(lambda: f.read(block_size), b""):
                    sha256_hash.update(byte_block)
                    t.update(block_size)

            return sha256_hash.hexdigest()
def compress_file_tar(open_spec, input_files: [str], output_archive):
    """
    Compresses files into tar files.

    - 'w:gz'	Open for gzip compressed writing.
    - 'w:bz2'	Open for bzip2 compressed writing.
    - 'w:xz'	Open for lzma compressed writing.
    """

    with tarfile.open(output_archive, open_spec) as tar:
        with create_pg(total=len(input_files),
                       leave=False,
                       unit='file',
                       desc='Compressing files') as t:
            for file in input_files:
                t.set_postfix(file=file)
                tar.add(file)
                t.update(1)
Exemple #9
0
    def create_reader_from_backup(backup_root: BackupsEntry, backup_start):
        file_relative_file = dict()
        file_sha = dict()

        if backup_root:
            backups = list()
            for backup in backup_root.backups.order_by(
                    BackupEntry.created.desc()):
                backups.append(backup)
                if backup.type == BackupType.FULL:
                    break

            backups.reverse()
            # generate full file list

            # guess records to process
            total_records = 0
            for backup in backups:
                total_records += len(backup.all_files)

            with create_pg(total=total_records,
                           leave=False,
                           unit='records',
                           desc='Reading backup information') as t:
                for backup in backups:
                    for backup_file_map in backup.all_files.select():
                        f = backup_file_map.file
                        if backup_file_map.state in (FileState.NEW,
                                                     FileState.UPDATED):
                            info = BackupDatabaseReader.FileInfo(
                                backup_file_map.file, backup_file_map.state,
                                backup)
                            file_relative_file[f.relative_file] = info
                        elif backup_file_map.state == FileState.DELETED:
                            del file_relative_file[f.relative_file]

                        t.update(1)

        return BackupDatabaseReader(file_relative_file, file_sha)
Exemple #10
0
 def test_basic_workflow_simple(self):
     set_simple()
     with create_pg(desc='Description', total=10) as t:
         for i in range(10):
             t.update(1)