Example #1
0
    def save_file_path(self, binary, filename=None, compressed=False):
        # Generate a unique code using SHA256
        if isinstance(binary, StorageFile):
            filename = filename or binary.name
            binary = binary.read()
        if isinstance(binary, (binary_type, string_types)):
            unique_code = string_unique_code(binary)
        else:
            unique_code = file_unique_code(binary)
            if not filename:
                filename = basename(binary.name)

        lock_key = 'storage save %s' % unique_code
        self.cache.lock(lock_key)
        try:
            file_path = (
                self.session
                .query(*FilePath.__table__.c.values())
                .filter(FilePath.code == unique_code)
                .first())

            # Save file if dont exists
            if not file_path:
                # Create a new filepath
                filename = maybe_string(filename)
                mimetype = find_mimetype(filename=filename, header_or_file=binary)
                file_path = FilePath(code=unique_code, mimetype=maybe_unicode(mimetype), compressed=compressed)

                to_add = []
                file_size = 0

                # Save blocks
                blocks = self.save_blocks(binary)

                for order, (block_id, block_size) in enumerate(blocks):
                    to_add.append(FileBlock(file_id_block=block_id, order=order))
                    file_size += block_size

                # Add file path to DB
                file_path.size = file_size
                file_path.id = self.direct_insert(file_path).inserted_primary_key[0]

                # Relate blocks and file path
                for block_relation in to_add:
                    block_relation.file_id_path = file_path.id
                    self.direct_insert(block_relation)

            return file_path
        finally:
            self.cache.unlock(lock_key)
Example #2
0
    def save_blocks(self, binary):
        if isinstance(binary, (binary_type, string_types)):
            binary_is_string = True
        else:
            binary.seek(0)
            binary_is_string = False

        blocks = []
        block_size = self.block_size
        while_binary = binary
        while True:
            if binary_is_string:
                block = while_binary[:block_size]
                while_binary = while_binary[block_size:]
            else:
                block = while_binary.read(block_size)
            if not block:
                break

            # Create hash of the block
            blocks.append(string_unique_code(block))
        if not blocks:
            raise ValueError('Empty file')

        # Lock all blocks
        locked_keys = dict((k, 'storage block save %s' % k) for k in set(blocks))
        for lock_key in locked_keys.values():
            self.cache.lock(lock_key)

        response = []
        try:
            # Look for existing blocks
            existing_blocks = {}
            for block in (
                    self.session
                    .query(BlockPath.id, BlockPath.size, BlockPath.code)
                    .filter(BlockPath.code.in_(set(blocks)))
                    .all()):
                existing_blocks[block.code] = (block.id, block.size)
                self.cache.unlock(locked_keys.pop(block.code))

            # Add missing blocks
            for order, block_hash in enumerate(blocks):
                if block_hash in existing_blocks:
                    response.append(existing_blocks[block_hash])
                else:
                    if binary_is_string:
                        start_idx = order * block_size
                        block_binary = binary[start_idx:start_idx + block_size]
                    else:
                        binary.seek(order * block_size)
                        block_binary = binary.read(block_size)

                    full_path, path = self.create_file_path()
                    put_binary_on_file(full_path, block_binary, make_dir_recursively=True)

                    # Lets flush the session to prevent waiting in a possible locked block
                    block_size = len(block_binary)
                    block_response = self.direct_insert(BlockPath(path=path, size=block_size, code=block_hash))
                    block_id = block_response.inserted_primary_key[0]

                    response.append((block_id, block_size))
                    existing_blocks[block_hash] = (block_id, block_size)
                    self.cache.unlock(locked_keys.pop(block_hash))

        finally:
            for lock_key in locked_keys.values():
                self.cache.unlock(lock_key)

        return response