def save_file_path(self, binary, filename=None, compressed=False): # Generate a unique code using SHA256 if isinstance(binary, StorageFile): filename = filename or binary.name binary = binary.read() if isinstance(binary, (binary_type, string_types)): unique_code = string_unique_code(binary) else: unique_code = file_unique_code(binary) if not filename: filename = basename(binary.name) lock_key = 'storage save %s' % unique_code self.cache.lock(lock_key) try: file_path = ( self.session .query(*FilePath.__table__.c.values()) .filter(FilePath.code == unique_code) .first()) # Save file if dont exists if not file_path: # Create a new filepath filename = maybe_string(filename) mimetype = find_mimetype(filename=filename, header_or_file=binary) file_path = FilePath(code=unique_code, mimetype=maybe_unicode(mimetype), compressed=compressed) to_add = [] file_size = 0 # Save blocks blocks = self.save_blocks(binary) for order, (block_id, block_size) in enumerate(blocks): to_add.append(FileBlock(file_id_block=block_id, order=order)) file_size += block_size # Add file path to DB file_path.size = file_size file_path.id = self.direct_insert(file_path).inserted_primary_key[0] # Relate blocks and file path for block_relation in to_add: block_relation.file_id_path = file_path.id self.direct_insert(block_relation) return file_path finally: self.cache.unlock(lock_key)
def save_blocks(self, binary): if isinstance(binary, (binary_type, string_types)): binary_is_string = True else: binary.seek(0) binary_is_string = False blocks = [] block_size = self.block_size while_binary = binary while True: if binary_is_string: block = while_binary[:block_size] while_binary = while_binary[block_size:] else: block = while_binary.read(block_size) if not block: break # Create hash of the block blocks.append(string_unique_code(block)) if not blocks: raise ValueError('Empty file') # Lock all blocks locked_keys = dict((k, 'storage block save %s' % k) for k in set(blocks)) for lock_key in locked_keys.values(): self.cache.lock(lock_key) response = [] try: # Look for existing blocks existing_blocks = {} for block in ( self.session .query(BlockPath.id, BlockPath.size, BlockPath.code) .filter(BlockPath.code.in_(set(blocks))) .all()): existing_blocks[block.code] = (block.id, block.size) self.cache.unlock(locked_keys.pop(block.code)) # Add missing blocks for order, block_hash in enumerate(blocks): if block_hash in existing_blocks: response.append(existing_blocks[block_hash]) else: if binary_is_string: start_idx = order * block_size block_binary = binary[start_idx:start_idx + block_size] else: binary.seek(order * block_size) block_binary = binary.read(block_size) full_path, path = self.create_file_path() put_binary_on_file(full_path, block_binary, make_dir_recursively=True) # Lets flush the session to prevent waiting in a possible locked block block_size = len(block_binary) block_response = self.direct_insert(BlockPath(path=path, size=block_size, code=block_hash)) block_id = block_response.inserted_primary_key[0] response.append((block_id, block_size)) existing_blocks[block_hash] = (block_id, block_size) self.cache.unlock(locked_keys.pop(block_hash)) finally: for lock_key in locked_keys.values(): self.cache.unlock(lock_key) return response