class Eraser(object): """A wrapper for pyeclib erasure coding driver (ECDriver)""" def __init__(self, ec_k, ec_m, ec_type="liberasurecode_rs_vand", aes_enabled=True): self.ec_type = ec_type if aes_enabled: self.aes = AESDriver() logger.info("Eraser will use AES encryption") else: logger.info("Eraser will not use AES encryption") expected_module_name = "drivers." + ec_type.lower() + "_driver" expected_class_name = ec_type[0].upper() + ec_type[1:].lower( ) + "Driver" try: mod = __import__(expected_module_name, fromlist=[expected_class_name]) driver_class = None driver_class = getattr(mod, expected_class_name) self.driver = driver_class(k=ec_k, m=ec_m, ec_type=ec_type, hd=None) except (ImportError, AttributeError): logger.exception("Driver " + ec_type + " could not be loaded as a custom driver") try: self.driver = ECDriver(k=ec_k, m=ec_m, ec_type=ec_type) except Exception as error: logger.exception("Driver " + ec_type + " could not be loaded by pyeclib") raise error def encode(self, data): """Encode a string of bytes in flattened string of byte strips""" payload = data if hasattr(self, 'aes'): payload = self.aes.encode(data)[0] strips = self.driver.encode(payload) return strips def decode(self, strips): """Decode byte strips in a string of bytes""" payload = self.driver.decode(strips) if hasattr(self, 'aes'): return self.aes.decode([payload]) return payload def reconstruct(self, available_payload_fragments, missing_indices): """ Reconstruct missing fragments of data Args: available_payload_fragments(list(bytes)): Available fragments of data missing_indices(list(int)): List of the indices of the missing blocks Returns: list(bytes): A list of the reconstructed fragments """ return self.driver.reconstruct(available_payload_fragments, missing_indices) def fragments_needed(self, missing_indices): """ Return a list of the fragments needed to recover the missing ones Args: missing_indices(list(int)): The list of indices of the fragments to recover Returns: list(int): A list of the indices of the fragments required to recover the missing ones """ return self.driver.fragments_needed(missing_indices)
class StepEntangler(object): """ Basic implementation of STeP based entanglement """ # Use the Group Separator from the ASCII table as the delimiter between the # entanglement header and the data itself # https://www.lammertbies.nl/comm/info/ascii-characters.html HEADER_DELIMITER = chr(29) def __init__(self, source, s, t, p, ec_type="isa_l_rs_vand"): """ StepEntangler constructor Args: source(Source): Block source implementing the get_random_blocks and get_block primitives s(int): Number of source blocks or the number of chunks to make from the original data t(int): Number of old blocks to entangle with p(int): Number of parity blocks to produce using Reed-Solomon """ if not source or \ not callable(getattr(source, "get_block", None)) or \ not callable(getattr(source, "get_random_blocks", None)): raise ValueError( "source argument must implement a get_random_blocks and a get_block method" ) if s <= 0: raise ValueError("s({:d}) must be greater or equal to 1".format(s)) if t < 0: raise ValueError("t({:d}) must be greater or equal to 0".format(t)) if p < s: raise ValueError( "p({:d}) must be greater or equal to s({:d})".format(p, s)) self.s = s self.t = t self.p = p self.e = self.p - self.s self.k = s + t self.source = source self.driver = ECDriver(k=self.k, m=self.p, ec_type=ec_type) @staticmethod def __get_original_size_from_strip(strip): """ Returns the size of the original data located Args: strip(bytes): The bytes containing the header, size and data Returns: int: The size of the original data """ start = strip.find(EntanglementDriver.HEADER_DELIMITER) +\ len(EntanglementDriver.HEADER_DELIMITER) end = strip.find(EntanglementDriver.HEADER_DELIMITER, start) return int(strip[start:end]) @staticmethod def __get_data_from_strip(strip): """ Returns the data part of the bytes strip Args: strip(bytes): The bytes containing the header and the data Returns: bytes: The data part of the strip """ first_pos = strip.find(EntanglementDriver.HEADER_DELIMITER) +\ len(EntanglementDriver.HEADER_DELIMITER) pos = strip.find(EntanglementDriver.HEADER_DELIMITER, first_pos) +\ len(EntanglementDriver.HEADER_DELIMITER) return strip[pos:] @staticmethod def compute_fragment_size(document_size, fragments): """ Computes the fragment size that will be used to process a document of size `document_size`. Args: document_size(int): Document size in bytes fragments(int): Number of fragments Returns: int: The required fragment size in bytes Raises: ValueError: if the document size argument is not an integer or lower than 0 """ if not isinstance(document_size, int) or document_size < 0: raise ValueError( "document_size argument must be an integer greater or equal to 0" ) if not isinstance(fragments, int) or fragments <= 0: raise ValueError( "fragments argument must be an integer greater than 0") fragment_size = int(math.ceil(document_size / float(fragments))) if (fragment_size % 2) == 1: fragment_size += 1 return fragment_size def encode(self, data): """ Encodes data using combining entanglemend and Reed-Solomon(n, k) where k = s + t and n = k + p. Args: data(bytes): The original data to encode Returns: list(bytes): The encoded bytes to store """ pointer_blocks = [] if self.t > 0: pointer_blocks = self.source.get_random_blocks(self.t) block_header = serialize_entanglement_header(pointer_blocks) size = len(data) fragment_size = StepEntangler.compute_fragment_size(size, self.s) padded_size = fragment_size * self.s padded_data = pad(data, padded_size) pointer_blocks = [ pad(self.__get_data_from_strip(block.data)[80:], fragment_size) for block in pointer_blocks ] encoded = self.entangle(padded_data, pointer_blocks) parity_blocks = [ block_header + self.HEADER_DELIMITER + str(size) + self.HEADER_DELIMITER + parity_block for parity_block in encoded[self.k:] ] return parity_blocks def entangle(self, data, blocks): """ Performs entanglement combining the data and the extra blocks using Reed-Solomon. Args: data(bytes): The original piece of data blocks(list(bytes)): The pointer blocks Returns: list(bytes): The parity blocks produced by combining the data and pointer blocks and running them through Reed-Solomon encoding """ return self.driver.encode(data + "".join(blocks)) def fetch_and_prep_pointer_blocks(self, pointers, fragment_size, original_data_size): """ Fetches the pointer blocks and rewrites their liberasurecode header so that they can be reused for reconstruction or decoding Args: pointer_blocks(list(list)): A list of 2 elements lists namely the filename and the index of each pointer block fragment_size(int): Size of each fragment original_data_size(int): Size of the original piece of data Returns: list(bytes): A list of cleaned up liberasurecode fragments formatted and padded to fit the code Raises: ValueError: If the pointers argument is not of type list, The fragment_size argument is not an int or is lower or equal to 0, The original_data_size argument is not an int or is lower or equal to 0 """ if not isinstance(pointers, list): raise ValueError("pointers argument must be of type list") if not isinstance(fragment_size, int) or fragment_size <= 0: raise ValueError( "fragment_size argument must be an integer greater than 0") if not isinstance(original_data_size, int) or original_data_size <= 0: raise ValueError( "original_data_size argument must be an integer greater than 0" ) if original_data_size < fragment_size: raise ValueError( "original_data_size must be greater or equal to fragment_size") pointer_collection = {} fetchers = [] for pointer_index, coordinates in enumerate(pointers): path = coordinates[0] index = coordinates[1] fragment_index = self.s + pointer_index fetcher = PointerHandler(self.source, path, index, fragment_index, fragment_size, original_data_size, pointer_collection) fetcher.start() fetchers.append(fetcher) for fetcher in fetchers: fetcher.join() return [ pointer_collection[key] for key in sorted(pointer_collection.keys()) ] def decode(self, strips, path=None): """ Decodes data using the entangled blocks and Reed-Solomon. Args: strips(list(bytes)): The encoded strips of data Returns: bytes: The decoded data Raises: ECDriverError: if the number of fragments is too low for Reed-Solomon decoding """ logger = logging.getLogger("entangled_driver") model_fragment_header = FragmentHeader( self.__get_data_from_strip(strips[0])[:80]) fragment_size = model_fragment_header.metadata.size orig_data_size = model_fragment_header.metadata.orig_data_size modified_pointer_blocks = [] original_data_size = self.__get_original_size_from_strip(strips[0]) block_header_text = get_entanglement_header_from_strip(strips[0]) strips = [self.__get_data_from_strip(strip) for strip in strips] if self.t > 0: block_header = parse_entanglement_header(block_header_text) modified_pointer_blocks = self.fetch_and_prep_pointer_blocks( block_header, fragment_size, orig_data_size) # Filter to see what pointers we were able to fetch from the proxy initial_length = len(modified_pointer_blocks) modified_pointer_blocks = [ mpb for mpb in modified_pointer_blocks if mpb ] filtered_length = len(modified_pointer_blocks) if filtered_length != initial_length: logger.warn("Only found {:d} pointers out of {:d}".format( filtered_length, initial_length)) biggest_index = max( [FragmentHeader(s[:80]).metadata.index for s in strips]) missing = initial_length - filtered_length if missing > self.e: message = "Configuration of Step (s={:d}, t={:d}, e={:d}, p={:d}) " + \ "does not allow for reconstruction with {:d} missing fragments" raise ECDriverError( message.format(self.s, self.t, self.e, self.p, missing)) extra_parities_needed = [ index - self.k for index in xrange(biggest_index + 1, biggest_index + 1 + missing, 1) ] logger.info("We need blocks {} from {:s}".format( sorted(extra_parities_needed), path)) for index in extra_parities_needed: strips.append( self.__get_data_from_strip( self.source.get_block(path, index).data)) decoded = self.disentangle(strips, modified_pointer_blocks) return decoded[:original_data_size] def disentangle(self, parity_blocks, pointer_blocks): """ Performs disentanglement in order to reconstruct and decode the original data. Args: parity_blocks(bytes): The parity blocks produced from the original encoding pointer_blocks(bytes): The blocks used in the original entanglement adjusted to the right size Returns: bytes: The data is it was originally mixed with the pointer blocks before encoding """ available_blocks = pointer_blocks + parity_blocks return self.driver.decode(available_blocks) def fragments_needed(self, missing_fragment_indexes): """ Returns the list of fragments necessary for decoding/reconstruction of data Args: missing_fragment_indexes(list(int)): The list of missing fragments Returns: list(int): The list of fragments required for decoding/reconstruction Raises: ECDriverError: If the number of missing indexes to work around is greater than self.p - self.s ValueError: if one of the missing indexes is out of scope (index < 0 || (self.s + self.s + self.p) <= index) """ if self.e == 0: message = ( "Configuration of Step (s={:d}, t={:d}, e={:d}, p={:d}) does not allow for reconstruction" .format(self.s, self.t, self.e, self.p)) raise ECDriverError(message) if self.e < len(missing_fragment_indexes): message = ( "Configuration of Step (s={:d}, t={:d}, e={:d}, p={:d}) does not allow for reconstruction of {:d} missing blocks" .format(self.s, self.t, self.e, self.p, len(missing_fragment_indexes))) raise ECDriverError(message) for index in missing_fragment_indexes: if index < 0 or (self.s + self.t + self.p) <= index: raise ValueError( "Index {:d} is out of range(0 <= index < {:d})".format( index, self.s + self.t + self.p)) required_indices = [] for index in xrange(self.k, self.k + self.p): if not index in missing_fragment_indexes: required_indices.append(index) if len(required_indices) == self.s: break return required_indices def reconstruct(self, available_fragment_payloads, missing_fragment_indexes): """ Reconstruct the missing fragements Args: list(bytes): Avalaible fragments list(int): Indices of the missing fragments Returns: list(bytes): The list of reconstructed blocks """ header_text = get_entanglement_header_from_strip( available_fragment_payloads[0]) list_of_pointer_blocks = parse_entanglement_header(header_text) parity_header = FragmentHeader( self.__get_data_from_strip(available_fragment_payloads[0])[:80]) data_size = self.__get_original_size_from_strip( available_fragment_payloads[0]) parity_blocks = [ self.__get_data_from_strip(block) for block in available_fragment_payloads ] missing_fragment_indexes = [ index + self.s + self.t for index in missing_fragment_indexes ] # Get pointer blocks modified_pointer_blocks = self.fetch_and_prep_pointer_blocks( list_of_pointer_blocks, parity_header.metadata.size, parity_header.metadata.orig_data_size) # Filter to remove responses for pointers that are missing modified_pointer_blocks = [ mpb for mpb in modified_pointer_blocks if mpb ] assembled = modified_pointer_blocks + parity_blocks reconstructed = self.driver.reconstruct(assembled, missing_fragment_indexes) requested_blocks = [] for index, block in enumerate(reconstructed): requested_block = header_text + self.HEADER_DELIMITER + str( data_size) + self.HEADER_DELIMITER + block requested_blocks.append(requested_block) return requested_blocks def __repr__(self): return "StepEntangler(s=" + str(self.s) + ", t=" + str( self.t) + ", p=" + str(self.p) + ")"
if __name__ == "__main__": if len(sys.argv) != 2: print_usage() sys.exit(0) SIZE = int(sys.argv[1]) EC_K = int(os.environ.get("EC_K", 10)) EC_M = int(os.environ.get("EC_M", 4)) EC_TYPE = os.environ.get("EC_TYPE", "liberasurecode_rs_vand") DRIVER = ECDriver(k=EC_K, m=EC_M, ec_type=EC_TYPE) DATA = os.urandom(SIZE) STRIPS = DRIVER.encode(DATA) LENGTH = EC_K + EC_M SUPPORTED_DISTANCE = LENGTH - EC_K + 1 print "About to reconstruct ", REQUESTS, " times a payload of size ", SIZE, " bytes (", \ (DRIVER.ec_type if hasattr(DRIVER, "ec_type") else EC_TYPE), ", k =", DRIVER.k, \ ", m =", DRIVER.m, ") from 0 to", SUPPORTED_DISTANCE, "missing blocks" random.seed(0) for missing_blocks in range(SUPPORTED_DISTANCE): for i in range(REQUESTS): missing_indices = range(missing_blocks) start = time.clock() DRIVER.reconstruct(STRIPS[missing_blocks:], missing_indices) end = time.clock() elapsed_in_milliseconds = (end - start) * 1000 print elapsed_in_milliseconds