Exemple #1
0
class Eraser(object):
    """A wrapper for pyeclib erasure coding driver (ECDriver)"""
    def __init__(self,
                 ec_k,
                 ec_m,
                 ec_type="liberasurecode_rs_vand",
                 aes_enabled=True):
        self.ec_type = ec_type
        if aes_enabled:
            self.aes = AESDriver()
            logger.info("Eraser will use AES encryption")
        else:
            logger.info("Eraser will not use AES encryption")
        expected_module_name = "drivers." + ec_type.lower() + "_driver"
        expected_class_name = ec_type[0].upper() + ec_type[1:].lower(
        ) + "Driver"
        try:
            mod = __import__(expected_module_name,
                             fromlist=[expected_class_name])
            driver_class = None
            driver_class = getattr(mod, expected_class_name)
            self.driver = driver_class(k=ec_k,
                                       m=ec_m,
                                       ec_type=ec_type,
                                       hd=None)
        except (ImportError, AttributeError):
            logger.exception("Driver " + ec_type +
                             " could not be loaded as a custom driver")
            try:
                self.driver = ECDriver(k=ec_k, m=ec_m, ec_type=ec_type)
            except Exception as error:
                logger.exception("Driver " + ec_type +
                                 " could not be loaded by pyeclib")
                raise error

    def encode(self, data):
        """Encode a string of bytes in flattened string of byte strips"""
        payload = data
        if hasattr(self, 'aes'):
            payload = self.aes.encode(data)[0]
        strips = self.driver.encode(payload)
        return strips

    def decode(self, strips):
        """Decode byte strips in a string of bytes"""
        payload = self.driver.decode(strips)
        if hasattr(self, 'aes'):
            return self.aes.decode([payload])
        return payload

    def reconstruct(self, available_payload_fragments, missing_indices):
        """
        Reconstruct missing fragments of data
        Args:
            available_payload_fragments(list(bytes)): Available fragments of data
            missing_indices(list(int)): List of the indices of the missing blocks
        Returns:
            list(bytes): A list of the reconstructed fragments
        """
        return self.driver.reconstruct(available_payload_fragments,
                                       missing_indices)

    def fragments_needed(self, missing_indices):
        """
        Return a list of the fragments needed to recover the missing ones
        Args:
            missing_indices(list(int)): The list of indices of the fragments to recover
        Returns:
            list(int): A list of the indices of the fragments required to recover the missing ones
        """
        return self.driver.fragments_needed(missing_indices)
class StepEntangler(object):
    """
    Basic implementation of STeP based entanglement
    """
    # Use the Group Separator from the ASCII table as the delimiter between the
    # entanglement header and the data itself
    # https://www.lammertbies.nl/comm/info/ascii-characters.html
    HEADER_DELIMITER = chr(29)

    def __init__(self, source, s, t, p, ec_type="isa_l_rs_vand"):
        """
        StepEntangler constructor
        Args:
            source(Source): Block source implementing the get_random_blocks and
                            get_block primitives
            s(int): Number of source blocks or the number of chunks to make from
                    the original data
            t(int): Number of old blocks to entangle with
            p(int): Number of parity blocks to produce using Reed-Solomon
        """
        if not source or \
           not callable(getattr(source, "get_block", None)) or \
           not callable(getattr(source, "get_random_blocks", None)):
            raise ValueError(
                "source argument must implement a get_random_blocks and a get_block method"
            )
        if s <= 0:
            raise ValueError("s({:d}) must be greater or equal to 1".format(s))
        if t < 0:
            raise ValueError("t({:d}) must be greater or equal to 0".format(t))
        if p < s:
            raise ValueError(
                "p({:d}) must be greater or equal to s({:d})".format(p, s))
        self.s = s
        self.t = t
        self.p = p
        self.e = self.p - self.s
        self.k = s + t

        self.source = source
        self.driver = ECDriver(k=self.k, m=self.p, ec_type=ec_type)

    @staticmethod
    def __get_original_size_from_strip(strip):
        """
        Returns the size of the original data located
        Args:
            strip(bytes): The bytes containing the header, size and data
        Returns:
            int: The size of the original data
        """
        start = strip.find(EntanglementDriver.HEADER_DELIMITER) +\
                len(EntanglementDriver.HEADER_DELIMITER)
        end = strip.find(EntanglementDriver.HEADER_DELIMITER, start)
        return int(strip[start:end])

    @staticmethod
    def __get_data_from_strip(strip):
        """
        Returns the data part of the bytes strip
        Args:
            strip(bytes): The bytes containing the header and the data
        Returns:
            bytes: The data part of the strip
        """
        first_pos = strip.find(EntanglementDriver.HEADER_DELIMITER) +\
                    len(EntanglementDriver.HEADER_DELIMITER)
        pos = strip.find(EntanglementDriver.HEADER_DELIMITER, first_pos) +\
              len(EntanglementDriver.HEADER_DELIMITER)
        return strip[pos:]

    @staticmethod
    def compute_fragment_size(document_size, fragments):
        """
        Computes the fragment size that will be used to process a document of size
        `document_size`.
        Args:
            document_size(int): Document size in bytes
            fragments(int): Number of fragments
        Returns:
            int: The required fragment size in bytes
        Raises:
            ValueError: if the document size argument is not an integer or lower than 0
        """
        if not isinstance(document_size, int) or document_size < 0:
            raise ValueError(
                "document_size argument must be an integer greater or equal to 0"
            )
        if not isinstance(fragments, int) or fragments <= 0:
            raise ValueError(
                "fragments argument must be an integer greater than 0")
        fragment_size = int(math.ceil(document_size / float(fragments)))
        if (fragment_size % 2) == 1:
            fragment_size += 1
        return fragment_size

    def encode(self, data):
        """
        Encodes data using combining entanglemend and Reed-Solomon(n, k)
        where k = s + t and n = k + p.
        Args:
            data(bytes): The original data to encode
        Returns:
            list(bytes): The encoded bytes to store
        """
        pointer_blocks = []
        if self.t > 0:
            pointer_blocks = self.source.get_random_blocks(self.t)
        block_header = serialize_entanglement_header(pointer_blocks)
        size = len(data)
        fragment_size = StepEntangler.compute_fragment_size(size, self.s)
        padded_size = fragment_size * self.s
        padded_data = pad(data, padded_size)
        pointer_blocks = [
            pad(self.__get_data_from_strip(block.data)[80:], fragment_size)
            for block in pointer_blocks
        ]
        encoded = self.entangle(padded_data, pointer_blocks)
        parity_blocks = [
            block_header + self.HEADER_DELIMITER + str(size) +
            self.HEADER_DELIMITER + parity_block
            for parity_block in encoded[self.k:]
        ]
        return parity_blocks

    def entangle(self, data, blocks):
        """
        Performs entanglement combining the data and the extra blocks using
        Reed-Solomon.
        Args:
            data(bytes): The original piece of data
            blocks(list(bytes)): The pointer blocks
        Returns:
            list(bytes): The parity blocks produced by combining the data and
                         pointer blocks and running them through Reed-Solomon
                         encoding
        """
        return self.driver.encode(data + "".join(blocks))

    def fetch_and_prep_pointer_blocks(self, pointers, fragment_size,
                                      original_data_size):
        """
        Fetches the pointer blocks and rewrites their liberasurecode header so
        that they can be reused for reconstruction or decoding
        Args:
            pointer_blocks(list(list)): A list of 2 elements lists namely the
                                        filename and the index of each pointer
                                        block
            fragment_size(int): Size of each fragment
            original_data_size(int): Size of the original piece of data
        Returns:
            list(bytes): A list of cleaned up liberasurecode fragments formatted
                         and padded to fit the code
        Raises:
            ValueError: If the pointers argument is not of type list,
                        The fragment_size argument is not an int or is lower or
                        equal to 0,
                        The original_data_size argument is not an int or is lower
                        or equal to 0
        """
        if not isinstance(pointers, list):
            raise ValueError("pointers argument must be of type list")
        if not isinstance(fragment_size, int) or fragment_size <= 0:
            raise ValueError(
                "fragment_size argument must be an integer greater than 0")
        if not isinstance(original_data_size, int) or original_data_size <= 0:
            raise ValueError(
                "original_data_size argument must be an integer greater than 0"
            )
        if original_data_size < fragment_size:
            raise ValueError(
                "original_data_size must be greater or equal to fragment_size")
        pointer_collection = {}
        fetchers = []
        for pointer_index, coordinates in enumerate(pointers):
            path = coordinates[0]
            index = coordinates[1]
            fragment_index = self.s + pointer_index
            fetcher = PointerHandler(self.source, path, index, fragment_index,
                                     fragment_size, original_data_size,
                                     pointer_collection)
            fetcher.start()
            fetchers.append(fetcher)
        for fetcher in fetchers:
            fetcher.join()
        return [
            pointer_collection[key]
            for key in sorted(pointer_collection.keys())
        ]

    def decode(self, strips, path=None):
        """
        Decodes data using the entangled blocks and Reed-Solomon.
        Args:
            strips(list(bytes)): The encoded strips of data
        Returns:
            bytes: The decoded data
        Raises:
            ECDriverError: if the number of fragments is too low for Reed-Solomon
                           decoding
        """
        logger = logging.getLogger("entangled_driver")
        model_fragment_header = FragmentHeader(
            self.__get_data_from_strip(strips[0])[:80])
        fragment_size = model_fragment_header.metadata.size
        orig_data_size = model_fragment_header.metadata.orig_data_size
        modified_pointer_blocks = []

        original_data_size = self.__get_original_size_from_strip(strips[0])
        block_header_text = get_entanglement_header_from_strip(strips[0])
        strips = [self.__get_data_from_strip(strip) for strip in strips]
        if self.t > 0:
            block_header = parse_entanglement_header(block_header_text)
            modified_pointer_blocks = self.fetch_and_prep_pointer_blocks(
                block_header, fragment_size, orig_data_size)
            # Filter to see what pointers we were able to fetch from the proxy
            initial_length = len(modified_pointer_blocks)
            modified_pointer_blocks = [
                mpb for mpb in modified_pointer_blocks if mpb
            ]
            filtered_length = len(modified_pointer_blocks)
            if filtered_length != initial_length:
                logger.warn("Only found {:d} pointers out of {:d}".format(
                    filtered_length, initial_length))
                biggest_index = max(
                    [FragmentHeader(s[:80]).metadata.index for s in strips])
                missing = initial_length - filtered_length
                if missing > self.e:
                    message = "Configuration of Step (s={:d}, t={:d}, e={:d}, p={:d}) " + \
                              "does not allow for reconstruction with {:d} missing fragments"
                    raise ECDriverError(
                        message.format(self.s, self.t, self.e, self.p,
                                       missing))
                extra_parities_needed = [
                    index - self.k
                    for index in xrange(biggest_index + 1, biggest_index + 1 +
                                        missing, 1)
                ]
                logger.info("We need blocks {} from {:s}".format(
                    sorted(extra_parities_needed), path))
                for index in extra_parities_needed:
                    strips.append(
                        self.__get_data_from_strip(
                            self.source.get_block(path, index).data))
        decoded = self.disentangle(strips, modified_pointer_blocks)
        return decoded[:original_data_size]

    def disentangle(self, parity_blocks, pointer_blocks):
        """
        Performs disentanglement in order to reconstruct and decode the original
        data.
        Args:
            parity_blocks(bytes): The parity blocks produced from the original encoding
            pointer_blocks(bytes): The blocks used in the original entanglement adjusted to the right size
        Returns:
            bytes: The data is it was originally mixed with the pointer blocks before encoding
        """
        available_blocks = pointer_blocks + parity_blocks
        return self.driver.decode(available_blocks)

    def fragments_needed(self, missing_fragment_indexes):
        """
        Returns the list of fragments necessary for decoding/reconstruction of data
        Args:
            missing_fragment_indexes(list(int)): The list of missing fragments
        Returns:
            list(int): The list of fragments required for decoding/reconstruction
        Raises:
            ECDriverError: If the number of missing indexes to  work around is
                           greater than self.p - self.s
            ValueError: if one of the missing indexes is out of scope
                           (index < 0 || (self.s + self.s + self.p) <= index)
        """
        if self.e == 0:
            message = (
                "Configuration of Step (s={:d}, t={:d}, e={:d}, p={:d}) does not allow for reconstruction"
                .format(self.s, self.t, self.e, self.p))
            raise ECDriverError(message)
        if self.e < len(missing_fragment_indexes):
            message = (
                "Configuration of Step (s={:d}, t={:d}, e={:d}, p={:d}) does not allow for reconstruction of {:d} missing blocks"
                .format(self.s, self.t, self.e, self.p,
                        len(missing_fragment_indexes)))
            raise ECDriverError(message)
        for index in missing_fragment_indexes:
            if index < 0 or (self.s + self.t + self.p) <= index:
                raise ValueError(
                    "Index {:d} is out of range(0 <= index < {:d})".format(
                        index, self.s + self.t + self.p))
        required_indices = []
        for index in xrange(self.k, self.k + self.p):
            if not index in missing_fragment_indexes:
                required_indices.append(index)
                if len(required_indices) == self.s:
                    break
        return required_indices

    def reconstruct(self, available_fragment_payloads,
                    missing_fragment_indexes):
        """
        Reconstruct the missing fragements
        Args:
            list(bytes): Avalaible fragments
            list(int): Indices of the missing fragments
        Returns:
            list(bytes): The list of reconstructed blocks
        """
        header_text = get_entanglement_header_from_strip(
            available_fragment_payloads[0])
        list_of_pointer_blocks = parse_entanglement_header(header_text)

        parity_header = FragmentHeader(
            self.__get_data_from_strip(available_fragment_payloads[0])[:80])
        data_size = self.__get_original_size_from_strip(
            available_fragment_payloads[0])

        parity_blocks = [
            self.__get_data_from_strip(block)
            for block in available_fragment_payloads
        ]
        missing_fragment_indexes = [
            index + self.s + self.t for index in missing_fragment_indexes
        ]
        # Get pointer blocks
        modified_pointer_blocks = self.fetch_and_prep_pointer_blocks(
            list_of_pointer_blocks, parity_header.metadata.size,
            parity_header.metadata.orig_data_size)
        # Filter to remove responses for pointers that are missing
        modified_pointer_blocks = [
            mpb for mpb in modified_pointer_blocks if mpb
        ]
        assembled = modified_pointer_blocks + parity_blocks
        reconstructed = self.driver.reconstruct(assembled,
                                                missing_fragment_indexes)

        requested_blocks = []
        for index, block in enumerate(reconstructed):
            requested_block = header_text + self.HEADER_DELIMITER + str(
                data_size) + self.HEADER_DELIMITER + block
            requested_blocks.append(requested_block)

        return requested_blocks

    def __repr__(self):
        return "StepEntangler(s=" + str(self.s) + ", t=" + str(
            self.t) + ", p=" + str(self.p) + ")"
if __name__ == "__main__":
    if len(sys.argv) != 2:
        print_usage()
        sys.exit(0)
    SIZE = int(sys.argv[1])
    EC_K = int(os.environ.get("EC_K", 10))
    EC_M = int(os.environ.get("EC_M", 4))
    EC_TYPE = os.environ.get("EC_TYPE", "liberasurecode_rs_vand")

    DRIVER = ECDriver(k=EC_K, m=EC_M, ec_type=EC_TYPE)

    DATA = os.urandom(SIZE)
    STRIPS = DRIVER.encode(DATA)
    LENGTH = EC_K + EC_M
    SUPPORTED_DISTANCE = LENGTH - EC_K + 1
    print "About to reconstruct ", REQUESTS, " times a payload of size ", SIZE, " bytes (", \
    (DRIVER.ec_type if hasattr(DRIVER, "ec_type") else EC_TYPE), ", k =", DRIVER.k, \
    ", m =", DRIVER.m, ") from 0 to", SUPPORTED_DISTANCE, "missing blocks"

    random.seed(0)

    for missing_blocks in range(SUPPORTED_DISTANCE):
        for i in range(REQUESTS):
            missing_indices = range(missing_blocks)
            start = time.clock()
            DRIVER.reconstruct(STRIPS[missing_blocks:], missing_indices)
            end = time.clock()
            elapsed_in_milliseconds = (end - start) * 1000
            print elapsed_in_milliseconds
if __name__ == "__main__":
    if len(sys.argv) != 2:
        print_usage()
        sys.exit(0)
    SIZE = int(sys.argv[1])
    EC_K = int(os.environ.get("EC_K", 10))
    EC_M = int(os.environ.get("EC_M", 4))
    EC_TYPE = os.environ.get("EC_TYPE", "liberasurecode_rs_vand")

    DRIVER = ECDriver(k=EC_K, m=EC_M, ec_type=EC_TYPE)

    DATA = os.urandom(SIZE)
    STRIPS = DRIVER.encode(DATA)
    LENGTH = EC_K + EC_M
    SUPPORTED_DISTANCE = LENGTH - EC_K + 1
    print "About to reconstruct ", REQUESTS, " times a payload of size ", SIZE, " bytes (", \
    (DRIVER.ec_type if hasattr(DRIVER, "ec_type") else EC_TYPE), ", k =", DRIVER.k, \
    ", m =", DRIVER.m, ") from 0 to", SUPPORTED_DISTANCE, "missing blocks"

    random.seed(0)

    for missing_blocks in range(SUPPORTED_DISTANCE):
        for i in range(REQUESTS):
            missing_indices = range(missing_blocks)
            start = time.clock()
            DRIVER.reconstruct(STRIPS[missing_blocks:], missing_indices)
            end = time.clock()
            elapsed_in_milliseconds = (end - start) * 1000
            print elapsed_in_milliseconds