def encode_decode(self, k):

        print "\nTesting encoding and then decoding with k = %s" % k

        md5 = hashlib.md5()

        with FileChunker(k, SYMBOLSIZE, DEFAULT_FILE) as chunker:

            chunk = chunker.chunk()
            while chunk:
                padding = chunk.padding

                symbols = [(i, chunk[i]) for i in xrange(k)]
                encoder = Encoder(k, symbols)
                symbols = []

                # Start at k/2 and produce 1.25k more symbols to get a mix
                # of parity and source symbols
                for i in xrange(k * 2):
                    symbols.append(encoder.next())

                encoder = None
                decoder = Decoder(k)
                for tup in symbols:
                    decoder.append(tup)

                decoder.decode()
                decoded = bytearray()
                for i in xrange(k):
                    esi, s = decoder.next()
                    decoded += s.tostring()
                decoder = None

                if padding:
                    padding = 0 - padding
                    print "Removing padding", padding, "bytes"
                    decoded = decoded[:padding]

                md5.update(decoded)

                # Continue on to the next chunk
                chunk = chunker.chunk()

        print "Original digest:", self.original_digest
        print "Decoded digest:", md5.hexdigest()
        return self.original_digest == md5.hexdigest()
    def encode_decode(self, k):

        print "\nTesting encoding and then decoding with k = %s" % k

        md5 = hashlib.md5()

        with FileChunker(k, SYMBOLSIZE, DEFAULT_FILE) as chunker:
            chunk = chunker.chunk()
            while chunk:
                padding = chunk.padding

                symbols = [(i, chunk[i]) for i in xrange(k)]
                encoder = Encoder(k, symbols)
                symbols = []

                # Start at k/2 and produce 1.25k more symbols to get a mix
                # of parity and source symbols
                for i in xrange(k * 2):
                    symbols.append(encoder.next())

                encoder = None
                decoder = Decoder(k)
                for tup in symbols:
                    decoder.append(tup)

                decoder.decode()
                decoded = bytearray()
                for i in xrange(k):
                    esi, s = decoder.next()
                    decoded += s.tostring()
                decoder = None

                if padding:
                    padding = 0 - padding
                    print "Removing padding", padding, "bytes"
                    decoded = decoded[:padding]

                md5.update(decoded)
                # Continue on to the next chunk
                chunk = chunker.chunk()

        print "Original digest:", self.original_digest
        print "Decoded digest:", md5.hexdigest()
        return self.original_digest == md5.hexdigest()
예제 #3
0
    def decode(self):
        """
        Orchestrates the reading of file shares into encoded symbols
        and decoding of the encoded symbols
        """
        self.stats['start_time'] = time.time()

        self.verify_input_dir()
        self.verify_output_file()

        # Outer loop will iterate over blocks in a directory.
        # Block directories contain shares per block.
        # Blocks start at 0 and increment by 1.  If block n doesn't exist
        # Then assume that is the end of the file
        block = 0
        blockdir = os.path.join(self.input_dir, str(block))
        while os.path.exists(blockdir):

            # Attempt to read metadata for this block
            self.start_timer()
            k, padding = self.read_block_meta_data(blockdir)
            self.add_time(self.stop_timer(), 'io_time')

            # For each file in the block directory(excluding meta) read each
            # share.  Each will be an encoding symbol

            decoder = Decoder(k)
            read_symbols = 0

            for _file in os.listdir(blockdir):

                # Skip non files
                if not os.path.isfile(os.path.join(blockdir, _file)):
                    continue

                try:
                    # A share should be a named integer indicating which symbol it
                    # is.  int(_file) will raise an error if not and we ignore
                    # that file
                    id = int(_file)

                    # Open the share file in binary mode
                    self.start_timer()

                    symbol = numpy.fromfile(os.path.join(blockdir, _file),
                                            dtype='uint64')
                    self.add_time(self.stop_timer(), 'io_time')

                    # Add the symbol to the decoder.
                    # A symbol is a (integer, numpy array) tuple
                    can_decode = decoder.append((int(_file), symbol))
                    read_symbols += 1
                    if can_decode:
                        break
                except Exception, e:
                    continue
                    pass

            # Ideally we want more than k encoded symbols.
            # We will fail with less than k
            if read_symbols < k:
                self.exit(
                    "There were not sufficient symbols to recover block %s" %
                    block)

            if not can_decode:
                self.exit(
                    "A decoding schedule was not possible with the symbols provided."
                )

            # Instruct decoder to calculate intermediate symbols from
            # known encoding symbols
            self.start_timer()
            decoder.decode()
            self.add_time(self.stop_timer(), 'decoding_time')

            # Steam source symbol output by encoding the first
            # k encoded symbols.
            # The first k source symbols == the first k encoding symbols
            target = open(self.output_file, 'ab')
            for i in xrange(k):

                self.start_timer()
                s = decoder.next()[1]
                self.add_time(self.stop_timer(), 'decoding_time')

                self.start_timer()
                s.tofile(target)
                self.add_time(self.stop_timer(), 'io_time')
            target.close()

            # Padding should only be on the last block but we check anyway
            # @TODO - Ensure file size is accurate before truncating
            if (padding):
                self.start_timer()
                size = os.path.getsize(self.output_file) - padding
                target = io.open(self.output_file, 'a+b')
                target.truncate(size)
                target.close()
                self.add_time(self.stop_timer(), 'io_time')

            # Increment block number by 1
            block += 1
            blockdir = os.path.join(self.input_dir, str(block))
예제 #4
0
    def decode(self):
        """
        Orchestrates the reading of file shares into encoded symbols
        and decoding of the encoded symbols
        """
        self.stats['start_time'] = time.time()

        self.verify_input_dir()
        self.verify_output_file()

        # Outer loop will iterate over blocks in a directory.
        # Block directories contain shares per block.
        # Blocks start at 0 and increment by 1.  If block n doesn't exist
        # Then assume that is the end of the file
        block = 0
        blockdir = os.path.join(self.input_dir, str(block))
        while os.path.exists(blockdir):

            # Attempt to read metadata for this block
            self.start_timer()
            k, padding = self.read_block_meta_data(blockdir)
            self.add_time(self.stop_timer(), 'io_time')

            # For each file in the block directory(excluding meta) read each
            # share.  Each will be an encoding symbol

            decoder = Decoder(k)
            read_symbols = 0

            for _file in os.listdir(blockdir):
                # Skip non files
                if not os.path.isfile(os.path.join(blockdir, _file)):
                    continue

                try:
                    # Open the share file in binary mode
                    self.start_timer()

                    symbol = numpy.fromfile(os.path.join(blockdir, _file),
                                            dtype='uint64')
                    self.add_time(self.stop_timer(), 'io_time')

                    # Add the symbol to the decoder.
                    # A symbol is a (integer, numpy array) tuple
                    can_decode = decoder.append((int(_file), symbol))
                    read_symbols += 1
                    if can_decode:
                        break
                except Exception:
                    continue
                    pass

            # Ideally we want more than k encoded symbols.
            # We will fail with less than k
            if read_symbols < k:
                self.exit("There were not sufficient symbols"
                          " to recover block %s" % block)

            if not can_decode:
                self.exit("A decoding schedule was not possible "
                          "with the symbols provided.")

            # Instruct decoder to calculate intermediate symbols from
            # known encoding symbols
            self.start_timer()
            decoder.decode()
            self.add_time(self.stop_timer(), 'decoding_time')

            # Steam source symbol output by encoding the first
            # k encoded symbols.
            # The first k source symbols == the first k encoding symbols
            target = open(self.output_file, 'ab')
            for i in xrange(k):

                self.start_timer()
                s = decoder.next()[1]
                self.add_time(self.stop_timer(), 'decoding_time')

                self.start_timer()
                s.tofile(target)
                self.add_time(self.stop_timer(), 'io_time')
            target.close()

            # Padding should only be on the last block but we check anyway
            # @TODO - Ensure file size is accurate before truncating
            if (padding):
                self.start_timer()
                size = os.path.getsize(self.output_file) - padding
                target = io.open(self.output_file, 'a+b')
                target.truncate(size)
                target.close()
                self.add_time(self.stop_timer(), 'io_time')

            # Increment block number by 1
            block += 1
            blockdir = os.path.join(self.input_dir, str(block))

        self.stats['blocks_decoded'] = block
        self.stats['end_time'] = time.time()
        self.stats['elapsed_time'] = \
            self.stats['end_time'] - self.stats['start_time']