def encode_decode(self, k): print "\nTesting encoding and then decoding with k = %s" % k md5 = hashlib.md5() with FileChunker(k, SYMBOLSIZE, DEFAULT_FILE) as chunker: chunk = chunker.chunk() while chunk: padding = chunk.padding symbols = [(i, chunk[i]) for i in xrange(k)] encoder = Encoder(k, symbols) symbols = [] # Start at k/2 and produce 1.25k more symbols to get a mix # of parity and source symbols for i in xrange(k * 2): symbols.append(encoder.next()) encoder = None decoder = Decoder(k) for tup in symbols: decoder.append(tup) decoder.decode() decoded = bytearray() for i in xrange(k): esi, s = decoder.next() decoded += s.tostring() decoder = None if padding: padding = 0 - padding print "Removing padding", padding, "bytes" decoded = decoded[:padding] md5.update(decoded) # Continue on to the next chunk chunk = chunker.chunk() print "Original digest:", self.original_digest print "Decoded digest:", md5.hexdigest() return self.original_digest == md5.hexdigest()
def decode(self): """ Orchestrates the reading of file shares into encoded symbols and decoding of the encoded symbols """ self.stats['start_time'] = time.time() self.verify_input_dir() self.verify_output_file() # Outer loop will iterate over blocks in a directory. # Block directories contain shares per block. # Blocks start at 0 and increment by 1. If block n doesn't exist # Then assume that is the end of the file block = 0 blockdir = os.path.join(self.input_dir, str(block)) while os.path.exists(blockdir): # Attempt to read metadata for this block self.start_timer() k, padding = self.read_block_meta_data(blockdir) self.add_time(self.stop_timer(), 'io_time') # For each file in the block directory(excluding meta) read each # share. Each will be an encoding symbol decoder = Decoder(k) read_symbols = 0 for _file in os.listdir(blockdir): # Skip non files if not os.path.isfile(os.path.join(blockdir, _file)): continue try: # A share should be a named integer indicating which symbol it # is. int(_file) will raise an error if not and we ignore # that file id = int(_file) # Open the share file in binary mode self.start_timer() symbol = numpy.fromfile(os.path.join(blockdir, _file), dtype='uint64') self.add_time(self.stop_timer(), 'io_time') # Add the symbol to the decoder. # A symbol is a (integer, numpy array) tuple can_decode = decoder.append((int(_file), symbol)) read_symbols += 1 if can_decode: break except Exception, e: continue pass # Ideally we want more than k encoded symbols. # We will fail with less than k if read_symbols < k: self.exit( "There were not sufficient symbols to recover block %s" % block) if not can_decode: self.exit( "A decoding schedule was not possible with the symbols provided." ) # Instruct decoder to calculate intermediate symbols from # known encoding symbols self.start_timer() decoder.decode() self.add_time(self.stop_timer(), 'decoding_time') # Steam source symbol output by encoding the first # k encoded symbols. # The first k source symbols == the first k encoding symbols target = open(self.output_file, 'ab') for i in xrange(k): self.start_timer() s = decoder.next()[1] self.add_time(self.stop_timer(), 'decoding_time') self.start_timer() s.tofile(target) self.add_time(self.stop_timer(), 'io_time') target.close() # Padding should only be on the last block but we check anyway # @TODO - Ensure file size is accurate before truncating if (padding): self.start_timer() size = os.path.getsize(self.output_file) - padding target = io.open(self.output_file, 'a+b') target.truncate(size) target.close() self.add_time(self.stop_timer(), 'io_time') # Increment block number by 1 block += 1 blockdir = os.path.join(self.input_dir, str(block))
def decode(self): """ Orchestrates the reading of file shares into encoded symbols and decoding of the encoded symbols """ self.stats['start_time'] = time.time() self.verify_input_dir() self.verify_output_file() # Outer loop will iterate over blocks in a directory. # Block directories contain shares per block. # Blocks start at 0 and increment by 1. If block n doesn't exist # Then assume that is the end of the file block = 0 blockdir = os.path.join(self.input_dir, str(block)) while os.path.exists(blockdir): # Attempt to read metadata for this block self.start_timer() k, padding = self.read_block_meta_data(blockdir) self.add_time(self.stop_timer(), 'io_time') # For each file in the block directory(excluding meta) read each # share. Each will be an encoding symbol decoder = Decoder(k) read_symbols = 0 for _file in os.listdir(blockdir): # Skip non files if not os.path.isfile(os.path.join(blockdir, _file)): continue try: # Open the share file in binary mode self.start_timer() symbol = numpy.fromfile(os.path.join(blockdir, _file), dtype='uint64') self.add_time(self.stop_timer(), 'io_time') # Add the symbol to the decoder. # A symbol is a (integer, numpy array) tuple can_decode = decoder.append((int(_file), symbol)) read_symbols += 1 if can_decode: break except Exception: continue pass # Ideally we want more than k encoded symbols. # We will fail with less than k if read_symbols < k: self.exit("There were not sufficient symbols" " to recover block %s" % block) if not can_decode: self.exit("A decoding schedule was not possible " "with the symbols provided.") # Instruct decoder to calculate intermediate symbols from # known encoding symbols self.start_timer() decoder.decode() self.add_time(self.stop_timer(), 'decoding_time') # Steam source symbol output by encoding the first # k encoded symbols. # The first k source symbols == the first k encoding symbols target = open(self.output_file, 'ab') for i in xrange(k): self.start_timer() s = decoder.next()[1] self.add_time(self.stop_timer(), 'decoding_time') self.start_timer() s.tofile(target) self.add_time(self.stop_timer(), 'io_time') target.close() # Padding should only be on the last block but we check anyway # @TODO - Ensure file size is accurate before truncating if (padding): self.start_timer() size = os.path.getsize(self.output_file) - padding target = io.open(self.output_file, 'a+b') target.truncate(size) target.close() self.add_time(self.stop_timer(), 'io_time') # Increment block number by 1 block += 1 blockdir = os.path.join(self.input_dir, str(block)) self.stats['blocks_decoded'] = block self.stats['end_time'] = time.time() self.stats['elapsed_time'] = \ self.stats['end_time'] - self.stats['start_time']