async def patch_remote_blocks(remote_blocks, outstream, remote_instructions, check_hashes=False): for first_offset, block in remote_blocks: # Optionally check if this block's hashes match the expected hashes instruction = remote_instructions[first_offset] if check_hashes and (common.adler32(block) != instruction[0] or common.stronghash(block) != instruction[1]): #print(str(first_offset)+" had an error:\n"+str(common.adler32(block))+" != "+str(instruction[0])+" or "+str(common.stronghash(block))+" != "+str(instruction[1])) raise Exception for offset in instruction[2]: await outstream.seek(offset) await outstream.write(block)
async def get_instructions(datastream, remote_hashes, blocksize=_DEFAULT_BLOCKSIZE): match = True local_offset = -blocksize local_instructions = [] while True: if match and datastream is not None: # Whenever there is a match or the loop is running for the first # time, populate the window using weakchecksum instead of rolling # through every single byte which takes at least twice as long. block = bytearray(await datastream.read(blocksize)) local_offset += blocksize checksum = common.adler32(block) #match = False match = common.check_block(block, checksum, remote_hashes, local_instructions, local_offset) if not match: # The current block wasn't matched if datastream: try: # Get the next byte and affix to the window newbyte = ord(await datastream.read(1)) block.append(newbyte) except TypeError: # No more data from the file; the window will slowly shrink. # "newbyte" needs to be zero from here on to keep the checksum correct. newbyte = 0 # Not necessary to add to the window tailsize = await datastream.tell() % blocksize datastream = None if datastream is None and len(block) <= tailsize: # The likelihood that any blocks will match after this is # nearly nil so call it quits. break # Remove the first byte from the window and cheaply calculate # the new checksum for it using the previous checksum oldbyte = block.pop(0) local_offset += 1 checksum = common.adler32_roll(checksum, oldbyte, newbyte, blocksize) # Now put the block offsets in a dictionary where the key is the first offset remote_instructions = {offsets[0]: (weak, strong, offsets) for weak, strongs in remote_hashes.items() for strong, offsets in strongs.items()} return local_instructions, remote_instructions
def block_checksums(instream, blocksize=_DEFAULT_BLOCKSIZE): hashes = {} block = instream.read(blocksize) offset = 0 while block: weak = common.adler32(block) strong = common.stronghash(block) try: hashes[weak][strong] except KeyError: hashes[weak] = {} try: hashes[weak][strong].append(offset) except KeyError: hashes[weak][strong] = [offset] offset += blocksize block = instream.read(blocksize) return offset / blocksize, hashes
def get_instructions(datastream, remote_hashes, blocksize=_DEFAULT_BLOCKSIZE): match = True local_offset = -blocksize local_instructions = [] while True: if match and datastream is not None: # Whenever there is a match or the loop is running for the first # time, populate the window using weakchecksum instead of rolling # through every single byte which takes at least twice as long. window = bytearray(datastream.read(blocksize)) local_offset += blocksize checksum = common.adler32(window) match = False if checksum in remote_hashes: # Matched the weak hash strong = common.stronghash(window) try: remote_offset = remote_hashes[checksum][strong] # Matched the strong hash too, so the local block matches to a remote block match = True local_instructions.append((local_offset, remote_offset)) # After the block match we don't care about this block anymore, # so remove it from the dictionary del remote_hashes[checksum][strong] if not remote_hashes[checksum]: # empty dicts evaluate to false del remote_hashes[checksum] except KeyError: # Did not match the strong hash pass if not match: # The current block wasn't matched if datastream: try: # Get the next byte and affix to the window newbyte = ord(datastream.read(1)) window.append(newbyte) except TypeError: # No more data from the file; the window will slowly shrink. # "newbyte" needs to be zero from here on to keep the checksum correct. newbyte = 0 # Not necessary to add to the window tailsize = datastream.tell() % blocksize datastream = None if datastream is None and len(window) <= tailsize: # The likelihood that any blocks will match after this is # nearly nil so call it quits. break # Remove the first byte from the window and cheaply calculate # the new checksum for it using the previous checksum oldbyte = window.pop(0) local_offset += 1 checksum = common.adler32_roll(checksum, oldbyte, newbyte, blocksize) # Now put the block offsets in a dictionary where the key is the first offset remote_instructions = { offsets[0]: (weak, strong, offsets) for weak, strongs in remote_hashes.items() for strong, offsets in strongs.items() } return local_instructions, remote_instructions