async def patch_remote_blocks(remote_blocks, outstream, remote_instructions, check_hashes=False):
	for first_offset, block in remote_blocks:
		# Optionally check if this block's hashes match the expected hashes
		instruction = remote_instructions[first_offset]
		if check_hashes and (common.adler32(block) != instruction[0] or common.stronghash(block) != instruction[1]):
			#print(str(first_offset)+" had an error:\n"+str(common.adler32(block))+" != "+str(instruction[0])+" or "+str(common.stronghash(block))+" != "+str(instruction[1]))
			raise Exception
		for offset in instruction[2]:
			await outstream.seek(offset)
			await outstream.write(block)
async def get_instructions(datastream, remote_hashes, blocksize=_DEFAULT_BLOCKSIZE):
	match = True
	local_offset = -blocksize
	local_instructions = []

	while True:
		if match and datastream is not None:
			# Whenever there is a match or the loop is running for the first
			# time, populate the window using weakchecksum instead of rolling
			# through every single byte which takes at least twice as long.
			block = bytearray(await datastream.read(blocksize))
			local_offset += blocksize
			checksum = common.adler32(block)
		#match = False

		match = common.check_block(block, checksum, remote_hashes, local_instructions, local_offset)

		if not match:
			# The current block wasn't matched
			if datastream:
				try:
					# Get the next byte and affix to the window
					newbyte = ord(await datastream.read(1))
					block.append(newbyte)
				except TypeError:
					# No more data from the file; the window will slowly shrink.
					# "newbyte" needs to be zero from here on to keep the checksum correct.
					newbyte = 0  # Not necessary to add to the window
					tailsize = await datastream.tell() % blocksize
					datastream = None

			if datastream is None and len(block) <= tailsize:
				# The likelihood that any blocks will match after this is
				# nearly nil so call it quits.
				break

			# Remove the first byte from the window and cheaply calculate
			# the new checksum for it using the previous checksum
			oldbyte = block.pop(0)
			local_offset += 1
			checksum = common.adler32_roll(checksum, oldbyte, newbyte, blocksize)

	# Now put the block offsets in a dictionary where the key is the first offset
	remote_instructions = {offsets[0]: (weak, strong, offsets)
						   for weak, strongs in remote_hashes.items()
						   for strong, offsets in strongs.items()}

	return local_instructions, remote_instructions
Exemple #3
0
def block_checksums(instream, blocksize=_DEFAULT_BLOCKSIZE):
    hashes = {}
    block = instream.read(blocksize)
    offset = 0
    while block:
        weak = common.adler32(block)
        strong = common.stronghash(block)
        try:
            hashes[weak][strong]
        except KeyError:
            hashes[weak] = {}

        try:
            hashes[weak][strong].append(offset)
        except KeyError:
            hashes[weak][strong] = [offset]

        offset += blocksize
        block = instream.read(blocksize)

    return offset / blocksize, hashes
Exemple #4
0
def get_instructions(datastream, remote_hashes, blocksize=_DEFAULT_BLOCKSIZE):
    match = True
    local_offset = -blocksize
    local_instructions = []

    while True:
        if match and datastream is not None:
            # Whenever there is a match or the loop is running for the first
            # time, populate the window using weakchecksum instead of rolling
            # through every single byte which takes at least twice as long.
            window = bytearray(datastream.read(blocksize))
            local_offset += blocksize
            checksum = common.adler32(window)
            match = False

        if checksum in remote_hashes:
            # Matched the weak hash
            strong = common.stronghash(window)
            try:
                remote_offset = remote_hashes[checksum][strong]
                # Matched the strong hash too, so the local block matches to a remote block
                match = True
                local_instructions.append((local_offset, remote_offset))

                # After the block match we don't care about this block anymore,
                # so remove it from the dictionary
                del remote_hashes[checksum][strong]
                if not remote_hashes[checksum]:  # empty dicts evaluate to false
                    del remote_hashes[checksum]
            except KeyError:
                # Did not match the strong hash
                pass

        if not match:
            # The current block wasn't matched
            if datastream:
                try:
                    # Get the next byte and affix to the window
                    newbyte = ord(datastream.read(1))
                    window.append(newbyte)
                except TypeError:
                    # No more data from the file; the window will slowly shrink.
                    # "newbyte" needs to be zero from here on to keep the checksum correct.
                    newbyte = 0  # Not necessary to add to the window
                    tailsize = datastream.tell() % blocksize
                    datastream = None

            if datastream is None and len(window) <= tailsize:
                # The likelihood that any blocks will match after this is
                # nearly nil so call it quits.
                break

            # Remove the first byte from the window and cheaply calculate
            # the new checksum for it using the previous checksum
            oldbyte = window.pop(0)
            local_offset += 1
            checksum = common.adler32_roll(checksum, oldbyte, newbyte,
                                           blocksize)

    # Now put the block offsets in a dictionary where the key is the first offset
    remote_instructions = {
        offsets[0]: (weak, strong, offsets)
        for weak, strongs in remote_hashes.items()
        for strong, offsets in strongs.items()
    }

    return local_instructions, remote_instructions