def get_likely_xor_keysizes(ciphertext, number_of_keysizes=1): """Return the most likely repeating keysize for a ciphertext encrypted with a repeating key XOR. This is a helper function for break_repeating_key_xor used further down the list of key sizes we need to try. :param ciphertext: The ciphertext to find likely key sizes for :param number_of_keysizes: An optional variable to set the number of likely key sizes to return :return: A list of likely key sizes with the most likely key sizes at smaller indexes. """ normalized_distances = [] for keysize in xrange(2, 40): key_size_chunks = list(chunks(ciphertext, keysize)) total_distance = 0 max_blocks = len(key_size_chunks)-1 if len(key_size_chunks) < 40 else 40 number_of_blocks = 0 # If the number of blocks in the average distance calculation is too low # we may end up with the wrong keysize. Using 40 blocks seems to give # results. for i in xrange(0, max_blocks, 2): total_distance += get_hamming_distance(key_size_chunks[i], key_size_chunks[i + 1]) number_of_blocks += 1 avg_distance = float(total_distance) / (number_of_blocks / 2) normalized_distances.append((keysize, avg_distance / keysize)) # The most likely keysizes will have the lowest hamming weight normalized_distances.sort(key=lambda x: x[1]) return [i[0] for i in normalized_distances[:number_of_keysizes]]
def test_get_hamming_distance(): assert cc_util.get_hamming_distance('this is a test', 'wokka wokka!!!') == 37