def test_hash_block_size(): # create new DB shutil.rmtree(db1, True) H.hashdb(["create", db1]) # wrong hash block size H.write_temp_dfxml_hash(byte_run_len=1024) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 0) H.int_equals(changes['hashes_not_inserted_mismatched_hash_block_size'], 1)
def test_add(): # one hash in db1 is added to db2 shutil.rmtree(db1, True) shutil.rmtree(db2, True) H.hashdb(["create", db1]) H.write_temp_dfxml_hash() H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["add", db1, db2]) sizes = H.parse_sizes(H.hashdb(["size", db2])) H.int_equals(sizes['hash_store_size'],1) H.int_equals(sizes['source_store_size'],1)
def test_add_multiple(): # hash from db1 and db2 result in two hashes in db3 shutil.rmtree(db1, True) shutil.rmtree(db2, True) shutil.rmtree(db3, True) H.hashdb(["create", db1]) H.write_temp_dfxml_hash(repository_name="r1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["create", db2]) H.write_temp_dfxml_hash(repository_name="r2") H.hashdb(["import", db2, "temp_dfxml_hash"]) H.hashdb(["add_multiple", db1, db2, db3]) sizes = H.parse_sizes(H.hashdb(["size", db3])) H.int_equals(sizes['hash_store_size'],2) H.int_equals(sizes['source_store_size'],2)
def test_add_repository(): # hash with correct repository name is added shutil.rmtree(db1, True) shutil.rmtree(db2, True) H.rm_tempfile(xml1) H.hashdb(["create", db1]) H.write_temp_dfxml_hash(repository_name="r1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(repository_name="r2") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["add_repository", db1, db2, "r1"]) sizes = H.parse_sizes(H.hashdb(["size", db2])) H.int_equals(sizes['hash_store_size'],1) H.int_equals(sizes['source_store_size'],1) H.hashdb(["export", db2, xml1]) H.dfxml_hash_equals(repository_name="r1")
def test_subtract(): # db1 - db2 -> db3 where source must match shutil.rmtree(db1, True) shutil.rmtree(db2, True) shutil.rmtree(db3, True) H.rm_tempfile(xml1) H.hashdb(["create", db1]) H.hashdb(["create", db2]) H.write_temp_dfxml_hash(repository_name="r1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["import", db2, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(repository_name="r2") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["subtract", db1, db2, db3]) sizes = H.parse_sizes(H.hashdb(["size", db3])) H.int_equals(sizes['hash_store_size'],1) H.hashdb(["export", db3, xml1]) H.dfxml_hash_equals(repository_name="r2")
def test_basic_settings(): # remove existing DB shutil.rmtree(db1, True) # create new DB H.hashdb(["create", db1, "-p1024", "-m3", "-a 128", "-t 7", "--bloom=disabled", "--bloom_kM=4:14"]) # validate settings parameters settings = H.parse_settings(db1) H.int_equals(settings['settings_version'], 2) H.int_equals(settings['byte_alignment'], 128) H.int_equals(settings['hash_truncation'], 7) H.int_equals(settings['hash_block_size'], 1024) H.int_equals(settings['maximum_hash_duplicates'], 3) H.bool_equals(settings['bloom_used'], False) H.int_equals(settings['bloom_k_hash_functions'], 4) H.int_equals(settings['bloom_M_hash_size'], 14) # byte alignment boundary H.write_temp_dfxml_hash(byte_run_len=1024) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1)
def setup(): # create DB with 3 entries shutil.rmtree(db1, True) H.hashdb(["create", db1]) H.rm_tempfile(xml1) H.write_temp_dfxml_hash(byte_run_hashdigest="00", byte_run_file_offset=1*4096, repository_name="r1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(byte_run_hashdigest="00", byte_run_file_offset=2*4096, repository_name="r1", byte_run_hash_label="H") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(byte_run_hashdigest="11", byte_run_file_offset=3*4096, byte_run_hash_label="L") H.hashdb(["import", db1, "temp_dfxml_hash"])
def test_max_duplicates(): # create new DB with max 2 shutil.rmtree(db1, True) H.hashdb(["create", db1, "-m2"]) # add three entries where only two are allowed H.write_temp_dfxml_hash(byte_run_file_offset=4096*1) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.write_temp_dfxml_hash(byte_run_file_offset=4096*2) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.write_temp_dfxml_hash(byte_run_file_offset=4096*3) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 0) H.int_equals(changes['hashes_not_inserted_exceeds_max_duplicates'], 1) sizes = H.parse_sizes(H.hashdb(["size", db1])) H.int_equals(sizes['hash_store_size'], 2)
def test_byte_alignment(): # create new DB with byte alignment 2 shutil.rmtree(db1, True) H.hashdb(["create", db1, "-a2"]) # valid H.write_temp_dfxml_hash(byte_run_file_offset=6) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1) # invalid H.write_temp_dfxml_hash(byte_run_file_offset=7) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 0) H.int_equals(changes['hashes_not_inserted_invalid_byte_alignment'], 1) # valid H.write_temp_dfxml_hash(byte_run_file_offset=8) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1)
def test_intersect_hash(): # db1 with a,b and db2 with b,c intersect to db3 with just b # using different hash shutil.rmtree(db1, True) shutil.rmtree(db2, True) shutil.rmtree(db3, True) H.rm_tempfile(xml1) H.hashdb(["create", db1]) H.hashdb(["create", db2]) H.write_temp_dfxml_hash(byte_run_hashdigest="00112233445566778899aabbccddeef1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(byte_run_hashdigest="00112233445566778899aabbccddeef2") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["import", db2, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(byte_run_hashdigest="00112233445566778899aabbccddeef3") H.hashdb(["import", db2, "temp_dfxml_hash"]) H.hashdb(["intersect_hash", db1, db2, db3]) sizes = H.parse_sizes(H.hashdb(["size", db3])) H.int_equals(sizes['hash_store_size'],1) H.int_equals(sizes['source_store_size'],1) H.hashdb(["export", db3, xml1]) H.dfxml_hash_equals(byte_run_hashdigest="00112233445566778899aabbccddeef2")
def test_intersect(): # db1 with a,b and db2 with b,c intersect to db3 with just b # using same hash and different repository name shutil.rmtree(db1, True) shutil.rmtree(db2, True) shutil.rmtree(db3, True) H.rm_tempfile(xml1) H.hashdb(["create", db1]) H.hashdb(["create", db2]) H.write_temp_dfxml_hash(repository_name="r1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(repository_name="r2") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["import", db2, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(repository_name="r3") H.hashdb(["import", db2, "temp_dfxml_hash"]) H.hashdb(["intersect", db1, db2, db3]) sizes = H.parse_sizes(H.hashdb(["size", db3])) H.int_equals(sizes['hash_store_size'],1) H.int_equals(sizes['source_store_size'],1) H.hashdb(["export", db3, xml1]) H.dfxml_hash_equals(repository_name="r2")
def setup(): # create DFXML with hash values 00, 11, 22 shutil.rmtree(db1, True) H.hashdb(["create", db1]) H.rm_tempfile(xml1) H.write_temp_dfxml_hash(byte_run_hashdigest="00", byte_run_file_offset=1*4096) H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(byte_run_hashdigest="11", byte_run_file_offset=2*4096) H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(byte_run_hashdigest="22", byte_run_file_offset=3*4096) H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["export", db1, xml1]) # create DB with two entries of hash 00 and one entry of hash 11 # to provide two duplicate hash entries and two duplicate source entries shutil.rmtree(db1, True) H.hashdb(["create", db1]) H.write_temp_dfxml_hash(byte_run_hashdigest="00", repository_name="r1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(byte_run_hashdigest="00", repository_name="r2", byte_run_hash_label="H") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(byte_run_hashdigest="11", repository_name="r2", byte_run_hash_label="L") H.hashdb(["import", db1, "temp_dfxml_hash"])
def test_hash_truncation(): # create new DB with 3 byte hash truncation, no Bloom shutil.rmtree(db1, True) H.hashdb(["create", db1, "-t3", "--bloom", "disabled"]) # valid entry H.write_temp_dfxml_hash(byte_run_hashdigest='00112233') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1) # duplicate element H.write_temp_dfxml_hash(byte_run_hashdigest='00112244') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_not_inserted_duplicate_element'], 1) # valid entry H.write_temp_dfxml_hash(byte_run_hashdigest='00114433') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1) # create new DB with 3 byte hash truncation, with Bloom shutil.rmtree(db1, True) H.hashdb(["create", db1, "-t3"]) # valid entry H.write_temp_dfxml_hash(byte_run_hashdigest='00112233') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1) # duplicate element H.write_temp_dfxml_hash(byte_run_hashdigest='00112244') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_not_inserted_duplicate_element'], 1) # valid entry H.write_temp_dfxml_hash(byte_run_hashdigest='00114433') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1)