def test_histogram(): lines = H.hashdb(["histogram", db1]) H.str_equals(lines[4], '{"total_hashes": 3, "total_distinct_hashes": 1}') H.str_equals(lines[5], '{"duplicates":1, "distinct_hashes":1, "total":1}') H.str_equals(lines[6], '{"duplicates":2, "distinct_hashes":1, "total":2}') H.str_equals(lines[7], '') H.int_equals(len(lines), 8)
def test_size(): lines = H.hashdb(["size", db1]) #print(*lines, sep='\n') H.str_equals(lines[0], 'hash store size: 3') H.str_equals(lines[1], 'source store size: 2') H.str_equals(lines[2], '') H.int_equals(len(lines), 3)
def test_hash_block_size(): # create new DB shutil.rmtree(db1, True) H.hashdb(["create", db1]) # wrong hash block size H.write_temp_dfxml_hash(byte_run_len=1024) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 0) H.int_equals(changes['hashes_not_inserted_mismatched_hash_block_size'], 1)
def test_scan_hash(): # hash present lines = H.hashdb(["scan_hash", db1, "00"]) H.str_equals(lines[0], '["00",{"count":2}]') H.int_equals(len(lines), 2) # hash not present lines = H.hashdb(["scan_hash", db1, "33"]) H.str_equals(lines[0], '["33",{"count":0}]') H.int_equals(len(lines), 2)
def test_add(): # one hash in db1 is added to db2 shutil.rmtree(db1, True) shutil.rmtree(db2, True) H.hashdb(["create", db1]) H.write_temp_dfxml_hash() H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["add", db1, db2]) sizes = H.parse_sizes(H.hashdb(["size", db2])) H.int_equals(sizes['hash_store_size'],1) H.int_equals(sizes['source_store_size'],1)
def test_add_multiple(): # hash from db1 and db2 result in two hashes in db3 shutil.rmtree(db1, True) shutil.rmtree(db2, True) shutil.rmtree(db3, True) H.hashdb(["create", db1]) H.write_temp_dfxml_hash(repository_name="r1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["create", db2]) H.write_temp_dfxml_hash(repository_name="r2") H.hashdb(["import", db2, "temp_dfxml_hash"]) H.hashdb(["add_multiple", db1, db2, db3]) sizes = H.parse_sizes(H.hashdb(["size", db3])) H.int_equals(sizes['hash_store_size'],2) H.int_equals(sizes['source_store_size'],2)
def test_add_repository(): # hash with correct repository name is added shutil.rmtree(db1, True) shutil.rmtree(db2, True) H.rm_tempfile(xml1) H.hashdb(["create", db1]) H.write_temp_dfxml_hash(repository_name="r1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(repository_name="r2") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["add_repository", db1, db2, "r1"]) sizes = H.parse_sizes(H.hashdb(["size", db2])) H.int_equals(sizes['hash_store_size'],1) H.int_equals(sizes['source_store_size'],1) H.hashdb(["export", db2, xml1]) H.dfxml_hash_equals(repository_name="r1")
def test_subtract(): # db1 - db2 -> db3 where source must match shutil.rmtree(db1, True) shutil.rmtree(db2, True) shutil.rmtree(db3, True) H.rm_tempfile(xml1) H.hashdb(["create", db1]) H.hashdb(["create", db2]) H.write_temp_dfxml_hash(repository_name="r1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["import", db2, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(repository_name="r2") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["subtract", db1, db2, db3]) sizes = H.parse_sizes(H.hashdb(["size", db3])) H.int_equals(sizes['hash_store_size'],1) H.hashdb(["export", db3, xml1]) H.dfxml_hash_equals(repository_name="r2")
def test_duplicates(): lines = H.hashdb(["duplicates", db1, "0"]) H.str_equals(lines[4], 'No hashes were found with this count.') H.int_equals(len(lines), 6) lines = H.hashdb(["duplicates", db1, "1"]) H.str_equals(lines[3], '["11",{"count":1}]') H.int_equals(len(lines), 6) lines = H.hashdb(["duplicates", db1, "2"]) H.str_equals(lines[3], '["00",{"count":2}]') H.int_equals(len(lines), 6) lines = H.hashdb(["duplicates", db1, "3"]) H.str_equals(lines[4], 'No hashes were found with this count.') H.int_equals(len(lines), 6)
def test_intersect(): # db1 with a,b and db2 with b,c intersect to db3 with just b # using same hash and different repository name shutil.rmtree(db1, True) shutil.rmtree(db2, True) shutil.rmtree(db3, True) H.rm_tempfile(xml1) H.hashdb(["create", db1]) H.hashdb(["create", db2]) H.write_temp_dfxml_hash(repository_name="r1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(repository_name="r2") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["import", db2, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(repository_name="r3") H.hashdb(["import", db2, "temp_dfxml_hash"]) H.hashdb(["intersect", db1, db2, db3]) sizes = H.parse_sizes(H.hashdb(["size", db3])) H.int_equals(sizes['hash_store_size'],1) H.int_equals(sizes['source_store_size'],1) H.hashdb(["export", db3, xml1]) H.dfxml_hash_equals(repository_name="r2")
def test_intersect_hash(): # db1 with a,b and db2 with b,c intersect to db3 with just b # using different hash shutil.rmtree(db1, True) shutil.rmtree(db2, True) shutil.rmtree(db3, True) H.rm_tempfile(xml1) H.hashdb(["create", db1]) H.hashdb(["create", db2]) H.write_temp_dfxml_hash(byte_run_hashdigest="00112233445566778899aabbccddeef1") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(byte_run_hashdigest="00112233445566778899aabbccddeef2") H.hashdb(["import", db1, "temp_dfxml_hash"]) H.hashdb(["import", db2, "temp_dfxml_hash"]) H.write_temp_dfxml_hash(byte_run_hashdigest="00112233445566778899aabbccddeef3") H.hashdb(["import", db2, "temp_dfxml_hash"]) H.hashdb(["intersect_hash", db1, db2, db3]) sizes = H.parse_sizes(H.hashdb(["size", db3])) H.int_equals(sizes['hash_store_size'],1) H.int_equals(sizes['source_store_size'],1) H.hashdb(["export", db3, xml1]) H.dfxml_hash_equals(byte_run_hashdigest="00112233445566778899aabbccddeef2")
def test_byte_alignment(): # create new DB with byte alignment 2 shutil.rmtree(db1, True) H.hashdb(["create", db1, "-a2"]) # valid H.write_temp_dfxml_hash(byte_run_file_offset=6) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1) # invalid H.write_temp_dfxml_hash(byte_run_file_offset=7) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 0) H.int_equals(changes['hashes_not_inserted_invalid_byte_alignment'], 1) # valid H.write_temp_dfxml_hash(byte_run_file_offset=8) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1)
def test_scan_expanded_hash(): # all lines = H.hashdb(["scan_expanded_hash", db1, "11"]) H.str_equals(lines[3], '{"block_hashdigest":"11", "count":1, "source_list_id":654825492, "sources":[{"source_id":2,"file_offset":0,"label":"L","repository_name":"r2","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}]}') H.int_equals(len(lines), 5) # -m0 lines = H.hashdb(["scan_expanded_hash", "-m0", db1, "11"]) H.str_equals(lines[3], '{"block_hashdigest":"11", "count":1, "source_list_id":654825492}') H.int_equals(len(lines), 5) # -m1 lines = H.hashdb(["scan_expanded_hash", "-m1", db1, "11"]) H.str_equals(lines[3], '{"block_hashdigest":"11", "count":1, "source_list_id":654825492, "sources":[{"source_id":2,"file_offset":0,"label":"L","repository_name":"r2","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}]}') H.int_equals(len(lines), 5)
def test_hash_table(): # source_id 0 lines = H.hashdb(["hash_table", db1, "0"]) H.str_equals(lines[0], 'The requested source ID is not in the database.') H.int_equals(len(lines), 2) # source_id 1 lines = H.hashdb(["hash_table", db1, "1"]) H.str_equals(lines[3], '# {"source_id":1,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}') H.str_equals(lines[5], '4096 00 {"count":2}') H.str_equals(lines[6], '8192 00 {"count":2}') H.int_equals(len(lines), 8) # source_id 2 lines = H.hashdb(["hash_table", db1, "2"]) H.str_equals(lines[3], '# {"source_id":2,"repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}') H.str_equals(lines[5], '12288 11 {"count":1}') H.int_equals(len(lines), 7) # source_id 3 lines = H.hashdb(["hash_table", db1, "3"]) H.str_equals(lines[0], 'The requested source ID is not in the database.') H.int_equals(len(lines), 2)
def test_max_duplicates(): # create new DB with max 2 shutil.rmtree(db1, True) H.hashdb(["create", db1, "-m2"]) # add three entries where only two are allowed H.write_temp_dfxml_hash(byte_run_file_offset=4096*1) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.write_temp_dfxml_hash(byte_run_file_offset=4096*2) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.write_temp_dfxml_hash(byte_run_file_offset=4096*3) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 0) H.int_equals(changes['hashes_not_inserted_exceeds_max_duplicates'], 1) sizes = H.parse_sizes(H.hashdb(["size", db1])) H.int_equals(sizes['hash_store_size'], 2)
def test_explain_identified_blocks(): # test empty file write_empty_identified_blocks() lines = H.hashdb(["explain_identified_blocks", db1, "temp_identified_blocks"]) H.str_equals(lines[3], '# hashes') H.str_equals(lines[4], '# There are no hashes to report.') H.str_equals(lines[5], '# sources') H.str_equals(lines[6], '# There are no sources to report.') H.int_equals(len(lines), 8) # test all write_full_identified_blocks() lines = H.hashdb(["explain_identified_blocks", db1, "temp_identified_blocks"]) H.str_equals(lines[3], '# hashes') H.str_equals(lines[4], '["00",{"count":2},[{"source_id":1,"file_offset":4096},{"source_id":1,"file_offset":8192,"label":"H"}]]') H.str_equals(lines[5], '["11",{"count":1},[{"source_id":2,"file_offset":12288,"label":"L"}]]') H.str_equals(lines[6], '# sources') H.str_equals(lines[7], '{"source_id":1,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}') H.str_equals(lines[8], '{"source_id":2,"repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}') H.int_equals(len(lines), 10) # test all with -m0 write_full_identified_blocks() lines = H.hashdb(["explain_identified_blocks", "-m0", db1, "temp_identified_blocks"]) H.str_equals(lines[3], '# hashes') H.str_equals(lines[4], '# There are no hashes to report.') H.str_equals(lines[5], '# sources') H.str_equals(lines[6], '# There are no sources to report.') H.int_equals(len(lines), 8) # test all with -m1 write_full_identified_blocks() lines = H.hashdb(["explain_identified_blocks", "-m1", db1, "temp_identified_blocks"]) H.str_equals(lines[3], '# hashes') H.str_equals(lines[4], '["11",{"count":1},[{"source_id":2,"file_offset":12288,"label":"L"}]]') H.str_equals(lines[5], '# sources') H.str_equals(lines[6], '{"source_id":2,"repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}') H.int_equals(len(lines), 8) # test all with -m2 write_full_identified_blocks() lines = H.hashdb(["explain_identified_blocks", "-m2", db1, "temp_identified_blocks"]) H.str_equals(lines[3], '# hashes') H.str_equals(lines[4], '["00",{"count":2},[{"source_id":1,"file_offset":4096},{"source_id":1,"file_offset":8192,"label":"H"}]]') H.str_equals(lines[5], '["11",{"count":1},[{"source_id":2,"file_offset":12288,"label":"L"}]]') H.str_equals(lines[6], '# sources') H.str_equals(lines[7], '{"source_id":1,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}') H.str_equals(lines[8], '{"source_id":2,"repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}') H.int_equals(len(lines), 10) # test invalid hash value write_wrong_identified_blocks() lines = H.hashdb(["explain_identified_blocks", db1, "temp_identified_blocks"]) H.str_equals((lines[3])[:5], 'Error') H.str_equals(lines[4], '# hashes') H.str_equals(lines[5], '# There are no hashes to report.') H.str_equals(lines[6], '# sources') H.str_equals(lines[7], '# There are no sources to report.') H.int_equals(len(lines), 9)
def test_sources(): lines = H.hashdb(["sources", db1]) H.str_equals(lines[0], '{"source_id":1,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}') H.str_equals(lines[1], '{"source_id":2,"repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}') H.str_equals(lines[2], '') H.int_equals(len(lines), 3)
def test_scan(): lines = H.hashdb(["scan", db1, xml1]) #print(*lines, sep='\n') H.str_equals(lines[4], '["00",{"count":2}]') H.str_equals(lines[7], '["11",{"count":1}]') H.int_equals(len(lines), 10)
def test_hash_truncation(): # create new DB with 3 byte hash truncation, no Bloom shutil.rmtree(db1, True) H.hashdb(["create", db1, "-t3", "--bloom", "disabled"]) # valid entry H.write_temp_dfxml_hash(byte_run_hashdigest='00112233') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1) # duplicate element H.write_temp_dfxml_hash(byte_run_hashdigest='00112244') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_not_inserted_duplicate_element'], 1) # valid entry H.write_temp_dfxml_hash(byte_run_hashdigest='00114433') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1) # create new DB with 3 byte hash truncation, with Bloom shutil.rmtree(db1, True) H.hashdb(["create", db1, "-t3"]) # valid entry H.write_temp_dfxml_hash(byte_run_hashdigest='00112233') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1) # duplicate element H.write_temp_dfxml_hash(byte_run_hashdigest='00112244') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_not_inserted_duplicate_element'], 1) # valid entry H.write_temp_dfxml_hash(byte_run_hashdigest='00114433') changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1)
def test_expand_identified_blocks(): # test empty file write_empty_identified_blocks() lines = H.hashdb(["expand_identified_blocks", db1, "temp_identified_blocks"]) H.int_equals(len(lines), 4) # test all write_full_identified_blocks() lines = H.hashdb(["expand_identified_blocks", db1, "temp_identified_blocks"]) H.str_equals(lines[3], '4096 00 [{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"},{"source_id":1,"file_offset":8192,"label":"H"}]}]') H.str_equals(lines[4], '8192 00 [{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096},{"source_id":1,"file_offset":8192,"label":"H"}]}]') H.str_equals(lines[5], '12288 11 [{"count":1},{"source_list_id":654825492, "sources":[{"source_id":2,"file_offset":12288,"label":"L","repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}]}]') H.int_equals(len(lines), 7) # test all with -m0 write_full_identified_blocks() lines = H.hashdb(["expand_identified_blocks", "-m0", db1, "temp_identified_blocks"]) H.str_equals(lines[3], '4096 00 [{"count":2},{"source_list_id":2844319735}]') H.str_equals(lines[4], '8192 00 [{"count":2},{"source_list_id":2844319735}]') H.str_equals(lines[5], '12288 11 [{"count":1},{"source_list_id":654825492}]') H.int_equals(len(lines), 7) # test all with -m1 write_full_identified_blocks() lines = H.hashdb(["expand_identified_blocks", "-m1", db1, "temp_identified_blocks"]) H.str_equals(lines[3], '4096 00 [{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"},{"source_id":1,"file_offset":8192,"label":"H"}]}]') H.str_equals(lines[4], '8192 00 [{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096},{"source_id":1,"file_offset":8192,"label":"H"}]}]') H.str_equals(lines[5], '12288 11 [{"count":1},{"source_list_id":654825492, "sources":[{"source_id":2,"file_offset":12288,"label":"L","repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}]}]'); H.int_equals(len(lines), 7) # test all with -m2 write_full_identified_blocks() print(*lines, sep='\n') lines = H.hashdb(["expand_identified_blocks", "-m2", db1, "temp_identified_blocks"]) H.str_equals(lines[3], '4096 00 [{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"},{"source_id":1,"file_offset":8192,"label":"H"}]}]') H.str_equals(lines[4], '8192 00 [{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096},{"source_id":1,"file_offset":8192,"label":"H"}]}]') H.str_equals(lines[5], '12288 11 [{"count":1},{"source_list_id":654825492, "sources":[{"source_id":2,"file_offset":12288,"label":"L","repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}]}]') H.int_equals(len(lines), 7) # test invalid hash value write_wrong_identified_blocks() lines = H.hashdb(["expand_identified_blocks", db1, "temp_identified_blocks"]) H.str_equals((lines[3])[:5], 'Error') H.int_equals(len(lines), 5)
def test_basic_settings(): # remove existing DB shutil.rmtree(db1, True) # create new DB H.hashdb(["create", db1, "-p1024", "-m3", "-a 128", "-t 7", "--bloom=disabled", "--bloom_kM=4:14"]) # validate settings parameters settings = H.parse_settings(db1) H.int_equals(settings['settings_version'], 2) H.int_equals(settings['byte_alignment'], 128) H.int_equals(settings['hash_truncation'], 7) H.int_equals(settings['hash_block_size'], 1024) H.int_equals(settings['maximum_hash_duplicates'], 3) H.bool_equals(settings['bloom_used'], False) H.int_equals(settings['bloom_k_hash_functions'], 4) H.int_equals(settings['bloom_M_hash_size'], 14) # byte alignment boundary H.write_temp_dfxml_hash(byte_run_len=1024) changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"])) H.int_equals(changes['hashes_inserted'], 1)