def test_add_multiple(): json_db1 = [ '{"file_hash":"11","filesize":1,"file_type":"ft1","zero_count":15,"nonprobative_count":111,"name_pairs":["rn1","fn1"]}', '{"block_hash":"11111111","k_entropy":101,"block_label":"bl1","source_offsets":["11",1,[1024]]}'] json_db2 = [ '{"file_hash":"22","filesize":2,"file_type":"ft2","zero_count":16,"nonprobative_count":222,"name_pairs":["rn2","fn2"]}', '{"block_hash":"22222222","k_entropy":202,"block_label":"bl2","source_offsets":["22",1,[1024]]}'] json3_db3 = [ '# command: ','# hashdb-Version: ', '{"block_hash":"11111111","k_entropy":101,"block_label":"bl1","source_offsets":["11",1,[1024]]}', '{"block_hash":"22222222","k_entropy":202,"block_label":"bl2","source_offsets":["22",1,[1024]]}', '{"file_hash":"11","filesize":1,"file_type":"ft1","zero_count":15,"nonprobative_count":111,"name_pairs":["rn1","fn1"]}', '{"file_hash":"22","filesize":2,"file_type":"ft2","zero_count":16,"nonprobative_count":222,"name_pairs":["rn2","fn2"]}' ] # create DBs H.make_hashdb("temp_1.hdb", json_db1) H.make_hashdb("temp_2.hdb", json_db2) H.rm_tempdir("temp_3.hdb") # add 1 and 2 into 3 H.hashdb(["add_multiple", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"]) # check temp_3.hdb H.hashdb(["export", "temp_3.hdb", "temp_3.json"]) json_in3 = H.read_file("temp_3.json") H.lines_equals(json_in3, json3_db3)
def test_import_tab3(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.tab", [ "# <file hash> <tab> <block hash> <tab> <index>", "0011223344556677 8899aabbccddeeff 1"]) H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_1.tab"]) H.rm_tempdir("temp_2.hdb") H.rm_tempfile("temp_2.json") H.hashdb(["create", "temp_2.hdb"]) H.make_tempfile("temp_2.tab", [ "# <file hash> <tab> <block hash> <tab> <index>", "0011223344556677 8899aabbccddeeff 1", "0000000000000000 8899aabbccddeeff 1", "0011223344556677 8899aabbccddeeff 2", "0011223344556677 ffffffffffffffff 3", "1111111111111111 2222222222222222 9", "1111111111111111 2222222222222222 9"]) H.hashdb(["import_tab", "-w", "temp_1.hdb", "temp_2.hdb", "temp_2.tab"]) H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) returned_answer = H.read_file("temp_2.json") expected_answer = ["# command: ","# hashdb-Version: ", \ '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_offsets":["1111111111111111",2,[4096]]}', '{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"w","source_offsets":["0000000000000000",1,[0],"0011223344556677",2,[0,512]]}', '{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_offsets":["0011223344556677",1,[1024]]}', '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_2.tab","temp_2.tab"]}', '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_2.tab","temp_2.tab"]}', '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_2.tab","temp_2.tab"]}' ] H.lines_equals(returned_answer, expected_answer)
def test_size(): # hash stores H.make_hashdb("temp_1.hdb", [ '{"block_hash":"0011223344556677", "source_sub_counts":["0000000000000000", 1]}', '{"block_hash":"00112233556677", "source_sub_counts":["0000000000000000", 1]}' ]) expected_answer = [ '{"hash_data_store":2, "hash_store":2, "source_data_store":1, "source_id_store":1, "source_name_store":0}', '' ] returned_answer = H.hashdb(["size", "temp_1.hdb"]) H.lines_equals(expected_answer, returned_answer) # source stores, no name_pairs H.make_hashdb( "temp_1.hdb", ['{"file_hash":"0011223344556677","filesize":0,"name_pairs":[]}']) expected_answer = [ '{"hash_data_store":0, "hash_store":0, "source_data_store":1, "source_id_store":1, "source_name_store":0}', '' ] returned_answer = H.hashdb(["size", "temp_1.hdb"]) H.lines_equals(expected_answer, returned_answer) # source stores, name_pairs H.make_hashdb("temp_1.hdb", [ '{"file_hash":"0011223344556677","filesize":0,"name_pairs":["r1","f1","r2","f2"]}' ]) expected_answer = [ '{"hash_data_store":0, "hash_store":0, "source_data_store":1, "source_id_store":1, "source_name_store":2}', '' ] returned_answer = H.hashdb(["size", "temp_1.hdb"]) H.lines_equals(expected_answer, returned_answer)
def test_add_multiple(): json_db1 = [ '{"file_hash":"11","filesize":1,"file_type":"ft1","zero_count":15,"nonprobative_count":111,"name_pairs":["rn1","fn1"]}', '{"block_hash":"11111111","k_entropy":101,"block_label":"bl1","source_sub_counts":["11",1]}' ] json_db2 = [ '{"file_hash":"22","filesize":2,"file_type":"ft2","zero_count":16,"nonprobative_count":222,"name_pairs":["rn2","fn2"]}', '{"block_hash":"22222222","k_entropy":202,"block_label":"bl2","source_sub_counts":["22",1]}' ] json3_db3 = [ '# command: ', '# hashdb-Version: ', '{"block_hash":"11111111","k_entropy":101,"block_label":"bl1","source_sub_counts":["11",1]}', '{"block_hash":"22222222","k_entropy":202,"block_label":"bl2","source_sub_counts":["22",1]}', '{"file_hash":"11","filesize":1,"file_type":"ft1","zero_count":15,"nonprobative_count":111,"name_pairs":["rn1","fn1"]}', '{"file_hash":"22","filesize":2,"file_type":"ft2","zero_count":16,"nonprobative_count":222,"name_pairs":["rn2","fn2"]}' ] # create DBs H.make_hashdb("temp_1.hdb", json_db1) H.make_hashdb("temp_2.hdb", json_db2) H.rm_tempdir("temp_3.hdb") # add 1 and 2 into 3 H.hashdb(["add_multiple", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"]) # check temp_3.hdb H.hashdb(["export", "temp_3.hdb", "temp_3.json"]) json_in3 = H.read_file("temp_3.json") H.lines_equals(json_in3, json3_db3)
def test_scan_list(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.json", json_data) H.hashdb(["import", "temp_1.hdb", "temp_1.json"]) # test values: not present, valid, valid repeat, valid, valid, not valid hash_file = ["# command: ","# hashdb-Version: ", \ "# marker 1", \ "fp1 0000000000000000", \ "# marker 2", \ "fp1 2222222222222222", \ "# marker 3", \ "fp2 2222222222222222", \ "fp3 8899aabbccddeeff", \ "fp4 ffffffffffffffff", \ "# marker4", \ "fp4 invalid_hash_value", \ "# marker5"] H.make_tempfile("temp_1.txt", hash_file) returned_answer = H.hashdb(["scan_list", "temp_1.hdb", "temp_1.txt"]) expected_answer = [ '# command: ', '# hashdb-Version: ', '# command: ', '# hashdb-Version: ', '# marker 1', '# marker 2', 'fp1 2222222222222222 {"block_hash":"2222222222222222","k_entropy":7,"block_label":"bl1","count":1,"source_list_id":1303964917,"sources":[{"file_hash":"1111111111111111","filesize":5,"file_type":"ftc","zero_count":60,"nonprobative_count":6,"name_pairs":["r3","f3"]}],"source_sub_counts":["1111111111111111",1]}', '# marker 3', 'fp2 2222222222222222 {"block_hash":"2222222222222222"}', 'fp3 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","k_entropy":8,"block_label":"bl2","count":3,"source_list_id":36745675,"sources":[{"file_hash":"0000000000000000","filesize":3,"file_type":"ftb","zero_count":40,"nonprobative_count":4,"name_pairs":["r2","f2"]},{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0000000000000000",1,"0011223344556677",2]}', 'fp4 ffffffffffffffff {"block_hash":"ffffffffffffffff","k_entropy":9,"block_label":"bl3","count":1,"source_list_id":2343118327,"sources":[],"source_sub_counts":["0011223344556677",1]}', '# marker4', '# marker5', '# scan_list completed.', '' ] H.lines_equals(returned_answer, expected_answer)
def test_size(): # hash stores H.make_hashdb("temp_1.hdb", [ '{"block_hash":"0011223344556677", "source_offsets":["0000000000000000", 1, [0]]}', '{"block_hash":"00112233556677", "source_offsets":["0000000000000000", 1, [512]]}']) expected_answer = [ '{"hash_data_store":2, "hash_store":1, "source_data_store":1, "source_id_store":1, "source_name_store":0}', ''] returned_answer = H.hashdb(["size", "temp_1.hdb"]) H.lines_equals(expected_answer, returned_answer) # source stores, no name_pairs H.make_hashdb("temp_1.hdb", [ '{"file_hash":"0011223344556677","filesize":0,"name_pairs":[]}']) expected_answer = [ '{"hash_data_store":0, "hash_store":0, "source_data_store":1, "source_id_store":1, "source_name_store":0}', ''] returned_answer = H.hashdb(["size", "temp_1.hdb"]) H.lines_equals(expected_answer, returned_answer) # source stores, name_pairs H.make_hashdb("temp_1.hdb", [ '{"file_hash":"0011223344556677","filesize":0,"name_pairs":["r1","f1","r2","f2"]}']) expected_answer = [ '{"hash_data_store":0, "hash_store":0, "source_data_store":1, "source_id_store":1, "source_name_store":2}', ''] returned_answer = H.hashdb(["size", "temp_1.hdb"]) H.lines_equals(expected_answer, returned_answer)
def test_ingest(): H.make_temp_media("temp_1_media") H.rm_tempdir("temp_1.hdb") H.hashdb(["create", "temp_1.hdb"]) H.hashdb(["ingest", "temp_1.hdb", "temp_1_media"]) returned_answer = H.hashdb(["size", "temp_1.hdb"]) H.lines_equals(returned_answer, [ '{"hash_data_store":4, "hash_store":4, "source_data_store":4, "source_id_store":4, "source_name_store":4}', '' ])
def test_media(): # create media to read H.make_temp_media("temp_1_media") # read embedded filename in zip header returned_answer = H.hashdb(["read_media", "temp_1_media", "630", "13"]) H.lines_equals(returned_answer, ["temp_0_file_1"]) # read zip 1 returned_answer = H.hashdb(["read_media", "temp_1_media", "600-zip-0", "50"]) H.lines_equals(returned_answer, ["temp_0_file_1 content"]) # read zip 2 returned_answer = H.hashdb(["read_media", "temp_1_media", "666-zip-0", "50"]) H.lines_equals(returned_answer, ["temp_0_file_2 content"]) # read gzip returned_answer = H.hashdb(["read_media", "temp_1_media", "872-gzip-0", "50"]) H.lines_equals(returned_answer, ["gzip content"]) # read partial zip 1 returned_answer = H.hashdb(["read_media", "temp_1_media", "600-zip-3", "10"]) H.lines_equals(returned_answer, ["p_0_file_1"]) # read partial gzip returned_answer = H.hashdb(["read_media", "temp_1_media", "872-gzip-3", "5"]) H.lines_equals(returned_answer, ["p con"]) # read out of range returned_answer = H.hashdb(["read_media", "temp_1_media", "1000000000", "50"]) H.lines_equals(returned_answer, [""]) # read out of range zip 1 returned_answer = H.hashdb(["read_media", "temp_1_media", "600-zip-100", "50"]) H.lines_equals(returned_answer, [""]) # read out of range gzip returned_answer = H.hashdb(["read_media", "temp_1_media", "872-gzip-100", "50"]) H.lines_equals(returned_answer, [""]) # read media size returned_answer = H.hashdb(["read_media_size", "temp_1_media"]) H.lines_equals(returned_answer, ["917", ""])
def test_basic_settings(): # remove existing DB h.rm_tempdir("temp_1.hdb") # create new DB h.hashdb(["create", "-b4", "temp_1.hdb"]) # validate settings parameters lines = h.read_file(settings1) h.lines_equals(lines, [ '{"settings_version":4, "block_size":4}' ])
def test_media(): # create media to read H.make_temp_media("temp_1_media") # read embedded filename in zip header returned_answer = H.hashdb(["read_media", "temp_1_media", "630", "13"]) H.lines_equals(returned_answer, ["temp_0_file_1"]) # read zip 1 returned_answer = H.hashdb( ["read_media", "temp_1_media", "600-zip-0", "50"]) H.lines_equals(returned_answer, ["temp_0_file_1 content"]) # read zip 2 returned_answer = H.hashdb( ["read_media", "temp_1_media", "666-zip-0", "50"]) H.lines_equals(returned_answer, ["temp_0_file_2 content"]) # read gzip returned_answer = H.hashdb( ["read_media", "temp_1_media", "872-gzip-0", "50"]) H.lines_equals(returned_answer, ["gzip content"]) # read partial zip 1 returned_answer = H.hashdb( ["read_media", "temp_1_media", "600-zip-3", "10"]) H.lines_equals(returned_answer, ["p_0_file_1"]) # read partial gzip returned_answer = H.hashdb( ["read_media", "temp_1_media", "872-gzip-3", "5"]) H.lines_equals(returned_answer, ["p con"]) # read out of range returned_answer = H.hashdb( ["read_media", "temp_1_media", "1000000000", "50"]) H.lines_equals(returned_answer, [""]) # read out of range zip 1 returned_answer = H.hashdb( ["read_media", "temp_1_media", "600-zip-100", "50"]) H.lines_equals(returned_answer, [""]) # read out of range gzip returned_answer = H.hashdb( ["read_media", "temp_1_media", "872-gzip-100", "50"]) H.lines_equals(returned_answer, [""]) # read media size returned_answer = H.hashdb(["read_media_size", "temp_1_media"]) H.lines_equals(returned_answer, ["917", ""])
def test_add_repository(): # create new hashdb H.make_hashdb("temp_1.hdb", json_out1) H.rm_tempdir("temp_2.hdb") # add to new temp_2.hdb H.hashdb(["add_repository", "temp_1.hdb", "temp_2.hdb", "repository1"]) # temp_2.hdb should only have hashes and sources with repository1 H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, [ '# command: ', '# hashdb-Version: ', '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_offsets":["1111111111111111",1,[4096]]}', '{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_offsets":["0000000000000000",1,[0],"0011223344556677",2,[0,512]]}', '{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_offsets":["0011223344556677",1,[1024]]}', '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}', '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}', '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}' ]) # add to new temp_2.hdb H.rm_tempdir("temp_2.hdb") H.hashdb(["add_repository", "temp_1.hdb", "temp_2.hdb", "repository2"]) # temp_2.hdb should only have hashes and sources with repository2 H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, [ '# command: ', '# hashdb-Version: ', '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_offsets":["1111111111111111",1,[4096]]}', '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository2","second_temp_1.tab"]}' ])
def test_subtract_hash(): # create new hashdb H.make_hashdb("temp_1.hdb", json_set_db1) H.make_hashdb("temp_2.hdb", json_set_db2) H.rm_tempdir("temp_3.hdb") # db1 - db2 hash H.hashdb(["subtract_hash", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"]) H.hashdb(["export", "temp_3.hdb", "temp_3.json"]) json3 = H.read_file("temp_3.json") H.lines_equals(json3, [ '# command: ', '# hashdb-Version: ', '{"block_hash":"1111111111111111","k_entropy":1,"block_label":"bl1","source_sub_counts":["11",1]}', '{"file_hash":"11","filesize":1,"file_type":"A","zero_count":11,"nonprobative_count":1,"name_pairs":["r1","f1"]}' ]) # db2 - db1 hash H.rm_tempdir("temp_3.hdb") H.hashdb(["subtract_hash", "temp_2.hdb", "temp_1.hdb", "temp_3.hdb"]) H.hashdb(["export", "temp_3.hdb", "temp_3.json"]) json3 = H.read_file("temp_3.json") H.lines_equals(json3, [ '# command: ', '# hashdb-Version: ', '{"block_hash":"3333333333333333","k_entropy":3,"block_label":"bl3","source_sub_counts":["33",1]}', '{"file_hash":"33","filesize":3,"file_type":"C","zero_count":13,"nonprobative_count":3,"name_pairs":["r2","f2"]}' ])
def test_import_tab4(): H.rm_tempdir("temp_1.hdb") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.tab", [ "# <file hash> <tab> <block hash> <tab> <index>", "0000000000000000 8888888888888888 1", "0000000000000000 8888888888888888 2" ]) H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_1.tab"]) H.make_tempfile("temp_2.tab", [ "# <file hash> <tab> <block hash> <tab> <index>", "0000000000000000 8888888888888888 1", "0000000000000000 8888888888888888 2", "0000000000000000 8888888888888888 3", "1111111111111111 8888888888888888 1", "1111111111111111 8888888888888888 2" ]) H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_2.tab"]) H.hashdb(["export", "temp_1.hdb", "temp_1.json"]) returned_answer = H.read_file("temp_1.json") expected_answer = [ "# command: ", "# hashdb-Version: ", '{"block_hash":"8888888888888888","k_entropy":0,"block_label":"","source_sub_counts":["0000000000000000",2,"1111111111111111",2]}', '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r","temp_1.tab"]}', '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r","temp_2.tab"]}' ] H.lines_equals(returned_answer, expected_answer)
def test_add_repository(): # create new hashdb H.make_hashdb("temp_1.hdb", json_out1) H.rm_tempdir("temp_2.hdb") # add to new temp_2.hdb H.hashdb(["add_repository", "temp_1.hdb", "temp_2.hdb", "repository1"]) # temp_2.hdb should only have hashes and sources with repository1 H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, [ '# command: ', '# hashdb-Version: ', '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_sub_counts":["1111111111111111",1]}', '{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_sub_counts":["0000000000000000",1,"0011223344556677",2]}', '{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_sub_counts":["0011223344556677",1]}', '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}', '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}', '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}' ]) # add to new temp_2.hdb H.rm_tempdir("temp_2.hdb") H.hashdb(["add_repository", "temp_1.hdb", "temp_2.hdb", "repository2"]) # temp_2.hdb should only have hashes and sources with repository2 H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, [ '# command: ', '# hashdb-Version: ', '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_sub_counts":["1111111111111111",1]}', '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository2","second_temp_1.tab"]}' ])
def test_subtract_hash(): # create new hashdb H.make_hashdb("temp_1.hdb", json_set_db1) H.make_hashdb("temp_2.hdb", json_set_db2) H.rm_tempdir("temp_3.hdb") # db1 - db2 hash H.hashdb(["subtract_hash", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"]) H.hashdb(["export", "temp_3.hdb", "temp_3.json"]) json3 = H.read_file("temp_3.json") H.lines_equals(json3, [ '# command: ', '# hashdb-Version: ', '{"block_hash":"1111111111111111","k_entropy":1,"block_label":"bl1","source_offsets":["11",1,[4096]]}', '{"file_hash":"11","filesize":1,"file_type":"A","zero_count":11,"nonprobative_count":1,"name_pairs":["r1","f1"]}' ]) # db2 - db1 hash H.rm_tempdir("temp_3.hdb") H.hashdb(["subtract_hash", "temp_2.hdb", "temp_1.hdb", "temp_3.hdb"]) H.hashdb(["export", "temp_3.hdb", "temp_3.json"]) json3 = H.read_file("temp_3.json") H.lines_equals(json3, [ '# command: ', '# hashdb-Version: ', '{"block_hash":"3333333333333333","k_entropy":3,"block_label":"bl3","source_offsets":["33",1,[4096]]}', '{"file_hash":"33","filesize":3,"file_type":"C","zero_count":13,"nonprobative_count":3,"name_pairs":["r2","f2"]}' ])
def test_import_tab4(): H.rm_tempdir("temp_1.hdb") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.tab", [ "# <file hash> <tab> <block hash> <tab> <index>", "0000000000000000 8888888888888888 1", "0000000000000000 8888888888888888 2"]) H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_1.tab"]) H.make_tempfile("temp_2.tab", [ "# <file hash> <tab> <block hash> <tab> <index>", "0000000000000000 8888888888888888 1", "0000000000000000 8888888888888888 2", "0000000000000000 8888888888888888 3", "1111111111111111 8888888888888888 1", "1111111111111111 8888888888888888 2"]) H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_2.tab"]) H.hashdb(["export", "temp_1.hdb", "temp_1.json"]) returned_answer = H.read_file("temp_1.json") expected_answer = [ "# command: ","# hashdb-Version: ", '{"block_hash":"8888888888888888","k_entropy":0,"block_label":"","source_offsets":["0000000000000000",2,[0,512],"1111111111111111",2,[0,512]]}', '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r","temp_1.tab"]}', '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r","temp_2.tab"]}' ] H.lines_equals(returned_answer, expected_answer)
def test_basic_settings(): # remove existing DB h.rm_tempdir("temp_1.hdb") # create new DB h.hashdb(["create", "-b4", "-a2", "-m500:20", "-t30:10", "temp_1.hdb"]) # validate settings parameters lines = h.read_file(settings1) h.lines_equals( lines, [ '{"settings_version":3, "byte_alignment":2, "block_size":4, "max_count":500, "max_sub_count":20, "hash_prefix_bits":30, "hash_suffix_bytes":10}' ], )
def test_intersect(): # create new hashdb H.make_hashdb("temp_1.hdb", json_set_db1) H.make_hashdb("temp_2.hdb", json_set_db2) H.rm_tempdir("temp_3.hdb") # intersect H.hashdb(["intersect", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"]) H.hashdb(["export", "temp_3.hdb", "temp_3.json"]) json3 = H.read_file("temp_3.json") H.lines_equals(json3, [ '# command: ', '# hashdb-Version: ', '{"block_hash":"2222222222222222","k_entropy":2,"block_label":"bl2","source_sub_counts":["22",2]}', '{"file_hash":"22","filesize":2,"file_type":"B","zero_count":12,"nonprobative_count":2,"name_pairs":["r1","f1","r2","f2"]}' ])
def test_intersect(): # create new hashdb H.make_hashdb("temp_1.hdb", json_set_db1) H.make_hashdb("temp_2.hdb", json_set_db2) H.rm_tempdir("temp_3.hdb") # intersect H.hashdb(["intersect", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"]) H.hashdb(["export", "temp_3.hdb", "temp_3.json"]) json3 = H.read_file("temp_3.json") H.lines_equals(json3, [ '# command: ', '# hashdb-Version: ', '{"block_hash":"2222222222222222","k_entropy":2,"block_label":"bl2","source_offsets":["22",2,[0,512]]}', '{"file_hash":"22","filesize":2,"file_type":"B","zero_count":12,"nonprobative_count":2,"name_pairs":["r1","f1","r2","f2"]}' ])
def test_json_commands(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.json", json_data) H.hashdb(["import", "temp_1.hdb", "temp_1.json"]) # scan_list: alredy done # scan_hash returned_answer = H.hashdb(["scan_hash", "-j", "c", "temp_1.hdb", "8899aabbccddeeff"]) H.lines_equals(returned_answer, [ '{"block_hash":"8899aabbccddeeff","count":1}', '' ]) # scan_media: skip # duplicates returned_answer = H.hashdb(["duplicates", "-j", "c", "temp_1.hdb", "1"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '8899aabbccddeeff {"block_hash":"8899aabbccddeeff","count":1}', '# Processing 1 of 1 completed.', '' ]) # hash_table returned_answer = H.hashdb(["hash_table", "-j", "c", "temp_1.hdb", "0011223344556677"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '8899aabbccddeeff {"block_hash":"8899aabbccddeeff","count":1}', '# Processing 1 of 1 completed.', '' ]) # scan_random: nothing returned but accepts -j returned_answer = H.hashdb(["scan_random", "-j", "c", "temp_1.hdb", "1"]) H.lines_equals(returned_answer, [ '# Processing 1 of 1 completed.', '' ]) # scan_same: nothing returned but accepts -j returned_answer = H.hashdb(["scan_same", "-j", "c", "temp_1.hdb", "1"]) H.lines_equals(returned_answer, [ 'Match not found, hash 80000000000000000000000000000000:', '# Processing 1 of 1 completed.', '' ])
def test_json_modes(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.json", json_data) H.hashdb(["import", "temp_1.hdb", "temp_1.json"]) # make hash file hash_file = [ "fp1 8899aabbccddeeff", "fp2 8899aabbccddeeff"] H.make_tempfile("temp_1.txt", hash_file) # expanded returned_answer = H.hashdb(["scan_list", "-j", "e", "temp_1.hdb", "temp_1.txt"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'fp1 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}', 'fp2 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}', '# scan_list completed.', '' ]) # expanded optimized returned_answer = H.hashdb(["scan_list", "-j", "o", "temp_1.hdb", "temp_1.txt"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'fp1 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}', 'fp2 8899aabbccddeeff {"block_hash":"8899aabbccddeeff"}', '# scan_list completed.', '' ]) # count only returned_answer = H.hashdb(["scan_list", "-j", "c", "temp_1.hdb", "temp_1.txt"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'fp1 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","count":1}', 'fp2 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","count":1}', '# scan_list completed.', '' ]) # approximate count returned_answer = H.hashdb(["scan_list", "-j", "a", "temp_1.hdb", "temp_1.txt"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'fp1 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","approximate_count":1}', 'fp2 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","approximate_count":1}', '# scan_list completed.', '' ])
def test_duplicates(): # hash 0... doesn't go in at all. # hash 1... has one source with one pair. # hash 2... has one source with two pairs. H.make_hashdb("temp_1.hdb", [ '{"block_hash":"0000000000000000", "source_offsets":[]}', '{"block_hash":"1111111111111111", "source_offsets":["0000000000000000", 1, [0]]}', '{"block_hash":"2222222222222222", "source_offsets":["0000000000000000", 2, [0,512]]}']) # zero returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "0"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'No hashes were found with this count.', '# Processing 2 of 2 completed.', '']) # one returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "1"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '1111111111111111 {"block_hash":"1111111111111111","k_entropy":0,"block_label":"","count":1,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_offsets":["0000000000000000",1,[0]]}', '# Processing 2 of 2 completed.', '']) # two returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "2"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '2222222222222222 {"block_hash":"2222222222222222","k_entropy":0,"block_label":"","count":2,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_offsets":["0000000000000000",2,[0,512]]}', '# Processing 2 of 2 completed.', '']) # three returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "3"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'No hashes were found with this count.', '# Processing 2 of 2 completed.', ''])
def test_sources(): # source stores, no name_pairs H.make_hashdb("temp_1.hdb", [ '{"file_hash":"0011223344556677","filesize":0,"name_pairs":[]}']) expected_answer = [ '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}', ''] returned_answer = H.hashdb(["sources", "temp_1.hdb"]) H.lines_equals(expected_answer, returned_answer) # source stores, two name_pairs H.make_hashdb("temp_1.hdb", [ '{"file_hash":"0011223344556677","filesize":0,"name_pairs":["r1","f1","r2","f2"]}']) expected_answer = [ '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r1","f1","r2","f2"]}', ''] returned_answer = H.hashdb(["sources", "temp_1.hdb"]) H.lines_equals(expected_answer, returned_answer)
def test_scan_list(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.json", json_data) H.hashdb(["import", "temp_1.hdb", "temp_1.json"]) # test values: not present, valid, valid repeat, valid, valid, not valid hash_file = ["# command: ","# hashdb-Version: ", \ "# marker 1", \ "fp1 0000000000000000", \ "# marker 2", \ "fp1 2222222222222222", \ "# marker 3", \ "fp2 2222222222222222", \ "fp3 8899aabbccddeeff", \ "fp4 ffffffffffffffff", \ "# marker4", \ "fp4 invalid_hash_value", \ "# marker5"] H.make_tempfile("temp_1.txt", hash_file) returned_answer = H.hashdb(["scan_list", "temp_1.hdb", "temp_1.txt"]) expected_answer = [ '# command: ', '# hashdb-Version: ', '# command: ', '# hashdb-Version: ', '# marker 1', '# marker 2', 'fp1 2222222222222222 {"block_hash":"2222222222222222","k_entropy":7,"block_label":"bl1","count":1,"source_list_id":1303964917,"sources":[{"file_hash":"1111111111111111","filesize":5,"file_type":"ftc","zero_count":60,"nonprobative_count":6,"name_pairs":["r3","f3"]}],"source_offsets":["1111111111111111",1,[4096]]}', '# marker 3', 'fp2 2222222222222222 {"block_hash":"2222222222222222"}', 'fp3 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","k_entropy":8,"block_label":"bl2","count":3,"source_list_id":36745675,"sources":[{"file_hash":"0000000000000000","filesize":3,"file_type":"ftb","zero_count":40,"nonprobative_count":4,"name_pairs":["r2","f2"]},{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_offsets":["0000000000000000",1,[0],"0011223344556677",2,[0,512]]}', 'fp4 ffffffffffffffff {"block_hash":"ffffffffffffffff","k_entropy":9,"block_label":"bl3","count":1,"source_list_id":2343118327,"sources":[],"source_offsets":["0011223344556677",1,[1024]]}', '# marker4', '# marker5', '# scan_list completed.', ''] H.lines_equals(returned_answer, expected_answer)
def test_duplicates(): # hash 0... doesn't go in at all. # hash 1... has one source with one pair. # hash 2... has one source with two pairs. H.make_hashdb("temp_1.hdb", [ '{"block_hash":"0000000000000000", "source_sub_counts":[]}', '{"block_hash":"1111111111111111", "source_sub_counts":["0000000000000000", 1]}', '{"block_hash":"2222222222222222", "source_sub_counts":["0000000000000000", 2]}' ]) # zero returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "0"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'No hashes were found with this count.', '# Processing 2 of 2 completed.', '' ]) # one returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "1"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '1111111111111111 {"block_hash":"1111111111111111","k_entropy":0,"block_label":"","count":1,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_sub_counts":["0000000000000000",1]}', '# Processing 2 of 2 completed.', '' ]) # two returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "2"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '2222222222222222 {"block_hash":"2222222222222222","k_entropy":0,"block_label":"","count":2,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_sub_counts":["0000000000000000",2]}', '# Processing 2 of 2 completed.', '' ]) # three returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "3"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'No hashes were found with this count.', '# Processing 2 of 2 completed.', '' ])
def test_export_json_hash_partition_range(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.rm_tempfile("temp_2.json") temp1_input = [ '{"block_hash":"2222222222222222","k_entropy":1,"block_label":"bl1","source_offsets":["1111111111111111",2,[4096]]}', '{"block_hash":"8899aabbccddeeff","k_entropy":2,"block_label":"bl2","source_offsets":["0000000000000000",1,[0],"0011223344556677",2,[0,512]]}', '{"block_hash":"ffffffffffffffff","k_entropy":3,"block_label":"bl3","source_offsets":["0011223344556677",1,[1024]]}', '{"file_hash":"0000000000000000","filesize":3,"file_type":"ftb","zero_count":4,"nonprobative_count":5,"name_pairs":["r2","f2"]}', '{"file_hash":"0011223344556677","filesize":6,"file_type":"fta","zero_count":7,"nonprobative_count":8,"name_pairs":["r1","f1"]}', '{"file_hash":"1111111111111111","filesize":9,"file_type":"ftc","zero_count":10,"nonprobative_count":11,"name_pairs":["r3","f3"]}' ] expected_answer = [ "# command: ","# hashdb-Version: ", '{"block_hash":"2222222222222222","k_entropy":1,"block_label":"bl1","source_offsets":["1111111111111111",2,[4096]]}', '{"file_hash":"1111111111111111","filesize":9,"file_type":"ftc","zero_count":10,"nonprobative_count":11,"name_pairs":["r3","f3"]}' ] H.make_tempfile("temp_1.json", temp1_input) H.hashdb(["create", "temp_1.hdb"]) H.hashdb(["import", "temp_1.hdb", "temp_1.json"]) H.hashdb(["export", "-p", "00:80", "temp_1.hdb", "temp_2.json"]) returned_answer = H.read_file("temp_2.json") H.lines_equals(returned_answer, expected_answer)
def test_import_tab1(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.tab", [ "# <file hash> <tab> <block hash> <tab> <index>", "0011223344556677 8899aabbccddeeff 1", "0000000000000000 8899aabbccddeeff 1", "0011223344556677 8899aabbccddeeff 2", "0011223344556677 ffffffffffffffff 3", "1111111111111111 2222222222222222 9", "1111111111111111 2222222222222222 9" ]) H.hashdb(["import_tab", "temp_1.hdb", "temp_1.tab"]) H.hashdb(["export", "temp_1.hdb", "temp_1.json"]) returned_answer = H.read_file("temp_1.json") expected_answer = [ "# command: ", "# hashdb-Version: ", '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_sub_counts":["1111111111111111",2]}', '{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_sub_counts":["0000000000000000",1,"0011223344556677",2]}', '{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_sub_counts":["0011223344556677",1]}', '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_1.tab","temp_1.tab"]}', '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_1.tab","temp_1.tab"]}', '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_1.tab","temp_1.tab"]}' ] H.lines_equals(returned_answer, expected_answer)
def test_json_modes(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.json", json_data) H.hashdb(["import", "temp_1.hdb", "temp_1.json"]) # make hash file hash_file = ["fp1 8899aabbccddeeff", "fp2 8899aabbccddeeff"] H.make_tempfile("temp_1.txt", hash_file) # expanded returned_answer = H.hashdb( ["scan_list", "-j", "e", "temp_1.hdb", "temp_1.txt"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'fp1 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}', 'fp2 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}', '# scan_list completed.', '' ]) # expanded optimized returned_answer = H.hashdb( ["scan_list", "-j", "o", "temp_1.hdb", "temp_1.txt"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'fp1 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}', 'fp2 8899aabbccddeeff {"block_hash":"8899aabbccddeeff"}', '# scan_list completed.', '' ]) # count only returned_answer = H.hashdb( ["scan_list", "-j", "c", "temp_1.hdb", "temp_1.txt"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'fp1 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","count":1}', 'fp2 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","count":1}', '# scan_list completed.', '' ]) # approximate count returned_answer = H.hashdb( ["scan_list", "-j", "a", "temp_1.hdb", "temp_1.txt"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', 'fp1 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","approximate_count":1}', 'fp2 8899aabbccddeeff {"block_hash":"8899aabbccddeeff","approximate_count":1}', '# scan_list completed.', '' ])
def test_export_json_hash_partition_range(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.rm_tempfile("temp_2.json") temp1_input = [ '{"block_hash":"2222222222222222","k_entropy":1,"block_label":"bl1","source_sub_counts":["1111111111111111",2]}', '{"block_hash":"8899aabbccddeeff","k_entropy":2,"block_label":"bl2","source_sub_counts":["0000000000000000",1,"0011223344556677",2]}', '{"block_hash":"ffffffffffffffff","k_entropy":3,"block_label":"bl3","source_sub_counts":["0011223344556677",1]}', '{"file_hash":"0000000000000000","filesize":3,"file_type":"ftb","zero_count":4,"nonprobative_count":5,"name_pairs":["r2","f2"]}', '{"file_hash":"0011223344556677","filesize":6,"file_type":"fta","zero_count":7,"nonprobative_count":8,"name_pairs":["r1","f1"]}', '{"file_hash":"1111111111111111","filesize":9,"file_type":"ftc","zero_count":10,"nonprobative_count":11,"name_pairs":["r3","f3"]}' ] expected_answer = [ "# command: ", "# hashdb-Version: ", '{"block_hash":"2222222222222222","k_entropy":1,"block_label":"bl1","source_sub_counts":["1111111111111111",2]}', '{"file_hash":"1111111111111111","filesize":9,"file_type":"ftc","zero_count":10,"nonprobative_count":11,"name_pairs":["r3","f3"]}' ] H.make_tempfile("temp_1.json", temp1_input) H.hashdb(["create", "temp_1.hdb"]) H.hashdb(["import", "temp_1.hdb", "temp_1.json"]) H.hashdb(["export", "-p", "00:80", "temp_1.hdb", "temp_2.json"]) returned_answer = H.read_file("temp_2.json") H.lines_equals(returned_answer, expected_answer)
def test_hash_table(): # note that the first hash doesn't go in at all, next goes in once, last goes in twice. H.make_hashdb("temp_1.hdb", [ '{"block_hash":"0000000000000000", "source_offsets":[]}', '{"block_hash":"1111111111111111", "source_offsets":["0000000000000000", 1, [0]]}', '{"block_hash":"2222222222222222", "source_offsets":["0000000000000000", 2, [0,512]]}']) # no match returned_answer = H.hashdb(["hash_table", "temp_1.hdb", "0011223344556677"]) H.lines_equals(returned_answer, [ 'There is no source with this file hash', '']) # two matches returned_answer = H.hashdb(["hash_table", "temp_1.hdb", "0000000000000000"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '1111111111111111 {"block_hash":"1111111111111111","k_entropy":0,"block_label":"","count":1,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_offsets":["0000000000000000",1,[0]]}', '2222222222222222 {"block_hash":"2222222222222222","k_entropy":0,"block_label":"","count":2,"source_list_id":1696784233,"sources":[],"source_offsets":["0000000000000000",2,[0,512]]}', '# Processing 2 of 2 completed.', ''])
def test_json_commands(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.json", json_data) H.hashdb(["import", "temp_1.hdb", "temp_1.json"]) # scan_list: alredy done # scan_hash returned_answer = H.hashdb( ["scan_hash", "-j", "c", "temp_1.hdb", "8899aabbccddeeff"]) H.lines_equals(returned_answer, ['{"block_hash":"8899aabbccddeeff","count":1}', '']) # scan_media: skip # duplicates returned_answer = H.hashdb(["duplicates", "-j", "c", "temp_1.hdb", "1"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '8899aabbccddeeff {"block_hash":"8899aabbccddeeff","count":1}', '# Processing 1 of 1 completed.', '' ]) # hash_table returned_answer = H.hashdb( ["hash_table", "-j", "c", "temp_1.hdb", "0011223344556677"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '8899aabbccddeeff {"block_hash":"8899aabbccddeeff","count":1}', '# Processing 1 of 1 completed.', '' ]) # scan_random: nothing returned but accepts -j returned_answer = H.hashdb(["scan_random", "-j", "c", "temp_1.hdb", "1"]) H.lines_equals(returned_answer, ['# Processing 1 of 1 completed.', '']) # scan_same: nothing returned but accepts -j returned_answer = H.hashdb(["scan_same", "-j", "c", "temp_1.hdb", "1"]) H.lines_equals(returned_answer, [ 'Match not found, hash 80000000000000000000000000000000:', '# Processing 1 of 1 completed.', '' ])
def test_hash_table(): # note that the first hash doesn't go in at all, next goes in once, last goes in twice. H.make_hashdb("temp_1.hdb", [ '{"block_hash":"0000000000000000", "source_sub_counts":[]}', '{"block_hash":"1111111111111111", "source_sub_counts":["0000000000000000", 1]}', '{"block_hash":"2222222222222222", "source_sub_counts":["0000000000000000", 2]}' ]) # no match returned_answer = H.hashdb( ["hash_table", "temp_1.hdb", "0011223344556677"]) H.lines_equals(returned_answer, ['There is no source with this file hash', '']) # two matches returned_answer = H.hashdb( ["hash_table", "temp_1.hdb", "0000000000000000"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '1111111111111111 {"block_hash":"1111111111111111","k_entropy":0,"block_label":"","count":1,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_sub_counts":["0000000000000000",1]}', '2222222222222222 {"block_hash":"2222222222222222","k_entropy":0,"block_label":"","count":2,"source_list_id":1696784233,"sources":[],"source_sub_counts":["0000000000000000",2]}', '# Processing 2 of 2 completed.', '' ])
def test_sources(): # source stores, no name_pairs H.make_hashdb( "temp_1.hdb", ['{"file_hash":"0011223344556677","filesize":0,"name_pairs":[]}']) expected_answer = [ '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}', '' ] returned_answer = H.hashdb(["sources", "temp_1.hdb"]) H.lines_equals(expected_answer, returned_answer) # source stores, two name_pairs H.make_hashdb("temp_1.hdb", [ '{"file_hash":"0011223344556677","filesize":0,"name_pairs":["r1","f1","r2","f2"]}' ]) expected_answer = [ '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r1","f1","r2","f2"]}', '' ] returned_answer = H.hashdb(["sources", "temp_1.hdb"]) H.lines_equals(expected_answer, returned_answer)
def test_histogram(): H.make_hashdb("temp_1.hdb", [ '{"block_hash":"0000000000000000", "source_sub_counts":[]}', '{"block_hash":"1111111111111111", "source_sub_counts":["0000000000000000", 1]}', '{"block_hash":"2222222222222222", "source_sub_counts":["0000000000000000", 2]}' ]) returned_answer = H.hashdb(["histogram", "temp_1.hdb"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '{"total_hashes": 3, "total_distinct_hashes": 1}', '{"duplicates":1, "distinct_hashes":1, "total":1}', '{"duplicates":2, "distinct_hashes":1, "total":2}', '# Processing 2 of 2 completed.', '' ])
def test_histogram(): H.make_hashdb("temp_1.hdb", [ '{"block_hash":"0000000000000000", "source_offsets":[]}', '{"block_hash":"1111111111111111", "source_offsets":["0000000000000000", 1, [0]]}', '{"block_hash":"2222222222222222", "source_offsets":["0000000000000000", 2, [0,512]]}']) returned_answer = H.hashdb(["histogram", "temp_1.hdb"]) H.lines_equals(returned_answer, [ '# command: ', '# hashdb-Version: ', '{"total_hashes": 3, "total_distinct_hashes": 1}', '{"duplicates":1, "distinct_hashes":1, "total":1}', '{"duplicates":2, "distinct_hashes":1, "total":2}', '# Processing 2 of 2 completed.', ''])
def test_add(): # create new hashdb H.make_hashdb("temp_1.hdb", json_out1) H.rm_tempdir("temp_2.hdb") # add to new temp_2.hdb H.hashdb(["add", "temp_1.hdb", "temp_2.hdb"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, json_out1) # add to existing temp_2.hdb H.hashdb(["add", "temp_1.hdb", "temp_2.hdb"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, json_out1)
def test_import_tab2(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.tab", [ "# <file hash> <tab> <block hash> <tab> <index>", "0011223344556677 8899aabbccddeeff 1"]) H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_1.tab"]) H.hashdb(["export", "temp_1.hdb", "temp_1.json"]) returned_answer = H.read_file("temp_1.json") expected_answer = ["# command: ","# hashdb-Version: ", \ '{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_sub_counts":["0011223344556677",1]}', '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r","temp_1.tab"]}' ] H.lines_equals(returned_answer, expected_answer)
def test_scan_hash(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.json", json_data) H.hashdb(["import", "temp_1.hdb", "temp_1.json"]) # test individual hash, hash present returned_answer = H.hashdb(["scan_hash", "temp_1.hdb", "ffffffffffffffff"]) H.lines_equals(returned_answer, [ '{"block_hash":"ffffffffffffffff","k_entropy":9,"block_label":"bl3","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_offsets":["0011223344556677",1,[1024]]}', '']) # test individual hash, hash not present returned_answer = H.hashdb(["scan_hash", "temp_1.hdb", "0000000000000000"]) H.lines_equals(returned_answer, [ 'Hash not found for \'0000000000000000\'', \ ''])
def test_scan_hash(): H.rm_tempdir("temp_1.hdb") H.rm_tempfile("temp_1.json") H.hashdb(["create", "temp_1.hdb"]) H.make_tempfile("temp_1.json", json_data) H.hashdb(["import", "temp_1.hdb", "temp_1.json"]) # test individual hash, hash present returned_answer = H.hashdb(["scan_hash", "temp_1.hdb", "ffffffffffffffff"]) H.lines_equals(returned_answer, [ '{"block_hash":"ffffffffffffffff","k_entropy":9,"block_label":"bl3","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}', '' ]) # test individual hash, hash not present returned_answer = H.hashdb(["scan_hash", "temp_1.hdb", "0000000000000000"]) H.lines_equals(returned_answer, [ 'Hash not found for \'0000000000000000\'', \ ''])
def test_random(): H.rm_tempdir("temp_1.hdb") H.hashdb(["create", "temp_1.hdb"]) lines = H.hashdb(["add_random", "temp_1.hdb", "100"]) H.lines_equals(lines, [ '# Processing 100 of 100 completed.', '# hashdb changes:', '# hash_data_inserted: 100', '# hash_inserted: 100', '# source_data_inserted: 1', '# source_data_same: 1', '# source_id_inserted: 1', '# source_id_already_present: 101', '# source_name_inserted: 1', '' ]) H.hashdb(["export", "temp_1.hdb", "temp_1.json"]) lines = H.hashdb(["scan_random", "temp_1.hdb", "100"]) H.lines_equals(lines, [ '# Processing 100 of 100 completed.', ''])
def test_add_range(): colon_one = [ '# command: ', '# hashdb-Version: ', '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_sub_counts":["1111111111111111",1]}', '{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_sub_counts":["0011223344556677",1]}', '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}', '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab","repository2","second_temp_1.tab"]}' ] two_colon_two = ['# command: ', '# hashdb-Version: '] two_colon = [ '# command: ', '# hashdb-Version: ', '{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_sub_counts":["0000000000000000",1,"0011223344556677",2]}', '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}', '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}' ] # create new hashdb H.make_hashdb("temp_1.hdb", json_out1) # add_range to new temp_2.hdb using ":1" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", ":1"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, colon_one) # add_range to new temp_2.hdb using "0:1" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "0:1"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, colon_one) # add_range to new temp_2.hdb using "1:1" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "1:1"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, colon_one) # add_range to new temp_2.hdb using "2:" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "2:"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, two_colon) # add_range to new temp_2.hdb using "2:2" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "2:2"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, two_colon_two) # add_range to new temp_2.hdb using "3:3" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "3:3"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, two_colon)
def test_add_range(): colon_one = [ '# command: ', '# hashdb-Version: ', '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_offsets":["1111111111111111",1,[4096]]}', '{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_offsets":["0011223344556677",1,[1024]]}', '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}', '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab","repository2","second_temp_1.tab"]}' ] two_colon_two = [ '# command: ', '# hashdb-Version: ' ] two_colon = [ '# command: ', '# hashdb-Version: ', '{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_offsets":["0000000000000000",1,[0],"0011223344556677",2,[0,512]]}', '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}', '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}' ] # create new hashdb H.make_hashdb("temp_1.hdb", json_out1) # add_range to new temp_2.hdb using ":1" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", ":1"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, colon_one) # add_range to new temp_2.hdb using "0:1" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "0:1"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, colon_one) # add_range to new temp_2.hdb using "1:1" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "1:1"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, colon_one) # add_range to new temp_2.hdb using "2:" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "2:"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, two_colon) # add_range to new temp_2.hdb using "2:2" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "2:2"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, two_colon_two) # add_range to new temp_2.hdb using "3:3" H.rm_tempdir("temp_2.hdb") H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "3:3"]) # temp_2.hdb should match H.hashdb(["export", "temp_2.hdb", "temp_2.json"]) json2 = H.read_file("temp_2.json") H.lines_equals(json2, two_colon)