Ejemplo n.º 1
0
def test_import_tab1():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.tab", [
          "# <file hash> <tab> <block hash> <tab> <index>",
          "0011223344556677	8899aabbccddeeff	1",
          "0000000000000000	8899aabbccddeeff	1",
          "0011223344556677	8899aabbccddeeff	2",
          "0011223344556677	ffffffffffffffff	3",
          "1111111111111111	2222222222222222	9",
          "1111111111111111	2222222222222222	9"])
    H.hashdb(["import_tab", "temp_1.hdb", "temp_1.tab"])
    H.hashdb(["export", "temp_1.hdb", "temp_1.json"])

    returned_answer = H.read_file("temp_1.json")
    expected_answer = ["# command: ","# hashdb-Version: ",
'{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_offsets":["1111111111111111",2,[4096]]}',
'{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_offsets":["0000000000000000",1,[0],"0011223344556677",2,[0,512]]}',
'{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_offsets":["0011223344556677",1,[1024]]}',
'{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_1.tab","temp_1.tab"]}',
'{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_1.tab","temp_1.tab"]}',
'{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_1.tab","temp_1.tab"]}'
]

    H.lines_equals(returned_answer, expected_answer)
Ejemplo n.º 2
0
def test_export_json_hash_partition_range():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.rm_tempfile("temp_2.json")

    temp1_input = [
'{"block_hash":"2222222222222222","k_entropy":1,"block_label":"bl1","source_offsets":["1111111111111111",2,[4096]]}',
'{"block_hash":"8899aabbccddeeff","k_entropy":2,"block_label":"bl2","source_offsets":["0000000000000000",1,[0],"0011223344556677",2,[0,512]]}',
'{"block_hash":"ffffffffffffffff","k_entropy":3,"block_label":"bl3","source_offsets":["0011223344556677",1,[1024]]}',
'{"file_hash":"0000000000000000","filesize":3,"file_type":"ftb","zero_count":4,"nonprobative_count":5,"name_pairs":["r2","f2"]}',
'{"file_hash":"0011223344556677","filesize":6,"file_type":"fta","zero_count":7,"nonprobative_count":8,"name_pairs":["r1","f1"]}',
'{"file_hash":"1111111111111111","filesize":9,"file_type":"ftc","zero_count":10,"nonprobative_count":11,"name_pairs":["r3","f3"]}'
]

    expected_answer = [
"# command: ","# hashdb-Version: ",
'{"block_hash":"2222222222222222","k_entropy":1,"block_label":"bl1","source_offsets":["1111111111111111",2,[4096]]}',
'{"file_hash":"1111111111111111","filesize":9,"file_type":"ftc","zero_count":10,"nonprobative_count":11,"name_pairs":["r3","f3"]}'
]

    H.make_tempfile("temp_1.json", temp1_input)
    H.hashdb(["create", "temp_1.hdb"])
    H.hashdb(["import", "temp_1.hdb", "temp_1.json"])
    H.hashdb(["export", "-p", "00:80", "temp_1.hdb", "temp_2.json"])

    returned_answer = H.read_file("temp_2.json")
    H.lines_equals(returned_answer, expected_answer)
Ejemplo n.º 3
0
def test_add_repository():
    # create new hashdb
    H.make_hashdb("temp_1.hdb", json_out1)
    H.rm_tempdir("temp_2.hdb")

    # add to new temp_2.hdb
    H.hashdb(["add_repository", "temp_1.hdb", "temp_2.hdb", "repository1"])

    # temp_2.hdb should only have hashes and sources with repository1
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, [
        '# command: ', '# hashdb-Version: ',
        '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_sub_counts":["1111111111111111",1]}',
        '{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_sub_counts":["0000000000000000",1,"0011223344556677",2]}',
        '{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_sub_counts":["0011223344556677",1]}',
        '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}',
        '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}',
        '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}'
    ])

    # add to new temp_2.hdb
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_repository", "temp_1.hdb", "temp_2.hdb", "repository2"])

    # temp_2.hdb should only have hashes and sources with repository2
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, [
        '# command: ', '# hashdb-Version: ',
        '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_sub_counts":["1111111111111111",1]}',
        '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository2","second_temp_1.tab"]}'
    ])
Ejemplo n.º 4
0
def test_import_tab4():
    H.rm_tempdir("temp_1.hdb")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.tab", [
          "# <file hash> <tab> <block hash> <tab> <index>",
          "0000000000000000	8888888888888888	1",
          "0000000000000000	8888888888888888	2"])
    H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_1.tab"])
    H.make_tempfile("temp_2.tab", [
          "# <file hash> <tab> <block hash> <tab> <index>",
          "0000000000000000	8888888888888888	1",
          "0000000000000000	8888888888888888	2",
          "0000000000000000	8888888888888888	3",
          "1111111111111111	8888888888888888	1",
          "1111111111111111	8888888888888888	2"])
    H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_2.tab"])

    H.hashdb(["export", "temp_1.hdb", "temp_1.json"])

    returned_answer = H.read_file("temp_1.json")
    expected_answer = [
"# command: ","# hashdb-Version: ",
'{"block_hash":"8888888888888888","k_entropy":0,"block_label":"","source_offsets":["0000000000000000",2,[0,512],"1111111111111111",2,[0,512]]}',
'{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r","temp_1.tab"]}',
'{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r","temp_2.tab"]}'
]
    H.lines_equals(returned_answer, expected_answer)
Ejemplo n.º 5
0
def test_scan_list():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.json", json_data)
    H.hashdb(["import", "temp_1.hdb", "temp_1.json"])

    # test values: not present, valid, valid repeat, valid, valid, not valid
    hash_file = ["# command: ","# hashdb-Version: ", \
"# marker 1", \
"fp1	0000000000000000", \
"# marker 2", \
"fp1	2222222222222222", \
"# marker 3", \
"fp2	2222222222222222", \
"fp3	8899aabbccddeeff", \
"fp4	ffffffffffffffff", \
"# marker4", \
"fp4	invalid_hash_value", \
"# marker5"]
    H.make_tempfile("temp_1.txt", hash_file)

    returned_answer = H.hashdb(["scan_list", "temp_1.hdb", "temp_1.txt"])
    expected_answer = [
        '# command: ', '# hashdb-Version: ', '# command: ',
        '# hashdb-Version: ', '# marker 1', '# marker 2',
        'fp1	2222222222222222	{"block_hash":"2222222222222222","k_entropy":7,"block_label":"bl1","count":1,"source_list_id":1303964917,"sources":[{"file_hash":"1111111111111111","filesize":5,"file_type":"ftc","zero_count":60,"nonprobative_count":6,"name_pairs":["r3","f3"]}],"source_sub_counts":["1111111111111111",1]}',
        '# marker 3', 'fp2	2222222222222222	{"block_hash":"2222222222222222"}',
        'fp3	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","k_entropy":8,"block_label":"bl2","count":3,"source_list_id":36745675,"sources":[{"file_hash":"0000000000000000","filesize":3,"file_type":"ftb","zero_count":40,"nonprobative_count":4,"name_pairs":["r2","f2"]},{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0000000000000000",1,"0011223344556677",2]}',
        'fp4	ffffffffffffffff	{"block_hash":"ffffffffffffffff","k_entropy":9,"block_label":"bl3","count":1,"source_list_id":2343118327,"sources":[],"source_sub_counts":["0011223344556677",1]}',
        '# marker4', '# marker5', '# scan_list completed.', ''
    ]

    H.lines_equals(returned_answer, expected_answer)
Ejemplo n.º 6
0
def test_add_multiple():
    json_db1 = [
        '{"file_hash":"11","filesize":1,"file_type":"ft1","zero_count":15,"nonprobative_count":111,"name_pairs":["rn1","fn1"]}',
        '{"block_hash":"11111111","k_entropy":101,"block_label":"bl1","source_sub_counts":["11",1]}'
    ]
    json_db2 = [
        '{"file_hash":"22","filesize":2,"file_type":"ft2","zero_count":16,"nonprobative_count":222,"name_pairs":["rn2","fn2"]}',
        '{"block_hash":"22222222","k_entropy":202,"block_label":"bl2","source_sub_counts":["22",1]}'
    ]
    json3_db3 = [
        '# command: ', '# hashdb-Version: ',
        '{"block_hash":"11111111","k_entropy":101,"block_label":"bl1","source_sub_counts":["11",1]}',
        '{"block_hash":"22222222","k_entropy":202,"block_label":"bl2","source_sub_counts":["22",1]}',
        '{"file_hash":"11","filesize":1,"file_type":"ft1","zero_count":15,"nonprobative_count":111,"name_pairs":["rn1","fn1"]}',
        '{"file_hash":"22","filesize":2,"file_type":"ft2","zero_count":16,"nonprobative_count":222,"name_pairs":["rn2","fn2"]}'
    ]

    # create DBs
    H.make_hashdb("temp_1.hdb", json_db1)
    H.make_hashdb("temp_2.hdb", json_db2)
    H.rm_tempdir("temp_3.hdb")

    # add 1 and 2 into 3
    H.hashdb(["add_multiple", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"])

    # check temp_3.hdb
    H.hashdb(["export", "temp_3.hdb", "temp_3.json"])
    json_in3 = H.read_file("temp_3.json")
    H.lines_equals(json_in3, json3_db3)
Ejemplo n.º 7
0
def test_subtract_hash():
    # create new hashdb
    H.make_hashdb("temp_1.hdb", json_set_db1)
    H.make_hashdb("temp_2.hdb", json_set_db2)
    H.rm_tempdir("temp_3.hdb")

    # db1 - db2 hash
    H.hashdb(["subtract_hash", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"])
    H.hashdb(["export", "temp_3.hdb", "temp_3.json"])
    json3 = H.read_file("temp_3.json")
    H.lines_equals(json3, [
        '# command: ', '# hashdb-Version: ',
        '{"block_hash":"1111111111111111","k_entropy":1,"block_label":"bl1","source_sub_counts":["11",1]}',
        '{"file_hash":"11","filesize":1,"file_type":"A","zero_count":11,"nonprobative_count":1,"name_pairs":["r1","f1"]}'
    ])

    # db2 - db1 hash
    H.rm_tempdir("temp_3.hdb")
    H.hashdb(["subtract_hash", "temp_2.hdb", "temp_1.hdb", "temp_3.hdb"])
    H.hashdb(["export", "temp_3.hdb", "temp_3.json"])
    json3 = H.read_file("temp_3.json")
    H.lines_equals(json3, [
        '# command: ', '# hashdb-Version: ',
        '{"block_hash":"3333333333333333","k_entropy":3,"block_label":"bl3","source_sub_counts":["33",1]}',
        '{"file_hash":"33","filesize":3,"file_type":"C","zero_count":13,"nonprobative_count":3,"name_pairs":["r2","f2"]}'
    ])
Ejemplo n.º 8
0
def test_add_multiple():
    json_db1 = [
'{"file_hash":"11","filesize":1,"file_type":"ft1","zero_count":15,"nonprobative_count":111,"name_pairs":["rn1","fn1"]}',
'{"block_hash":"11111111","k_entropy":101,"block_label":"bl1","source_offsets":["11",1,[1024]]}']
    json_db2 = [
'{"file_hash":"22","filesize":2,"file_type":"ft2","zero_count":16,"nonprobative_count":222,"name_pairs":["rn2","fn2"]}',
'{"block_hash":"22222222","k_entropy":202,"block_label":"bl2","source_offsets":["22",1,[1024]]}']
    json3_db3 = [
'# command: ','# hashdb-Version: ',
'{"block_hash":"11111111","k_entropy":101,"block_label":"bl1","source_offsets":["11",1,[1024]]}',
'{"block_hash":"22222222","k_entropy":202,"block_label":"bl2","source_offsets":["22",1,[1024]]}',
'{"file_hash":"11","filesize":1,"file_type":"ft1","zero_count":15,"nonprobative_count":111,"name_pairs":["rn1","fn1"]}',
'{"file_hash":"22","filesize":2,"file_type":"ft2","zero_count":16,"nonprobative_count":222,"name_pairs":["rn2","fn2"]}'
]



    # create DBs
    H.make_hashdb("temp_1.hdb", json_db1)
    H.make_hashdb("temp_2.hdb", json_db2)
    H.rm_tempdir("temp_3.hdb")

    # add 1 and 2 into 3
    H.hashdb(["add_multiple", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"])

    # check temp_3.hdb
    H.hashdb(["export", "temp_3.hdb", "temp_3.json"])
    json_in3 = H.read_file("temp_3.json")
    H.lines_equals(json_in3, json3_db3)
Ejemplo n.º 9
0
def test_subtract_hash():
    # create new hashdb
    H.make_hashdb("temp_1.hdb", json_set_db1)
    H.make_hashdb("temp_2.hdb", json_set_db2)
    H.rm_tempdir("temp_3.hdb")

    # db1 - db2 hash
    H.hashdb(["subtract_hash", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"])
    H.hashdb(["export", "temp_3.hdb", "temp_3.json"])
    json3 = H.read_file("temp_3.json")
    H.lines_equals(json3, [
'# command: ',
'# hashdb-Version: ',
'{"block_hash":"1111111111111111","k_entropy":1,"block_label":"bl1","source_offsets":["11",1,[4096]]}',
'{"file_hash":"11","filesize":1,"file_type":"A","zero_count":11,"nonprobative_count":1,"name_pairs":["r1","f1"]}'
])

    # db2 - db1 hash
    H.rm_tempdir("temp_3.hdb")
    H.hashdb(["subtract_hash", "temp_2.hdb", "temp_1.hdb", "temp_3.hdb"])
    H.hashdb(["export", "temp_3.hdb", "temp_3.json"])
    json3 = H.read_file("temp_3.json")
    H.lines_equals(json3, [
'# command: ',
'# hashdb-Version: ',
'{"block_hash":"3333333333333333","k_entropy":3,"block_label":"bl3","source_offsets":["33",1,[4096]]}',
'{"file_hash":"33","filesize":3,"file_type":"C","zero_count":13,"nonprobative_count":3,"name_pairs":["r2","f2"]}'
])
Ejemplo n.º 10
0
def test_add_repository():
    # create new hashdb
    H.make_hashdb("temp_1.hdb", json_out1)
    H.rm_tempdir("temp_2.hdb")

    # add to new temp_2.hdb
    H.hashdb(["add_repository", "temp_1.hdb", "temp_2.hdb", "repository1"])

    # temp_2.hdb should only have hashes and sources with repository1
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, [
'# command: ',
'# hashdb-Version: ',
'{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_offsets":["1111111111111111",1,[4096]]}',
'{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_offsets":["0000000000000000",1,[0],"0011223344556677",2,[0,512]]}',
'{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_offsets":["0011223344556677",1,[1024]]}',
'{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}',
'{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}',
'{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}'
])

    # add to new temp_2.hdb
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_repository", "temp_1.hdb", "temp_2.hdb", "repository2"])

    # temp_2.hdb should only have hashes and sources with repository2
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, [
'# command: ',
'# hashdb-Version: ',
'{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_offsets":["1111111111111111",1,[4096]]}',
'{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository2","second_temp_1.tab"]}'
])
Ejemplo n.º 11
0
def test_import_tab4():
    H.rm_tempdir("temp_1.hdb")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.tab", [
        "# <file hash> <tab> <block hash> <tab> <index>",
        "0000000000000000	8888888888888888	1",
        "0000000000000000	8888888888888888	2"
    ])
    H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_1.tab"])
    H.make_tempfile("temp_2.tab", [
        "# <file hash> <tab> <block hash> <tab> <index>",
        "0000000000000000	8888888888888888	1",
        "0000000000000000	8888888888888888	2",
        "0000000000000000	8888888888888888	3",
        "1111111111111111	8888888888888888	1",
        "1111111111111111	8888888888888888	2"
    ])
    H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_2.tab"])

    H.hashdb(["export", "temp_1.hdb", "temp_1.json"])

    returned_answer = H.read_file("temp_1.json")
    expected_answer = [
        "# command: ", "# hashdb-Version: ",
        '{"block_hash":"8888888888888888","k_entropy":0,"block_label":"","source_sub_counts":["0000000000000000",2,"1111111111111111",2]}',
        '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r","temp_1.tab"]}',
        '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r","temp_2.tab"]}'
    ]
    H.lines_equals(returned_answer, expected_answer)
Ejemplo n.º 12
0
def test_import_tab1():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.tab", [
        "# <file hash> <tab> <block hash> <tab> <index>",
        "0011223344556677	8899aabbccddeeff	1",
        "0000000000000000	8899aabbccddeeff	1",
        "0011223344556677	8899aabbccddeeff	2",
        "0011223344556677	ffffffffffffffff	3",
        "1111111111111111	2222222222222222	9",
        "1111111111111111	2222222222222222	9"
    ])
    H.hashdb(["import_tab", "temp_1.hdb", "temp_1.tab"])
    H.hashdb(["export", "temp_1.hdb", "temp_1.json"])

    returned_answer = H.read_file("temp_1.json")
    expected_answer = [
        "# command: ", "# hashdb-Version: ",
        '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_sub_counts":["1111111111111111",2]}',
        '{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_sub_counts":["0000000000000000",1,"0011223344556677",2]}',
        '{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_sub_counts":["0011223344556677",1]}',
        '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_1.tab","temp_1.tab"]}',
        '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_1.tab","temp_1.tab"]}',
        '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["temp_1.tab","temp_1.tab"]}'
    ]

    H.lines_equals(returned_answer, expected_answer)
Ejemplo n.º 13
0
def test_export_json_hash_partition_range():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.rm_tempfile("temp_2.json")

    temp1_input = [
        '{"block_hash":"2222222222222222","k_entropy":1,"block_label":"bl1","source_sub_counts":["1111111111111111",2]}',
        '{"block_hash":"8899aabbccddeeff","k_entropy":2,"block_label":"bl2","source_sub_counts":["0000000000000000",1,"0011223344556677",2]}',
        '{"block_hash":"ffffffffffffffff","k_entropy":3,"block_label":"bl3","source_sub_counts":["0011223344556677",1]}',
        '{"file_hash":"0000000000000000","filesize":3,"file_type":"ftb","zero_count":4,"nonprobative_count":5,"name_pairs":["r2","f2"]}',
        '{"file_hash":"0011223344556677","filesize":6,"file_type":"fta","zero_count":7,"nonprobative_count":8,"name_pairs":["r1","f1"]}',
        '{"file_hash":"1111111111111111","filesize":9,"file_type":"ftc","zero_count":10,"nonprobative_count":11,"name_pairs":["r3","f3"]}'
    ]

    expected_answer = [
        "# command: ", "# hashdb-Version: ",
        '{"block_hash":"2222222222222222","k_entropy":1,"block_label":"bl1","source_sub_counts":["1111111111111111",2]}',
        '{"file_hash":"1111111111111111","filesize":9,"file_type":"ftc","zero_count":10,"nonprobative_count":11,"name_pairs":["r3","f3"]}'
    ]

    H.make_tempfile("temp_1.json", temp1_input)
    H.hashdb(["create", "temp_1.hdb"])
    H.hashdb(["import", "temp_1.hdb", "temp_1.json"])
    H.hashdb(["export", "-p", "00:80", "temp_1.hdb", "temp_2.json"])

    returned_answer = H.read_file("temp_2.json")
    H.lines_equals(returned_answer, expected_answer)
Ejemplo n.º 14
0
def test_ingest():
    H.make_temp_media("temp_1_media")
    H.rm_tempdir("temp_1.hdb")
    H.hashdb(["create", "temp_1.hdb"])
    H.hashdb(["ingest", "temp_1.hdb", "temp_1_media"])
    returned_answer = H.hashdb(["size", "temp_1.hdb"])
    H.lines_equals(returned_answer, [
        '{"hash_data_store":4, "hash_store":4, "source_data_store":4, "source_id_store":4, "source_name_store":4}',
        ''
    ])
Ejemplo n.º 15
0
def test_ingest():
    H.make_temp_media("temp_1_media")
    H.rm_tempdir("temp_1.hdb")
    H.hashdb(["create", "temp_1.hdb"])
    H.hashdb(["ingest", "temp_1.hdb", "temp_1_media"])
    returned_answer = H.hashdb(["size", "temp_1.hdb"])
    H.lines_equals(returned_answer, [
'{"hash_data_store":4, "hash_store":4, "source_data_store":4, "source_id_store":4, "source_name_store":4}',
''
])
Ejemplo n.º 16
0
def test_json_commands():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.json", json_data)
    H.hashdb(["import", "temp_1.hdb", "temp_1.json"])

    # scan_list: alredy done

    # scan_hash
    returned_answer = H.hashdb(["scan_hash", "-j", "c", "temp_1.hdb", "8899aabbccddeeff"])
    H.lines_equals(returned_answer, [
'{"block_hash":"8899aabbccddeeff","count":1}',
''
])

    # scan_media: skip

    # duplicates
    returned_answer = H.hashdb(["duplicates", "-j", "c", "temp_1.hdb", "1"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","count":1}',
'# Processing 1 of 1 completed.',
''
])

    # hash_table
    returned_answer = H.hashdb(["hash_table", "-j", "c", "temp_1.hdb", "0011223344556677"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","count":1}',
'# Processing 1 of 1 completed.',
''
])

    # scan_random: nothing returned but accepts -j
    returned_answer = H.hashdb(["scan_random", "-j", "c", "temp_1.hdb", "1"])
    H.lines_equals(returned_answer, [
'# Processing 1 of 1 completed.',
''
])

    # scan_same: nothing returned but accepts -j
    returned_answer = H.hashdb(["scan_same", "-j", "c", "temp_1.hdb", "1"])
    H.lines_equals(returned_answer, [
'Match not found, hash 80000000000000000000000000000000:',
'# Processing 1 of 1 completed.',
''
])
Ejemplo n.º 17
0
def test_basic_settings():
    # remove existing DB
    h.rm_tempdir("temp_1.hdb")

    # create new DB
    h.hashdb(["create", "-b4", "temp_1.hdb"])

    # validate settings parameters
    lines = h.read_file(settings1)
    h.lines_equals(lines, [
'{"settings_version":4, "block_size":4}'

])
Ejemplo n.º 18
0
def test_intersect():
    # create new hashdb
    H.make_hashdb("temp_1.hdb", json_set_db1)
    H.make_hashdb("temp_2.hdb", json_set_db2)
    H.rm_tempdir("temp_3.hdb")

    # intersect
    H.hashdb(["intersect", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"])
    H.hashdb(["export", "temp_3.hdb", "temp_3.json"])
    json3 = H.read_file("temp_3.json")
    H.lines_equals(json3, [
        '# command: ', '# hashdb-Version: ',
        '{"block_hash":"2222222222222222","k_entropy":2,"block_label":"bl2","source_sub_counts":["22",2]}',
        '{"file_hash":"22","filesize":2,"file_type":"B","zero_count":12,"nonprobative_count":2,"name_pairs":["r1","f1","r2","f2"]}'
    ])
Ejemplo n.º 19
0
def test_basic_settings():
    # remove existing DB
    h.rm_tempdir("temp_1.hdb")

    # create new DB
    h.hashdb(["create", "-b4", "-a2", "-m500:20", "-t30:10", "temp_1.hdb"])

    # validate settings parameters
    lines = h.read_file(settings1)
    h.lines_equals(
        lines,
        [
            '{"settings_version":3, "byte_alignment":2, "block_size":4, "max_count":500, "max_sub_count":20, "hash_prefix_bits":30, "hash_suffix_bytes":10}'
        ],
    )
Ejemplo n.º 20
0
def test_histogram():
    H.make_hashdb("temp_1.hdb", [
'{"block_hash":"0000000000000000", "source_offsets":[]}',
'{"block_hash":"1111111111111111", "source_offsets":["0000000000000000", 1, [0]]}',
'{"block_hash":"2222222222222222", "source_offsets":["0000000000000000", 2, [0,512]]}'])

    returned_answer = H.hashdb(["histogram", "temp_1.hdb"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'{"total_hashes": 3, "total_distinct_hashes": 1}',
'{"duplicates":1, "distinct_hashes":1, "total":1}',
'{"duplicates":2, "distinct_hashes":1, "total":2}',
'# Processing 2 of 2 completed.',
''])
Ejemplo n.º 21
0
def test_histogram():
    H.make_hashdb("temp_1.hdb", [
        '{"block_hash":"0000000000000000", "source_sub_counts":[]}',
        '{"block_hash":"1111111111111111", "source_sub_counts":["0000000000000000", 1]}',
        '{"block_hash":"2222222222222222", "source_sub_counts":["0000000000000000", 2]}'
    ])

    returned_answer = H.hashdb(["histogram", "temp_1.hdb"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        '{"total_hashes": 3, "total_distinct_hashes": 1}',
        '{"duplicates":1, "distinct_hashes":1, "total":1}',
        '{"duplicates":2, "distinct_hashes":1, "total":2}',
        '# Processing 2 of 2 completed.', ''
    ])
Ejemplo n.º 22
0
def test_intersect():
    # create new hashdb
    H.make_hashdb("temp_1.hdb", json_set_db1)
    H.make_hashdb("temp_2.hdb", json_set_db2)
    H.rm_tempdir("temp_3.hdb")

    # intersect
    H.hashdb(["intersect", "temp_1.hdb", "temp_2.hdb", "temp_3.hdb"])
    H.hashdb(["export", "temp_3.hdb", "temp_3.json"])
    json3 = H.read_file("temp_3.json")
    H.lines_equals(json3, [
'# command: ',
'# hashdb-Version: ',
'{"block_hash":"2222222222222222","k_entropy":2,"block_label":"bl2","source_offsets":["22",2,[0,512]]}',
'{"file_hash":"22","filesize":2,"file_type":"B","zero_count":12,"nonprobative_count":2,"name_pairs":["r1","f1","r2","f2"]}'
])
Ejemplo n.º 23
0
def test_import_tab2():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.tab", [
          "# <file hash> <tab> <block hash> <tab> <index>",
          "0011223344556677	8899aabbccddeeff	1"])
    H.hashdb(["import_tab", "-rr", "temp_1.hdb", "temp_1.tab"])
    H.hashdb(["export", "temp_1.hdb", "temp_1.json"])

    returned_answer = H.read_file("temp_1.json")
    expected_answer = ["# command: ","# hashdb-Version: ", \
'{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_sub_counts":["0011223344556677",1]}',
'{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r","temp_1.tab"]}'
]
    H.lines_equals(returned_answer, expected_answer)
Ejemplo n.º 24
0
def test_scan_hash():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.json", json_data)
    H.hashdb(["import", "temp_1.hdb", "temp_1.json"])

    # test individual hash, hash present
    returned_answer = H.hashdb(["scan_hash", "temp_1.hdb", "ffffffffffffffff"])
    H.lines_equals(returned_answer, [
'{"block_hash":"ffffffffffffffff","k_entropy":9,"block_label":"bl3","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_offsets":["0011223344556677",1,[1024]]}',
''])
    # test individual hash, hash not present
    returned_answer = H.hashdb(["scan_hash", "temp_1.hdb", "0000000000000000"])
    H.lines_equals(returned_answer, [
'Hash not found for \'0000000000000000\'', \
''])
Ejemplo n.º 25
0
def test_json_modes():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.json", json_data)
    H.hashdb(["import", "temp_1.hdb", "temp_1.json"])

    # make hash file
    hash_file = [
"fp1	8899aabbccddeeff",
"fp2	8899aabbccddeeff"]
    H.make_tempfile("temp_1.txt", hash_file)

    # expanded
    returned_answer = H.hashdb(["scan_list", "-j", "e", "temp_1.hdb", "temp_1.txt"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'fp1	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}',
'fp2	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}',
'# scan_list completed.',
''
])

    # expanded optimized
    returned_answer = H.hashdb(["scan_list", "-j", "o", "temp_1.hdb", "temp_1.txt"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'fp1	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}',
'fp2	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff"}',
'# scan_list completed.',
''
])

    # count only
    returned_answer = H.hashdb(["scan_list", "-j", "c", "temp_1.hdb", "temp_1.txt"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'fp1	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","count":1}',
'fp2	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","count":1}',
'# scan_list completed.',
''
])

    # approximate count
    returned_answer = H.hashdb(["scan_list", "-j", "a", "temp_1.hdb", "temp_1.txt"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'fp1	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","approximate_count":1}',
'fp2	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","approximate_count":1}',
'# scan_list completed.',
''
])
Ejemplo n.º 26
0
def test_scan_hash():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.json", json_data)
    H.hashdb(["import", "temp_1.hdb", "temp_1.json"])

    # test individual hash, hash present
    returned_answer = H.hashdb(["scan_hash", "temp_1.hdb", "ffffffffffffffff"])
    H.lines_equals(returned_answer, [
        '{"block_hash":"ffffffffffffffff","k_entropy":9,"block_label":"bl3","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}',
        ''
    ])
    # test individual hash, hash not present
    returned_answer = H.hashdb(["scan_hash", "temp_1.hdb", "0000000000000000"])
    H.lines_equals(returned_answer, [
        'Hash not found for \'0000000000000000\'', \
''])
Ejemplo n.º 27
0
def test_json_commands():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.json", json_data)
    H.hashdb(["import", "temp_1.hdb", "temp_1.json"])

    # scan_list: alredy done

    # scan_hash
    returned_answer = H.hashdb(
        ["scan_hash", "-j", "c", "temp_1.hdb", "8899aabbccddeeff"])
    H.lines_equals(returned_answer,
                   ['{"block_hash":"8899aabbccddeeff","count":1}', ''])

    # scan_media: skip

    # duplicates
    returned_answer = H.hashdb(["duplicates", "-j", "c", "temp_1.hdb", "1"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        '8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","count":1}',
        '# Processing 1 of 1 completed.', ''
    ])

    # hash_table
    returned_answer = H.hashdb(
        ["hash_table", "-j", "c", "temp_1.hdb", "0011223344556677"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        '8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","count":1}',
        '# Processing 1 of 1 completed.', ''
    ])

    # scan_random: nothing returned but accepts -j
    returned_answer = H.hashdb(["scan_random", "-j", "c", "temp_1.hdb", "1"])
    H.lines_equals(returned_answer, ['# Processing 1 of 1 completed.', ''])

    # scan_same: nothing returned but accepts -j
    returned_answer = H.hashdb(["scan_same", "-j", "c", "temp_1.hdb", "1"])
    H.lines_equals(returned_answer, [
        'Match not found, hash 80000000000000000000000000000000:',
        '# Processing 1 of 1 completed.', ''
    ])
Ejemplo n.º 28
0
def test_sources():

    # source stores, no name_pairs
    H.make_hashdb("temp_1.hdb", [
'{"file_hash":"0011223344556677","filesize":0,"name_pairs":[]}'])
    expected_answer = [
'{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}',
'']
    returned_answer = H.hashdb(["sources", "temp_1.hdb"])
    H.lines_equals(expected_answer, returned_answer)

    # source stores, two name_pairs
    H.make_hashdb("temp_1.hdb", [
'{"file_hash":"0011223344556677","filesize":0,"name_pairs":["r1","f1","r2","f2"]}'])
    expected_answer = [
'{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r1","f1","r2","f2"]}',
'']
    returned_answer = H.hashdb(["sources", "temp_1.hdb"])
    H.lines_equals(expected_answer, returned_answer)
Ejemplo n.º 29
0
def test_add():
    # create new hashdb
    H.make_hashdb("temp_1.hdb", json_out1)
    H.rm_tempdir("temp_2.hdb")

    # add to new temp_2.hdb
    H.hashdb(["add", "temp_1.hdb", "temp_2.hdb"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, json_out1)

    # add to existing temp_2.hdb
    H.hashdb(["add", "temp_1.hdb", "temp_2.hdb"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, json_out1)
Ejemplo n.º 30
0
def test_add():
    # create new hashdb
    H.make_hashdb("temp_1.hdb", json_out1)
    H.rm_tempdir("temp_2.hdb")

    # add to new temp_2.hdb
    H.hashdb(["add", "temp_1.hdb", "temp_2.hdb"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, json_out1)

    # add to existing temp_2.hdb
    H.hashdb(["add", "temp_1.hdb", "temp_2.hdb"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, json_out1)
Ejemplo n.º 31
0
def test_scan_list():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.json", json_data)
    H.hashdb(["import", "temp_1.hdb", "temp_1.json"])

    # test values: not present, valid, valid repeat, valid, valid, not valid
    hash_file = ["# command: ","# hashdb-Version: ", \
"# marker 1", \
"fp1	0000000000000000", \
"# marker 2", \
"fp1	2222222222222222", \
"# marker 3", \
"fp2	2222222222222222", \
"fp3	8899aabbccddeeff", \
"fp4	ffffffffffffffff", \
"# marker4", \
"fp4	invalid_hash_value", \
"# marker5"]
    H.make_tempfile("temp_1.txt", hash_file)

    returned_answer = H.hashdb(["scan_list", "temp_1.hdb", "temp_1.txt"])
    expected_answer = [
'# command: ',
'# hashdb-Version: ',
'# command: ',
'# hashdb-Version: ',
'# marker 1',
'# marker 2',
'fp1	2222222222222222	{"block_hash":"2222222222222222","k_entropy":7,"block_label":"bl1","count":1,"source_list_id":1303964917,"sources":[{"file_hash":"1111111111111111","filesize":5,"file_type":"ftc","zero_count":60,"nonprobative_count":6,"name_pairs":["r3","f3"]}],"source_offsets":["1111111111111111",1,[4096]]}',
'# marker 3',
'fp2	2222222222222222	{"block_hash":"2222222222222222"}',
'fp3	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","k_entropy":8,"block_label":"bl2","count":3,"source_list_id":36745675,"sources":[{"file_hash":"0000000000000000","filesize":3,"file_type":"ftb","zero_count":40,"nonprobative_count":4,"name_pairs":["r2","f2"]},{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_offsets":["0000000000000000",1,[0],"0011223344556677",2,[0,512]]}',
'fp4	ffffffffffffffff	{"block_hash":"ffffffffffffffff","k_entropy":9,"block_label":"bl3","count":1,"source_list_id":2343118327,"sources":[],"source_offsets":["0011223344556677",1,[1024]]}',
'# marker4',
'# marker5',
'# scan_list completed.',
'']

    H.lines_equals(returned_answer, expected_answer)
Ejemplo n.º 32
0
def test_size():
    # hash stores
    H.make_hashdb("temp_1.hdb", [
        '{"block_hash":"0011223344556677", "source_sub_counts":["0000000000000000", 1]}',
        '{"block_hash":"00112233556677", "source_sub_counts":["0000000000000000", 1]}'
    ])
    expected_answer = [
        '{"hash_data_store":2, "hash_store":2, "source_data_store":1, "source_id_store":1, "source_name_store":0}',
        ''
    ]
    returned_answer = H.hashdb(["size", "temp_1.hdb"])
    H.lines_equals(expected_answer, returned_answer)

    # source stores, no name_pairs
    H.make_hashdb(
        "temp_1.hdb",
        ['{"file_hash":"0011223344556677","filesize":0,"name_pairs":[]}'])
    expected_answer = [
        '{"hash_data_store":0, "hash_store":0, "source_data_store":1, "source_id_store":1, "source_name_store":0}',
        ''
    ]
    returned_answer = H.hashdb(["size", "temp_1.hdb"])
    H.lines_equals(expected_answer, returned_answer)

    # source stores, name_pairs
    H.make_hashdb("temp_1.hdb", [
        '{"file_hash":"0011223344556677","filesize":0,"name_pairs":["r1","f1","r2","f2"]}'
    ])
    expected_answer = [
        '{"hash_data_store":0, "hash_store":0, "source_data_store":1, "source_id_store":1, "source_name_store":2}',
        ''
    ]
    returned_answer = H.hashdb(["size", "temp_1.hdb"])
    H.lines_equals(expected_answer, returned_answer)
Ejemplo n.º 33
0
def test_json_modes():
    H.rm_tempdir("temp_1.hdb")
    H.rm_tempfile("temp_1.json")
    H.hashdb(["create", "temp_1.hdb"])
    H.make_tempfile("temp_1.json", json_data)
    H.hashdb(["import", "temp_1.hdb", "temp_1.json"])

    # make hash file
    hash_file = ["fp1	8899aabbccddeeff", "fp2	8899aabbccddeeff"]
    H.make_tempfile("temp_1.txt", hash_file)

    # expanded
    returned_answer = H.hashdb(
        ["scan_list", "-j", "e", "temp_1.hdb", "temp_1.txt"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        'fp1	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}',
        'fp2	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}',
        '# scan_list completed.', ''
    ])

    # expanded optimized
    returned_answer = H.hashdb(
        ["scan_list", "-j", "o", "temp_1.hdb", "temp_1.txt"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        'fp1	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","k_entropy":8000,"block_label":"bl2","count":1,"source_list_id":2343118327,"sources":[{"file_hash":"0011223344556677","filesize":1,"file_type":"fta","zero_count":20,"nonprobative_count":2,"name_pairs":["r1","f1"]}],"source_sub_counts":["0011223344556677",1]}',
        'fp2	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff"}',
        '# scan_list completed.', ''
    ])

    # count only
    returned_answer = H.hashdb(
        ["scan_list", "-j", "c", "temp_1.hdb", "temp_1.txt"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        'fp1	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","count":1}',
        'fp2	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","count":1}',
        '# scan_list completed.', ''
    ])

    # approximate count
    returned_answer = H.hashdb(
        ["scan_list", "-j", "a", "temp_1.hdb", "temp_1.txt"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        'fp1	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","approximate_count":1}',
        'fp2	8899aabbccddeeff	{"block_hash":"8899aabbccddeeff","approximate_count":1}',
        '# scan_list completed.', ''
    ])
Ejemplo n.º 34
0
def test_size():
    # hash stores
    H.make_hashdb("temp_1.hdb", [
'{"block_hash":"0011223344556677", "source_offsets":["0000000000000000", 1, [0]]}',
'{"block_hash":"00112233556677", "source_offsets":["0000000000000000", 1, [512]]}'])
    expected_answer = [
'{"hash_data_store":2, "hash_store":1, "source_data_store":1, "source_id_store":1, "source_name_store":0}',
'']
    returned_answer = H.hashdb(["size", "temp_1.hdb"])
    H.lines_equals(expected_answer, returned_answer)

    # source stores, no name_pairs
    H.make_hashdb("temp_1.hdb", [
'{"file_hash":"0011223344556677","filesize":0,"name_pairs":[]}'])
    expected_answer = [
'{"hash_data_store":0, "hash_store":0, "source_data_store":1, "source_id_store":1, "source_name_store":0}',
'']
    returned_answer = H.hashdb(["size", "temp_1.hdb"])
    H.lines_equals(expected_answer, returned_answer)

    # source stores, name_pairs
    H.make_hashdb("temp_1.hdb", [
'{"file_hash":"0011223344556677","filesize":0,"name_pairs":["r1","f1","r2","f2"]}'])
    expected_answer = [
'{"hash_data_store":0, "hash_store":0, "source_data_store":1, "source_id_store":1, "source_name_store":2}',
'']
    returned_answer = H.hashdb(["size", "temp_1.hdb"])
    H.lines_equals(expected_answer, returned_answer)
Ejemplo n.º 35
0
def test_random():
    H.rm_tempdir("temp_1.hdb")
    H.hashdb(["create", "temp_1.hdb"])
    lines = H.hashdb(["add_random", "temp_1.hdb", "100"])
    H.lines_equals(lines, [
'# Processing 100 of 100 completed.',
'# hashdb changes:',
'#     hash_data_inserted: 100',
'#     hash_inserted: 100',
'#     source_data_inserted: 1',
'#     source_data_same: 1',
'#     source_id_inserted: 1',
'#     source_id_already_present: 101',
'#     source_name_inserted: 1',
''
])
    H.hashdb(["export", "temp_1.hdb", "temp_1.json"])

    lines = H.hashdb(["scan_random", "temp_1.hdb", "100"])
    H.lines_equals(lines, [
'# Processing 100 of 100 completed.',
''])
Ejemplo n.º 36
0
def test_hash_table():
    # note that the first hash doesn't go in at all, next goes in once, last goes in twice.
    H.make_hashdb("temp_1.hdb", [
'{"block_hash":"0000000000000000", "source_offsets":[]}',
'{"block_hash":"1111111111111111", "source_offsets":["0000000000000000", 1, [0]]}',
'{"block_hash":"2222222222222222", "source_offsets":["0000000000000000", 2, [0,512]]}'])

    # no match
    returned_answer = H.hashdb(["hash_table", "temp_1.hdb", "0011223344556677"])
    H.lines_equals(returned_answer, [
'There is no source with this file hash',
''])

    # two matches
    returned_answer = H.hashdb(["hash_table", "temp_1.hdb", "0000000000000000"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'1111111111111111	{"block_hash":"1111111111111111","k_entropy":0,"block_label":"","count":1,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_offsets":["0000000000000000",1,[0]]}',
'2222222222222222	{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","count":2,"source_list_id":1696784233,"sources":[],"source_offsets":["0000000000000000",2,[0,512]]}',
'# Processing 2 of 2 completed.',
''])
Ejemplo n.º 37
0
def test_hash_table():
    # note that the first hash doesn't go in at all, next goes in once, last goes in twice.
    H.make_hashdb("temp_1.hdb", [
        '{"block_hash":"0000000000000000", "source_sub_counts":[]}',
        '{"block_hash":"1111111111111111", "source_sub_counts":["0000000000000000", 1]}',
        '{"block_hash":"2222222222222222", "source_sub_counts":["0000000000000000", 2]}'
    ])

    # no match
    returned_answer = H.hashdb(
        ["hash_table", "temp_1.hdb", "0011223344556677"])
    H.lines_equals(returned_answer,
                   ['There is no source with this file hash', ''])

    # two matches
    returned_answer = H.hashdb(
        ["hash_table", "temp_1.hdb", "0000000000000000"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        '1111111111111111	{"block_hash":"1111111111111111","k_entropy":0,"block_label":"","count":1,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_sub_counts":["0000000000000000",1]}',
        '2222222222222222	{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","count":2,"source_list_id":1696784233,"sources":[],"source_sub_counts":["0000000000000000",2]}',
        '# Processing 2 of 2 completed.', ''
    ])
Ejemplo n.º 38
0
def test_sources():

    # source stores, no name_pairs
    H.make_hashdb(
        "temp_1.hdb",
        ['{"file_hash":"0011223344556677","filesize":0,"name_pairs":[]}'])
    expected_answer = [
        '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}',
        ''
    ]
    returned_answer = H.hashdb(["sources", "temp_1.hdb"])
    H.lines_equals(expected_answer, returned_answer)

    # source stores, two name_pairs
    H.make_hashdb("temp_1.hdb", [
        '{"file_hash":"0011223344556677","filesize":0,"name_pairs":["r1","f1","r2","f2"]}'
    ])
    expected_answer = [
        '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["r1","f1","r2","f2"]}',
        ''
    ]
    returned_answer = H.hashdb(["sources", "temp_1.hdb"])
    H.lines_equals(expected_answer, returned_answer)
Ejemplo n.º 39
0
def test_duplicates():
    # hash 0... doesn't go in at all.
    # hash 1... has one source with one pair.
    # hash 2... has one source with two pairs.
    H.make_hashdb("temp_1.hdb", [
'{"block_hash":"0000000000000000", "source_offsets":[]}',
'{"block_hash":"1111111111111111", "source_offsets":["0000000000000000", 1, [0]]}',
'{"block_hash":"2222222222222222", "source_offsets":["0000000000000000", 2, [0,512]]}'])

    # zero
    returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "0"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'No hashes were found with this count.',
'# Processing 2 of 2 completed.',
''])

    # one
    returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "1"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'1111111111111111	{"block_hash":"1111111111111111","k_entropy":0,"block_label":"","count":1,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_offsets":["0000000000000000",1,[0]]}',
'# Processing 2 of 2 completed.',
''])

    # two
    returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "2"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'2222222222222222	{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","count":2,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_offsets":["0000000000000000",2,[0,512]]}',
'# Processing 2 of 2 completed.',
''])

    # three
    returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "3"])
    H.lines_equals(returned_answer, [
'# command: ',
'# hashdb-Version: ',
'No hashes were found with this count.',
'# Processing 2 of 2 completed.',
''])
Ejemplo n.º 40
0
def test_duplicates():
    # hash 0... doesn't go in at all.
    # hash 1... has one source with one pair.
    # hash 2... has one source with two pairs.
    H.make_hashdb("temp_1.hdb", [
        '{"block_hash":"0000000000000000", "source_sub_counts":[]}',
        '{"block_hash":"1111111111111111", "source_sub_counts":["0000000000000000", 1]}',
        '{"block_hash":"2222222222222222", "source_sub_counts":["0000000000000000", 2]}'
    ])

    # zero
    returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "0"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        'No hashes were found with this count.',
        '# Processing 2 of 2 completed.', ''
    ])

    # one
    returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "1"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        '1111111111111111	{"block_hash":"1111111111111111","k_entropy":0,"block_label":"","count":1,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_sub_counts":["0000000000000000",1]}',
        '# Processing 2 of 2 completed.', ''
    ])

    # two
    returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "2"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        '2222222222222222	{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","count":2,"source_list_id":1696784233,"sources":[{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":[]}],"source_sub_counts":["0000000000000000",2]}',
        '# Processing 2 of 2 completed.', ''
    ])

    # three
    returned_answer = H.hashdb(["duplicates", "temp_1.hdb", "3"])
    H.lines_equals(returned_answer, [
        '# command: ', '# hashdb-Version: ',
        'No hashes were found with this count.',
        '# Processing 2 of 2 completed.', ''
    ])
Ejemplo n.º 41
0
def test_media():
    # create media to read
    H.make_temp_media("temp_1_media")

    # read embedded filename in zip header
    returned_answer = H.hashdb(["read_media", "temp_1_media", "630", "13"])
    H.lines_equals(returned_answer, ["temp_0_file_1"])

    # read zip 1
    returned_answer = H.hashdb(
        ["read_media", "temp_1_media", "600-zip-0", "50"])
    H.lines_equals(returned_answer, ["temp_0_file_1 content"])

    # read zip 2
    returned_answer = H.hashdb(
        ["read_media", "temp_1_media", "666-zip-0", "50"])
    H.lines_equals(returned_answer, ["temp_0_file_2 content"])

    # read gzip
    returned_answer = H.hashdb(
        ["read_media", "temp_1_media", "872-gzip-0", "50"])
    H.lines_equals(returned_answer, ["gzip content"])

    # read partial zip 1
    returned_answer = H.hashdb(
        ["read_media", "temp_1_media", "600-zip-3", "10"])
    H.lines_equals(returned_answer, ["p_0_file_1"])

    # read partial gzip
    returned_answer = H.hashdb(
        ["read_media", "temp_1_media", "872-gzip-3", "5"])
    H.lines_equals(returned_answer, ["p con"])

    # read out of range
    returned_answer = H.hashdb(
        ["read_media", "temp_1_media", "1000000000", "50"])
    H.lines_equals(returned_answer, [""])

    # read out of range zip 1
    returned_answer = H.hashdb(
        ["read_media", "temp_1_media", "600-zip-100", "50"])
    H.lines_equals(returned_answer, [""])

    # read out of range gzip
    returned_answer = H.hashdb(
        ["read_media", "temp_1_media", "872-gzip-100", "50"])
    H.lines_equals(returned_answer, [""])

    # read media size
    returned_answer = H.hashdb(["read_media_size", "temp_1_media"])
    H.lines_equals(returned_answer, ["917", ""])
Ejemplo n.º 42
0
def test_add_range():
    colon_one = [
        '# command: ', '# hashdb-Version: ',
        '{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_sub_counts":["1111111111111111",1]}',
        '{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_sub_counts":["0011223344556677",1]}',
        '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}',
        '{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab","repository2","second_temp_1.tab"]}'
    ]
    two_colon_two = ['# command: ', '# hashdb-Version: ']
    two_colon = [
        '# command: ', '# hashdb-Version: ',
        '{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_sub_counts":["0000000000000000",1,"0011223344556677",2]}',
        '{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}',
        '{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}'
    ]

    # create new hashdb
    H.make_hashdb("temp_1.hdb", json_out1)

    # add_range to new temp_2.hdb using ":1"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", ":1"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, colon_one)

    # add_range to new temp_2.hdb using "0:1"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "0:1"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, colon_one)

    # add_range to new temp_2.hdb using "1:1"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "1:1"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, colon_one)

    # add_range to new temp_2.hdb using "2:"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "2:"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, two_colon)

    # add_range to new temp_2.hdb using "2:2"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "2:2"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, two_colon_two)

    # add_range to new temp_2.hdb using "3:3"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "3:3"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, two_colon)
Ejemplo n.º 43
0
def test_media():
    # create media to read
    H.make_temp_media("temp_1_media")

    # read embedded filename in zip header
    returned_answer = H.hashdb(["read_media", "temp_1_media",
                                "630", "13"])
    H.lines_equals(returned_answer, ["temp_0_file_1"])

    # read zip 1
    returned_answer = H.hashdb(["read_media", "temp_1_media",
                                "600-zip-0", "50"])
    H.lines_equals(returned_answer, ["temp_0_file_1 content"])

    # read zip 2
    returned_answer = H.hashdb(["read_media", "temp_1_media",
                                "666-zip-0", "50"])
    H.lines_equals(returned_answer, ["temp_0_file_2 content"])

    # read gzip
    returned_answer = H.hashdb(["read_media", "temp_1_media",
                                "872-gzip-0", "50"])
    H.lines_equals(returned_answer, ["gzip content"])

    # read partial zip 1
    returned_answer = H.hashdb(["read_media", "temp_1_media",
                                "600-zip-3", "10"])
    H.lines_equals(returned_answer, ["p_0_file_1"])

    # read partial gzip
    returned_answer = H.hashdb(["read_media", "temp_1_media",
                                "872-gzip-3", "5"])
    H.lines_equals(returned_answer, ["p con"])

    # read out of range
    returned_answer = H.hashdb(["read_media", "temp_1_media",
                                "1000000000", "50"])
    H.lines_equals(returned_answer, [""])

    # read out of range zip 1
    returned_answer = H.hashdb(["read_media", "temp_1_media",
                                "600-zip-100", "50"])
    H.lines_equals(returned_answer, [""])

    # read out of range gzip
    returned_answer = H.hashdb(["read_media", "temp_1_media",
                                "872-gzip-100", "50"])
    H.lines_equals(returned_answer, [""])

    # read media size
    returned_answer = H.hashdb(["read_media_size", "temp_1_media"])
    H.lines_equals(returned_answer, ["917", ""])
Ejemplo n.º 44
0
def test_add_range():
    colon_one = [
'# command: ',
'# hashdb-Version: ',
'{"block_hash":"2222222222222222","k_entropy":0,"block_label":"","source_offsets":["1111111111111111",1,[4096]]}',
'{"block_hash":"ffffffffffffffff","k_entropy":0,"block_label":"","source_offsets":["0011223344556677",1,[1024]]}',
'{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}',
'{"file_hash":"1111111111111111","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab","repository2","second_temp_1.tab"]}'
]
    two_colon_two = [
'# command: ',
'# hashdb-Version: '
]
    two_colon = [
'# command: ',
'# hashdb-Version: ',
'{"block_hash":"8899aabbccddeeff","k_entropy":0,"block_label":"","source_offsets":["0000000000000000",1,[0],"0011223344556677",2,[0,512]]}',
'{"file_hash":"0000000000000000","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}',
'{"file_hash":"0011223344556677","filesize":0,"file_type":"","zero_count":0,"nonprobative_count":0,"name_pairs":["repository1","temp_1.tab"]}'
]

    # create new hashdb
    H.make_hashdb("temp_1.hdb", json_out1)

    # add_range to new temp_2.hdb using ":1"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", ":1"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, colon_one)

    # add_range to new temp_2.hdb using "0:1"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "0:1"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, colon_one)

    # add_range to new temp_2.hdb using "1:1"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "1:1"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, colon_one)

    # add_range to new temp_2.hdb using "2:"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "2:"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, two_colon)

    # add_range to new temp_2.hdb using "2:2"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "2:2"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, two_colon_two)

    # add_range to new temp_2.hdb using "3:3"
    H.rm_tempdir("temp_2.hdb")
    H.hashdb(["add_range", "temp_1.hdb", "temp_2.hdb", "3:3"])

    # temp_2.hdb should match
    H.hashdb(["export", "temp_2.hdb", "temp_2.json"])
    json2 = H.read_file("temp_2.json")
    H.lines_equals(json2, two_colon)