Esempio n. 1
0
def test_histogram():
    lines = H.hashdb(["histogram", db1])
    H.str_equals(lines[4], '{"total_hashes": 3, "total_distinct_hashes": 1}')
    H.str_equals(lines[5], '{"duplicates":1, "distinct_hashes":1, "total":1}')
    H.str_equals(lines[6], '{"duplicates":2, "distinct_hashes":1, "total":2}')
    H.str_equals(lines[7], '')
    H.int_equals(len(lines), 8)
Esempio n. 2
0
def test_size():
    lines = H.hashdb(["size", db1])
    #print(*lines, sep='\n')
    H.str_equals(lines[0], 'hash store size: 3')
    H.str_equals(lines[1], 'source store size: 2')
    H.str_equals(lines[2], '')
    H.int_equals(len(lines), 3)
Esempio n. 3
0
def test_hash_block_size():
    # create new DB
    shutil.rmtree(db1, True)
    H.hashdb(["create", db1])

    # wrong hash block size
    H.write_temp_dfxml_hash(byte_run_len=1024)
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_inserted'], 0)
    H.int_equals(changes['hashes_not_inserted_mismatched_hash_block_size'], 1)
Esempio n. 4
0
def test_scan_hash():
    # hash present
    lines = H.hashdb(["scan_hash", db1, "00"])
    H.str_equals(lines[0], '["00",{"count":2}]')
    H.int_equals(len(lines), 2)

    # hash not present
    lines = H.hashdb(["scan_hash", db1, "33"])
    H.str_equals(lines[0], '["33",{"count":0}]')
    H.int_equals(len(lines), 2)
Esempio n. 5
0
def test_add():
    # one hash in db1 is added to db2
    shutil.rmtree(db1, True)
    shutil.rmtree(db2, True)
    H.hashdb(["create", db1])
    H.write_temp_dfxml_hash()
    H.hashdb(["import", db1, "temp_dfxml_hash"])
    H.hashdb(["add", db1, db2])
    sizes = H.parse_sizes(H.hashdb(["size", db2]))
    H.int_equals(sizes['hash_store_size'],1)
    H.int_equals(sizes['source_store_size'],1)
Esempio n. 6
0
def test_add_multiple():
    # hash from db1 and db2 result in two hashes in db3
    shutil.rmtree(db1, True)
    shutil.rmtree(db2, True)
    shutil.rmtree(db3, True)
    H.hashdb(["create", db1])
    H.write_temp_dfxml_hash(repository_name="r1")
    H.hashdb(["import", db1, "temp_dfxml_hash"])
    H.hashdb(["create", db2])
    H.write_temp_dfxml_hash(repository_name="r2")
    H.hashdb(["import", db2, "temp_dfxml_hash"])
    H.hashdb(["add_multiple", db1, db2, db3])
    sizes = H.parse_sizes(H.hashdb(["size", db3]))
    H.int_equals(sizes['hash_store_size'],2)
    H.int_equals(sizes['source_store_size'],2)
Esempio n. 7
0
def test_add_repository():
    # hash with correct repository name is added
    shutil.rmtree(db1, True)
    shutil.rmtree(db2, True)
    H.rm_tempfile(xml1)
    H.hashdb(["create", db1])
    H.write_temp_dfxml_hash(repository_name="r1")
    H.hashdb(["import", db1, "temp_dfxml_hash"])
    H.write_temp_dfxml_hash(repository_name="r2")
    H.hashdb(["import", db1, "temp_dfxml_hash"])
    H.hashdb(["add_repository", db1, db2, "r1"])
    sizes = H.parse_sizes(H.hashdb(["size", db2]))
    H.int_equals(sizes['hash_store_size'],1)
    H.int_equals(sizes['source_store_size'],1)
    H.hashdb(["export", db2, xml1])
    H.dfxml_hash_equals(repository_name="r1")
Esempio n. 8
0
def test_subtract():
    # db1 - db2 -> db3 where source must match
    shutil.rmtree(db1, True)
    shutil.rmtree(db2, True)
    shutil.rmtree(db3, True)
    H.rm_tempfile(xml1)
    H.hashdb(["create", db1])
    H.hashdb(["create", db2])
    H.write_temp_dfxml_hash(repository_name="r1")
    H.hashdb(["import", db1, "temp_dfxml_hash"])
    H.hashdb(["import", db2, "temp_dfxml_hash"])
    H.write_temp_dfxml_hash(repository_name="r2")
    H.hashdb(["import", db1, "temp_dfxml_hash"])
    H.hashdb(["subtract", db1, db2, db3])
    sizes = H.parse_sizes(H.hashdb(["size", db3]))
    H.int_equals(sizes['hash_store_size'],1)
    H.hashdb(["export", db3, xml1])
    H.dfxml_hash_equals(repository_name="r2")
Esempio n. 9
0
def test_duplicates():
    lines = H.hashdb(["duplicates", db1, "0"])
    H.str_equals(lines[4], 'No hashes were found with this count.')
    H.int_equals(len(lines), 6)
    lines = H.hashdb(["duplicates", db1, "1"])
    H.str_equals(lines[3], '["11",{"count":1}]')
    H.int_equals(len(lines), 6)
    lines = H.hashdb(["duplicates", db1, "2"])
    H.str_equals(lines[3], '["00",{"count":2}]')
    H.int_equals(len(lines), 6)
    lines = H.hashdb(["duplicates", db1, "3"])
    H.str_equals(lines[4], 'No hashes were found with this count.')
    H.int_equals(len(lines), 6)
Esempio n. 10
0
def test_intersect():
    # db1 with a,b and db2 with b,c intersect to db3 with just b
    # using same hash and different repository name
    shutil.rmtree(db1, True)
    shutil.rmtree(db2, True)
    shutil.rmtree(db3, True)
    H.rm_tempfile(xml1)
    H.hashdb(["create", db1])
    H.hashdb(["create", db2])
    H.write_temp_dfxml_hash(repository_name="r1")
    H.hashdb(["import", db1, "temp_dfxml_hash"])
    H.write_temp_dfxml_hash(repository_name="r2")
    H.hashdb(["import", db1, "temp_dfxml_hash"])
    H.hashdb(["import", db2, "temp_dfxml_hash"])
    H.write_temp_dfxml_hash(repository_name="r3")
    H.hashdb(["import", db2, "temp_dfxml_hash"])
    H.hashdb(["intersect", db1, db2, db3])
    sizes = H.parse_sizes(H.hashdb(["size", db3]))
    H.int_equals(sizes['hash_store_size'],1)
    H.int_equals(sizes['source_store_size'],1)
    H.hashdb(["export", db3, xml1])
    H.dfxml_hash_equals(repository_name="r2")
Esempio n. 11
0
def test_intersect_hash():
    # db1 with a,b and db2 with b,c intersect to db3 with just b
    # using different hash
    shutil.rmtree(db1, True)
    shutil.rmtree(db2, True)
    shutil.rmtree(db3, True)
    H.rm_tempfile(xml1)
    H.hashdb(["create", db1])
    H.hashdb(["create", db2])
    H.write_temp_dfxml_hash(byte_run_hashdigest="00112233445566778899aabbccddeef1")
    H.hashdb(["import", db1, "temp_dfxml_hash"])
    H.write_temp_dfxml_hash(byte_run_hashdigest="00112233445566778899aabbccddeef2")
    H.hashdb(["import", db1, "temp_dfxml_hash"])
    H.hashdb(["import", db2, "temp_dfxml_hash"])
    H.write_temp_dfxml_hash(byte_run_hashdigest="00112233445566778899aabbccddeef3")
    H.hashdb(["import", db2, "temp_dfxml_hash"])
    H.hashdb(["intersect_hash", db1, db2, db3])
    sizes = H.parse_sizes(H.hashdb(["size", db3]))
    H.int_equals(sizes['hash_store_size'],1)
    H.int_equals(sizes['source_store_size'],1)
    H.hashdb(["export", db3, xml1])
    H.dfxml_hash_equals(byte_run_hashdigest="00112233445566778899aabbccddeef2")
Esempio n. 12
0
def test_byte_alignment():
    # create new DB with byte alignment 2
    shutil.rmtree(db1, True)
    H.hashdb(["create", db1, "-a2"])

    # valid
    H.write_temp_dfxml_hash(byte_run_file_offset=6)
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_inserted'], 1)

    # invalid
    H.write_temp_dfxml_hash(byte_run_file_offset=7)
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_inserted'], 0)
    H.int_equals(changes['hashes_not_inserted_invalid_byte_alignment'], 1)
    
    # valid
    H.write_temp_dfxml_hash(byte_run_file_offset=8)
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_inserted'], 1)
Esempio n. 13
0
def test_scan_expanded_hash():
    # all
    lines = H.hashdb(["scan_expanded_hash", db1, "11"])
    H.str_equals(lines[3], '{"block_hashdigest":"11", "count":1, "source_list_id":654825492, "sources":[{"source_id":2,"file_offset":0,"label":"L","repository_name":"r2","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}]}')
    H.int_equals(len(lines), 5)

    # -m0
    lines = H.hashdb(["scan_expanded_hash", "-m0", db1, "11"])
    H.str_equals(lines[3], '{"block_hashdigest":"11", "count":1, "source_list_id":654825492}')
    H.int_equals(len(lines), 5)

    # -m1
    lines = H.hashdb(["scan_expanded_hash", "-m1", db1, "11"])
    H.str_equals(lines[3], '{"block_hashdigest":"11", "count":1, "source_list_id":654825492, "sources":[{"source_id":2,"file_offset":0,"label":"L","repository_name":"r2","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}]}')
    H.int_equals(len(lines), 5)
Esempio n. 14
0
def test_hash_table():
    # source_id 0
    lines = H.hashdb(["hash_table", db1, "0"])
    H.str_equals(lines[0], 'The requested source ID is not in the database.')
    H.int_equals(len(lines), 2)

    # source_id 1
    lines = H.hashdb(["hash_table", db1, "1"])
    H.str_equals(lines[3], '# {"source_id":1,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}')
    H.str_equals(lines[5], '4096	00	{"count":2}')
    H.str_equals(lines[6], '8192	00	{"count":2}')
    H.int_equals(len(lines), 8)

    # source_id 2
    lines = H.hashdb(["hash_table", db1, "2"])
    H.str_equals(lines[3], '# {"source_id":2,"repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}')
    H.str_equals(lines[5], '12288	11	{"count":1}')
    H.int_equals(len(lines), 7)

    # source_id 3
    lines = H.hashdb(["hash_table", db1, "3"])
    H.str_equals(lines[0], 'The requested source ID is not in the database.')
    H.int_equals(len(lines), 2)
Esempio n. 15
0
def test_max_duplicates():
    # create new DB with max 2
    shutil.rmtree(db1, True)
    H.hashdb(["create", db1, "-m2"])

    # add three entries where only two are allowed
    H.write_temp_dfxml_hash(byte_run_file_offset=4096*1)
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.write_temp_dfxml_hash(byte_run_file_offset=4096*2)
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.write_temp_dfxml_hash(byte_run_file_offset=4096*3)
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_inserted'], 0)
    H.int_equals(changes['hashes_not_inserted_exceeds_max_duplicates'], 1)
    sizes = H.parse_sizes(H.hashdb(["size", db1]))
    H.int_equals(sizes['hash_store_size'], 2)
Esempio n. 16
0
def test_explain_identified_blocks():
    # test empty file
    write_empty_identified_blocks()
    lines = H.hashdb(["explain_identified_blocks", db1, "temp_identified_blocks"])
    H.str_equals(lines[3], '# hashes')
    H.str_equals(lines[4], '# There are no hashes to report.')
    H.str_equals(lines[5], '# sources')
    H.str_equals(lines[6], '# There are no sources to report.')
    H.int_equals(len(lines), 8)

    # test all
    write_full_identified_blocks()
    lines = H.hashdb(["explain_identified_blocks", db1, "temp_identified_blocks"])
    H.str_equals(lines[3], '# hashes')
    H.str_equals(lines[4], '["00",{"count":2},[{"source_id":1,"file_offset":4096},{"source_id":1,"file_offset":8192,"label":"H"}]]')
    H.str_equals(lines[5], '["11",{"count":1},[{"source_id":2,"file_offset":12288,"label":"L"}]]')
    H.str_equals(lines[6], '# sources')
    H.str_equals(lines[7], '{"source_id":1,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}')
    H.str_equals(lines[8], '{"source_id":2,"repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}')
    H.int_equals(len(lines), 10)

    # test all with -m0
    write_full_identified_blocks()
    lines = H.hashdb(["explain_identified_blocks", "-m0", db1, "temp_identified_blocks"])
    H.str_equals(lines[3], '# hashes')
    H.str_equals(lines[4], '# There are no hashes to report.')
    H.str_equals(lines[5], '# sources')
    H.str_equals(lines[6], '# There are no sources to report.')
    H.int_equals(len(lines), 8)

    # test all with -m1
    write_full_identified_blocks()
    lines = H.hashdb(["explain_identified_blocks", "-m1", db1, "temp_identified_blocks"])
    H.str_equals(lines[3], '# hashes')
    H.str_equals(lines[4], '["11",{"count":1},[{"source_id":2,"file_offset":12288,"label":"L"}]]')
    H.str_equals(lines[5], '# sources')
    H.str_equals(lines[6], '{"source_id":2,"repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}')
    H.int_equals(len(lines), 8)

    # test all with -m2
    write_full_identified_blocks()
    lines = H.hashdb(["explain_identified_blocks", "-m2", db1, "temp_identified_blocks"])
    H.str_equals(lines[3], '# hashes')
    H.str_equals(lines[4], '["00",{"count":2},[{"source_id":1,"file_offset":4096},{"source_id":1,"file_offset":8192,"label":"H"}]]')
    H.str_equals(lines[5], '["11",{"count":1},[{"source_id":2,"file_offset":12288,"label":"L"}]]')
    H.str_equals(lines[6], '# sources')
    H.str_equals(lines[7], '{"source_id":1,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}')
    H.str_equals(lines[8], '{"source_id":2,"repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}')
    H.int_equals(len(lines), 10)

    # test invalid hash value
    write_wrong_identified_blocks()
    lines = H.hashdb(["explain_identified_blocks", db1, "temp_identified_blocks"])
    H.str_equals((lines[3])[:5], 'Error')
    H.str_equals(lines[4], '# hashes')
    H.str_equals(lines[5], '# There are no hashes to report.')
    H.str_equals(lines[6], '# sources')
    H.str_equals(lines[7], '# There are no sources to report.')
    H.int_equals(len(lines), 9)
Esempio n. 17
0
def test_sources():
    lines = H.hashdb(["sources", db1])
    H.str_equals(lines[0], '{"source_id":1,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}')
    H.str_equals(lines[1], '{"source_id":2,"repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}')
    H.str_equals(lines[2], '')
    H.int_equals(len(lines), 3)
Esempio n. 18
0
def test_scan():
    lines = H.hashdb(["scan", db1, xml1])
    #print(*lines, sep='\n')
    H.str_equals(lines[4], '["00",{"count":2}]')
    H.str_equals(lines[7], '["11",{"count":1}]')
    H.int_equals(len(lines), 10)
Esempio n. 19
0
def test_hash_truncation():
    # create new DB with 3 byte hash truncation, no Bloom
    shutil.rmtree(db1, True)
    H.hashdb(["create", db1, "-t3", "--bloom", "disabled"])

    # valid entry
    H.write_temp_dfxml_hash(byte_run_hashdigest='00112233')
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_inserted'], 1)

    # duplicate element
    H.write_temp_dfxml_hash(byte_run_hashdigest='00112244')
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_not_inserted_duplicate_element'], 1)

    # valid entry
    H.write_temp_dfxml_hash(byte_run_hashdigest='00114433')
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_inserted'], 1)

    # create new DB with 3 byte hash truncation, with Bloom
    shutil.rmtree(db1, True)
    H.hashdb(["create", db1, "-t3"])

    # valid entry
    H.write_temp_dfxml_hash(byte_run_hashdigest='00112233')
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_inserted'], 1)

    # duplicate element
    H.write_temp_dfxml_hash(byte_run_hashdigest='00112244')
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_not_inserted_duplicate_element'], 1)

    # valid entry
    H.write_temp_dfxml_hash(byte_run_hashdigest='00114433')
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_inserted'], 1)
Esempio n. 20
0
def test_expand_identified_blocks():
    # test empty file
    write_empty_identified_blocks()
    lines = H.hashdb(["expand_identified_blocks", db1, "temp_identified_blocks"])
    H.int_equals(len(lines), 4)

    # test all
    write_full_identified_blocks()
    lines = H.hashdb(["expand_identified_blocks", db1, "temp_identified_blocks"])
    H.str_equals(lines[3], '4096	00	[{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"},{"source_id":1,"file_offset":8192,"label":"H"}]}]')
    H.str_equals(lines[4], '8192	00	[{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096},{"source_id":1,"file_offset":8192,"label":"H"}]}]')
    H.str_equals(lines[5], '12288	11	[{"count":1},{"source_list_id":654825492, "sources":[{"source_id":2,"file_offset":12288,"label":"L","repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}]}]')
    H.int_equals(len(lines), 7)

    # test all with -m0
    write_full_identified_blocks()
    lines = H.hashdb(["expand_identified_blocks", "-m0", db1, "temp_identified_blocks"])
    H.str_equals(lines[3], '4096	00	[{"count":2},{"source_list_id":2844319735}]')
    H.str_equals(lines[4], '8192	00	[{"count":2},{"source_list_id":2844319735}]')
    H.str_equals(lines[5], '12288	11	[{"count":1},{"source_list_id":654825492}]')
    H.int_equals(len(lines), 7)

    # test all with -m1
    write_full_identified_blocks()
    lines = H.hashdb(["expand_identified_blocks", "-m1", db1, "temp_identified_blocks"])
    H.str_equals(lines[3], '4096	00	[{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"},{"source_id":1,"file_offset":8192,"label":"H"}]}]')
    H.str_equals(lines[4], '8192	00	[{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096},{"source_id":1,"file_offset":8192,"label":"H"}]}]')
    H.str_equals(lines[5], '12288	11	[{"count":1},{"source_list_id":654825492, "sources":[{"source_id":2,"file_offset":12288,"label":"L","repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}]}]');
    H.int_equals(len(lines), 7)

    # test all with -m2
    write_full_identified_blocks()
    print(*lines, sep='\n')
    lines = H.hashdb(["expand_identified_blocks", "-m2", db1, "temp_identified_blocks"])
    H.str_equals(lines[3], '4096	00	[{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096,"repository_name":"r1","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"},{"source_id":1,"file_offset":8192,"label":"H"}]}]')
    H.str_equals(lines[4], '8192	00	[{"count":2},{"source_list_id":2844319735, "sources":[{"source_id":1,"file_offset":4096},{"source_id":1,"file_offset":8192,"label":"H"}]}]')
    H.str_equals(lines[5], '12288	11	[{"count":1},{"source_list_id":654825492, "sources":[{"source_id":2,"file_offset":12288,"label":"L","repository_name":"repositoryname","filename":"file1","file_hashdigest":"ff112233445566778899aabbccddeeff"}]}]')
    H.int_equals(len(lines), 7)

    # test invalid hash value
    write_wrong_identified_blocks()
    lines = H.hashdb(["expand_identified_blocks", db1, "temp_identified_blocks"])
    H.str_equals((lines[3])[:5], 'Error')
    H.int_equals(len(lines), 5)
Esempio n. 21
0
def test_basic_settings():
    # remove existing DB
    shutil.rmtree(db1, True)

    # create new DB
    H.hashdb(["create", db1, "-p1024", "-m3", "-a 128", "-t 7", "--bloom=disabled", "--bloom_kM=4:14"])

    # validate settings parameters
    settings = H.parse_settings(db1)
    H.int_equals(settings['settings_version'], 2)
    H.int_equals(settings['byte_alignment'], 128)
    H.int_equals(settings['hash_truncation'], 7)
    H.int_equals(settings['hash_block_size'], 1024)
    H.int_equals(settings['maximum_hash_duplicates'], 3)
    H.bool_equals(settings['bloom_used'], False)
    H.int_equals(settings['bloom_k_hash_functions'], 4)
    H.int_equals(settings['bloom_M_hash_size'], 14)

    # byte alignment boundary
    H.write_temp_dfxml_hash(byte_run_len=1024)
    changes = H.parse_changes(H.hashdb(["import", db1, "temp_dfxml_hash"]))
    H.int_equals(changes['hashes_inserted'], 1)