def test_hash_tree_walk(): tree = HashTree(8) tree.add(0xF) tree.add(0xA) assert len(tree) == 2 assert list(tree) == [0xA, 0xF]
def test_read_database(): databasef = Path(DATABASE_FILE) with open(databasef, "rb") as database: tree = HashTree.read_from_file(database, config.hash_length) with open(HASH_FILE, "r") as hashesf: for h in hashesf: assert h in tree
def test_write_database(data_folder): Path("tests/data/").mkdir(exist_ok=True) tree = HashTree(config.hash_length) if not data_folder: # reading hashes from the file with open(HASH_FILE, "r") as hashesf: hashes = hashesf.readlines() tree |= hashes else: # reading hashes from the image files in the specified location with open(HASH_FILE, "w") as hashesf: for image in data_folder.glob("*.xmp"): meta = CuteMeta.from_file(image) try: tree.add(meta.hash) except KeyError: continue hashesf.write(str(meta.hash) + "\n") # serialize tree with open(DATABASE_FILE, "wb") as database: tree.write_to_file(database)
def test_tree_serialization(): tree = HashTree(256) for _ in range(0, 30): tree.add(random.getrandbits(256)) first = list(tree._serialize()) tree2 = HashTree._deserialize(iter(first), tree.hash_length) second = list(tree2._serialize()) assert first == second
def test_hash_tree_insert(): tree = HashTree(256) tree.add( 0xf9101c9eb59dace6cbcef38fa433a6338683c759c268c4ec51883155cb2a53f8) tree.add( 0xed8a30cbb2d133170f36d32cd32c02dc93cbd903ccb68cb29b70db6ce728a6d1) assert 0xf9101c9eb59dace6cbcef38fa433a6338683c759c268c4ec51883155cb2a53f8 in tree assert 0xed8a30cbb2d133170f36d32cd32c02dc93cbd903ccb68cb29b70db6ce728a6d1 in tree assert 0xfefefefefefefefefefefefefefefefefefefefefefefefefefefefefefefefe not in tree assert 0x0000000000000000000000000000000000000000000000000000000000000000 not in tree
def test_find_hamming_distance_simple(): tree = HashTree(4) all_values = set([0b1111, 0b1110, 0b1011, 0b0010, 0b0001, 0b0000]) tree |= all_values assert find_all_hamming_distance(tree, 0b1111, 0) == set() assert find_all_hamming_distance(tree, 0b1111, 1) == set([0b1110, 0b1011]) assert find_all_hamming_distance(tree, 0b1111, 2) == set([0b1110, 0b1011]) assert find_all_hamming_distance(tree, 0b1111, 3) == set( [0b1110, 0b1011, 0b0010, 0b0001]) assert find_all_hamming_distance(tree, 0b1111, 4) == all_values - set([0b1111]) assert find_all_hamming_distance(tree, 0b1011, 0) == set() assert find_all_hamming_distance(tree, 0b1011, 1) == set([0b1111]) assert find_all_hamming_distance(tree, 0b1011, 2) == set( [0b1111, 0b1110, 0b0010, 0b0001]) assert find_all_hamming_distance(tree, 0b1011, 3) == all_values - set([0b1011]) assert find_all_hamming_distance(tree, 0b1011, 4) == all_values - set([0b1011])
def test_hash_tree_remove(): tree = HashTree(256) rndints = [] for _ in range(0, 20): rndint = random.getrandbits(256) rndints.append(rndint) tree.add(rndint) for r in list(reversed(rndints)): rndints.pop() assert r in tree tree.remove(r) assert r not in tree for other in rndints: assert other in tree assert len(tree) == 0 assert tree.root.left == tree.root.right == None
def test_find_hamming_distance(): tree = HashTree(54) numbers = [] def create_offset(n): num = 0b000000_000000_000000_000000 for m in range(0, 6): num ^= (((n & 0b0001) >> 0) & 1) << m for m in range(6, 12): num ^= (((n & 0b0010) >> 1) & 1) << m for m in range(12, 18): num ^= (((n & 0b0100) >> 2) & 1) << m for m in range(18, 24): num ^= (((n & 0b1000) >> 3) & 1) << m return num for i in range(0, 16): num = random.getrandbits(30) << 24 | create_offset(i) tree.add(num) number = (num, []) for m in random.sample(range(24, 54), 5): num = num ^ (1 << m) number[1].append(num) tree.add(num) numbers.append(number) newline = "\n" fmt = "054b" print() print("\n\n".join( f"{n[0]:{fmt}} >>>\n{newline.join(f'{o:{fmt}}' for o in n[1])}" for n in numbers)) for n in numbers: find = n[0] test = n[1] for distance in range(0, len(test)): assert find_all_hamming_distance(tree, find, distance) == set(test[:distance])
def init_db(): global __db, __hashes log.info("Scanning database") refresh_cache = False if not config.metadbf.exists() or not config.hashdbf.exists(): config.metadbf.touch(exist_ok=True) refresh_cache = True log.info("Setting up database %r", str(config.metadbf)) __db = connect_db() __db.executescript(f""" CREATE TABLE IF not EXISTS Metadata ( uid UUID PRIMARY KEY not null, last_updated timestamp not null DEFAULT(strftime('%Y-%m-%d %H:%M:%f', 'now')), hash TEXT not null, caption TEXT, author TEXT, source TEXT, group_id UUID, date timestamp not null DEFAULT(strftime('%Y-%m-%d %H:%M:%f', 'now')), rating Rating, source_other PSet, source_via PSet ) WITHOUT ROWID; CREATE TABLE IF not EXISTS Metadata_Keywords ( uid UUID not null, keyword TEXT NOT NULL CHECK (keyword REGEXP '{config.tag_regex}') ); CREATE TABLE IF not EXISTS Metadata_Collections ( uid UUID not null, collection TEXT NOT NULL CHECK (collection REGEXP '{config.tag_regex}') ); """) if refresh_cache: __hashes = HashTree(config.hash_length) log.info("Loading folder %r into database", str(config.image_folder)) for xmpf in config.image_folder.glob("*.xmp"): if not xmpf.is_file(): continue if xmpf.name.startswith("."): continue _load_file(xmpf, __db) __db.commit() with open(config.hashdbf, "wb") as hashdbfp, __lock: log.info("Writing hashes to file...") __hashes.write_to_file(hashdbfp) else: with open(config.hashdbf, "rb") as hashdbfp, __lock: log.info("Loading hashes from cache %r", str(config.hashdbf)) __hashes = HashTree.read_from_file(hashdbfp, config.hash_length) log.info("Catching up with image folder") uuids_in_folder = set() for xmpf in config.image_folder.glob("*.xmp"): if not xmpf.is_file(): continue if xmpf.name.startswith("."): continue try: uuid = UUID(xmpf.stem) uuids_in_folder.add(uuid) except: continue uuids_in_database = set( d[0] for d in __db.execute("select uid from Metadata").fetchall()) for uid in uuids_in_folder - uuids_in_database: # recently added _load_file(xmp_file_for_uid(uid), __db) for uid in uuids_in_database - uuids_in_folder: # recently deleted _remove_image(uid, __db) for uid in uuids_in_database: try: _save_file(xmp_file_for_uid(uid), __db) except FileNotFoundError: pass # was deleted earlier __db.commit() log.info("Done!") def exit(): log.info("Closing database connection") __db.commit() __db.close() with open(config.hashdbf, "wb") as hashdbfp, __lock: log.info("Writing hashes to file...") __hashes.write_to_file(hashdbfp) log.info("Done!") atexit.register(exit)