def test_hash_tree_walk():
    tree = HashTree(8)
    tree.add(0xF)
    tree.add(0xA)

    assert len(tree) == 2
    assert list(tree) == [0xA, 0xF]
def test_read_database():
    databasef = Path(DATABASE_FILE)

    with open(databasef, "rb") as database:
        tree = HashTree.read_from_file(database, config.hash_length)

    with open(HASH_FILE, "r") as hashesf:
        for h in hashesf:
            assert h in tree
def test_write_database(data_folder):
    Path("tests/data/").mkdir(exist_ok=True)

    tree = HashTree(config.hash_length)

    if not data_folder:
        # reading hashes from the file
        with open(HASH_FILE, "r") as hashesf:
            hashes = hashesf.readlines()
            tree |= hashes

    else:
        # reading hashes from the image files in the specified location
        with open(HASH_FILE, "w") as hashesf:
            for image in data_folder.glob("*.xmp"):
                meta = CuteMeta.from_file(image)
                try:
                    tree.add(meta.hash)
                except KeyError:
                    continue
                hashesf.write(str(meta.hash) + "\n")

    # serialize tree
    with open(DATABASE_FILE, "wb") as database:
        tree.write_to_file(database)
def test_tree_serialization():
    tree = HashTree(256)
    for _ in range(0, 30):
        tree.add(random.getrandbits(256))

    first = list(tree._serialize())
    tree2 = HashTree._deserialize(iter(first), tree.hash_length)
    second = list(tree2._serialize())

    assert first == second
def test_hash_tree_insert():
    tree = HashTree(256)
    tree.add(
        0xf9101c9eb59dace6cbcef38fa433a6338683c759c268c4ec51883155cb2a53f8)
    tree.add(
        0xed8a30cbb2d133170f36d32cd32c02dc93cbd903ccb68cb29b70db6ce728a6d1)

    assert 0xf9101c9eb59dace6cbcef38fa433a6338683c759c268c4ec51883155cb2a53f8 in tree
    assert 0xed8a30cbb2d133170f36d32cd32c02dc93cbd903ccb68cb29b70db6ce728a6d1 in tree

    assert 0xfefefefefefefefefefefefefefefefefefefefefefefefefefefefefefefefe not in tree
    assert 0x0000000000000000000000000000000000000000000000000000000000000000 not in tree
def test_find_hamming_distance_simple():
    tree = HashTree(4)
    all_values = set([0b1111, 0b1110, 0b1011, 0b0010, 0b0001, 0b0000])
    tree |= all_values

    assert find_all_hamming_distance(tree, 0b1111, 0) == set()
    assert find_all_hamming_distance(tree, 0b1111, 1) == set([0b1110, 0b1011])
    assert find_all_hamming_distance(tree, 0b1111, 2) == set([0b1110, 0b1011])
    assert find_all_hamming_distance(tree, 0b1111, 3) == set(
        [0b1110, 0b1011, 0b0010, 0b0001])
    assert find_all_hamming_distance(tree, 0b1111,
                                     4) == all_values - set([0b1111])

    assert find_all_hamming_distance(tree, 0b1011, 0) == set()
    assert find_all_hamming_distance(tree, 0b1011, 1) == set([0b1111])
    assert find_all_hamming_distance(tree, 0b1011, 2) == set(
        [0b1111, 0b1110, 0b0010, 0b0001])
    assert find_all_hamming_distance(tree, 0b1011,
                                     3) == all_values - set([0b1011])
    assert find_all_hamming_distance(tree, 0b1011,
                                     4) == all_values - set([0b1011])
def test_hash_tree_remove():
    tree = HashTree(256)
    rndints = []
    for _ in range(0, 20):
        rndint = random.getrandbits(256)
        rndints.append(rndint)
        tree.add(rndint)

    for r in list(reversed(rndints)):
        rndints.pop()
        assert r in tree
        tree.remove(r)
        assert r not in tree

        for other in rndints:
            assert other in tree

    assert len(tree) == 0
    assert tree.root.left == tree.root.right == None
def test_find_hamming_distance():
    tree = HashTree(54)
    numbers = []

    def create_offset(n):
        num = 0b000000_000000_000000_000000
        for m in range(0, 6):
            num ^= (((n & 0b0001) >> 0) & 1) << m
        for m in range(6, 12):
            num ^= (((n & 0b0010) >> 1) & 1) << m
        for m in range(12, 18):
            num ^= (((n & 0b0100) >> 2) & 1) << m
        for m in range(18, 24):
            num ^= (((n & 0b1000) >> 3) & 1) << m
        return num

    for i in range(0, 16):
        num = random.getrandbits(30) << 24 | create_offset(i)
        tree.add(num)
        number = (num, [])
        for m in random.sample(range(24, 54), 5):
            num = num ^ (1 << m)
            number[1].append(num)
            tree.add(num)
        numbers.append(number)

    newline = "\n"
    fmt = "054b"
    print()
    print("\n\n".join(
        f"{n[0]:{fmt}} >>>\n{newline.join(f'{o:{fmt}}' for o in n[1])}"
        for n in numbers))

    for n in numbers:
        find = n[0]
        test = n[1]
        for distance in range(0, len(test)):
            assert find_all_hamming_distance(tree, find,
                                             distance) == set(test[:distance])
Beispiel #9
0
def init_db():
    global __db, __hashes

    log.info("Scanning database")

    refresh_cache = False
    if not config.metadbf.exists() or not config.hashdbf.exists():
        config.metadbf.touch(exist_ok=True)
        refresh_cache = True

    log.info("Setting up database %r", str(config.metadbf))

    __db = connect_db()
    __db.executescript(f"""
        CREATE TABLE IF not EXISTS Metadata (
            uid UUID PRIMARY KEY not null,
            last_updated timestamp not null DEFAULT(strftime('%Y-%m-%d %H:%M:%f', 'now')),
            hash TEXT not null,
            caption TEXT,
            author TEXT,
            source TEXT,
            group_id UUID,
            date timestamp not null DEFAULT(strftime('%Y-%m-%d %H:%M:%f', 'now')),
            rating Rating,
            source_other PSet,
            source_via PSet
        ) WITHOUT ROWID;

        CREATE TABLE IF not EXISTS Metadata_Keywords (
            uid UUID not null,
            keyword TEXT NOT NULL CHECK (keyword REGEXP '{config.tag_regex}')
        );

        CREATE TABLE IF not EXISTS Metadata_Collections (
            uid UUID not null,
            collection TEXT NOT NULL CHECK (collection REGEXP '{config.tag_regex}')
        );
    """)

    if refresh_cache:
        __hashes = HashTree(config.hash_length)

        log.info("Loading folder %r into database", str(config.image_folder))

        for xmpf in config.image_folder.glob("*.xmp"):
            if not xmpf.is_file(): continue
            if xmpf.name.startswith("."): continue
            _load_file(xmpf, __db)

        __db.commit()
        with open(config.hashdbf, "wb") as hashdbfp, __lock:
            log.info("Writing hashes to file...")
            __hashes.write_to_file(hashdbfp)

    else:
        with open(config.hashdbf, "rb") as hashdbfp, __lock:
            log.info("Loading hashes from cache %r", str(config.hashdbf))
            __hashes = HashTree.read_from_file(hashdbfp, config.hash_length)

    log.info("Catching up with image folder")
    uuids_in_folder = set()
    for xmpf in config.image_folder.glob("*.xmp"):
        if not xmpf.is_file(): continue
        if xmpf.name.startswith("."): continue

        try:
            uuid = UUID(xmpf.stem)
            uuids_in_folder.add(uuid)

        except:
            continue
    uuids_in_database = set(
        d[0] for d in __db.execute("select uid from Metadata").fetchall())

    for uid in uuids_in_folder - uuids_in_database:  # recently added
        _load_file(xmp_file_for_uid(uid), __db)
    for uid in uuids_in_database - uuids_in_folder:  # recently deleted
        _remove_image(uid, __db)

    for uid in uuids_in_database:
        try:
            _save_file(xmp_file_for_uid(uid), __db)
        except FileNotFoundError:
            pass  # was deleted earlier

    __db.commit()

    log.info("Done!")

    def exit():
        log.info("Closing database connection")
        __db.commit()
        __db.close()

        with open(config.hashdbf, "wb") as hashdbfp, __lock:
            log.info("Writing hashes to file...")
            __hashes.write_to_file(hashdbfp)

        log.info("Done!")

    atexit.register(exit)