def test_write_database(data_folder):
    Path("tests/data/").mkdir(exist_ok=True)

    tree = HashTree(config.hash_length)

    if not data_folder:
        # reading hashes from the file
        with open(HASH_FILE, "r") as hashesf:
            hashes = hashesf.readlines()
            tree |= hashes

    else:
        # reading hashes from the image files in the specified location
        with open(HASH_FILE, "w") as hashesf:
            for image in data_folder.glob("*.xmp"):
                meta = CuteMeta.from_file(image)
                try:
                    tree.add(meta.hash)
                except KeyError:
                    continue
                hashesf.write(str(meta.hash) + "\n")

    # serialize tree
    with open(DATABASE_FILE, "wb") as database:
        tree.write_to_file(database)
Beispiel #2
0
def iqdb(ARGS):
    start = time.time()
    try:
        if validators.url(ARGS.file):
            print("Fetching IQDB for url", ARGS.file, "...")
            results = _iqdb(url=ARGS.file, saucenao=ARGS.saucenao)
        else:
            xmpf = Path(ARGS.file).with_suffix(".xmp")
            if xmpf.exists():
                cute_meta = CuteMeta.from_file(xmpf)
                print("Fetching IQDB for url", cute_meta.source, "...")
                results = _iqdb(url=cute_meta.source, saucenao=ARGS.saucenao)
            else:
                print("Fetching IQDB for file", ARGS.file, "...")
                results = _iqdb(file=open(ARGS.file, "rb"),
                                saucenao=ARGS.saucenao)

        end = time.time()
        print(f"Found {len(results)} results in {end - start:.2f} seconds.")
        for index, result in enumerate(results):
            print(
                f"{index+1:>3}: {result.similarity:.0%} {result.size[0]:>4}x{result.size[1]:<4} [{result.rating:^8}] {result.url}"
            )
    except IQDBException as e:
        print("IQDB Error:", str(e))
Beispiel #3
0
def get_meta(uid: UUID, db: sqlite3.Connection = None):
    meta = CuteMeta(uid=uid, filename=xmp_file_for_uid(uid))
    uidstr = str(uid.hex)
    res = db.execute("select * from Metadata where uid is ?",
                     (uidstr, )).fetchone()
    for name, v in zip(res.keys(), res):
        setattr(meta, name, v)

    keywords = db.execute(
        "select keyword from Metadata_Keywords where uid is ?",
        (uidstr, )).fetchall()
    meta.keywords = set(k[0] for k in keywords) if keywords else None
    collections = db.execute(
        "select collection from Metadata_Collections where uid is ?",
        (uidstr, )).fetchall()
    meta.collections = set(c[0] for c in collections) if collections else None

    return meta
Beispiel #4
0
def args(parser):
    def unescaped_string(arg_str):
        return codecs.decode(str(arg_str), "unicode_escape")

    tag_subcommand = parser.add_subparsers(dest="subcommand")

    c_set = tag_subcommand.add_parser("set", help="Modify a file's tags")
    c_set.add_argument("tag", help="Tag to set", choices=CuteMeta.tag_names())
    c_set.add_argument("value",
                       nargs="+",
                       type=unescaped_string,
                       help="Values to set. Multiple values for list or set")
    c_set.add_argument("file", nargs="+").completer = UUIDFileCompleter

    c_add = tag_subcommand.add_parser("add",
                                      help="Adds an additional value to a tag")
    c_add.add_argument("tag",
                       help="Tag to add to",
                       choices=CuteMeta.tag_names())
    c_add.add_argument("value",
                       nargs="+",
                       type=unescaped_string,
                       help="Values to add")
    c_add.add_argument("file", nargs="+").completer = UUIDFileCompleter

    c_remove = tag_subcommand.add_parser("remove",
                                         help="Removes a value from a tag")
    c_remove.add_argument("tag",
                          help="Tag to remove from",
                          choices=CuteMeta.tag_names())
    c_remove.add_argument("value",
                          nargs="+",
                          type=unescaped_string,
                          help="Values to remove")
    c_remove.add_argument("file", nargs="+").completer = UUIDFileCompleter

    c_delete = tag_subcommand.add_parser("delete",
                                         help="Deletes a tag completely")
    c_delete.add_argument("tag",
                          help="Tag to remove from",
                          choices=CuteMeta.tag_names())
    c_delete.add_argument("file", nargs="+").completer = UUIDFileCompleter

    return parser
Beispiel #5
0
def main(ARGS):
    import sys

    from uuid import UUID
    from pathlib import Path

    from cutespam import db
    from cutespam.xmpmeta import CuteMeta

    if ARGS.file and ARGS.file[0] == "-" and not sys.stdin.isatty():
        ARGS.file = sys.stdin.read().splitlines()

    if ARGS.json:
        print("[")
    for i, file in enumerate(ARGS.file):
        fp = Path(file)
        if fp.exists() and fp.is_file():
            cute_meta = CuteMeta.from_file(fp.with_suffix(".xmp"))
        else:
            try:
                uid = UUID(file)
                cute_meta = CuteMeta.from_db(uid)
            except:
                if not ARGS.json:
                    print("\n".join(
                        str(uid) for uid in db.get_tab_complete_uids(file)))
                    return
                else:
                    continue

        if ARGS.json:
            print(cute_meta.to_json(ARGS.tag))
            if i < len(ARGS.file) - 1: print(",")
        else:
            print(cute_meta.to_string(ARGS.tag))
    if ARGS.json:
        print("]")
Beispiel #6
0
def _load_file(xmpf: Path, db: sqlite3.Connection):
    meta: CuteMeta = CuteMeta.from_file(xmpf)
    timestamp = meta.last_updated

    if not meta.uid: return
    if not meta.hash: return

    log.info("Loading %r", str(xmpf))

    # Sync data
    if meta.generate_keywords():
        log.info("Updated autogenerated keywords")
        timestamp = datetime.utcnow()  # make sure we set the correct timestamp

    with __lock:
        try:
            __hashes.add(meta.hash)
        except KeyError:
            log.warn("Possible duplicate %r", str(xmpf))

    db.execute(
        f"""
        INSERT INTO Metadata (
            last_updated, uid, hash, caption, author, source, group_id, rating, source_other, source_via
        ) VALUES (
            ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
        )""", (timestamp, meta.uid, meta.hash, meta.caption, meta.author,
               meta.source, meta.group_id, meta.rating, meta.source_other,
               meta.source_via))
    if meta.date:
        db.execute(
            """
            UPDATE Metadata SET date = ? WHERE uid is ?
        """, (meta.date, meta.uid))

    if meta.keywords:
        db.executemany(
            f"""
            INSERT INTO Metadata_Keywords VALUES (
                ?, ?
            ) 	
        """, [(meta.uid, keyword) for keyword in meta.keywords])
    if meta.collections:
        db.executemany(
            f"""
            INSERT INTO Metadata_Collections VALUES (
                ?, ?
            ) 	
        """, [(meta.uid, collection) for collection in meta.collections])
Beispiel #7
0
def _save_file(xmpf: Path, db: sqlite3.Connection):
    with __lock:
        meta = CuteMeta.from_file(xmpf)
        f_last_updated = meta.last_updated
        uid = UUID(xmpf.stem)
        db_last_updated = db.execute(
            """
            select last_updated from Metadata where uid is ?
        """, (uid, )).fetchone()[0]

        if f_last_updated > db_last_updated:
            log.info("Reading from file %r", str(xmpf))
            log.debug("file: %s database: %s", f_last_updated, db_last_updated)
            _save_meta(meta, f_last_updated, db)
            db.commit()
Beispiel #8
0
def _save_meta(meta: CuteMeta, timestamp: datetime, db: sqlite3.Connection):

    # Sync data
    if meta.generate_keywords():
        log.info("Updated autogenerated keywords")
        timestamp = datetime.utcnow()  # make sure we set the correct timestamp

    db.execute(
        """
        DELETE FROM Metadata_Keywords WHERE uid is ?
    """, (meta.uid, ))
    db.execute(
        """
        DELETE FROM Metadata_Collections WHERE uid is ?
    """, (meta.uid, ))

    if meta.keywords:
        db.executemany(
            f"""
            INSERT INTO Metadata_Keywords VALUES (
                ?, ?
            ) 	
        """, [(meta.uid, keyword) for keyword in meta.keywords])
    if meta.collections:
        db.executemany(
            f"""
            INSERT INTO Metadata_Collections VALUES (
                ?, ?
            ) 	
        """, [(meta.uid, collection) for collection in meta.collections])

    db.execute(
        """
        UPDATE Metadata SET
            last_updated = ?,
            hash = ?,
            caption = ?,
            author = ?,
            source = ?,
            group_id = ?,
            rating = ?,
            source_other = ?,
            source_via = ?
        WHERE
            uid is ?
    """, (timestamp, meta.hash, meta.caption, meta.author, meta.source,
          meta.group_id, meta.rating, meta.source_other, meta.source_via,
          meta.uid))
Beispiel #9
0
def download(data: dict):
    imagef = get_cached_file(data["img"])
    metaf = imagef.with_suffix(".xmp")

    meta: CuteMeta = CuteMeta(filename=metaf)
    meta.last_updated = datetime.utcnow()
    meta.source = data["img"]
    meta.hash = hash_img(imagef)
    meta.date = datetime.utcnow()

    read_meta_from_dict(meta, data)

    meta.generate_keywords()
    meta.write()

    shutil.move(str(imagef),
                str(config.image_folder / (str(meta.uid) + imagef.suffix)))
    shutil.move(str(metaf),
                str(config.image_folder / (str(meta.uid) + ".xmp")))
Beispiel #10
0
def listen_for_db_changes():
    last_updated = datetime.utcfromtimestamp(
        os.path.getmtime(config.metadbf.resolve()))
    # We need our own connection since this is on a different thread
    db = connect_db()
    while True:
        time.sleep(10)

        modified = db.execute(
            """
            select * from Metadata where last_updated > ?
        """, (last_updated, )).fetchall()
        last_updated = datetime.utcnow()

        for data in modified:
            with __lock:
                filename = xmp_file_for_uid(data["uid"])
                meta = CuteMeta.from_file(filename)

                f_last_updated = meta.last_updated
                db_last_updated = data["last_updated"]
                if db_last_updated > f_last_updated:
                    log.info("Writing to file %r", str(filename))
                    log.debug("file: %s database: %s", f_last_updated,
                              db_last_updated)

                    for name, v in zip(data.keys(), data):
                        setattr(meta, name, v)

                    keywords = db.execute(
                        """
                        select keyword from Metadata_Keywords where uid = ?
                    """, (data["uid"], )).fetchmany()
                    collections = db.execute(
                        """
                        select collection from Metadata_Collections where uid = ?
                    """, (data["uid"], )).fetchmany()

                    meta.keywords = set(k[0] for k in keywords)
                    meta.collections = set(c[0] for c in collections)
                    meta.last_updated = db_last_updated  # Make sure that the entry in the database stays the same as the file
                    meta.write()
Beispiel #11
0
    def download_img(source):

        img = source.get("img", None)
        meta = source.get("meta", {})
        uid = meta.get("uid", None)
        if not img:
            log("No image supplied for\n" + str(source))
            return
        if not uid:
            log("No uid for\n" + str(source))

        try:
            url = urlparse(img)
            filename = os.path.basename(url.path)
            _, ext = os.path.splitext(filename)
            tmpfile = Path(ARGS.out_folder) / (filename + ".tmp")
            imgfile = Path(ARGS.out_folder) / (str(UUID(uid)) + ext)

            response = make_request(img, "GET")
            header = response.info()
            cnt_type = header["Content-Type"]
            if cnt_type not in ("image/jpeg", "image/png"):
                log("Unknown content type", cnt_type, "for", img)
                return
            size = int(header["Content-Length"])
            size_mb = size / 1_000_000
            if size_mb > ARGS.max_filesize:
                log("%s is too big! You specified a maximum size of %d MB, file is %.2f MB"
                    % (img, ARGS.max_filesize, size_mb))
                return

            total_chunks = math.ceil(size / config.download_chunk_size)
            with response as stream:
                with open(tmpfile, "wb") as outf:
                    log("Starting download of", img)
                    for _ in atpbar.atpbar(range(total_chunks), name=img):
                        #for _ in range(total_chunks):
                        chunk = stream.read(config.download_chunk_size)
                        if not chunk:
                            break
                        outf.write(chunk)

            os.rename(tmpfile, imgfile)

            xmpfile = imgfile.with_suffix(".xmp")
            cute_meta = CuteMeta.from_file(xmpfile)
            cute_meta.clear()  # Delete all unwanted tags

            log("Generating image hash for file", imgfile)
            cute_meta.hash = hash_img(imgfile)
            log("Hashed", imgfile, "as", cute_meta.hash, "(phash, 16)")

            cute_meta.read_from_dict(meta, ignore_missing_keys=True)
            cute_meta.add_characters(*meta.get("character", []))
            cute_meta.source = img
            cute_meta.source_other = source.get("src", [])
            cute_meta.source_via = source.get("via", [])
            cute_meta.date = datetime.utcnow()
            cute_meta.write()

        except urllib.error.HTTPError as e:
            status = e.code
        except urllib.error.URLError as e:
            status = 400
        except ssl.CertificateError:
            status = 495
        except Exception as e:
            log("An exception occured while fetching url %s: %s" %
                (img, str(e)))
            status = 0
        else:
            status = 200

        if status and status != 200:
            log("%s: %s" % (status, img))
Beispiel #12
0
def test_metadata():
    cm = CuteMeta(filename=TEST_OUT)
    cm.rating = Rating("q")
    cm.date = cm.last_updated = datetime.strptime("2017-05-29T00:00:59.412Z",
                                                  "%Y-%m-%dT%H:%M:%S.%fZ")
    cm.hash = "b59f95ffc2e2f440ff006fb01a95c547ee163ee045c0c030ba3b570f4ecc28b5"
    cm.group_id = cm.uid = UUID("04a10461-a60b-4dc3-8d91-4a91b311f004")
    cm.source = "http://example.com/example_image.jpg"
    cm.author = "test_author"
    cm.source_other = cm.source_via = set(
        ["http://example.com", "http://example.de"])
    cm.keywords = set(["test_keyword", "test_keyword_2"])
    cm.collections = set(
        ["test_collection", "test_collection2", "test_collection3"])
    cm.caption = "Test Caption"
    cm.write()

    cm2 = CuteMeta(filename=TEST_OUT)
    cm2.read()

    cm3 = CuteMeta(filename=TEST_XMP)
    cm3.read()

    assert cm.as_dict() == cm2.as_dict() == cm3.as_dict()
    def html_output(duplicates):
        t_html = """
        <html>
            <head>
                <style>
                    table {{
                        table-layout: fixed;
                        border-bottom: 1px solid black;
                        width: 100%;
                        padding: 5px;
                    }}
                    img {{
                        max-width: 100%;
                        max-height: 500px;
                    }}
                    code {{
                        display: block;
                        white-space: pre-wrap;
                        word-wrap: break-word;
                    }}
                </style>
            </head>
            <body>
            {tables}
            </body>
        </html>
        """
        t_table = """
            <table>
                <tr>{images}</tr>
                <tr>{links}</tr>
                <tr>{dimensions}</tr>
                <tr>{tags}</tr>
            </table>
        """

        tables = ""
        for duplicate in duplicates:
            images = ""
            links = ""
            dimensions = ""
            tags = ""

            for uid in duplicate:
                d = picture_file_for_uid(uid)
                fsize = os.path.getsize(d.resolve()) / 1_000_000
                with Image.open(d) as img_data:
                    width, height = img_data.size
                    fformat = img_data.format

                meta = CuteMeta.from_db(uid)
                path = str(d.resolve().absolute())
                images += f"<td><img src='{path}'/></td>"
                links += f"<td><a href={path}><code>{path}</code></a></td>"
                dimensions += f"<td><code>Resolution: {width}x{height}\nFormat: {fformat}\nSize: {fsize:.2f} MB</code></td>"
                tags += f"<td><code>{meta.to_string()}</code></td>"

            tables += t_table.format(images=images,
                                     links=links,
                                     dimensions=dimensions,
                                     tags=tags)

        return t_html.format(tables=tables)
Beispiel #14
0
def main(ARGS):
    import shutil, time, sys
    from PIL import Image
    from uuid import uuid4
    from datetime import datetime
    from pathlib import Path

    from cutespam import yn_choice
    from cutespam.api import read_meta_from_dict, get_cached_file
    from cutespam.iqdb import iqdb, upscale
    from cutespam.hash import hash_img
    from cutespam.xmpmeta import CuteMeta
    from cutespam.config import config
    from cutespam.db import find_similar_images_hash, picture_file_for_uid

    if ARGS.file and ARGS.file[0] == "-" and not sys.stdin.isatty():
        ARGS.file = sys.stdin.read().splitlines()

    for file in ARGS.file:
        file = Path(file)
        if file.suffix == ".xmp": continue
        xmpfile = file.with_suffix(".xmp")

        meta = CuteMeta(filename=xmpfile)
        meta.hash = hash_img(file)

        similar = find_similar_images_hash(meta.hash, 0.9)
        if similar:
            print()
            print("Found potential duplicates:")
            for s in similar:
                print(
                    f"{s[0]:.1%}: {picture_file_for_uid(s[1]).resolve().as_uri() if ARGS.uri else s[1]}"
                )
            if not ARGS.add_duplicate and (ARGS.skip_duplicate
                                           or not yn_choice("Proceed?")):
                continue

        with Image.open(file) as imgf:
            width, height = imgf.size
            resolution = width * height

        print(f"Resolution: {width}x{height}")
        print("Fetching iqdb results")
        with open(file, "rb") as fp:
            result = iqdb(file=fp, threshold=0.9)

        source, data, service, r_resolution = upscale(result, resolution)
        if not source:
            if not ARGS.add_no_iqdb and (
                    ARGS.skip_no_iqdb or
                    not yn_choice("No relevant images found. Add anyways?")):
                continue
        else:
            print("Found image on", service)

        if r_resolution > resolution:
            file = get_cached_file(source)

        meta.uid = uuid4()
        meta.source = source
        meta.date = datetime.utcnow()

        if data:
            read_meta_from_dict(meta, data)

        meta.generate_keywords()

        print("Metadata:")
        print(meta)

        meta.last_updated = datetime.utcnow()
        meta.write()

        print("Done")
        f = shutil.move if ARGS.move else shutil.copy
        f(str(file), str(config.image_folder / (str(meta.uid) + file.suffix)))
        f(str(xmpfile), str(config.image_folder / (str(meta.uid) + ".xmp")))
Beispiel #15
0
def main(ARGS):
    import sys
    from uuid import UUID
    from datetime import datetime
    from pathlib import Path

    from cutespam import yn_choice

    if ARGS.file and ARGS.file[0] == "-" and not sys.stdin.isatty():
        ARGS.file = sys.stdin.read().splitlines()

    for file in ARGS.file:
        asked_confirm_multiple = False  # Make sure not not ask on every file

        fp = Path(file)
        if fp.exists() and fp.is_file():
            cute_meta = CuteMeta.from_file(fp.with_suffix(".xmp"))
        else:
            try:
                uid = UUID(file)
            except:
                raise argparse.ArgumentTypeError("Not a valid file or uuid")
            cute_meta = CuteMeta.from_db(uid)

        tag = ARGS.tag

        if ARGS.subcommand == "set":
            tpe = getattr(CuteMeta, tag).type
            val = ARGS.value
            # this was in the wrong order, oops
            if len(val) == 1:
                val = val[0]
            if issubclass(tpe, datetime):
                try:
                    val = datetime.strptime(val, "%Y-%m-%d %H:%M:%S")
                except ValueError:
                    print("Invalid date format, use YY-MM-DD HH:MM:SS")
                    return
            else:
                # Make sure that we can convert into it!
                val = tpe(val)

            curr_v = getattr(cute_meta, tag)
            if curr_v and issubclass(
                    tpe, (list, set)) and not asked_confirm_multiple:
                if not yn_choice(
                        "You are about to overwrite multiple values, proceed?"
                ):
                    return
                else:
                    asked_confirm_multiple = True

            setattr(cute_meta, tag, val)
        elif ARGS.subcommand == "delete":
            setattr(cute_meta, tag, None)
        elif ARGS.subcommand == "add":
            tpe = getattr(CuteMeta, tag).type
            if not issubclass(tpe, (list, set)):
                print("Can only add from list or set")
                return
            v = getattr(cute_meta, tag) or tpe()
            if tpe == list: v += ARGS.value
            else: v |= set(ARGS.value)

            setattr(cute_meta, tag, v)
        elif ARGS.subcommand == "remove":
            tpe = getattr(CuteMeta, tag).type
            if not issubclass(tpe, (list, set)):
                print("Can only remove to list or set")
                return
            v = getattr(cute_meta, tag) or set()
            v = set(v)
            v -= set(ARGS.value)
            setattr(cute_meta, tag, tpe(v))

        cute_meta.write()