コード例 #1
0
def post_all_deleted_rs():
    deleted_query = {
        "query": {
            "filtered": {
                "filter": {
                    "term": {
                        "deleted": True
                    }
                }
            }
        },
        "size": 10000
    }

    deleted_recordsets = set()
    rsp = stats.search(doc_type="digest", body=deleted_query)
    for rs in rsp["hits"]["hits"]:
        deleted_recordsets.add(rs["_source"]["recordset_id"])

    print("{} recordsets already marked as deleted in stats.".format(
        len(deleted_recordsets)))

    count = 0
    with apidbpool.cursor() as cursor:
        cursor.execute("SELECT id FROM uuids WHERE type='recordset' and deleted=true")

        for r in cursor:
            if r["id"] not in deleted_recordsets:
                count += 1
                print("Deleting {}.".format(r["id"]))
                post_delete_stats(r["id"])

    print("{} recordsets deleted from stats.".format(count))
コード例 #2
0
ファイル: updatedb.py プロジェクト: roncanepa/idb-backend
def write_urls_to_db(to_insert, to_update):
    with apidbpool.cursor(autocommit=True) as cur:
        cur.executemany("INSERT INTO media (url,type,mime) VALUES (%s,%s,%s)",
                        ((k, v[0], v[1]) for k, v in to_insert.items()))
        inserted = cur.rowcount
        cur.executemany(
            "UPDATE media SET type=%s, mime=%s, last_status=NULL, last_check=NULL WHERE url=%s",
            to_update)
        updated = cur.rowcount
    logger.info("Inserted : %8d, Updated : %8d", inserted, updated)
コード例 #3
0
def update_db_status(items):
    rc = 0
    with apidbpool.cursor(autocommit=True) as cur:
        for fi in items:
            try:
                status = fi.status_code.value
            except AttributeError:
                status = fi.status_code
            cur.execute(
                "UPDATE media SET last_status=%s, last_check=now() WHERE url=%s",
                (status, fi.url))
            rc += cur.rowcount
            yield fi
    logger.info("Finished updating %d records", rc)
コード例 #4
0
ファイル: updatedb.py プロジェクト: roncanepa/idb-backend
def get_objects_from_ceph():
    import magic
    existing_objects = set(r[0] for r in apidbpool.fetchiter(
        "SELECT etag FROM objects", cursor_factory=cursor))

    logger.info("Found %d objects", len(existing_objects))

    s = IDigBioStorage()
    buckets = ["datasets", "images"]
    count = 0
    rowcount = 0
    lrc = 0
    with apidbpool.connection() as conn:
        with apidbpool.cursor() as cur:
            for b_k in buckets:
                b = s.get_bucket("idigbio-" + b_k + "-prod")
                for k in b.list():
                    if k.name not in existing_objects:
                        try:
                            ks = k.get_contents_as_string(
                                headers={'Range': 'bytes=0-100'})
                            detected_mime = magic.from_buffer(ks, mime=True)
                            cur.execute(
                                """INSERT INTO objects (bucket,etag,detected_mime)
                                   SELECT %(bucket)s,%(etag)s,%(dm)s
                                   WHERE NOT EXISTS(
                                      SELECT 1 FROM objects WHERE etag=%(etag)s)""",
                                {
                                    "bucket": b_k,
                                    "etag": k.name,
                                    "dm": detected_mime
                                })
                            existing_objects.add(k.name)
                            rowcount += cur.rowcount
                        except:
                            logger.exception(
                                "Ceph Error; bucket:%s keyname:%s", b_k,
                                k.name)
                    count += 1

                    if rowcount != lrc and rowcount % 10000 == 0:
                        logger.info("Count: %8d,  rowcount: %8d", count,
                                    rowcount)

                        conn.commit()
                        lrc = rowcount
                conn.commit()
                logger.info("Count: %8d,  rowcount: %8d  (Finished %s)", count,
                            rowcount, b_k)