Example #1
0
def taper_off_orphans(conn):

    now = time.time()

    plans = config.items("plan")
    idx = 0
    del_count = 0
    for name, plan in plans:
        plan_info = planparser.parse_plan(name, plan, config, idx)
        idx += 1

        # everything that has no more data-points
        taper_until_ts = datetime.datetime.fromtimestamp(now - plan_info["keep_data"])
        delete_until_ts = None

        print "Taper off orphans ", name, "(has", conn.files.find({"uploadDate": {"$lte": taper_until_ts},
                                                                  "tag": plan_info["tag"]}).count(), "candidates)"

        for file in conn.files.find({"uploadDate": {"$lte": taper_until_ts},
                                     "tag": plan_info["tag"]},
                                    sort = [("uploadDate", pymongo.DESCENDING)]):

            if not delete_until_ts:
                delete_until_ts = file["uploadDate"] - datetime.timedelta(seconds = plan_info["keep_orphaned_screenshots"])
            elif delete_until_ts < file["uploadDate"]:
                conn.fs.delete(file["_id"])
                del_count += 1
                if del_count % 1024 == 0:
                    print "deleted", del_count, "files."
            else:
                delete_until_ts = None
Example #2
0
def global_cleanup(conn):

    now = time.time()

    plans = config.items("plan")
    idx = 0
    for name, plan in plans:
        plan_info = planparser.parse_plan(name, plan, config, idx)
        idx += 1
        print "Cleaning", name, "..."

        # remove data
        keep_until_ts = datetime.datetime.fromtimestamp(now - plan_info["keep_data"])
        keep_until = keep_until_ts.strftime("%Y-%m-%d %H:%M:%S")

        doc_count = conn.results.find({"tag": name, "timestamp": {"$lte": keep_until}}).count()
        doc_count += conn.mh_results.find({"tag": name, "timestamp": {"$lte": keep_until}}).count()

        conn.results.remove({"tag": name, "timestamp": {"$lte": keep_until}})
        conn.mh_results.remove({"tag": name, "timestamp": {"$lte": keep_until}})

        # remove screenshots
        keep_until_ts = datetime.datetime.fromtimestamp(now - plan_info["keep_screenshots"])

        for file in conn.files.find({"uploadDate": {"$lte": keep_until_ts},
                                     "tag": plan_info["tag"]}):
            conn.fs.delete(file["_id"])


        print doc_count, "results deleted."
Example #3
0
def global_cleanup(conn):

    now = time.time()

    plans = config.items("plan")
    idx = 0
    for name, plan in plans:
        plan_info = planparser.parse_plan(name, plan, config, idx)
        idx += 1
        print "Cleaning", name, "..."

        # remove data
        keep_until_ts = datetime.datetime.fromtimestamp(now -
                                                        plan_info["keep_data"])
        keep_until = keep_until_ts.strftime("%Y-%m-%d %H:%M:%S")

        doc_count = conn.results.find({
            "tag": name,
            "timestamp": {
                "$lte": keep_until
            }
        }).count()
        doc_count += conn.mh_results.find({
            "tag": name,
            "timestamp": {
                "$lte": keep_until
            }
        }).count()

        conn.results.remove({"tag": name, "timestamp": {"$lte": keep_until}})
        conn.mh_results.remove({
            "tag": name,
            "timestamp": {
                "$lte": keep_until
            }
        })

        # remove screenshots
        keep_until_ts = datetime.datetime.fromtimestamp(
            now - plan_info["keep_screenshots"])

        for file in conn.files.find({
                "uploadDate": {
                    "$lte": keep_until_ts
                },
                "tag": plan_info["tag"]
        }):
            conn.fs.delete(file["_id"])

        print doc_count, "results deleted."
Example #4
0
def taper_off_orphans(conn):

    now = time.time()

    plans = config.items("plan")
    idx = 0
    del_count = 0
    for name, plan in plans:
        plan_info = planparser.parse_plan(name, plan, config, idx)
        idx += 1

        # everything that has no more data-points
        taper_until_ts = datetime.datetime.fromtimestamp(
            now - plan_info["keep_data"])
        delete_until_ts = None

        print "Taper off orphans ", name, "(has", conn.files.find({
            "uploadDate": {
                "$lte": taper_until_ts
            },
            "tag":
            plan_info["tag"]
        }).count(), "candidates)"

        for file in conn.files.find(
            {
                "uploadDate": {
                    "$lte": taper_until_ts
                },
                "tag": plan_info["tag"]
            },
                sort=[("uploadDate", pymongo.DESCENDING)]):

            if not delete_until_ts:
                delete_until_ts = file["uploadDate"] - datetime.timedelta(
                    seconds=plan_info["keep_orphaned_screenshots"])
            elif delete_until_ts < file["uploadDate"]:
                conn.fs.delete(file["_id"])
                del_count += 1
                if del_count % 1024 == 0:
                    print "deleted", del_count, "files."
            else:
                delete_until_ts = None
Example #5
0
    work_queues = {}
    plans = config.items("plan")
    for name, plan in plans:
        work_queues[name] = []
        thread.start_new_thread(worker_thread, (name, work_queues[name],))

    while True:

        # one minute scheduler...
        ts = time.time()

        urls_to_handle = set()

        idx = 0
        for name, plan in plans:
            plan_info = planparser.parse_plan(name, plan, config, idx)
            idx += 1
            plan_info["tag"] = name
            work_queue = work_queues[name]
            if is_plan_scheduled(name, plan_info, ts):
                for url in plan_info["urls"]:
                    if url not in urls_to_handle:
                        urls_to_handle.add(url)
                        already_enqueued = False
                        for work in work_queue:
                            if work["url"] == url:
                                already_enqueued = True
                        if not already_enqueued:
                            work_queue.append({"url": url, "plan_info": plan_info})

        if config.get("config", "keep_running") == "False":
Example #6
0
        work_queues[name] = []
        thread.start_new_thread(worker_thread, (
            name,
            work_queues[name],
        ))

    while True:

        # one minute scheduler...
        ts = time.time()

        urls_to_handle = set()

        idx = 0
        for name, plan in plans:
            plan_info = planparser.parse_plan(name, plan, config, idx)
            idx += 1
            plan_info["tag"] = name
            work_queue = work_queues[name]
            if is_plan_scheduled(name, plan_info, ts):
                for url in plan_info["urls"]:
                    if url not in urls_to_handle:
                        urls_to_handle.add(url)
                        already_enqueued = False
                        for work in work_queue:
                            if work["url"] == url:
                                already_enqueued = True
                        if not already_enqueued:
                            work_queue.append({
                                "url": url,
                                "plan_info": plan_info