Example #1
0
def check_if_scraping_done(record):

    record['running'] = False
    record['complete'] = True
    result = idc.search_ops_table({'content': record, 'isa': 'scrape'})

    return result is not None
Example #2
0
def get_live_scrapes_older_than(min_hours_old=DEFAULT_MAX_TIME,
                                db_id=None,
                                coll_id=None):
    """Get all scrapes that are not marked complete and are at least min_hour_old

    Generally we expect scrapes to take only a few hours so an entire DB scrape should
    not take more than 4-6 hours at worst.

    min_hour_old -- Find only records older than this many hours

    Optional:
    db_id -- Find only records relating to this db id
    coll_id -- Find only records relating to this coll id

    """

    try:
        start = datetime.datetime.now() - datetime.timedelta(
            hours=min_hours_old)
        #Currently this is enough to identify scrape records
        rec_test = {'time': {"$lt": start}, 'complete': False}
        if db_id: rec_test['db'] = db_id
        if coll_id: rec_test['coll'] = coll_id
        curs = idc.search_ops_table(rec_test)
        return list(curs)
    except:
        return []
Example #3
0
def get_progress(uid):
    """Get progress of scrape with uid"""

    #NOTE what follows _is_ vulnerable to races but
    # this will only affect the progress meter, and should be rare
    scrapes = idc.search_ops_table({'isa':'scrape', 'content':{'uid':uuid.UUID(uid)}})
    #Assume all ops records with correct uid and containing 'running' are relevant
    try:
        n_scrapes = scrapes.count()
    except:
        try:
            n_scrapes = len(scrapes())
        except:
            n_scrapes = 0

    curr_coll = 0
    curr_item = None
    for item in scrapes:
        if item['content']['complete'] : curr_coll = curr_coll + 1
        if item['content']['running'] : curr_item = item

    if curr_item:
        try:
            prog_in_curr = get_progress_from_db(uid, curr_item['db'], curr_item['coll'])
        except:
            #Web front or user can't do anything about errors here. If process
            # is failing, will become evident later
            prog_in_curr = 0
    else:
        #Nothing running. If not yet started, prog=0. If done prog=100.
        prog_in_curr = 100 * (curr_coll == n_scrapes)

    return {'n_colls':n_scrapes, 'curr_coll':curr_coll, 'progress_in_current':prog_in_curr}
Example #4
0
def get_latest_report():

    reports = idc.search_ops_table({'isa': 'report'})
    sorted_reports = sorted(reports, key=lambda s: s['scan_date'])
    rept = sorted_reports[-1]
    rept['report'] = rept.pop('content')

    try:
        return rept
    except:
        return {'scan_date': 'NaN'}
Example #5
0
def get_completed_scrapes(n_days=7):
    """Get successfully completed scrapes from the last n days
    """

    try:
        start = datetime.datetime.now() - datetime.timedelta(days=n_days)
        #Currently this is enough to identify scrape records
        rec_test = {'time': {"$gt": start}, 'complete': True}
        curs = idc.search_ops_table(rec_test)
        return list(curs)
    except:
        return []
Example #6
0
def get_lockout_state():
    """Get global lockout status"""
    res = None
    try:
        rec_find = {'lockout':{"$exists":True}}
        #Get latest lockout record
        res = idc.search_ops_table(rec_find).sort('_id', -1).limit(1)
    except:
        res = None
        pass
    if res is None:
        return False
    else:
        return res[0]['lockout']
Example #7
0
def collate_orphans_by_uid(uid):
    """Fetch all orphans with given uid and return summary"""

    #All orphans records for this uid
    record = {'uid':uuid.UUID(uid), 'orphans':{"$exists":True}}
    records = idc.search_ops_table(record)
    orph_data = {}
    db_name = ''
    try:
        db_name = idc.get_db_name(records[0]['db'])['name']
    except:
        record = {'uid':uuid.UUID(uid)}
        tmp_record = idc.search_ops_table(record)
        try:
            db_name = idc.get_db_name(tmp_record['db'])['name']
        except:
            pass

    orph_data[db_name] = {}
    for entry in records:
        coll = idc.get_coll_name(entry['coll'])['name']
        orph_data[db_name][coll] = split_orphans(entry)

    return orph_data
Example #8
0
def collate_orphans():
    """Fetch all orphans and return summary"""

    #All orphans records
    record = {'orphans':{"$exists":True}}
    records = idc.search_ops_table(record)
    orph_data = {}

    for entry in records:
        #print entry['uid']
        db_name = idc.get_db_name(entry['db'])['name']
        orph_data[db_name] = {}
        coll = idc.get_coll_name(entry['coll'])['name']
        orph_tmp = split_orphans(entry)
        orph_data[db_name][coll] = orph_tmp

    return orph_data