Ejemplo n.º 1
0
def generate_report():
    """Generate a JSON document describing current state
       of the inventory database.
       Can take a while to run
    """
    report = {}
    try:
        got_client = inv.setup_internal_client(editor=True, remote=False)
        assert(got_client == True)
        idb = inv.int_client
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False

    #Check connection, editor status etc
    report['connection'] = report_connection(idb, inv_db)

    all_colls=list(inv_db['collection_ids'].find())

    report['fields'] = report_fields_tables(inv_db, all_colls)
    report['latest'] = report_latest_changes(all_colls)
    report['gone'] = report_gone(all_colls)
    report['scrapes'] = report_scrapes(inv_db)

    report_record = {'isa':'report', 'scan_date': datetime.datetime.now(), 'report':report}
    inv_db['ops'].insert_one(report_record)
Ejemplo n.º 2
0
def collate_orphans_by_uid(uid):
    """Fetch all orphans with given uid and return summary"""

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False
    #All orphans records for this uid
    record = {'uid':uuid.UUID(uid), 'orphans':{"$exists":True}}
    records = inv_db['ops'].find(record)
    orph_data = {}
    db_name = ''
    try:
        db_name = idc.get_db_name(inv_db, records[0]['db'])['name']
    except:
        record = {'uid':uuid.UUID(uid)}
        tmp_record = inv_db['ops'].find_one(record)
        try:
            db_name = idc.get_db_name(inv_db, tmp_record['db'])['name']
        except:
            pass

    orph_data[db_name] = {}
    for entry in records:
        coll = idc.get_coll_name(inv_db, entry['coll'])['name']
        orph_data[db_name][coll] = split_orphans(entry)

    return orph_data
Ejemplo n.º 3
0
def get_progress(uid):
    """Get progress of scrape with uid"""

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False

    #NOTE what follows _is_ vulnerable to races but
    # this will only affect the progress meter, and should be rare
    scrapes = inv_db['ops'].find({'uid':uuid.UUID(uid), 'running':{"$exists":True}})
    #Assume all ops records with correct uid and containing 'running' are relevant
    n_scrapes = scrapes.count()
    curr_coll = 0
    curr_item = None
    for item in scrapes:
        if item['complete'] : curr_coll = curr_coll + 1
        if item['running'] : curr_item = item

    if curr_item:
        try:
            prog_in_curr = get_progress_from_db(inv_db, uid, curr_item['db'], curr_item['coll'])
        except Exception as e:
            #Web front or user can't do anything about errors here. If process
            # is failing, will become evident later
            prog_in_curr = 0
    else:
        #Nothing running. If not yet started, prog=0. If done prog=100.
        prog_in_curr = 100 * (curr_coll == n_scrapes)

    return {'n_colls':n_scrapes, 'curr_coll':curr_coll, 'progress_in_current':prog_in_curr}
Ejemplo n.º 4
0
def remove_gone_collections():
    """Remove any collections marked as gone
    """
    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False

    remove_all_gone(inv_db)
    return True
Ejemplo n.º 5
0
def get_latest_report():

    try:
        got_client = inv.setup_internal_client(editor=True, remote=False)
        assert(got_client == True)
        idb = inv.int_client
        inv_db = idb[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False

    reports = inv_db['ops'].find({'isa':'report', 'scan_date':{'$exists':True}})
    sorted_reports = sorted(reports, key=lambda s : s['scan_date'])
    return sorted_reports[-1]
Ejemplo n.º 6
0
def set_lockout_state(state):
    """Swap state of lockout. If record exists, toggle, else create"""
    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return True
    try:
        assert(state == True or state == False)
        rec_set = {'lockout':state}
        inv_db['ops'].insert_one(rec_set)
    except:
        inv.log_dest.error('Failed to set lockout state')
Ejemplo n.º 7
0
def null_old_scrapes(time=DEFAULT_MAX_TIME):
    """Update any old, incomplete AND not running scrapes to be 'complete'"""

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return {'err':True, 'found':0}

    lst = get_live_scrapes_older_than(inv_db, time)
    new_lst = check_scrapes_running(inv_db, lst)
    null_scrapes_by_list(inv_db, new_lst)
    return {'err':False, 'found':len(new_lst)}
Ejemplo n.º 8
0
def upload_scraped_data(structure_data, uid):
    """Main entry point for scraper tool

    structure_data -- the json data
    uid -- string uuid from scraper process start call
    """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert (got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection " + str(e))
        return False

    inv.log_dest.warning('In upload with ' + str(uid))
    upload_scraped_inventory(inv_db, structure_data, uid)
Ejemplo n.º 9
0
def upload_scraped_data(structure_data, uid):
    """Main entry point for scraper tool

    structure_data -- the json data
    uid -- string uuid from scraper process start call
    """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False

    inv.log_dest.warning('In upload with '+str(uid))
    upload_scraped_inventory(inv_db, structure_data, uid)
Ejemplo n.º 10
0
def update_scrape_progress(db, coll, uid, complete=None, running=None):
    """Update progress of scrape from db/coll names and uid """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False

    try:
        db_id = idc.get_db_id(inv_db, db)
        coll_id = idc.get_coll_id(inv_db, db_id['id'], coll)
        update_scrape_progress_helper(inv_db, db_id['id'], coll_id['id'], uid, complete=complete, running=running)
    except Exception as e:
        inv.log_dest.error("Error updating progress "+ str(e))
        return False
Ejemplo n.º 11
0
def mark_all_gone(main_db):
    """Set status of all removed collections to gone"""

    inv_db = main_db[inv.get_inv_db_name()]
    dbs = iv.gen_retrieve_db_listing(inv_db)
    all_colls = get_db_lists()

    gone_code = ih.status_to_code('gone')
    for db in dbs:
        colls = iv.gen_retrieve_db_listing(inv_db, db[0])
        db_id = idc.get_db_id(inv_db, db[0])
        for coll in colls:
            gone = not (coll[0] in all_colls[db[0]])
            #Only mark if isn't already
            mark = gone and coll[3] != 'gone'
            if mark:
                coll_id = idc.get_coll_id(inv_db, db_id['id'], coll[0])
                idc.update_coll(inv_db, coll_id['id'], status=gone_code)
                inv.log_dest.info(str(db) +'.'+str(coll) +' is now gone')
Ejemplo n.º 12
0
def get_lockout_state():
    """Get lockout status"""
    try:
        got_client = inv.setup_internal_client(editor=True)
        assert (got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection " + str(e))
        return True

    try:
        rec_find = {'lockout': {"$exists": True}}
        res = inv_db['ops'].find(rec_find).sort('_id', -1).limit(1)
    except:
        pass
    if res is None:
        return False
    else:
        return res[0]['lockout']
Ejemplo n.º 13
0
def get_lockout_state():
    """Get global lockout status"""
    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return True

    try:
        rec_find = {'lockout':{"$exists":True}}
        #Get latest lockout record
        res = inv_db['ops'].find(rec_find).sort('_id', -1).limit(1)
    except:
        pass
    if res is None:
        return False
    else:
        return res[0]['lockout']
Ejemplo n.º 14
0
def check_scrapes_on(spec):
    """If collection given, check for scrapes in progress or
    queued on it. If only db, check all collections in it"""
    try:
        got_client = inv.setup_internal_client(editor=True)
        assert (got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection " + str(e))
        return False
    try:
        db_id = idc.get_db_id(inv_db, spec['db'])
        spec_ids = {'db': db_id['id']}
        if spec['coll']:
            coll_id = idc.get_coll_id(inv_db, db_id['id'], spec['coll'])
            spec_ids['coll'] = coll_id['id']
        result = check_if_scraping(
            inv_db, spec_ids) or check_if_scraping_queued(inv_db, spec_ids)
        return result
    except Exception as e:
        return False
Ejemplo n.º 15
0
def register_scrape(db, coll, uid):
    """Create a suitable inventory entry for the scrape"""

    inv.log_dest.warning(db + ' ' + coll + ' ' + str(uid))
    try:
        got_client = inv.setup_internal_client(editor=True)
        assert (got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection " + str(e))
        return {'err': True, 'inprog': False}
    try:
        db_id = idc.get_db_id(inv_db, db)
        inv.log_dest.warning(str(db_id))
        db_id = db_id['id']
        inprog = False
        had_err = False
        if not coll:
            all_colls = idc.get_all_colls(inv_db, db_id)
            for coll in all_colls:
                coll_id = coll['_id']
                tmp = check_and_insert_scrape_record(inv_db, db_id, coll_id,
                                                     uid)
                inprog = tmp['inprog'] and inprog
                had_err = tmp['err'] and had_err
        else:
            coll_id = idc.get_coll_id(inv_db, db_id, coll)
            inv.log_dest.warning(str(coll_id))
            coll_id = coll_id['id']
            tmp = check_and_insert_scrape_record(inv_db, db_id, coll_id, uid)
            inprog = tmp['inprog'] and inprog
            had_err = tmp['err'] and had_err

    except Exception as e:
        #Either failed to connect etc, or are already scraping
        inv.log_dest.warning('Error resistering scrape ' + str(e))
        return {'err': True, 'inprog': False}

    return {'err': had_err, 'inprog': inprog}
Ejemplo n.º 16
0
def update_scrape_progress(db, coll, uid, complete=None, running=None):
    """Update progress of scrape from db/coll names and uid """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert (got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection " + str(e))
        return False

    try:
        db_id = idc.get_db_id(inv_db, db)
        coll_id = idc.get_coll_id(inv_db, db_id['id'], coll)
        update_scrape_progress_helper(inv_db,
                                      db_id['id'],
                                      coll_id['id'],
                                      uid,
                                      complete=complete,
                                      running=running)
    except Exception as e:
        inv.log_dest.error("Error updating progress " + str(e))
        return False
Ejemplo n.º 17
0
def null_all_scrapes(db, coll):
    """Update all scrapes on db.coll to be 'complete' """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False

    try:
        db_id = idc.get_db_id(inv_db, db)
        coll_id = idc.get_coll_id(inv_db, db_id['id'], coll)
        rec_find = {'db':db_id['id'], 'coll':coll_id['id']}
        rec_set = {}
        rec_set['complete'] = True
        rec_set['running'] = False

        inv_db['ops'].update_many(rec_find, {"$set":rec_set})
    except Exception as e:
        inv.log_dest.error("Error updating progress "+ str(e))
        return False
Ejemplo n.º 18
0
def collate_orphans():
    """Fetch all orphans and return summary"""

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False
    #All orphans records
    record = {'orphans':{"$exists":True}}
    records = inv_db['ops'].find(record)
    orph_data = {}

    for entry in records:
        #print entry['uid']
        db_name = idc.get_db_name(inv_db, entry['db'])['name']
        orph_data[db_name] = {}
        coll = idc.get_coll_name(inv_db, entry['coll'])['name']
        orph_tmp = split_orphans(entry)
        orph_data[db_name][coll] = orph_tmp

    return orph_data
Ejemplo n.º 19
0
def update_fields(diff, storeRollback=True):
    """Update a record from a diff object.

    diff -- should be a fully qualified difference, containing db, collection names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example)
    e.g. {"db":"curve_automorphisms","collection":"passports","diffs":[{"item":"total_label","field":"type","content":"string"}]}
    If this is a record entry, then the 'item' field will be a record hash.
    storeRollback -- determine whether to store the undiff and diff to allow rollback of the change
    """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return

    try:
        if diff['collection'] is not None:
            inv.log_dest.info("Updating descriptions for " + diff["db"]+'.'+diff["collection"])
        else:
            inv.log_dest.info("Updating descriptions for " + diff["db"])
        _id = idc.get_db_id(db, diff["db"])
        rollback = None
        try:
            for change in diff["diffs"]:
                if ih.is_special_field(change["item"]):
                    if storeRollback:
                        rollback = capture_rollback(db, _id['id'], diff["db"], diff["collection"], change)
                    change["item"] = change["item"][2:-2] #Trim special fields. TODO this should be done better somehow
                    updated = idc.update_coll_data(db, _id['id'], diff["collection"], change["item"], change["field"], change["content"])
                elif ih.is_toplevel_field(change["item"]):
                    #Here we have item == "toplevel", field the relevant field, and change the new value
                    if storeRollback:
                        rollback = capture_rollback(db, _id['id'], diff["db"], diff["collection"], change)
                    #Only nice_name is currently an option
                    if(change["field"] not in ['nice_name', 'status']):
                        updated = {'err':True}
                    else:
                        if(diff["collection"]):
                            if(change['field']) == 'nice_name':
                                new_nice = change['content']
                                new_stat = None
                            else:
                                new_nice = None
                                new_stat = ih.status_to_code(change['content'])
                            c_id = idc.get_coll_id(db, _id['id'], diff['collection'])
                            updated = idc.update_coll(db, c_id['id'], nice_name=new_nice, status=new_stat)
                        else:
                            #Is database nice_name
                            updated = idc.update_db(db, _id['id'], nice_name=change["content"])
                else:
                    _c_id = idc.get_coll_id(db, _id['id'], diff["collection"])
                    if storeRollback:
                        rollback = capture_rollback(db, _id['id'], diff["db"], diff["collection"], change, coll_id = _c_id['id'])
                    succeeded = False
                    #if it looks like a record, try treating as one
                    #If this fails try it as a field
                    if ih.is_probable_record_hash(change['item']):
                        updated = idc.update_record_description(db, _c_id['id'], {'hash':change["item"], change["field"]:change["content"]})
                        if updated['err'] == False:
                            succeeded = True;
                    if not succeeded:
                        updated = idc.update_field(db, _c_id['id'], change["item"], change["field"], change["content"], type="human")

                if updated['err']:
                    raise KeyError("Cannot update, item not present")
                else:
                    if storeRollback:
                        store_rollback(db, rollback)

        except Exception as e:
            inv.log_dest.error("Error applying diff "+ str(change)+' '+str(e))
            raise UpdateFailed(str(e))

    except Exception as e:
        inv.log_dest.error("Error updating fields "+ str(e))
Ejemplo n.º 20
0
def update_fields(diff, storeRollback=True):
    """Update a record from a diff object.

    diff -- should be a fully qualified difference, containing db, collection names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example)
    e.g. {"db":"curve_automorphisms","collection":"passports","diffs":[{"item":"total_label","field":"type","content":"string"}]}
    If this is a record entry, then the 'item' field will be a record hash.
    storeRollback -- determine whether to store the undiff and diff to allow rollback of the change
    """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert (got_client == True)
        db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection " + str(e))
        return

    try:
        if diff['collection'] is not None:
            inv.log_dest.info("Updating descriptions for " + diff["db"] + '.' +
                              diff["collection"])
        else:
            inv.log_dest.info("Updating descriptions for " + diff["db"])
        _id = idc.get_db_id(db, diff["db"])
        rollback = None
        try:
            for change in diff["diffs"]:
                if ih.is_special_field(change["item"]):
                    if storeRollback:
                        rollback = capture_rollback(db, _id['id'], diff["db"],
                                                    diff["collection"], change)
                    change["item"] = change["item"][
                        2:
                        -2]  #Trim special fields. TODO this should be done better somehow
                    updated = idc.update_coll_data(db, _id['id'],
                                                   diff["collection"],
                                                   change["item"],
                                                   change["field"],
                                                   change["content"])
                elif ih.is_toplevel_field(change["item"]):
                    #Here we have item == "toplevel", field the relevant field, and change the new value
                    if storeRollback:
                        rollback = capture_rollback(db, _id['id'], diff["db"],
                                                    diff["collection"], change)
                    #Only nice_name is currently an option
                    if (change["field"] not in ['nice_name', 'status']):
                        updated = {'err': True}
                    else:
                        if (diff["collection"]):
                            if (change['field']) == 'nice_name':
                                new_nice = change['content']
                                new_stat = None
                            else:
                                new_nice = None
                                new_stat = ih.status_to_code(change['content'])
                            c_id = idc.get_coll_id(db, _id['id'],
                                                   diff['collection'])
                            updated = idc.update_coll(db,
                                                      c_id['id'],
                                                      nice_name=new_nice,
                                                      status=new_stat)
                        else:
                            #Is database nice_name
                            updated = idc.update_db(
                                db, _id['id'], nice_name=change["content"])
                else:
                    _c_id = idc.get_coll_id(db, _id['id'], diff["collection"])
                    if storeRollback:
                        rollback = capture_rollback(db,
                                                    _id['id'],
                                                    diff["db"],
                                                    diff["collection"],
                                                    change,
                                                    coll_id=_c_id['id'])
                    succeeded = False
                    #if it looks like a record, try treating as one
                    #If this fails try it as a field
                    if ih.is_probable_record_hash(change['item']):
                        updated = idc.update_record_description(
                            db, _c_id['id'], {
                                'hash': change["item"],
                                change["field"]: change["content"]
                            })
                        if updated['err'] == False:
                            succeeded = True
                    if not succeeded:
                        updated = idc.update_field(db,
                                                   _c_id['id'],
                                                   change["item"],
                                                   change["field"],
                                                   change["content"],
                                                   type="human")

                if updated['err']:
                    raise KeyError("Cannot update, item not present")
                else:
                    if storeRollback:
                        store_rollback(db, rollback)

        except Exception as e:
            inv.log_dest.error("Error applying diff " + str(change) + ' ' +
                               str(e))
            raise UpdateFailed(str(e))

    except Exception as e:
        inv.log_dest.error("Error updating fields " + str(e))