Ejemplo n.º 1
0
def gen_retrieve_db_listing(db_name=None):
    """Retrieve listing for all or given database.

    db_name -- If absent, get listing of all dbs, if present, get listing of collections in named db

    NB connection must have been setup and checked!
    """

    table_name = 'inv_dbs'
    coll_name = 'inv_tables'
    try:
        table = db[table_name]
        if db_name is None:
            query = {}
            records = list(table.search(query, {'_id': 1, 'name' : 1, 'nice_name':1}))
            records = [(rec['name'], rec['nice_name'], idc.count_colls(rec['_id'])) for rec in records]
        else:
            _id = idc.get_db_id(db_name)['id']
            table = db[coll_name]
            query = {'db_id':_id}
            records = list(table.search(query, {'_id': 1, 'name' : 1, 'nice_name':1, 'status':1}))
            records = [(rec['name'], rec['nice_name'], 0, ih.code_to_status(rec['status']), False) for rec in records]
    except:
        records = None
    if records is not None:
        return sorted(records, key=lambda s: s[0].lower())
    else:
        return records
Ejemplo n.º 2
0
def register_scrape(db, table, uid):
    """Create a suitable inventory entry for the scrape"""

    inv.log_dest.warning(db+' '+table+' '+str(uid))
    try:
        db_id = idc.get_db_id(db)
        inv.log_dest.warning(str(db_id))
        inprog = False
        had_err = False
        if not table:
            all_tables = idc.get_all_tables(db_id)
            for table in all_tables:
                table_id = table['_id']
                tmp = check_and_insert_scrape_record(db_id, table_id, uid)
                inprog = tmp['inprog'] and inprog
                had_err = tmp['err'] and had_err
        else:
            table_id = idc.get_table_id(table)
            inv.log_dest.warning(str(table_id))
            tmp = check_and_insert_scrape_record(db_id, table_id, uid)
            inprog = tmp['inprog'] and inprog
            had_err = tmp['err'] and had_err

    except Exception as e:
        #Either failed to connect etc, or are already scraping
        inv.log_dest.warning('Error resistering scrape '+str(e))
        return {'err':True, 'inprog':False}

    return {'err':had_err, 'inprog':inprog}
Ejemplo n.º 3
0
def upload_scraped_inventory(db, structure_dat, uid):
    """Upload a json structure document and store any oprhans

        db -- LMFDB connection to inventory database
        structure_dat -- JSON document containing all db/collections to upload
        uid -- UID string for uploading process
    """

    inv.log_dest.info(
        "_____________________________________________________________________________________________"
    )
    n_dbs = len(structure_dat.keys())
    progress_tracker = 0

    for db_name in structure_dat:
        progress_tracker += 1
        inv.log_dest.info("Uploading " + db_name + " (" +
                          str(progress_tracker) + " of " + str(n_dbs) + ')')
        invc.set_db(db, db_name, db_name)

        for coll_name in structure_dat[db_name]:
            inv.log_dest.info("    Uploading collection " + coll_name)
            orphaned_keys = upload_collection_structure(db,
                                                        db_name,
                                                        coll_name,
                                                        structure_dat,
                                                        fresh=False)
            if len(orphaned_keys) != 0:
                db_id = invc.get_db_id(db, db_name)
                coll_id = invc.get_coll_id(db, db_id['id'], coll_name)
                ild.store_orphans(db, db_id['id'], coll_id['id'], uid,
                                  orphaned_keys)
Ejemplo n.º 4
0
def register_scrape(db, coll, uid):
    """Create a suitable inventory entry for the scrape"""

    try:
        db_id = idc.get_db_id(db)
        db_id = db_id['id']
        inprog = False
        had_err = False
        if not coll:
            all_colls = idc.get_all_colls(db_id)
            for coll in all_colls:
                coll_id = coll['_id']
                tmp = check_and_insert_scrape_record(db_id, coll_id, uid)
                inprog = tmp['inprog'] and inprog
                had_err = tmp['err'] and had_err
        else:
            coll_id = idc.get_coll_id(db_id, coll)
            coll_id = coll_id['id']
            tmp = check_and_insert_scrape_record(db_id, coll_id, uid)
            inprog = tmp['inprog'] and inprog
            had_err = tmp['err'] and had_err

    except:
        #Either failed to connect etc, or are already scraping
        return {'err': True, 'inprog': False}

    return {'err': had_err, 'inprog': inprog}
Ejemplo n.º 5
0
def retrieve_db_listing(db_name=None):
    """Retrieve listing for all or given database.

    db_name -- If absent, get listing of all dbs, if present, get listing of tables in named db
    """
    try:
        if db_name is None:
            #query = {}
            records = list(db.inv_dbs.search({}, ['name', 'nice_name']))
            counts = defaultdict(int)
            for tablename in db.tablenames:
                dbname = tablename.split('_')[0]
                counts[dbname] += 1
            records = [(rec['name'], rec['nice_name'], counts[rec['name']]) for rec in records]
        else:
            db_id = idc.get_db_id(db_name)
            records = list(db.inv_tables.search({'db_id': db_id},
                                                ['_id', 'name', 'nice_name', 'status']))
            records = [(rec['name'], rec['nice_name'],
                        comma(db[rec['name']].count()),
                        ih.code_to_status(rec['status']), check_locked(rec['_id'])) for rec in records]
        return sorted(records, key=lambda s: s[0].lower())
    except Exception as e:
        inv.log_dest.error("Something went wrong retrieving db info "+str(e))
        raise
        return None
Ejemplo n.º 6
0
def upload_scraped_inventory(structure_dat, uid):
    """Upload a json structure document and store any oprhans

        structure_dat -- JSON document containing all db/collections to upload
        uid -- UID string for uploading process
    """

    n_dbs = len(structure_dat.keys())
    progress_tracker = 0

    for db_name in structure_dat:
        progress_tracker += 1
        invc.set_db(db_name, db_name)

        for coll_name in structure_dat[db_name]:
            orphaned_keys = upload_collection_structure(db_name,
                                                        coll_name,
                                                        structure_dat,
                                                        fresh=False)
            if len(orphaned_keys) != 0:
                db_id = invc.get_db_id(db_name)
                coll_id = invc.get_coll_id(db_id['id'], coll_name)
                ild.store_orphans(db_id['id'], coll_id['id'], uid,
                                  orphaned_keys)
    return n_dbs
Ejemplo n.º 7
0
def retrieve_db_listing(db_name=None):
    """Retrieve listing for all or given database.

    db_name -- If absent, get listing of all dbs, if present, get listing of tables in named db
    """
    try:
        if db_name is None:
            #query = {}
            records = list(db.inv_dbs.search({}, ['name', 'nice_name']))
            counts = defaultdict(int)
            for tablename in db.tablenames:
                dbname = tablename.split('_')[0]
                counts[dbname] += 1
            records = [(rec['name'], rec['nice_name'], counts[rec['name']]) for rec in records]
        else:
            db_id = idc.get_db_id(db_name)
            records = list(db.inv_tables.search({'db_id': db_id},
                                                ['_id', 'name', 'nice_name', 'status']))
            records = [(rec['name'], rec['nice_name'],
                        comma(db[rec['name']].count()),
                        ih.code_to_status(rec['status']), check_locked(rec['_id'])) for rec in records]
        return sorted(records, key=lambda s: s[0].lower())
    except Exception as e:
        inv.log_dest.error("Something went wrong retrieving db info "+str(e))
        raise
        return None
Ejemplo n.º 8
0
def check_scrapes_on(spec=None):
    """If table given, check for scrapes in progress or
    queued on it. If only db, check all tables in it. If spec is None, check everything"""
    spec_ids = {}
    if spec is not None:
        if spec.get('db'):
            spec_ids['db'] = idc.get_db_id(spec['db'])
        if spec.get('table'):
            spec_ids['table'] = idc.get_table_id(spec['table'])
    return check_if_scraping(spec_ids) or check_if_scraping_queued(spec_ids)
Ejemplo n.º 9
0
def is_valid_db_collection(db_name, collection_name):
    """Check if db and collection name (if not None) exist"""
    try:
        db_id = idc.get_db_id(db_name)
        if not db_id['exist']:
            return False
        if collection_name:
            coll_id = idc.get_coll_id(db_id['id'], collection_name)
            if not coll_id['exist']:
                return False
    except:
        return False
    return True
Ejemplo n.º 10
0
def update_scrape_progress(db_name, coll, uid, complete=None, running=None):
    """Update progress of scrape from db/coll names and uid """

    try:
        db_id = idc.get_db_id(db_name)
        coll_id = idc.get_coll_id(db_id['id'], coll)
        update_scrape_progress_helper(db_id['id'],
                                      coll_id['id'],
                                      uid,
                                      complete=complete,
                                      running=running)
    except:
        return False
Ejemplo n.º 11
0
def is_valid_db_table(db_name, table_name):
    """Check if db and table_name name (if not None) exist"""
    try:
        db_id = idc.get_db_id(db_name)
        if db_id is None:
            return False
        if table_name:
            table_id = idc.get_table_id(table_name)
            if table_id is None:
                return False
    except Exception as e:
        inv.log_dest.error('Failed checking existence of '+db_name+' '+str(table_name)+' '+str(e))
        return False
    return True
Ejemplo n.º 12
0
def is_valid_db_table(db_name, table_name):
    """Check if db and table_name name (if not None) exist"""
    try:
        db_id = idc.get_db_id(db_name)
        if db_id is None:
            return False
        if table_name:
            table_id = idc.get_table_id(table_name)
            if table_id is None:
                return False
    except Exception as e:
        inv.log_dest.error('Failed checking existence of '+db_name+' '+str(table_name)+' '+str(e))
        return False
    return True
Ejemplo n.º 13
0
def check_locks(resp):
    """Check if request pertains to locked coll
    or editing is locked globally
    """
    if get_lockout_state():
        raise EditLockError('Global Edit Lock')
    try:
        db_name = resp['db']
        coll_name = resp['collection']
        db_id = idc.get_db_id(db_name)
        coll_id = idc.get_coll_id(db_id['id'], coll_name)
        if check_locked(coll_id['id']):
            raise EditLockError('Collection locked')
    except Exception as e:
        raise e
Ejemplo n.º 14
0
def update_scrape_progress(db_name, table_name, uid, complete=None, running=None):
    """Update progress of scrape from db/table names and uid """
    try:
        db_id = idc.get_db_id(db_name)
        table_id = idc.get_table_id(table_name)
        rec_find = {'db_id':db_id, 'table_id':table_id, 'uid':uid}
        rec_set = {}
        if complete is not None:
            rec_set['complete'] = complete
        if running is not None:
            rec_set['running'] = running
        if rec_set:
            db.inv_ops.update(rec_find, rec_set)
    except Exception as e:
        inv.log_dest.error("Error updating progress "+ str(e))
        return False
Ejemplo n.º 15
0
def check_scrapes_on(spec=None):
    """If collection given, check for scrapes in progress or
    queued on it. If only db, check all collections in it. If spec is None, check everything"""
    try:
        spec_ids = {}
        if spec is not None:
            db_id = idc.get_db_id(spec['db'])
            spec_ids = {'db': db_id['id']}
            if spec['coll']:
                coll_id = idc.get_coll_id(db_id['id'], spec['coll'])
                spec_ids['coll'] = coll_id['id']
        result = check_if_scraping(spec_ids) or check_if_scraping_queued(
            spec_ids)
        return result
    except:
        return False
Ejemplo n.º 16
0
def get_nicename(db_name, collection_name):
    """Return the nice_name string for given db/coll pair"""

    try:
        if collection_name:
            db_id = idc.get_db_id(db_name)
            coll_rec = idc.get_coll(db_id['id'], collection_name)
            nice_name = coll_rec['data']['nice_name']
        else:
            db_rec = idc.get_db(db_name)
            #print db_rec
            nice_name = db_rec['data']['nice_name']
        return nice_name
    except:
        #Can't return nice name so return None
        return None
Ejemplo n.º 17
0
def mark_all_gone():
    """Set status of all removed collections to gone"""

    dbs = iv.gen_retrieve_db_listing()
    all_colls = get_db_lists()

    gone_code = ih.status_to_code('gone')
    for db in dbs:
        colls = iv.gen_retrieve_db_listing(db[0])
        db_id = idc.get_db_id(db[0])
        for coll in colls:
            gone = not (coll[0] in all_colls[db[0]])
            #Only mark if isn't already
            mark = gone and coll[3] != 'gone'
            if mark:
                coll_id = idc.get_coll_id(db_id['id'], coll[0])
                idc.update_coll(coll_id['id'], status=gone_code)
Ejemplo n.º 18
0
def update_scrape_progress(db, coll, uid, complete=None, running=None):
    """Update progress of scrape from db/coll names and uid """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False

    try:
        db_id = idc.get_db_id(inv_db, db)
        coll_id = idc.get_coll_id(inv_db, db_id['id'], coll)
        update_scrape_progress_helper(inv_db, db_id['id'], coll_id['id'], uid, complete=complete, running=running)
    except Exception as e:
        inv.log_dest.error("Error updating progress "+ str(e))
        return False
Ejemplo n.º 19
0
def mark_all_gone(main_db):
    """Set status of all removed collections to gone"""

    inv_db = main_db[inv.get_inv_db_name()]
    dbs = iv.gen_retrieve_db_listing(inv_db)
    all_colls = get_db_lists()

    gone_code = ih.status_to_code('gone')
    for db in dbs:
        colls = iv.gen_retrieve_db_listing(inv_db, db[0])
        db_id = idc.get_db_id(inv_db, db[0])
        for coll in colls:
            gone = not (coll[0] in all_colls[db[0]])
            #Only mark if isn't already
            mark = gone and coll[3] != 'gone'
            if mark:
                coll_id = idc.get_coll_id(inv_db, db_id['id'], coll[0])
                idc.update_coll(inv_db, coll_id['id'], status=gone_code)
                inv.log_dest.info(str(db) +'.'+str(coll) +' is now gone')
Ejemplo n.º 20
0
def delete_by_collection(db_name, coll_name):
    """Remove collection entry and all its fields"""

    try:
        _db_id = invc.get_db_id(db_name)
        _c_id = invc.get_coll_id(_db_id['id'], coll_name)
    except:
        return {'err': True, 'id': 0, 'exist': False}

    #Remove fields entries matching _c_id
    delete_collection_data(_c_id['id'], tbl='auto')
    delete_collection_data(_c_id['id'], tbl='human')
    delete_collection_data(_c_id['id'], tbl='records')

    try:
        lmfdb_db[inv.ALL_STRUC.coll_ids[inv.STR_NAME]].delete(
            {'_id': _c_id['id']})
    except:
        pass
Ejemplo n.º 21
0
def is_valid_db_collection(db_name, collection_name):
    """Check if db and collection name (if not None) exist"""
    try:
        inv.setup_internal_client()
        db = inv.int_client[inv.ALL_STRUC.name]
    except Exception as e:
        raise ih.ConnectOrAuthFail("")
        return False
    try:
        db_id = idc.get_db_id(db, db_name)
        if not db_id['exist']:
            return False
        if collection_name:
            coll_id = idc.get_coll_id(db, db_id['id'], collection_name)
            if not coll_id['exist']:
                return False
    except Exception as e:
        inv.log_dest.error('Failed checking existence of '+db_name+' '+collection_name+' '+str(e))
        return False
    return True
Ejemplo n.º 22
0
def update_scrape_progress(db_name,
                           table_name,
                           uid,
                           complete=None,
                           running=None):
    """Update progress of scrape from db/table names and uid """
    try:
        db_id = idc.get_db_id(db_name)
        table_id = idc.get_table_id(table_name)
        rec_find = {'db_id': db_id, 'table_id': table_id, 'uid': uid}
        rec_set = {}
        if complete is not None:
            rec_set['complete'] = complete
        if running is not None:
            rec_set['running'] = running
        if rec_set:
            db.inv_ops.update(rec_find, rec_set)
    except Exception as e:
        inv.log_dest.error("Error updating progress " + str(e))
        return False
Ejemplo n.º 23
0
def check_locks(resp):
    """Check if request pertains to locked coll
    or editing is locked globally
    """
    inv.setup_internal_client()
    try:
        db = inv.int_client[inv.ALL_STRUC.name]
    except Exception:
        raise ih.ConnectOrAuthFail("")
    if get_lockout_state():
        raise EditLockError('Global Edit Lock')
    try:
        db_name = resp['db']
        coll_name = resp['collection']
        db_id = idc.get_db_id(db, db_name)
        coll_id = idc.get_coll_id(db, db_id['id'], coll_name)
        if check_locked(db, coll_id['id']):
            raise EditLockError('Collection locked')
    except Exception as e:
        inv.log_dest.error("Error in locking " + str(e))
        raise e
Ejemplo n.º 24
0
def is_valid_db_collection(db_name, collection_name):
    """Check if db and collection name (if not None) exist"""
    try:
        inv.setup_internal_client()
        db = inv.int_client[inv.ALL_STRUC.name]
    except Exception as e:
        raise ih.ConnectOrAuthFail("")
        return False
    try:
        db_id = idc.get_db_id(db, db_name)
        if not db_id['exist']:
            return False
        if collection_name:
            coll_id = idc.get_coll_id(db, db_id['id'], collection_name)
            if not coll_id['exist']:
                return False
    except Exception as e:
        inv.log_dest.error('Failed checking existence of ' + db_name + ' ' +
                           collection_name + ' ' + str(e))
        return False
    return True
Ejemplo n.º 25
0
def check_scrapes_on(spec):
    """If collection given, check for scrapes in progress or
    queued on it. If only db, check all collections in it"""
    try:
        got_client = inv.setup_internal_client(editor=True)
        assert (got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection " + str(e))
        return False
    try:
        db_id = idc.get_db_id(inv_db, spec['db'])
        spec_ids = {'db': db_id['id']}
        if spec['coll']:
            coll_id = idc.get_coll_id(inv_db, db_id['id'], spec['coll'])
            spec_ids['coll'] = coll_id['id']
        result = check_if_scraping(
            inv_db, spec_ids) or check_if_scraping_queued(inv_db, spec_ids)
        return result
    except Exception as e:
        return False
Ejemplo n.º 26
0
def check_locks(resp):
    """Check if request pertains to locked coll
    or editing is locked globally
    """
    inv.setup_internal_client()
    try:
        db = inv.int_client[inv.ALL_STRUC.name]
    except Exception:
        raise ih.ConnectOrAuthFail("")
    if get_lockout_state():
        raise EditLockError('Global Edit Lock')
    try:
        db_name = resp['db']
        coll_name = resp['collection']
        db_id = idc.get_db_id(db, db_name)
        coll_id = idc.get_coll_id(db, db_id['id'], coll_name)
        if check_locked(db, coll_id['id']):
            raise EditLockError('Collection locked')
    except Exception as e:
        inv.log_dest.error("Error in locking "+str(e))
        raise e
Ejemplo n.º 27
0
def register_scrape(db, coll, uid):
    """Create a suitable inventory entry for the scrape"""

    inv.log_dest.warning(db + ' ' + coll + ' ' + str(uid))
    try:
        got_client = inv.setup_internal_client(editor=True)
        assert (got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection " + str(e))
        return {'err': True, 'inprog': False}
    try:
        db_id = idc.get_db_id(inv_db, db)
        inv.log_dest.warning(str(db_id))
        db_id = db_id['id']
        inprog = False
        had_err = False
        if not coll:
            all_colls = idc.get_all_colls(inv_db, db_id)
            for coll in all_colls:
                coll_id = coll['_id']
                tmp = check_and_insert_scrape_record(inv_db, db_id, coll_id,
                                                     uid)
                inprog = tmp['inprog'] and inprog
                had_err = tmp['err'] and had_err
        else:
            coll_id = idc.get_coll_id(inv_db, db_id, coll)
            inv.log_dest.warning(str(coll_id))
            coll_id = coll_id['id']
            tmp = check_and_insert_scrape_record(inv_db, db_id, coll_id, uid)
            inprog = tmp['inprog'] and inprog
            had_err = tmp['err'] and had_err

    except Exception as e:
        #Either failed to connect etc, or are already scraping
        inv.log_dest.warning('Error resistering scrape ' + str(e))
        return {'err': True, 'inprog': False}

    return {'err': had_err, 'inprog': inprog}
Ejemplo n.º 28
0
def upload_scraped_inventory(structure_dat, uid):
    """Upload a json structure document and store any oprhans

        structure_dat -- JSON document containing all db/tables to upload
        uid -- UID string for uploading process
    """

    inv.log_dest.info("_____________________________________________________________________________________________")
    n_dbs = len(structure_dat.keys())
    progress_tracker = 0

    for db_name in structure_dat:
        progress_tracker += 1
        inv.log_dest.info("Uploading "+db_name+" ("+str(progress_tracker)+" of "+str(n_dbs)+')')
        invc.set_db(db_name, db_name)

        for table_name in structure_dat[db_name]:
            inv.log_dest.info("    Uploading table "+table_name)
            orphaned_keys = upload_table_structure(db_name, table_name, structure_dat, fresh=False)
            if len(orphaned_keys) != 0:
                db_id = invc.get_db_id(db_name)
                table_id = invc.get_table_id(table_name)
                ild.store_orphans(db_id, table_id, uid, orphaned_keys)
Ejemplo n.º 29
0
def null_all_scrapes(db, coll):
    """Update all scrapes on db.coll to be 'complete' """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return False

    try:
        db_id = idc.get_db_id(inv_db, db)
        coll_id = idc.get_coll_id(inv_db, db_id['id'], coll)
        rec_find = {'db':db_id['id'], 'coll':coll_id['id']}
        rec_set = {}
        rec_set['complete'] = True
        rec_set['running'] = False

        inv_db['ops'].update_many(rec_find, {"$set":rec_set})
    except Exception as e:
        inv.log_dest.error("Error updating progress "+ str(e))
        return False
Ejemplo n.º 30
0
def delete_by_collection(inv_db, db_name, coll_name):
    """Remove collection entry and all its fields"""

    if not inv.validate_mongodb(inv_db):
        raise TypeError("db does not match Inventory structure")
        return

    try:
        _db_id = invc.get_db_id(inv_db, db_name)
        _c_id = invc.get_coll_id(inv_db, _db_id['id'], coll_name)
    except Exception as e:
        inv.log_dest.error("Error getting collection " + str(e))
        return {'err':True, 'id':0, 'exist':False}

    #Remove fields entries matching _c_id
    delete_collection_data(inv_db, _c_id['id'], tbl='auto')
    delete_collection_data(inv_db, _c_id['id'], tbl='human')
    delete_collection_data(inv_db, _c_id['id'], tbl='records')

    try:
        inv_db[inv.ALL_STRUC.coll_ids[inv.STR_NAME]].remove({'_id':_c_id['id']})
    except Exception as e:
        inv.log_dest.error("Error removing collection " + str(e))
Ejemplo n.º 31
0
def update_scrape_progress(db, coll, uid, complete=None, running=None):
    """Update progress of scrape from db/coll names and uid """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert (got_client == True)
        inv_db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection " + str(e))
        return False

    try:
        db_id = idc.get_db_id(inv_db, db)
        coll_id = idc.get_coll_id(inv_db, db_id['id'], coll)
        update_scrape_progress_helper(inv_db,
                                      db_id['id'],
                                      coll_id['id'],
                                      uid,
                                      complete=complete,
                                      running=running)
    except Exception as e:
        inv.log_dest.error("Error updating progress " + str(e))
        return False
Ejemplo n.º 32
0
def get_nicename(db_name, collection_name):
    """Return the nice_name string for given db/coll pair"""

    try:
        inv.setup_internal_client()
        db = inv.int_client[inv.ALL_STRUC.name]
    except Exception as e:
        raise ih.ConnectOrAuthFail("")
        return None
    try:
        if collection_name:
            db_id = idc.get_db_id(db, db_name)
            coll_rec = idc.get_coll(db, db_id['id'], collection_name)
            nice_name = coll_rec['data']['nice_name']
        else:
            db_rec = idc.get_db(db, db_name)
            #print db_rec
            nice_name = db_rec['data']['nice_name']
        return nice_name
    except Exception as e:
        inv.log_dest.error('Failed to get nice name for '+db_name+' '+collection_name+' '+str(e))
        #Can't return nice name so return None
        return None
Ejemplo n.º 33
0
def get_nicename(db_name, collection_name):
    """Return the nice_name string for given db/coll pair"""

    try:
        inv.setup_internal_client()
        db = inv.int_client[inv.ALL_STRUC.name]
    except Exception as e:
        raise ih.ConnectOrAuthFail("")
        return None
    try:
        if collection_name:
            db_id = idc.get_db_id(db, db_name)
            coll_rec = idc.get_coll(db, db_id['id'], collection_name)
            nice_name = coll_rec['data']['nice_name']
        else:
            db_rec = idc.get_db(db, db_name)
            print db_rec
            nice_name = db_rec['data']['nice_name']
        return nice_name
    except Exception as e:
        inv.log_dest.error('Failed to get nice name for ' + db_name + ' ' +
                           collection_name + ' ' + str(e))
        #Can't return nice name so return None
        return None
Ejemplo n.º 34
0
def delete_by_collection(inv_db, db_name, coll_name):
    """Remove collection entry and all its fields"""

    if not inv.validate_mongodb(inv_db):
        raise TypeError("db does not match Inventory structure")
        return

    try:
        _db_id = invc.get_db_id(inv_db, db_name)
        _c_id = invc.get_coll_id(inv_db, _db_id['id'], coll_name)
    except Exception as e:
        inv.log_dest.error("Error getting collection " + str(e))
        return {'err': True, 'id': 0, 'exist': False}

    #Remove fields entries matching _c_id
    delete_collection_data(inv_db, _c_id['id'], tbl='auto')
    delete_collection_data(inv_db, _c_id['id'], tbl='human')
    delete_collection_data(inv_db, _c_id['id'], tbl='records')

    try:
        inv_db[inv.ALL_STRUC.coll_ids[inv.STR_NAME]].remove(
            {'_id': _c_id['id']})
    except Exception as e:
        inv.log_dest.error("Error removing collection " + str(e))
Ejemplo n.º 35
0
def upload_scraped_inventory(db, structure_dat, uid):
    """Upload a json structure document and store any oprhans

        db -- LMFDB connection to inventory database
        structure_dat -- JSON document containing all db/collections to upload
        uid -- UID string for uploading process
    """

    inv.log_dest.info("_____________________________________________________________________________________________")
    n_dbs = len(structure_dat.keys())
    progress_tracker = 0

    for db_name in structure_dat:
        progress_tracker += 1
        inv.log_dest.info("Uploading " + db_name+" ("+str(progress_tracker)+" of "+str(n_dbs)+')')
        invc.set_db(db, db_name, db_name)

        for coll_name in structure_dat[db_name]:
            inv.log_dest.info("    Uploading collection "+coll_name)
            orphaned_keys = upload_collection_structure(db, db_name, coll_name, structure_dat, fresh=False)
            if len(orphaned_keys) != 0:
                db_id = invc.get_db_id(db, db_name)
                coll_id = invc.get_coll_id(db, db_id['id'], coll_name)
                ild.store_orphans(db, db_id['id'], coll_id['id'], uid, orphaned_keys)
Ejemplo n.º 36
0
def update_fields(diff, storeRollback=True):
    """Update a record from a diff object.

    diff -- should be a fully qualified difference, containing db, table names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example)
    e.g. {"db":"hgcwa","table":"hgcwa_passports","diffs":[{"item":"total_label","field":"type","content":"string"}]}
    If this is a record entry, then the 'item' field will be a record hash.
    storeRollback -- determine whether to store the undiff and diff to allow rollback of the change
    """

    try:
        if diff['table'] is not None:
            inv.log_dest.info("Updating descriptions for " + diff["table"])
        else:
            inv.log_dest.info("Updating descriptions for " + diff["db"])
        db_id = idc.get_db_id(diff["db"])
        rollback = None
        try:
            for change in diff["diffs"]:
                if ih.is_special_field(change["item"]):
                    if storeRollback:
                        rollback = capture_rollback(db_id, diff["db"], diff["table"], change)
                    change["item"] = change["item"][2:-2] #Trim special fields. TODO this should be done better somehow
                    updated = idc.update_table_data(db_id, diff["table"], change["item"], change["field"], change["content"])
                elif ih.is_toplevel_field(change["item"]):
                    #Here we have item == "toplevel", field the relevant field, and change the new value
                    if storeRollback:
                        rollback = capture_rollback(db_id, diff["db"], diff["table"], change)
                    #Only nice_name is currently an option
                    if(change["field"] not in ['nice_name', 'status']):
                        updated = {'err':True}
                    else:
                        if(diff["table"]):
                            if(change['field']) == 'nice_name':
                                new_nice = change['content']
                                new_stat = None
                            else:
                                new_nice = None
                                new_stat = ih.status_to_code(change['content'])
                            table_id = idc.get_table_id(diff['table'])
                            updated = idc.update_table(table_id, nice_name=new_nice, status=new_stat)
                        else:
                            #Is database nice_name
                            updated = idc.update_db(db_id, nice_name=change["content"])
                else:
                    table_id = idc.get_table_id(diff["table"])
                    if storeRollback:
                        rollback = capture_rollback(db_id, diff["db"], diff["table"], change, table_id = table_id)
                    updated = idc.update_field(table_id, change["item"], change["field"], change["content"], type="human")

                if updated['err']:
                    raise KeyError("Cannot update, item not present")
                else:
                    if storeRollback:
                        store_rollback(rollback)

        except Exception as e:
            inv.log_dest.error("Error applying diff "+ str(change)+' '+str(e))
            raise UpdateFailed(str(e))

    except Exception as e:
        inv.log_dest.error("Error updating fields "+ str(e))
Ejemplo n.º 37
0
def update_fields(diff, storeRollback=True):
    """Update a record from a diff object.

    diff -- should be a fully qualified difference, containing db, collection names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example)
    e.g. {"db":"curve_automorphisms","collection":"passports","diffs":[{"item":"total_label","field":"type","content":"string"}]}
    If this is a record entry, then the 'item' field will be a record hash.
    storeRollback -- determine whether to store the undiff and diff to allow rollback of the change
    """

    try:
        got_client = inv.setup_internal_client(editor=True)
        assert(got_client == True)
        db = inv.int_client[inv.get_inv_db_name()]
    except Exception as e:
        inv.log_dest.error("Error getting Db connection "+ str(e))
        return

    try:
        if diff['collection'] is not None:
            inv.log_dest.info("Updating descriptions for " + diff["db"]+'.'+diff["collection"])
        else:
            inv.log_dest.info("Updating descriptions for " + diff["db"])
        _id = idc.get_db_id(db, diff["db"])
        rollback = None
        try:
            for change in diff["diffs"]:
                if ih.is_special_field(change["item"]):
                    if storeRollback:
                        rollback = capture_rollback(db, _id['id'], diff["db"], diff["collection"], change)
                    change["item"] = change["item"][2:-2] #Trim special fields. TODO this should be done better somehow
                    updated = idc.update_coll_data(db, _id['id'], diff["collection"], change["item"], change["field"], change["content"])
                elif ih.is_toplevel_field(change["item"]):
                    #Here we have item == "toplevel", field the relevant field, and change the new value
                    if storeRollback:
                        rollback = capture_rollback(db, _id['id'], diff["db"], diff["collection"], change)
                    #Only nice_name is currently an option
                    if(change["field"] not in ['nice_name', 'status']):
                        updated = {'err':True}
                    else:
                        if(diff["collection"]):
                            if(change['field']) == 'nice_name':
                                new_nice = change['content']
                                new_stat = None
                            else:
                                new_nice = None
                                new_stat = ih.status_to_code(change['content'])
                            c_id = idc.get_coll_id(db, _id['id'], diff['collection'])
                            updated = idc.update_coll(db, c_id['id'], nice_name=new_nice, status=new_stat)
                        else:
                            #Is database nice_name
                            updated = idc.update_db(db, _id['id'], nice_name=change["content"])
                else:
                    _c_id = idc.get_coll_id(db, _id['id'], diff["collection"])
                    if storeRollback:
                        rollback = capture_rollback(db, _id['id'], diff["db"], diff["collection"], change, coll_id = _c_id['id'])
                    succeeded = False
                    #if it looks like a record, try treating as one
                    #If this fails try it as a field
                    if ih.is_probable_record_hash(change['item']):
                        updated = idc.update_record_description(db, _c_id['id'], {'hash':change["item"], change["field"]:change["content"]})
                        if updated['err'] == False:
                            succeeded = True;
                    if not succeeded:
                        updated = idc.update_field(db, _c_id['id'], change["item"], change["field"], change["content"], type="human")

                if updated['err']:
                    raise KeyError("Cannot update, item not present")
                else:
                    if storeRollback:
                        store_rollback(db, rollback)

        except Exception as e:
            inv.log_dest.error("Error applying diff "+ str(change)+' '+str(e))
            raise UpdateFailed(str(e))

    except Exception as e:
        inv.log_dest.error("Error updating fields "+ str(e))
Ejemplo n.º 38
0
def update_fields(diff, storeRollback=True):
    """Update a record from a diff object.

    diff -- should be a fully qualified difference, containing db, table names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example)
    e.g. {"db":"hgcwa","table":"hgcwa_passports","diffs":[{"item":"total_label","field":"type","content":"string"}]}
    If this is a record entry, then the 'item' field will be a record hash.
    storeRollback -- determine whether to store the undiff and diff to allow rollback of the change
    """

    try:
        if diff['table'] is not None:
            inv.log_dest.info("Updating descriptions for " + diff["table"])
        else:
            inv.log_dest.info("Updating descriptions for " + diff["db"])
        db_id = idc.get_db_id(diff["db"])
        rollback = None
        try:
            for change in diff["diffs"]:
                if ih.is_special_field(change["item"]):
                    if storeRollback:
                        rollback = capture_rollback(db_id, diff["db"],
                                                    diff["table"], change)
                    change["item"] = change["item"][
                        2:
                        -2]  #Trim special fields. TODO this should be done better somehow
                    updated = idc.update_table_data(db_id, diff["table"],
                                                    change["item"],
                                                    change["field"],
                                                    change["content"])
                elif ih.is_toplevel_field(change["item"]):
                    #Here we have item == "toplevel", field the relevant field, and change the new value
                    if storeRollback:
                        rollback = capture_rollback(db_id, diff["db"],
                                                    diff["table"], change)
                    #Only nice_name is currently an option
                    if (change["field"] not in ['nice_name', 'status']):
                        updated = {'err': True}
                    else:
                        if (diff["table"]):
                            if (change['field']) == 'nice_name':
                                new_nice = change['content']
                                new_stat = None
                            else:
                                new_nice = None
                                new_stat = ih.status_to_code(change['content'])
                            table_id = idc.get_table_id(diff['table'])
                            updated = idc.update_table(table_id,
                                                       nice_name=new_nice,
                                                       status=new_stat)
                        else:
                            #Is database nice_name
                            updated = idc.update_db(
                                db_id, nice_name=change["content"])
                else:
                    table_id = idc.get_table_id(diff["table"])
                    if storeRollback:
                        rollback = capture_rollback(db_id,
                                                    diff["db"],
                                                    diff["table"],
                                                    change,
                                                    table_id=table_id)
                    updated = idc.update_field(table_id,
                                               change["item"],
                                               change["field"],
                                               change["content"],
                                               type="human")

                if updated['err']:
                    raise KeyError("Cannot update, item not present")
                else:
                    if storeRollback:
                        store_rollback(rollback)

        except Exception as e:
            inv.log_dest.error("Error applying diff " + str(change) + ' ' +
                               str(e))
            raise UpdateFailed(str(e))

    except Exception as e:
        inv.log_dest.error("Error updating fields " + str(e))
Ejemplo n.º 39
0
def update_fields(diff, storeRollback=True):
    """Update a record from a diff object.

    diff -- should be a fully qualified difference, containing db, collection names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example)
    e.g. {"db":"curve_automorphisms","collection":"passports","diffs":[{"item":"total_label","field":"type","content":"string"}]}
    If this is a record entry, then the 'item' field will be a record hash.
    storeRollback -- determine whether to store the undiff and diff to allow rollback of the change
    """

    try:
        _id = idc.get_db_id(diff["db"])
        rollback = None

        storeRollback = False
        try:
            for change in diff["diffs"]:
                if ih.is_special_field(change["item"]):
                    if storeRollback:
                        rollback = capture_rollback(_id['id'], diff["db"], diff["collection"], change)
                    change["item"] = change["item"][2:-2] #Trim special fields. TODO this should be done better somehow
                    updated = idc.update_coll_data(_id['id'], diff["collection"], change["item"], change["field"], change["content"])
                elif ih.is_toplevel_field(change["item"]):
                    #Here we have item == "toplevel", field the relevant field, and change the new value
                    if storeRollback:
                        rollback = capture_rollback(_id['id'], diff["db"], diff["collection"], change)
                    #Only nice_name is currently an option
                    print(change['field'])
                    if(change["field"] not in ['nice_name', 'status']):
                        updated = {'err':True}
                    else:
                        if(diff["collection"]):
                            if(change['field']) == 'nice_name':
                                new_nice = change['content']
                                new_stat = None
                            else:
                                new_nice = None
                                new_stat = ih.status_to_code(change['content'])
                            c_id = idc.get_coll_id(_id['id'], diff['collection'])
                            updated = idc.update_coll(c_id['id'], nice_name=new_nice, status=new_stat)
                        else:
                            #Is database nice_name
                            print(_id)
                            updated = idc.update_db(_id['id'], nice_name=change["content"])
                else:
                    _c_id = idc.get_coll_id(_id['id'], diff["collection"])
                    if storeRollback:
                        rollback = capture_rollback(_id['id'], diff["db"], diff["collection"], change, coll_id = _c_id['id'])
                    succeeded = False
                    #if it looks like a record, try treating as one
                    #If this fails try it as a field
                    if ih.is_probable_record_hash(change['item']):
                        updated = idc.update_record_description(_c_id['id'], {'hash':change["item"], change["field"]:change["content"]})
                        if updated['err'] == False:
                            succeeded = True;
                    if not succeeded:
                        updated = idc.update_field(_c_id['id'], change["item"], change["field"], change["content"], type="human")

                if updated['err']:
                    raise KeyError("Cannot update, item not present")
                else:
                    if storeRollback:
                        store_rollback(rollback)

        except Exception as e:
            raise UpdateFailed(str(e))

    except Exception as e:
        #inv.log_dest.error("Error updating fields "+ str(e))
        pass
Ejemplo n.º 40
0
def upload_table_structure(db_name, table_name, structure_dat, fresh=False):
    """Upload the structure description for a single table

    Any entered descriptions for keys which still exist are preserved.
    Removed or renamed keys will be returned for handling
    Table entry is created if it doesn't exist,
    in which case Notes and Info are filled with dummies
    db_name -- Name of database (must exist)
    table_name -- Name of table to upload
    structure_dat -- lmfdb db structure as json object
    """


    dummy_info = {} #Dummy per table info, containing basic fields we want included
    for field in inv.info_editable_fields:
        dummy_info[field] = None

    try:
        table_entry = structure_dat[table_name]
        db_entry = invc.get_db_id(db_name)
        if db_entry is None:
            #All dbs should have been added from the struc: if not is error
            inv.log_dest.error("ERROR: No inventory DB entry "+ db_name)
            inv.log_dest.error("Cannot add descriptions")
            return []

        table_id = invc.get_table_id(table_name)
        if table_id is None:
	    #Table doesn't exist, create it
            table_id = invc.set_table(db_entry, table_name, table_name, None, dummy_info, 0)
        else:
	    #Delete existing auto-table entries (no table => no entries)
           delete_table_data(table_id, tbl='auto')
        try:
            scrape_date = datetime.datetime.strptime(structure_dat[db_name][table_name]['scrape_date'], '%Y-%m-%d %H:%M:%S.%f')
        except Exception as e:
            inv.log_dest.info("Scrape date parsing failed "+str(e))
            scrape_date = datetime.datetime.min
        invc.set_table_scrape_date(table_id, scrape_date)

    except Exception as e:
        inv.log_dest.error("Failed to refresh table (db, table or scrape data) "+str(e))

    try:
        for field in table_entry['fields']:
            inv.log_dest.info("            Processing "+field)
            invc.set_field(table_id, field, table_entry['fields'][field])
        for record in table_entry['records']:
            inv.log_dest.info("            Processing record "+str(record))
            invc.set_record(table_id, table_entry['records'][record])
        #Cleanup any records which no longer exist
        invc.cleanup_records(db, table_id, table_entry['records'])

        inv.log_dest.info("            Processing indices")
        #FIXME
        upload_indices(db, table_id, table_entry['indices'])

    except Exception as e:
        inv.log_dest.error("Failed to refresh table entries "+str(e))

    orphaned_keys = []
    if not fresh:
        try:
	    #Trim any human table keys which are now redundant
            orphaned_keys = invc.trim_human_table(db_entry, table_id)
        except Exception as e:
            inv.log_dest.error("Failed trimming table "+str(e))

    #Ensure everything mandatory is present in human table
    try:
        invc.complete_human_table(db_entry, table_id)
    except Exception as e:
        inv.log_dest.error("Failed padding table "+str(e))

    return orphaned_keys
Ejemplo n.º 41
0
def upload_collection_structure(db_name,
                                coll_name,
                                structure_dat,
                                fresh=False):
    """Upload the structure description for a single collection

    Any entered descriptions for keys which still exist are preserved.
    Removed or renamed keys will be returned for handling
    Collection is entry is created if it doesn't exist,
    in which case Notes and Info are filled with dummies
    db -- LMFDB connection to inventory database
    db_name -- Name of database this collection is in (MUST exist)
    coll_name -- Name of collection to upload
    structure_dat -- lmfdb db structure as json object
    """

    dummy_info = {
    }  #Dummy per collection info, containing basic fields we want included
    for field in inv.info_editable_fields:
        dummy_info[field] = None
    try:
        coll_entry = structure_dat[db_name][coll_name]
        db_entry = invc.get_db_id(db_name)
        if not db_entry['exist']:
            #All dbs should have been added from the struc: if not is error
            return
        #Inventory data migration includes db name in collection name for some reason
        #Work around until we can fix the data
        full_coll_name = db_name + '_' + coll_name

        _c_id = invc.get_coll_id(db_entry['id'], full_coll_name)
        if not _c_id['exist']:
            #Collection doesn't exist, create it
            _c_id = invc.set_coll(db_entry['id'], full_coll_name,
                                  full_coll_name, {'description': None},
                                  dummy_info, 0)
        else:
            #Delete existing auto-table entries (no collection => no entries)
            delete_collection_data(_c_id['id'], tbl='auto')
        try:
            scrape_date = datetime.datetime.strptime(
                structure_dat[db_name][coll_name]['scrape_date'],
                '%Y-%m-%d %H:%M:%S.%f')
        except:
            scrape_date = datetime.datetime.min

        invc.set_coll_scrape_date(_c_id['id'], scrape_date)

    except:
        pass

    try:
        for field in coll_entry['fields']:
            invc.set_field(_c_id['id'], field, coll_entry['fields'][field])
            #Add any keys needed to human_table
            invc.create_field(_c_id['id'], field, 'human')

    except:
        pass

    orphaned_keys = []
    if not fresh:
        try:
            #Trim any human table keys which are now redundant
            orphaned_keys = invc.trim_human_table(db_entry['id'], _c_id['id'])
        except:
            pass

    return orphaned_keys