Ejemplo n.º 1
0
def register_scrape(db, table, uid):
    """Create a suitable inventory entry for the scrape"""

    inv.log_dest.warning(db+' '+table+' '+str(uid))
    try:
        db_id = idc.get_db_id(db)
        inv.log_dest.warning(str(db_id))
        inprog = False
        had_err = False
        if not table:
            all_tables = idc.get_all_tables(db_id)
            for table in all_tables:
                table_id = table['_id']
                tmp = check_and_insert_scrape_record(db_id, table_id, uid)
                inprog = tmp['inprog'] and inprog
                had_err = tmp['err'] and had_err
        else:
            table_id = idc.get_table_id(table)
            inv.log_dest.warning(str(table_id))
            tmp = check_and_insert_scrape_record(db_id, table_id, uid)
            inprog = tmp['inprog'] and inprog
            had_err = tmp['err'] and had_err

    except Exception as e:
        #Either failed to connect etc, or are already scraping
        inv.log_dest.warning('Error resistering scrape '+str(e))
        return {'err':True, 'inprog':False}

    return {'err':had_err, 'inprog':inprog}
Ejemplo n.º 2
0
def check_scrapes_on(spec=None):
    """If table given, check for scrapes in progress or
    queued on it. If only db, check all tables in it. If spec is None, check everything"""
    spec_ids = {}
    if spec is not None:
        if spec.get('db'):
            spec_ids['db'] = idc.get_db_id(spec['db'])
        if spec.get('table'):
            spec_ids['table'] = idc.get_table_id(spec['table'])
    return check_if_scraping(spec_ids) or check_if_scraping_queued(spec_ids)
Ejemplo n.º 3
0
def null_all_scrapes(table_name):
    """Update all scrapes on table to be 'complete' """

    try:
        table_id = idc.get_table_id(table_name)
        rec_find = {'table_id':table_id}
        rec_set = {'complete':True, 'running':False}
        db.inv_ops.update(rec_find, rec_set)
    except Exception as e:
        inv.log_dest.error("Error updating progress "+ str(e))
        return False
Ejemplo n.º 4
0
def is_valid_db_table(db_name, table_name):
    """Check if db and table_name name (if not None) exist"""
    try:
        db_id = idc.get_db_id(db_name)
        if db_id is None:
            return False
        if table_name:
            table_id = idc.get_table_id(table_name)
            if table_id is None:
                return False
    except Exception as e:
        inv.log_dest.error('Failed checking existence of '+db_name+' '+str(table_name)+' '+str(e))
        return False
    return True
Ejemplo n.º 5
0
def is_valid_db_table(db_name, table_name):
    """Check if db and table_name name (if not None) exist"""
    try:
        db_id = idc.get_db_id(db_name)
        if db_id is None:
            return False
        if table_name:
            table_id = idc.get_table_id(table_name)
            if table_id is None:
                return False
    except Exception as e:
        inv.log_dest.error('Failed checking existence of '+db_name+' '+str(table_name)+' '+str(e))
        return False
    return True
Ejemplo n.º 6
0
def update_scrape_progress(db_name, table_name, uid, complete=None, running=None):
    """Update progress of scrape from db/table names and uid """
    try:
        db_id = idc.get_db_id(db_name)
        table_id = idc.get_table_id(table_name)
        rec_find = {'db_id':db_id, 'table_id':table_id, 'uid':uid}
        rec_set = {}
        if complete is not None:
            rec_set['complete'] = complete
        if running is not None:
            rec_set['running'] = running
        if rec_set:
            db.inv_ops.update(rec_find, rec_set)
    except Exception as e:
        inv.log_dest.error("Error updating progress "+ str(e))
        return False
Ejemplo n.º 7
0
def check_locks(resp):
    """Check if request pertains to locked table
    or editing is locked globally
    """
    if get_lockout_state():
        raise EditLockError('Global Edit Lock')
    try:
        #db_name = resp['db']
        table_name = resp['table']
        #db_id = idc.get_db_id(db_name)
        table_id = idc.get_table_id(table_name)
        if check_locked(table_id):
            raise EditLockError('Table locked')
    except Exception as e:
        inv.log_dest.error("Error in locking "+str(e))
        raise e
Ejemplo n.º 8
0
def check_locks(resp):
    """Check if request pertains to locked table
    or editing is locked globally
    """
    if get_lockout_state():
        raise EditLockError('Global Edit Lock')
    try:
        #db_name = resp['db']
        table_name = resp['table']
        #db_id = idc.get_db_id(db_name)
        table_id = idc.get_table_id(table_name)
        if check_locked(table_id):
            raise EditLockError('Table locked')
    except Exception as e:
        inv.log_dest.error("Error in locking "+str(e))
        raise e
Ejemplo n.º 9
0
def delete_by_table(db_name, table_name):
    """Remove table entry and all its fields"""

    try:
        table_id = invc.get_table_id(table_name)
    except Exception as e:
        inv.log_dest.error("Error getting table " + str(e))
        return {'err':True, 'id':0, 'exist':False}

    #Remove fields entries matching table_id
    delete_table_data(table_id, tbl='auto')
    delete_table_data(table_id, tbl='human')
    delete_table_data(table_id, tbl='records')

    try:
        db[inv.ALL_STRUC.table_ids[inv.STR_NAME]].delete({'_id':table_id})
    except Exception as e:
        inv.log_dest.error("Error removing table " + str(e))
Ejemplo n.º 10
0
def update_gone_list():
    """Set status of all removed tables to gone"""

    dbs = iv.retrieve_db_listing()
    all_tables = get_db_lists()

    gone_code = ih.status_to_code('gone')
    for db_rec in dbs:
        db_name = db_rec[0]
        tables = iv.retrieve_db_listing(db_name)
        #db_id = idc.get_db_id(db_name)
        for table in tables:
            table_name = table[0]
            gone = not (table_name in all_tables[db_name])
            #Only mark if isn't already
            if gone and table[3] != 'gone':
                table_id = idc.get_table_id(table_name)
                idc.update_table(table_id, status=gone_code)
                inv.log_dest.info(str(table_name) + ' is now gone')
Ejemplo n.º 11
0
def update_gone_list():
    """Set status of all removed tables to gone"""

    dbs = iv.retrieve_db_listing()
    all_tables = get_db_lists()

    gone_code = ih.status_to_code('gone')
    for db_rec in dbs:
        db_name = db_rec[0]
        tables = iv.retrieve_db_listing(db_name)
        #db_id = idc.get_db_id(db_name)
        for table in tables:
            table_name = table[0]
            gone = not (table_name in all_tables[db_name])
            #Only mark if isn't already
            if gone and table[3] != 'gone':
                table_id = idc.get_table_id(table_name)
                idc.update_table(table_id, status=gone_code)
                inv.log_dest.info(str(table_name) +' is now gone')
Ejemplo n.º 12
0
def update_scrape_progress(db_name,
                           table_name,
                           uid,
                           complete=None,
                           running=None):
    """Update progress of scrape from db/table names and uid """
    try:
        db_id = idc.get_db_id(db_name)
        table_id = idc.get_table_id(table_name)
        rec_find = {'db_id': db_id, 'table_id': table_id, 'uid': uid}
        rec_set = {}
        if complete is not None:
            rec_set['complete'] = complete
        if running is not None:
            rec_set['running'] = running
        if rec_set:
            db.inv_ops.update(rec_find, rec_set)
    except Exception as e:
        inv.log_dest.error("Error updating progress " + str(e))
        return False
Ejemplo n.º 13
0
def upload_scraped_inventory(structure_dat, uid):
    """Upload a json structure document and store any oprhans

        structure_dat -- JSON document containing all db/tables to upload
        uid -- UID string for uploading process
    """

    inv.log_dest.info("_____________________________________________________________________________________________")
    n_dbs = len(structure_dat.keys())
    progress_tracker = 0

    for db_name in structure_dat:
        progress_tracker += 1
        inv.log_dest.info("Uploading "+db_name+" ("+str(progress_tracker)+" of "+str(n_dbs)+')')
        invc.set_db(db_name, db_name)

        for table_name in structure_dat[db_name]:
            inv.log_dest.info("    Uploading table "+table_name)
            orphaned_keys = upload_table_structure(db_name, table_name, structure_dat, fresh=False)
            if len(orphaned_keys) != 0:
                db_id = invc.get_db_id(db_name)
                table_id = invc.get_table_id(table_name)
                ild.store_orphans(db_id, table_id, uid, orphaned_keys)
Ejemplo n.º 14
0
def update_fields(diff, storeRollback=True):
    """Update a record from a diff object.

    diff -- should be a fully qualified difference, containing db, table names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example)
    e.g. {"db":"hgcwa","table":"hgcwa_passports","diffs":[{"item":"total_label","field":"type","content":"string"}]}
    If this is a record entry, then the 'item' field will be a record hash.
    storeRollback -- determine whether to store the undiff and diff to allow rollback of the change
    """

    try:
        if diff['table'] is not None:
            inv.log_dest.info("Updating descriptions for " + diff["table"])
        else:
            inv.log_dest.info("Updating descriptions for " + diff["db"])
        db_id = idc.get_db_id(diff["db"])
        rollback = None
        try:
            for change in diff["diffs"]:
                if ih.is_special_field(change["item"]):
                    if storeRollback:
                        rollback = capture_rollback(db_id, diff["db"], diff["table"], change)
                    change["item"] = change["item"][2:-2] #Trim special fields. TODO this should be done better somehow
                    updated = idc.update_table_data(db_id, diff["table"], change["item"], change["field"], change["content"])
                elif ih.is_toplevel_field(change["item"]):
                    #Here we have item == "toplevel", field the relevant field, and change the new value
                    if storeRollback:
                        rollback = capture_rollback(db_id, diff["db"], diff["table"], change)
                    #Only nice_name is currently an option
                    if(change["field"] not in ['nice_name', 'status']):
                        updated = {'err':True}
                    else:
                        if(diff["table"]):
                            if(change['field']) == 'nice_name':
                                new_nice = change['content']
                                new_stat = None
                            else:
                                new_nice = None
                                new_stat = ih.status_to_code(change['content'])
                            table_id = idc.get_table_id(diff['table'])
                            updated = idc.update_table(table_id, nice_name=new_nice, status=new_stat)
                        else:
                            #Is database nice_name
                            updated = idc.update_db(db_id, nice_name=change["content"])
                else:
                    table_id = idc.get_table_id(diff["table"])
                    if storeRollback:
                        rollback = capture_rollback(db_id, diff["db"], diff["table"], change, table_id = table_id)
                    updated = idc.update_field(table_id, change["item"], change["field"], change["content"], type="human")

                if updated['err']:
                    raise KeyError("Cannot update, item not present")
                else:
                    if storeRollback:
                        store_rollback(rollback)

        except Exception as e:
            inv.log_dest.error("Error applying diff "+ str(change)+' '+str(e))
            raise UpdateFailed(str(e))

    except Exception as e:
        inv.log_dest.error("Error updating fields "+ str(e))
Ejemplo n.º 15
0
def upload_table_structure(db_name, table_name, structure_dat, fresh=False):
    """Upload the structure description for a single table

    Any entered descriptions for keys which still exist are preserved.
    Removed or renamed keys will be returned for handling
    Table entry is created if it doesn't exist,
    in which case Notes and Info are filled with dummies
    db_name -- Name of database (must exist)
    table_name -- Name of table to upload
    structure_dat -- lmfdb db structure as json object
    """


    dummy_info = {} #Dummy per table info, containing basic fields we want included
    for field in inv.info_editable_fields:
        dummy_info[field] = None

    try:
        table_entry = structure_dat[table_name]
        db_entry = invc.get_db_id(db_name)
        if db_entry is None:
            #All dbs should have been added from the struc: if not is error
            inv.log_dest.error("ERROR: No inventory DB entry "+ db_name)
            inv.log_dest.error("Cannot add descriptions")
            return []

        table_id = invc.get_table_id(table_name)
        if table_id is None:
	    #Table doesn't exist, create it
            table_id = invc.set_table(db_entry, table_name, table_name, None, dummy_info, 0)
        else:
	    #Delete existing auto-table entries (no table => no entries)
           delete_table_data(table_id, tbl='auto')
        try:
            scrape_date = datetime.datetime.strptime(structure_dat[db_name][table_name]['scrape_date'], '%Y-%m-%d %H:%M:%S.%f')
        except Exception as e:
            inv.log_dest.info("Scrape date parsing failed "+str(e))
            scrape_date = datetime.datetime.min
        invc.set_table_scrape_date(table_id, scrape_date)

    except Exception as e:
        inv.log_dest.error("Failed to refresh table (db, table or scrape data) "+str(e))

    try:
        for field in table_entry['fields']:
            inv.log_dest.info("            Processing "+field)
            invc.set_field(table_id, field, table_entry['fields'][field])
        for record in table_entry['records']:
            inv.log_dest.info("            Processing record "+str(record))
            invc.set_record(table_id, table_entry['records'][record])
        #Cleanup any records which no longer exist
        invc.cleanup_records(db, table_id, table_entry['records'])

        inv.log_dest.info("            Processing indices")
        #FIXME
        upload_indices(db, table_id, table_entry['indices'])

    except Exception as e:
        inv.log_dest.error("Failed to refresh table entries "+str(e))

    orphaned_keys = []
    if not fresh:
        try:
	    #Trim any human table keys which are now redundant
            orphaned_keys = invc.trim_human_table(db_entry, table_id)
        except Exception as e:
            inv.log_dest.error("Failed trimming table "+str(e))

    #Ensure everything mandatory is present in human table
    try:
        invc.complete_human_table(db_entry, table_id)
    except Exception as e:
        inv.log_dest.error("Failed padding table "+str(e))

    return orphaned_keys
Ejemplo n.º 16
0
def update_fields(diff, storeRollback=True):
    """Update a record from a diff object.

    diff -- should be a fully qualified difference, containing db, table names and then a list of changes, each being a dict containing the item, the field and the new content. Item corresponds to an entry in an object, field to the piece of information this specifies (for example, type, description, example)
    e.g. {"db":"hgcwa","table":"hgcwa_passports","diffs":[{"item":"total_label","field":"type","content":"string"}]}
    If this is a record entry, then the 'item' field will be a record hash.
    storeRollback -- determine whether to store the undiff and diff to allow rollback of the change
    """

    try:
        if diff['table'] is not None:
            inv.log_dest.info("Updating descriptions for " + diff["table"])
        else:
            inv.log_dest.info("Updating descriptions for " + diff["db"])
        db_id = idc.get_db_id(diff["db"])
        rollback = None
        try:
            for change in diff["diffs"]:
                if ih.is_special_field(change["item"]):
                    if storeRollback:
                        rollback = capture_rollback(db_id, diff["db"],
                                                    diff["table"], change)
                    change["item"] = change["item"][
                        2:
                        -2]  #Trim special fields. TODO this should be done better somehow
                    updated = idc.update_table_data(db_id, diff["table"],
                                                    change["item"],
                                                    change["field"],
                                                    change["content"])
                elif ih.is_toplevel_field(change["item"]):
                    #Here we have item == "toplevel", field the relevant field, and change the new value
                    if storeRollback:
                        rollback = capture_rollback(db_id, diff["db"],
                                                    diff["table"], change)
                    #Only nice_name is currently an option
                    if (change["field"] not in ['nice_name', 'status']):
                        updated = {'err': True}
                    else:
                        if (diff["table"]):
                            if (change['field']) == 'nice_name':
                                new_nice = change['content']
                                new_stat = None
                            else:
                                new_nice = None
                                new_stat = ih.status_to_code(change['content'])
                            table_id = idc.get_table_id(diff['table'])
                            updated = idc.update_table(table_id,
                                                       nice_name=new_nice,
                                                       status=new_stat)
                        else:
                            #Is database nice_name
                            updated = idc.update_db(
                                db_id, nice_name=change["content"])
                else:
                    table_id = idc.get_table_id(diff["table"])
                    if storeRollback:
                        rollback = capture_rollback(db_id,
                                                    diff["db"],
                                                    diff["table"],
                                                    change,
                                                    table_id=table_id)
                    updated = idc.update_field(table_id,
                                               change["item"],
                                               change["field"],
                                               change["content"],
                                               type="human")

                if updated['err']:
                    raise KeyError("Cannot update, item not present")
                else:
                    if storeRollback:
                        store_rollback(rollback)

        except Exception as e:
            inv.log_dest.error("Error applying diff " + str(change) + ' ' +
                               str(e))
            raise UpdateFailed(str(e))

    except Exception as e:
        inv.log_dest.error("Error updating fields " + str(e))