def collate_orphans_by_uid(uid): """Fetch all orphans with given uid and return summary""" try: got_client = inv.setup_internal_client(editor=True) assert(got_client == True) inv_db = inv.int_client[inv.get_inv_db_name()] except Exception as e: inv.log_dest.error("Error getting Db connection "+ str(e)) return False #All orphans records for this uid record = {'uid':uuid.UUID(uid), 'orphans':{"$exists":True}} records = inv_db['ops'].find(record) orph_data = {} db_name = '' try: db_name = idc.get_db_name(inv_db, records[0]['db'])['name'] except: record = {'uid':uuid.UUID(uid)} tmp_record = inv_db['ops'].find_one(record) try: db_name = idc.get_db_name(inv_db, tmp_record['db'])['name'] except: pass orph_data[db_name] = {} for entry in records: coll = idc.get_coll_name(inv_db, entry['coll'])['name'] orph_data[db_name][coll] = split_orphans(entry) return orph_data
def store_orphans(db_id, coll_id, uid, orphan_document): """Store orphan info into ops table""" try: record = {'db':db_id, 'coll':coll_id, 'uid':uuid.UUID(uid), 'orphans':orphan_document} idc.add_to_ops_table(record) except: db_name = idc.get_db_name(db_id) coll_name = idc.get_coll_name(coll_id) filename = 'Orph_'+db_name['name']+'_'+coll_name['name']+'.json' with open(filename, 'w') as file: file.write(json.dumps(orphan_document)) inv.log_dest.error('Failed to store orphans, wrote to file '+filename)
def store_orphans(inv_db, db_id, coll_id, uid, orphan_document): """Store orphan info into ops table""" try: record = {'db':db_id, 'coll':coll_id, 'uid':uuid.UUID(uid), 'orphans':orphan_document} inv_db['ops'].insert_one(record) except Exception as e: inv.log_dest.error('Store failed with '+str(e)) db_name = idc.get_db_name(inv_db, db_id) coll_name = idc.get_coll_name(inv_db, coll_id) filename = 'Orph_'+db_name['name']+'_'+coll_name['name']+'.json' with open(filename, 'w') as file: file.write(json.dumps(orphan_document)) inv.log_dest.error('Failed to store orphans, wrote to file '+filename)
def collate_orphans(): """Fetch all orphans and return summary""" #All orphans records record = {'orphans': {"$exists": True}} orph_data = {} for entry in db.inv_ops.search(record): #print entry['uid'] db_name = idc.get_db_name(entry['db']) orph_data[db_name] = {} table = idc.get_table_name(entry['table']) orph_tmp = split_orphans(entry) orph_data[db_name][table] = orph_tmp return orph_data
def collate_orphans_by_uid(uid): """Fetch all orphans with given uid and return summary""" #All orphans records for this uid record = {'uid': uid, 'orphans': {"$exists": True}} records = db.inv_ops.search(record) orph_data = {} db_name = '' try: db_name = idc.get_db_name(records[0]['db']) except Exception: record = {'uid': uid} tmp_record = db.inv_ops.lucky(record) try: db_name = idc.get_db_name(tmp_record['db']) except Exception: pass orph_data[db_name] = {} for entry in records: table = idc.get_table_name(entry['table']) orph_data[db_name][table] = split_orphans(entry) return orph_data
def check_scrapes_running(scrape_list): """Given a list of scrapes, check for actual running state and return a new list containing only those which are NOT running""" new_list = [] for item in scrape_list: try: db_name = idc.get_db_name(item['db'])['name'] coll_name = idc.get_coll_name(item['table'])['name'] prog = get_scrape_progress(db_name, coll_name) if prog == (-1, -1): new_list.append(item) except: pass return new_list
def check_scrapes_running(scrape_list): """Given a list of scrapes, check for actual running state and return a new list containing only those which are NOT running""" new_list = [] for item in scrape_list: db_name = idc.get_db_name(item['db']) table_name = idc.get_table_name(item['table']) try: prog = get_scrape_progress(db_name, table_name) if prog == (-1, -1): new_list.append(item) except Exception as e: inv.log_dest.warning('Failed to get progress '+db_name+' '+table_name+' '+str(e)) return new_list
def collate_orphans_by_uid(uid): """Fetch all orphans with given uid and return summary""" #All orphans records for this uid record = {'uid':uuid.UUID(uid), 'orphans':{"$exists":True}} records = idc.search_ops_table(record) orph_data = {} db_name = '' try: db_name = idc.get_db_name(records[0]['db'])['name'] except: record = {'uid':uuid.UUID(uid)} tmp_record = idc.search_ops_table(record) try: db_name = idc.get_db_name(tmp_record['db'])['name'] except: pass orph_data[db_name] = {} for entry in records: coll = idc.get_coll_name(entry['coll'])['name'] orph_data[db_name][coll] = split_orphans(entry) return orph_data
def collate_orphans(): """Fetch all orphans and return summary""" #All orphans records record = {'orphans':{"$exists":True}} orph_data = {} for entry in db.inv_ops.search(record): #print entry['uid'] db_name = idc.get_db_name(entry['db']) orph_data[db_name] = {} table = idc.get_table_name(entry['table']) orph_tmp = split_orphans(entry) orph_data[db_name][table] = orph_tmp return orph_data
def collate_orphans_by_uid(uid): """Fetch all orphans with given uid and return summary""" #All orphans records for this uid record = {'uid':uid, 'orphans':{"$exists":True}} records = db.inv_ops.search(record) orph_data = {} db_name = '' try: db_name = idc.get_db_name(records[0]['db']) except Exception: record = {'uid':uid} tmp_record = db.inv_ops.lucky(record) try: db_name = idc.get_db_name(tmp_record['db']) except Exception: pass orph_data[db_name] = {} for entry in records: table = idc.get_table_name(entry['table']) orph_data[db_name][table] = split_orphans(entry) return orph_data
def check_scrapes_running(inv_db, scrape_list): """Given a list of scrapes, check for actual running state and return a new list containing only those which are NOT running""" new_list = [] for item in scrape_list: try: db_name = idc.get_db_name(inv_db, item['db'])['name'] coll_name = idc.get_coll_name(inv_db, item['coll'])['name'] prog = get_scrape_progress(db_name, coll_name, getDBConnection()) if prog == (-1, -1): new_list.append(item) except Exception as e: inv.log_dest.warning('Failed to get progress '+db_name+' '+coll_name+' '+str(e)) return new_list
def collate_orphans(): """Fetch all orphans and return summary""" #All orphans records record = {'orphans':{"$exists":True}} records = idc.search_ops_table(record) orph_data = {} for entry in records: #print entry['uid'] db_name = idc.get_db_name(entry['db'])['name'] orph_data[db_name] = {} coll = idc.get_coll_name(entry['coll'])['name'] orph_tmp = split_orphans(entry) orph_data[db_name][coll] = orph_tmp return orph_data
def get_progress_from_db(uid, db_id, coll_id): """Query db to see state of current scrape NOTE: this function assumed that when it is called there was a running process on db.coll, so if there no longer is, it must have finished """ db_name = idc.get_db_name(db_id)['name'] coll_name = idc.get_coll_name(coll_id)['name'] try: live_progress = sf.get_scrape_progress(db_name, coll_name) #Cheat here: we'll cap running to 99% and the last 1% is left for upload time #If no running record found, this assumes it completed before #we managed to check it, hence 99% percent = (live_progress[0] *99)/live_progress[1] except Exception as e: percent = 0 raise e return percent
def get_progress_from_db(inv_db, uid, db_id, coll_id): """Query db to see state of current scrape NOTE: this function assumed that when it is called there was a running process on db.coll, so if there no longer is, it must have finished """ db_name = idc.get_db_name(inv_db, db_id)['name'] coll_name = idc.get_coll_name(inv_db, coll_id)['name'] try: live_progress = sf.get_scrape_progress(db_name, coll_name, getDBConnection()) #Cheat here: we'll cap running to 99% and the last 1% is left for upload time #If no running record found, this assumes it completed before #we managed to check it, hence 99% percent = (live_progress[0] *99)/live_progress[1] except Exception as e: inv.log_dest.warning(e) percent = 0 raise e return percent
def collate_orphans(): """Fetch all orphans and return summary""" try: got_client = inv.setup_internal_client(editor=True) assert(got_client == True) inv_db = inv.int_client[inv.get_inv_db_name()] except Exception as e: inv.log_dest.error("Error getting Db connection "+ str(e)) return False #All orphans records record = {'orphans':{"$exists":True}} records = inv_db['ops'].find(record) orph_data = {} for entry in records: #print entry['uid'] db_name = idc.get_db_name(inv_db, entry['db'])['name'] orph_data[db_name] = {} coll = idc.get_coll_name(inv_db, entry['coll'])['name'] orph_tmp = split_orphans(entry) orph_data[db_name][coll] = orph_tmp return orph_data