def start_worker_mon(): """Starts the Monitoring worker""" worker_name = '{}-mon'.format(WORKER_PREFIX) worker = Worker(worker_name, 'monitor', fn=mon_worker.run, kwargs={}, log=log, kill_if_running=True, pid_folder=DBNET_FOLDER) worker.start() log('Monitor Loop PID is {}'.format(worker.pid)) workers['mon'] = worker workers['mon'].put_child_q(dict(name=worker_name, pid=worker.pid)) # add to monitor store.sqlx('workers').replace_rec( hostname=worker.hostname, worker_name=worker.name, worker_type=worker.type, worker_pid=worker.pid, status='RUNNING', task_id=-1, task_function=worker.fn.__name__, task_start_date=now(), task_args=jdumps(worker.args), task_kwargs=jdumps(worker.kwargs), progress=None, queue_length=0, last_updated=epoch(), ) return worker
def start_worker_webapp(): """Starts the WebApp worker""" worker_name = '{}-webapp'.format(WORKER_PREFIX) worker = Worker(worker_name, 'web-app', fn=webapp_worker.run, log=log, kill_if_running=True, args=(WEBAPP_HOST, WEBAPP_PORT), kwargs={'mon_worker': workers['mon']}, pid_folder=DBNET_FOLDER) worker.start() workers['mon'].put_child_q(dict(name=worker_name, pid=worker.pid)) # add to monitor workers['webapp'] = worker store.sqlx('workers').replace_rec( hostname=worker.hostname, worker_name=worker.name, worker_type=worker.type, worker_pid=worker.pid, status='RUNNING', task_id=-1, task_function=worker.fn.__name__, task_start_date=now(), task_args=jdumps(worker.args), task_kwargs=jdumps(worker.kwargs), progress=None, queue_length=0, last_updated=epoch(), ) return worker
def start_worker_db(db_name, start=False): """Create and start a dabatase worker Args: db_name: the name of the database start: Whether to automatically start the worker or not Returns: The worker object. """ db_prof = get_db_profile(db_name) db_workers_map[db_name] = db_workers_map.get(db_name, []) # multiple workers for same database index = 0 worker_name = '{}-{}-{}'.format(WORKER_PREFIX, db_name, index) while worker_name in workers: # in case worker name is already in index += 1 worker_name = '{}-{}-{}'.format(WORKER_PREFIX, db_name, index) worker = Worker(worker_name, 'database-client', fn=db_worker.run, log=log, kill_if_running=True, args=(db_prof, conf_queue), kwargs={}, pid_folder=DBNET_FOLDER) worker.status = 'IDLE' if start: worker.start() log('*Started worker {} with PID {}'.format(worker.name, worker.pid)) workers['mon'].put_child_q(dict(name=worker_name, pid=worker.pid)) # add to monitor store.sqlx('workers').replace_rec( hostname=worker.hostname, worker_name=worker.name, worker_type=worker.type, worker_pid=worker.pid, queue_length=0, status='IDLE', last_updated=epoch(), ) workers[worker_name] = worker db_workers_map[db_name].append(worker) return worker
def update_meta(worker: Worker, data_dict): """Update the worker's metadata and send results to frontend. Args: worker: the respective worker data_dict: the request payload dictionary """ database = data_dict['database'] try: conn = get_conn(database) make_rec = lambda name, rec: store.sqlx(name).ntRec(**dict( db_name=database, last_updated=int(time.time()), **rec)) # meta_tables table_data = [ make_rec('meta_tables', row._asdict()) for row in conn.get_all_tables() ] store.sqlx('meta_tables').replace(table_data) # meta_columns column_data = [ make_rec('meta_columns', row._asdict()) for row in conn.get_all_columns() ] store.sqlx('meta_columns').replace(column_data) data = dict( id=data_dict['id'], payload_type='meta-updated', completed=True, orig_req=data_dict, sid=data_dict['sid'], ) except Exception as E: worker.log(E) err_msg = get_error_str(E) data = dict( id=data_dict['id'], payload_type='meta-updated', completed=False, error=err_msg, orig_req=data_dict, sid=data_dict['sid'], ) finally: worker.put_parent_q(data)
def start_sql(sql, id, limit, options, sid): rows = fields = [] get_fields = lambda r: r.__fields__ if hasattr(r, '__fields__' ) else r._fields s_t = epoch() cache_used = False limit = int(options['limit']) if 'limit' in options else limit try: def exec_sql(sql, limit_def=5000): log('\n------------SQL-START------------\n{}\n------------SQL-END------------ \n' .format(sql), color='blue') log('LIMIT: ' + str(limit), color='blue') cache_used = False if sql in worker_sql_cache: for fields, rows in list(worker_sql_cache[sql]['results']): # if limit above limit_def, then refresh if limit > limit_def: break # if limit is same and not a csv call, then refresh if limit == worker_sql_cache[sql][ 'limit'] and 'csv' not in options: break # if ran more than 10 minutes ago, then refresh if now_minus(minutes=10 ) > worker_sql_cache[sql]['timestamp']: del worker_sql_cache[sql] break if len(fields) > 0: cache_used = True # must return data/fields worker_sql_cache[sql]['limit'] = limit log('+Cache Used') yield fields, rows, cache_used if not cache_used: worker_sql_cache[sql] = dict(timestamp=now(), results=[], limit=limit) rows = conn.query( sql.replace('%', '%%'), dtype='tuple', limit=limit if limit > limit_def else limit_def) fields = conn._fields worker_sql_cache[sql]['results'].append((fields, rows)) yield fields, rows, cache_used if 'meta' in options: # get_schemas or meta_func = options['meta'] rows = getattr(conn, meta_func)(**options['kwargs']) rows = [tuple(r) for r in rows] fields = conn._fields elif 'special' in options: pass else: for fields, rows, cache_used in exec_sql(sql): fields, rows = fields, rows rows = rows[:limit] if len(rows) > limit else rows if rows == None: rows = [] if 'email_address' in options or 'csv' in options: file_name = '{}-{}-{}.csv'.format(database, options['name'], data_dict['id']) file_path = '{}/{}'.format(CSV_FOLDER, file_name) write_csv(file_path, fields, rows) if os.path.getsize(file_path) > 20 * (1024**2): rc = os.system('gzip -f ' + file_path) file_name = file_name + '.gz' if rc == 0 else file_name file_path = '{}/{}'.format(CSV_FOLDER, file_name) url = 'http://{base_url}:{port}/csv/{name}'.format( base_url=socket.gethostname(), port=WEBAPP_PORT, name=file_name, ) options['url'] = url if 'email_address' in options: subj = 'DbNet -- Result for Query {}'.format(data_dict['id']) body_text = 'URL: {url}\n\nROWS: {rows}\n\nSQL:\n{sql}'.format( url=url, rows=len(rows), sql=sql) to_address = options['email_address'] email_template = os.getenv("SMTP_TEMPLATE") if 'exchange_server' == email_template: email_func = send_email_exchange elif 'outlook' == email_template: email_func = send_from_outlook elif 'gmail' == email_template: email_func = send_from_gmail else: raise Exception('Email method not implemented!') email_func(to_address, subj, body_text) if len(rows) > 100: rows = rows[:100] e_t = epoch() secs = e_t - s_t # Add query store.sqlx('queries').add( task_id=data_dict['id'], database=database, sql_text=sql, exec_date=s_t, duration_sec=secs, row_count=len(rows), limit_val=limit, cached=cache_used, sql_md5=hashlib.md5(sql.encode('utf-8')).hexdigest(), last_updated=epoch(), ) if sql.strip(): sql_fpath = '{}/{}.{}.sql'.format(SQL_FOLDER, database, data_dict['id']) sql_text = '-- Completed @ {} in {} seconds.\n\n{}'.format( now_str(), secs, sql) write_file(sql_fpath, sql_text) # time.sleep(0.5) data = dict( id=data_dict['id'], payload_type='query-data', database=database, rows=rows, headers=fields, start_ts=s_t, end_ts=e_t, execute_time=round(secs, 2), completed=True, cache_used=cache_used, options=options, pid=worker_pid, orig_req=data_dict, sid=sid, ) except Exception as E: secs = epoch() - s_t err_msg_long = get_exception_message() err_msg = get_error_str(E) worker.log(E) data = dict(id=id, payload_type='query-data', database=database, rows=[], headers=[], execute_time=round(secs, 2), completed=False, error='ERROR:\n' + err_msg, options=options, pid=worker_pid, orig_req=data_dict, sid=sid) finally: # worker.pipe.send_to_parent(data) worker.put_parent_q(data)
def run(db_prof, conf_queue: Queue, worker: Worker): """Launch the database worker and await requests. Args: db_prof: the db profile conf_queue: a multiprocessing Queue worker: the respective worker. """ global worker_name, worker_status log = worker.log worker_name = worker.name worker_status = 'IDLE' set_worker_idle() worker_db_prof = db_prof while True: try: time.sleep(0.05) # brings down CPU loop usage except (KeyboardInterrupt, SystemExit): return # data_dict = worker.pipe.recv_from_parent(timeout=0) data_dict = worker.get_child_q() if data_dict: conf_data = {'payload_type': 'confirmation'} if data_dict['req_type'] in func_map: worker_queue.append(data_dict) sync_queue() conf_data['queued'] = True # Add task store.sqlx('tasks').add( task_id=data_dict['id'], function=func_map[data_dict['req_type']].__name__, queue_date=now(), start_date=None, end_date=None, args=jdumps([]), kwargs=jdumps(data_dict), error=None, worker_name=worker_name, worker_pid=worker_pid, last_updated=epoch(), ) log('+({}) Queued task: {}'.format(len(worker_queue), data_dict)) # Send receipt confirmation? # with worker.lock: # worker.pipe.send_to_parent(conf_data) if len(worker_queue) and worker_status == 'IDLE': data_dict = worker_queue.popleft() sync_queue() worker_status = 'BUSY' func = func_map[data_dict['req_type']] # Sync worker store.sqlx('workers').update_rec( hostname=worker.hostname, worker_name=worker.name, status=worker_status, task_id=data_dict['id'], task_function=func.__name__, task_start_date=now(), task_args=jdumps([]), task_kwargs=jdumps(data_dict), last_updated=epoch(), ) # Sync task store.sqlx('tasks').update_rec( task_id=data_dict['id'], start_date=now(), last_updated=epoch(), ) try: error_data = None func(worker, data_dict) except Exception as E: log(E) error_data = dict( id=data_dict['id'], sid=data_dict['sid'], payload_type='task-error', error=get_error_str(E), ) # worker.pipe.send_to_parent(error_data) worker.put_parent_q(error_data) finally: # Sync worker worker_status = 'IDLE' set_worker_idle() # Sync task store.sqlx('tasks').update_rec( task_id=data_dict['id'], end_date=now(), error=jdumps(error_data) if error_data else None, last_updated=epoch(), )
WEBAPP_PORT = int(os.getenv('DBNET_WEBAPP_PORT', default=5566)) DBNET_FOLDER = os.getenv('DBNET_FOLDER', default=get_home_path() + '/dbnet') SQL_FOLDER = DBNET_FOLDER + '/sql' os.makedirs(SQL_FOLDER, exist_ok=True) CSV_FOLDER = DBNET_FOLDER + '/csv' os.makedirs(CSV_FOLDER, exist_ok=True) sync_queue = lambda: store.worker_set(hostname=worker_hostname, worker_name=worker_name, queue_length=len(worker_queue)) set_worker_idle = lambda: store.sqlx('workers').update_rec( hostname=worker_hostname, worker_name=worker_name, status='IDLE', task_id=None, task_function=None, task_start_date=None, task_args=None, task_kwargs=None, last_updated=epoch()) def execute_sql(worker: Worker, data_dict): "Execute SQL operation" log = worker.log database = data_dict['database'] sid = data_dict['sid'] pid = worker_pid conn = get_conn(database)
def handle_web_worker_req(web_worker: Worker, data_dict): """Handler for a web worker request Args: worker: the respective worker data_dict: the request payload dictionary Returns: True if successful. False if no worked is available. """ # print('data_dict: {}'.format(data_dict)) # return data = struct(data_dict) response_data = {} response_data_for_missing = { 'completed': False, 'payload_type': 'client-response', 'sid': data.sid, 'error': Exception('Request "{}" not handled!'.format(data.req_type)) } if data.req_type in ('submit-sql'): db_worker = get_or_create_worker(data.database) if db_worker is None: return False # send to worker queue db_worker.put_child_q(data_dict) response_data['worker_name'] = db_worker.name response_data['queued'] = True elif data.req_type == 'stop-worker': completed = stop_worker(data.worker_name) response_data = dict(completed=completed) elif data.req_type == 'add-worker': start_worker_db(data.database, start=True) response_data = dict(completed=True) elif data.req_type == 'set-state': store.state_set(data.key, data.value) response_data = dict(completed=True) elif data.req_type == 'set-database': store.sqlx('databases').replace_rec(**data.db_states) response_data = dict(completed=True) elif data.req_type == 'get-database': rec = store.sqlx('databases').select_one(fwa(db_name=data.db_name)) response_data = dict(completed=True, data=rec._asdict()) elif data.req_type == 'get-databases': databases = get_databases() get_rec = lambda d: dict(type=d['type']) response_data = dict(completed=True, data={ k: get_rec(databases[k]) for k in sorted(databases) if k.lower() not in ('tests', 'drivers') }) elif data.req_type == 'get-analysis-sql': db_worker = get_or_create_worker(data.database) if db_worker is None: return False db_worker.put_child_q(data_dict) response_data['queued'] = True elif data.req_type == 'get-meta-tables': where = "lower(db_name)=lower('{}')".format(data.database) if data.filter_schema: where = where + ''' and lower(schema_name) like lower('%{}%')'''.format( data.filter_schema) if data.filter_table: where = where + ''' and lower(table_name) like lower('%{}%')'''.format( data.filter_table) rows = store.sqlx('meta_tables').query(where, limit=data.limit) if rows: headers = store.sqlx('meta_tables').ntRec._fields rows = [list(r) for r in rows] response_data = dict(completed=True, headers=headers, rows=rows) else: db_worker = get_or_create_worker(data.database) if db_worker is None: return False db_worker.put_child_q(data_dict) response_data['queued'] = True elif data.req_type == 'get-meta-columns': log(str(data)) where = "lower(db_name)=lower('{}')".format(data.database) if data.filter_schema: where = where + ''' and lower(schema_name) like lower('%{}%')'''.format( data.filter_schema) if data.filter_table: where = where + ''' and lower(table_name) like lower('%{}%')'''.format( data.filter_table) if data.filter_column: where = where + ''' and lower(column_name) like lower('%{}%')'''.format( data.filter_column) rows = store.sqlx('meta_columns').query(where, limit=data.limit) if rows: headers = store.sqlx('meta_columns').ntRec._fields rows = [list(r) for r in rows] response_data = dict(completed=True, headers=headers, rows=rows) else: db_worker = get_or_create_worker(data.database) if db_worker is None: return False db_worker.put_child_q(data_dict) response_data['queued'] = True elif data.req_type == 'set-tab': store.sqlx('tabs').replace_rec(**data.tab_state) response_data = dict(completed=True) elif data.req_type == 'get-tab': rec = store.sqlx('tabs').select_one( fwa(db_name=data.db_name, tab_name=data.tab_name)) response_data = dict(completed=True, data=rec._asdict()) elif data.req_type == 'get-tasks': rows = store.sqlx('tasks').query( where= '1=1 order by end_date desc, start_date desc, queue_date desc', limit=100) recs = [row._asdict() for row in rows] response_data = dict(data=recs, completed=True) elif data.req_type == 'get-queries': rows = store.sqlx('queries').query(where=""" lower(sql_text) like '%{}%' and database = '{}' and sql_text <> '' order by exec_date desc """.format(data.filter.lower(), data.database), limit=int(data.limit)) recs = [row._asdict() for row in rows] response_data = dict(data=recs, completed=True) elif data.req_type == 'search-queries': where = "sql_text like '%{}%' order by exec_date desc".format( data.query_filter) rows = store.sqlx('queries').query(where=where, limit=100) recs = [row._asdict() for row in rows] response_data = dict(data=recs, completed=True) elif data.req_type == 'get-workers': make_rec = lambda wkr: dict( name=wkr.name, status=wkr.status, start_time=wkr.started, pid=wkr.pid, ) workers_data = [make_rec(wkr) for wkr in workers.values()] response_data = dict(data=workers_data, completed=True) elif data.req_type == 'reset-db': for wkr_nm in list(workers): if wkr_nm in ('webapp', 'mon'): continue stop_worker(wkr_nm) store.create_tables(drop_first=True, ask=False) response_data = dict(completed=True) # In case handle is missing. Also checked for completed if response_data: response_data['orig_req'] = data_dict response_data['payload_type'] = 'client-response' response_data['sid'] = data.sid response_data['completed'] = response_data.get('completed', False) res = '+Completed' if response_data[ 'completed'] else '+Queued' if 'queued' in response_data and response_data[ 'queued'] else '~Did not Complete' log('{} "{}" request "{}".'.format(res, data.req_type, data.id)) else: response_data = response_data_for_missing # Respond to WebApp Worker send_to_webapp(response_data) return True