Example #1
0
def start_worker_mon():
    """Starts the Monitoring worker"""
    worker_name = '{}-mon'.format(WORKER_PREFIX)
    worker = Worker(worker_name,
                    'monitor',
                    fn=mon_worker.run,
                    kwargs={},
                    log=log,
                    kill_if_running=True,
                    pid_folder=DBNET_FOLDER)

    worker.start()
    log('Monitor Loop PID is {}'.format(worker.pid))

    workers['mon'] = worker
    workers['mon'].put_child_q(dict(name=worker_name,
                                    pid=worker.pid))  # add to monitor
    store.sqlx('workers').replace_rec(
        hostname=worker.hostname,
        worker_name=worker.name,
        worker_type=worker.type,
        worker_pid=worker.pid,
        status='RUNNING',
        task_id=-1,
        task_function=worker.fn.__name__,
        task_start_date=now(),
        task_args=jdumps(worker.args),
        task_kwargs=jdumps(worker.kwargs),
        progress=None,
        queue_length=0,
        last_updated=epoch(),
    )
    return worker
Example #2
0
def start_worker_webapp():
    """Starts the WebApp worker"""
    worker_name = '{}-webapp'.format(WORKER_PREFIX)

    worker = Worker(worker_name,
                    'web-app',
                    fn=webapp_worker.run,
                    log=log,
                    kill_if_running=True,
                    args=(WEBAPP_HOST, WEBAPP_PORT),
                    kwargs={'mon_worker': workers['mon']},
                    pid_folder=DBNET_FOLDER)
    worker.start()
    workers['mon'].put_child_q(dict(name=worker_name,
                                    pid=worker.pid))  # add to monitor
    workers['webapp'] = worker
    store.sqlx('workers').replace_rec(
        hostname=worker.hostname,
        worker_name=worker.name,
        worker_type=worker.type,
        worker_pid=worker.pid,
        status='RUNNING',
        task_id=-1,
        task_function=worker.fn.__name__,
        task_start_date=now(),
        task_args=jdumps(worker.args),
        task_kwargs=jdumps(worker.kwargs),
        progress=None,
        queue_length=0,
        last_updated=epoch(),
    )
    return worker
Example #3
0
def start_worker_db(db_name, start=False):
    """Create and start a dabatase worker
  
  Args:
    db_name: the name of the database
    start: Whether to automatically start the worker or not
  
  Returns:
    The worker object.
  """
    db_prof = get_db_profile(db_name)
    db_workers_map[db_name] = db_workers_map.get(db_name, [])

    # multiple workers for same database
    index = 0
    worker_name = '{}-{}-{}'.format(WORKER_PREFIX, db_name, index)

    while worker_name in workers:
        # in case worker name is already in
        index += 1
        worker_name = '{}-{}-{}'.format(WORKER_PREFIX, db_name, index)

    worker = Worker(worker_name,
                    'database-client',
                    fn=db_worker.run,
                    log=log,
                    kill_if_running=True,
                    args=(db_prof, conf_queue),
                    kwargs={},
                    pid_folder=DBNET_FOLDER)
    worker.status = 'IDLE'

    if start:
        worker.start()
        log('*Started worker {} with PID {}'.format(worker.name, worker.pid))

    workers['mon'].put_child_q(dict(name=worker_name,
                                    pid=worker.pid))  # add to monitor
    store.sqlx('workers').replace_rec(
        hostname=worker.hostname,
        worker_name=worker.name,
        worker_type=worker.type,
        worker_pid=worker.pid,
        queue_length=0,
        status='IDLE',
        last_updated=epoch(),
    )

    workers[worker_name] = worker
    db_workers_map[db_name].append(worker)

    return worker
Example #4
0
def update_meta(worker: Worker, data_dict):
    """Update the worker's metadata and send results to frontend.

  Args:
    worker: the respective worker
    data_dict: the request payload dictionary
  """
    database = data_dict['database']

    try:
        conn = get_conn(database)
        make_rec = lambda name, rec: store.sqlx(name).ntRec(**dict(
            db_name=database, last_updated=int(time.time()), **rec))

        # meta_tables
        table_data = [
            make_rec('meta_tables', row._asdict())
            for row in conn.get_all_tables()
        ]
        store.sqlx('meta_tables').replace(table_data)

        # meta_columns
        column_data = [
            make_rec('meta_columns', row._asdict())
            for row in conn.get_all_columns()
        ]
        store.sqlx('meta_columns').replace(column_data)

        data = dict(
            id=data_dict['id'],
            payload_type='meta-updated',
            completed=True,
            orig_req=data_dict,
            sid=data_dict['sid'],
        )

    except Exception as E:
        worker.log(E)
        err_msg = get_error_str(E)

        data = dict(
            id=data_dict['id'],
            payload_type='meta-updated',
            completed=False,
            error=err_msg,
            orig_req=data_dict,
            sid=data_dict['sid'],
        )
    finally:
        worker.put_parent_q(data)
Example #5
0
    def start_sql(sql, id, limit, options, sid):
        rows = fields = []
        get_fields = lambda r: r.__fields__ if hasattr(r, '__fields__'
                                                       ) else r._fields
        s_t = epoch()
        cache_used = False
        limit = int(options['limit']) if 'limit' in options else limit

        try:

            def exec_sql(sql, limit_def=5000):
                log('\n------------SQL-START------------\n{}\n------------SQL-END------------ \n'
                    .format(sql),
                    color='blue')
                log('LIMIT: ' + str(limit), color='blue')
                cache_used = False
                if sql in worker_sql_cache:
                    for fields, rows in list(worker_sql_cache[sql]['results']):
                        # if limit above limit_def, then refresh
                        if limit > limit_def: break

                        # if limit is same and not a csv call, then refresh
                        if limit == worker_sql_cache[sql][
                                'limit'] and 'csv' not in options:
                            break

                        # if ran more than 10 minutes ago, then refresh
                        if now_minus(minutes=10
                                     ) > worker_sql_cache[sql]['timestamp']:
                            del worker_sql_cache[sql]
                            break

                        if len(fields) > 0:
                            cache_used = True  # must return data/fields
                            worker_sql_cache[sql]['limit'] = limit
                            log('+Cache Used')

                        yield fields, rows, cache_used

                if not cache_used:
                    worker_sql_cache[sql] = dict(timestamp=now(),
                                                 results=[],
                                                 limit=limit)
                    rows = conn.query(
                        sql.replace('%', '%%'),
                        dtype='tuple',
                        limit=limit if limit > limit_def else limit_def)
                    fields = conn._fields
                    worker_sql_cache[sql]['results'].append((fields, rows))
                    yield fields, rows, cache_used

            if 'meta' in options:
                # get_schemas or
                meta_func = options['meta']
                rows = getattr(conn, meta_func)(**options['kwargs'])
                rows = [tuple(r) for r in rows]
                fields = conn._fields

            elif 'special' in options:
                pass

            else:
                for fields, rows, cache_used in exec_sql(sql):
                    fields, rows = fields, rows
                    rows = rows[:limit] if len(rows) > limit else rows

            if rows == None: rows = []

            if 'email_address' in options or 'csv' in options:
                file_name = '{}-{}-{}.csv'.format(database, options['name'],
                                                  data_dict['id'])
                file_path = '{}/{}'.format(CSV_FOLDER, file_name)
                write_csv(file_path, fields, rows)
                if os.path.getsize(file_path) > 20 * (1024**2):
                    rc = os.system('gzip -f ' + file_path)
                    file_name = file_name + '.gz' if rc == 0 else file_name
                    file_path = '{}/{}'.format(CSV_FOLDER, file_name)

                url = 'http://{base_url}:{port}/csv/{name}'.format(
                    base_url=socket.gethostname(),
                    port=WEBAPP_PORT,
                    name=file_name,
                )
                options['url'] = url

            if 'email_address' in options:
                subj = 'DbNet -- Result for Query {}'.format(data_dict['id'])
                body_text = 'URL: {url}\n\nROWS: {rows}\n\nSQL:\n{sql}'.format(
                    url=url, rows=len(rows), sql=sql)
                to_address = options['email_address']
                email_template = os.getenv("SMTP_TEMPLATE")
                if 'exchange_server' == email_template:
                    email_func = send_email_exchange
                elif 'outlook' == email_template:
                    email_func = send_from_outlook
                elif 'gmail' == email_template:
                    email_func = send_from_gmail
                else:
                    raise Exception('Email method not implemented!')

                email_func(to_address, subj, body_text)

                if len(rows) > 100:
                    rows = rows[:100]

            e_t = epoch()
            secs = e_t - s_t

            # Add query
            store.sqlx('queries').add(
                task_id=data_dict['id'],
                database=database,
                sql_text=sql,
                exec_date=s_t,
                duration_sec=secs,
                row_count=len(rows),
                limit_val=limit,
                cached=cache_used,
                sql_md5=hashlib.md5(sql.encode('utf-8')).hexdigest(),
                last_updated=epoch(),
            )

            if sql.strip():
                sql_fpath = '{}/{}.{}.sql'.format(SQL_FOLDER, database,
                                                  data_dict['id'])
                sql_text = '-- Completed @ {} in {} seconds.\n\n{}'.format(
                    now_str(), secs, sql)
                write_file(sql_fpath, sql_text)

            # time.sleep(0.5)
            data = dict(
                id=data_dict['id'],
                payload_type='query-data',
                database=database,
                rows=rows,
                headers=fields,
                start_ts=s_t,
                end_ts=e_t,
                execute_time=round(secs, 2),
                completed=True,
                cache_used=cache_used,
                options=options,
                pid=worker_pid,
                orig_req=data_dict,
                sid=sid,
            )

        except Exception as E:
            secs = epoch() - s_t
            err_msg_long = get_exception_message()
            err_msg = get_error_str(E)

            worker.log(E)
            data = dict(id=id,
                        payload_type='query-data',
                        database=database,
                        rows=[],
                        headers=[],
                        execute_time=round(secs, 2),
                        completed=False,
                        error='ERROR:\n' + err_msg,
                        options=options,
                        pid=worker_pid,
                        orig_req=data_dict,
                        sid=sid)

        finally:
            # worker.pipe.send_to_parent(data)
            worker.put_parent_q(data)
Example #6
0
def run(db_prof, conf_queue: Queue, worker: Worker):
    """Launch the database worker and await requests.
  
  Args:
    db_prof: the db profile
    conf_queue: a multiprocessing Queue
    worker: the respective worker.
  """

    global worker_name, worker_status
    log = worker.log
    worker_name = worker.name
    worker_status = 'IDLE'
    set_worker_idle()
    worker_db_prof = db_prof

    while True:
        try:
            time.sleep(0.05)  # brings down CPU loop usage
        except (KeyboardInterrupt, SystemExit):
            return
        # data_dict = worker.pipe.recv_from_parent(timeout=0)
        data_dict = worker.get_child_q()
        if data_dict:
            conf_data = {'payload_type': 'confirmation'}
            if data_dict['req_type'] in func_map:
                worker_queue.append(data_dict)
                sync_queue()
                conf_data['queued'] = True

                # Add task
                store.sqlx('tasks').add(
                    task_id=data_dict['id'],
                    function=func_map[data_dict['req_type']].__name__,
                    queue_date=now(),
                    start_date=None,
                    end_date=None,
                    args=jdumps([]),
                    kwargs=jdumps(data_dict),
                    error=None,
                    worker_name=worker_name,
                    worker_pid=worker_pid,
                    last_updated=epoch(),
                )

                log('+({}) Queued task: {}'.format(len(worker_queue),
                                                   data_dict))

            # Send receipt confirmation?
            # with worker.lock:
            #   worker.pipe.send_to_parent(conf_data)

        if len(worker_queue) and worker_status == 'IDLE':
            data_dict = worker_queue.popleft()
            sync_queue()
            worker_status = 'BUSY'
            func = func_map[data_dict['req_type']]

            # Sync worker
            store.sqlx('workers').update_rec(
                hostname=worker.hostname,
                worker_name=worker.name,
                status=worker_status,
                task_id=data_dict['id'],
                task_function=func.__name__,
                task_start_date=now(),
                task_args=jdumps([]),
                task_kwargs=jdumps(data_dict),
                last_updated=epoch(),
            )

            # Sync task
            store.sqlx('tasks').update_rec(
                task_id=data_dict['id'],
                start_date=now(),
                last_updated=epoch(),
            )

            try:
                error_data = None
                func(worker, data_dict)
            except Exception as E:
                log(E)
                error_data = dict(
                    id=data_dict['id'],
                    sid=data_dict['sid'],
                    payload_type='task-error',
                    error=get_error_str(E),
                )
                # worker.pipe.send_to_parent(error_data)
                worker.put_parent_q(error_data)
            finally:

                # Sync worker
                worker_status = 'IDLE'
                set_worker_idle()

                # Sync task
                store.sqlx('tasks').update_rec(
                    task_id=data_dict['id'],
                    end_date=now(),
                    error=jdumps(error_data) if error_data else None,
                    last_updated=epoch(),
                )
Example #7
0
WEBAPP_PORT = int(os.getenv('DBNET_WEBAPP_PORT', default=5566))
DBNET_FOLDER = os.getenv('DBNET_FOLDER', default=get_home_path() + '/dbnet')
SQL_FOLDER = DBNET_FOLDER + '/sql'
os.makedirs(SQL_FOLDER, exist_ok=True)
CSV_FOLDER = DBNET_FOLDER + '/csv'
os.makedirs(CSV_FOLDER, exist_ok=True)

sync_queue = lambda: store.worker_set(hostname=worker_hostname,
                                      worker_name=worker_name,
                                      queue_length=len(worker_queue))

set_worker_idle = lambda: store.sqlx('workers').update_rec(
    hostname=worker_hostname,
    worker_name=worker_name,
    status='IDLE',
    task_id=None,
    task_function=None,
    task_start_date=None,
    task_args=None,
    task_kwargs=None,
    last_updated=epoch())


def execute_sql(worker: Worker, data_dict):
    "Execute SQL operation"
    log = worker.log

    database = data_dict['database']
    sid = data_dict['sid']
    pid = worker_pid

    conn = get_conn(database)
Example #8
0
def handle_web_worker_req(web_worker: Worker, data_dict):
    """Handler for a web worker request
  
  Args:
    worker: the respective worker
    data_dict: the request payload dictionary
  
  Returns:
    True if successful. False if no worked is available.
  """
    # print('data_dict: {}'.format(data_dict))
    # return
    data = struct(data_dict)
    response_data = {}
    response_data_for_missing = {
        'completed': False,
        'payload_type': 'client-response',
        'sid': data.sid,
        'error': Exception('Request "{}" not handled!'.format(data.req_type))
    }

    if data.req_type in ('submit-sql'):
        db_worker = get_or_create_worker(data.database)
        if db_worker is None: return False

        # send to worker queue
        db_worker.put_child_q(data_dict)
        response_data['worker_name'] = db_worker.name
        response_data['queued'] = True

    elif data.req_type == 'stop-worker':
        completed = stop_worker(data.worker_name)
        response_data = dict(completed=completed)

    elif data.req_type == 'add-worker':
        start_worker_db(data.database, start=True)
        response_data = dict(completed=True)

    elif data.req_type == 'set-state':
        store.state_set(data.key, data.value)
        response_data = dict(completed=True)

    elif data.req_type == 'set-database':
        store.sqlx('databases').replace_rec(**data.db_states)
        response_data = dict(completed=True)

    elif data.req_type == 'get-database':
        rec = store.sqlx('databases').select_one(fwa(db_name=data.db_name))
        response_data = dict(completed=True, data=rec._asdict())

    elif data.req_type == 'get-databases':
        databases = get_databases()
        get_rec = lambda d: dict(type=d['type'])
        response_data = dict(completed=True,
                             data={
                                 k: get_rec(databases[k])
                                 for k in sorted(databases)
                                 if k.lower() not in ('tests', 'drivers')
                             })

    elif data.req_type == 'get-analysis-sql':
        db_worker = get_or_create_worker(data.database)
        if db_worker is None: return False
        db_worker.put_child_q(data_dict)
        response_data['queued'] = True

    elif data.req_type == 'get-meta-tables':
        where = "lower(db_name)=lower('{}')".format(data.database)
        if data.filter_schema:
            where = where + ''' and lower(schema_name) like lower('%{}%')'''.format(
                data.filter_schema)
        if data.filter_table:
            where = where + ''' and lower(table_name) like lower('%{}%')'''.format(
                data.filter_table)
        rows = store.sqlx('meta_tables').query(where, limit=data.limit)
        if rows:
            headers = store.sqlx('meta_tables').ntRec._fields
            rows = [list(r) for r in rows]
            response_data = dict(completed=True, headers=headers, rows=rows)
        else:
            db_worker = get_or_create_worker(data.database)
            if db_worker is None: return False
            db_worker.put_child_q(data_dict)
            response_data['queued'] = True

    elif data.req_type == 'get-meta-columns':
        log(str(data))
        where = "lower(db_name)=lower('{}')".format(data.database)
        if data.filter_schema:
            where = where + ''' and lower(schema_name) like lower('%{}%')'''.format(
                data.filter_schema)
        if data.filter_table:
            where = where + ''' and lower(table_name) like lower('%{}%')'''.format(
                data.filter_table)
        if data.filter_column:
            where = where + ''' and lower(column_name) like lower('%{}%')'''.format(
                data.filter_column)
        rows = store.sqlx('meta_columns').query(where, limit=data.limit)
        if rows:
            headers = store.sqlx('meta_columns').ntRec._fields
            rows = [list(r) for r in rows]
            response_data = dict(completed=True, headers=headers, rows=rows)
        else:
            db_worker = get_or_create_worker(data.database)
            if db_worker is None: return False
            db_worker.put_child_q(data_dict)
            response_data['queued'] = True

    elif data.req_type == 'set-tab':
        store.sqlx('tabs').replace_rec(**data.tab_state)
        response_data = dict(completed=True)

    elif data.req_type == 'get-tab':
        rec = store.sqlx('tabs').select_one(
            fwa(db_name=data.db_name, tab_name=data.tab_name))
        response_data = dict(completed=True, data=rec._asdict())

    elif data.req_type == 'get-tasks':
        rows = store.sqlx('tasks').query(
            where=
            '1=1 order by end_date desc, start_date desc, queue_date desc',
            limit=100)
        recs = [row._asdict() for row in rows]
        response_data = dict(data=recs, completed=True)

    elif data.req_type == 'get-queries':
        rows = store.sqlx('queries').query(where="""
        lower(sql_text) like '%{}%'
        and database = '{}'
        and sql_text <> ''
        order by exec_date desc
      """.format(data.filter.lower(), data.database),
                                           limit=int(data.limit))
        recs = [row._asdict() for row in rows]
        response_data = dict(data=recs, completed=True)

    elif data.req_type == 'search-queries':
        where = "sql_text like '%{}%' order by exec_date desc".format(
            data.query_filter)
        rows = store.sqlx('queries').query(where=where, limit=100)
        recs = [row._asdict() for row in rows]
        response_data = dict(data=recs, completed=True)

    elif data.req_type == 'get-workers':
        make_rec = lambda wkr: dict(
            name=wkr.name,
            status=wkr.status,
            start_time=wkr.started,
            pid=wkr.pid,
        )
        workers_data = [make_rec(wkr) for wkr in workers.values()]
        response_data = dict(data=workers_data, completed=True)
    elif data.req_type == 'reset-db':
        for wkr_nm in list(workers):
            if wkr_nm in ('webapp', 'mon'): continue
            stop_worker(wkr_nm)
        store.create_tables(drop_first=True, ask=False)
        response_data = dict(completed=True)

    # In case handle is missing. Also checked for completed
    if response_data:
        response_data['orig_req'] = data_dict
        response_data['payload_type'] = 'client-response'
        response_data['sid'] = data.sid
        response_data['completed'] = response_data.get('completed', False)
        res = '+Completed' if response_data[
            'completed'] else '+Queued' if 'queued' in response_data and response_data[
                'queued'] else '~Did not Complete'
        log('{} "{}" request "{}".'.format(res, data.req_type, data.id))
    else:
        response_data = response_data_for_missing

    # Respond to WebApp Worker
    send_to_webapp(response_data)

    return True