def get_statement_execution_log(statement_execution_id): with DBSession() as session: statement_execution = logic.get_statement_execution_by_id( statement_execution_id, session=session) api_assert(statement_execution is not None, message="Invalid statement execution") verify_query_execution_permission( statement_execution.query_execution_id, session=session) log_path = statement_execution.log_path try: if log_path.startswith("stream"): logs = logic.get_statement_execution_stream_logs( statement_execution_id) return list(map(lambda log: log.log, logs)) else: with DBSession() as session: MAX_LOG_RETURN_LINES = 2000 result = "" statement_execution = logic.get_statement_execution_by_id( statement_execution_id, session=session) if statement_execution is not None and statement_execution.has_log: with GenericReader( statement_execution.log_path) as reader: result = reader.read_lines( number_of_lines=MAX_LOG_RETURN_LINES) if len(result) == MAX_LOG_RETURN_LINES: result += [ "---------------------------------------------------------------------------", f"We are truncating results since it reached limit of {MAX_LOG_RETURN_LINES} lines.", ] return result except FileDoesNotExist as e: abort(RESOURCE_NOT_FOUND_STATUS_CODE, str(e))
def write_csv_to_sheet( self, sheet, statement_execution_id: int, worksheet_title: str, start_cell: str, ): with DBSession() as session: max_rows = self._get_max_rows( statement_execution_id, start_cell=start_cell, session=session ) num_rows = self._get_statement_execution_num_rows( statement_execution_id, session=session ) num_cols = self._get_statement_execution_num_cols( statement_execution_id, session=session ) start_cell_coord = worksheet_coord_to_coord(start_cell) end_cell_coord = ( start_cell_coord[0] + num_cols - 1, start_cell_coord[1] + num_rows - 1, ) end_cell = coord_to_worksheet_coord(end_cell_coord[0], end_cell_coord[1]) with gspread_worksheet( sheet, worksheet_title, end_cell_coord[0], end_cell_coord[1] ) as worksheet: csv = self._get_statement_execution_result_iter( statement_execution_id, number_of_lines=max_rows, session=session ) self._update_worksheet(worksheet, start_cell, end_cell, csv)
def export_statement_execution_result(statement_execution_id, exporter_name, exporter_params=None): with DBSession() as session: statement_execution = logic.get_statement_execution_by_id( statement_execution_id, session=session) api_assert(statement_execution is not None, message="Invalid statement execution") verify_query_execution_permission( statement_execution.query_execution_id, session=session) exporter = get_exporter(exporter_name) api_assert(exporter is not None, f"Invalid export name {exporter_name}") if exporter_params: valid, reason = validate_form(exporter.export_form, exporter_params) api_assert(valid, "Invalid exporter params, reason: " + reason) task = export_query_execution_task.apply_async(args=[ exporter.exporter_name, statement_execution_id, current_user.id, exporter_params or {}, ], ) return task.task_id
def download_statement_execution_result(statement_execution_id): with DBSession() as session: statement_execution = logic.get_statement_execution_by_id( statement_execution_id, session=session) api_assert(statement_execution is not None, message="Invalid statement execution") verify_query_execution_permission( statement_execution.query_execution_id, session=session) download_file_name = f"result_{statement_execution.query_execution_id}_{statement_execution_id}.csv" reader = GenericReader(statement_execution.result_path) response = None if reader.has_download_url: # If the Reader can generate a download, # we let user download the file by redirection download_url = reader.get_download_url( custom_name=download_file_name) response = redirect(download_url) else: # We read the raw file and download it for the user reader.start() raw = reader.read_raw() response = Response(raw) response.headers["Content-Type"] = "text/csv" response.headers[ "Content-Disposition"] = f'attachment; filename="{download_file_name}"' return response
def update_datadoc_editor( id, write=None, read=None, originator=None, # Used for websocket to identify sender, optional ): with DBSession() as session: editor = logic.get_data_doc_editor_by_id(id, session=session) if editor: assert_can_write(editor.data_doc_id, session=session) editor = logic.update_data_doc_editor(id, read, write, session=session) if editor: editor_dict = editor.to_dict() socketio.emit( "data_doc_editor", ( originator, editor_dict["data_doc_id"], editor_dict["uid"], editor_dict, ), namespace="/datadoc", room=editor_dict["data_doc_id"], broadcast=True, ) return editor_dict
def update_datadoc_schedule(id, cron=None, enabled=None, kwargs=None): if kwargs is not None: kwargs_valid, kwargs_valid_reason = validate_datadoc_schedule_config( kwargs) api_assert(kwargs_valid, kwargs_valid_reason) if cron is not None: api_assert(validate_cron(cron), "Invalid cron expression") schedule_name = schedule_logic.get_data_doc_schedule_name(id) with DBSession() as session: assert_can_write(id, session=session) schedule = schedule_logic.get_task_schedule_by_name(schedule_name, session=session) api_assert(schedule, "Schedule does not exist") verify_data_doc_permission(id, session=session) updated_fields = {} if cron is not None: updated_fields["cron"] = cron if enabled is not None: updated_fields["enabled"] = enabled if kwargs is not None: updated_fields["kwargs"] = { **kwargs, "user_id": current_user.id, "doc_id": id, } return schedule_logic.update_task_schedule( schedule.id, session=session, **updated_fields, )
def create_data_doc_from_execution( environment_id, execution_id, engine_id, query_string, title=None, ): with DBSession() as session: verify_environment_permission([environment_id]) environment = Environment.get(id=environment_id, session=session) execution = get_query_execution_by_id(execution_id, session=session) uid = current_user.id api_assert(execution.uid == uid, "You can only create from your own executions.") return logic.create_data_doc_from_execution( environment_id=environment_id, owner_uid=uid, engine_id=engine_id, query_string=query_string, execution_id=execution_id, public=environment.shareable, archived=False, title=title, meta={}, session=session, )
def create_metastore_schedule( id, cron, ): with DBSession() as session: return logic.create_query_metastore_update_schedule( metastore_id=id, cron=cron, session=session )
def get_table_column_stats(column_id): """Get all table stats column by id""" with DBSession() as session: column = logic.get_column_by_id(column_id, session=session) verify_data_table_permission(column.table_id, session=session) return DataTableColumnStatistics.get_all(column_id=column_id, session=session)
def get_table_query_examples( table_id, environment_id, uid=None, engine_id=None, with_table_id=None, limit=10, offset=0, ): api_assert(limit < 100) with DBSession() as session: verify_environment_permission([environment_id]) verify_data_table_permission(table_id, session=session) engines = admin_logic.get_query_engines_by_environment(environment_id, session=session) engine_ids = [engine.id for engine in engines] api_assert(engine_id is None or engine_id in engine_ids, "Invalid engine id") query_logs = logic.get_table_query_examples( table_id, engine_ids, uid=uid, engine_id=engine_id, with_table_id=with_table_id, limit=limit, offset=offset, session=session, ) query_ids = [log.query_execution_id for log in query_logs] return query_ids
def get_column(column_id, with_table=False): with DBSession() as session: column = logic.get_column_by_id(column_id, session=session) verify_data_table_permission(column.table_id, session=session) column_dict = column.to_dict(with_table) return column_dict
def create_table_samples( table_id, environment_id, engine_id, partition=None, where=None, order_by=None, order_by_asc=True, limit=100, ): with DBSession() as session: api_assert(limit <= 100, "Too many rows requested") verify_environment_permission([environment_id]) verify_data_table_permission(table_id, session=session) verify_query_engine_environment_permission(engine_id, environment_id, session=session) task = run_sample_query.apply_async(args=[ table_id, engine_id, current_user.id, limit, partition, where, order_by, order_by_asc, ]) return task.task_id
def remove_table_ownership(table_id): """Remove a table ownership""" with DBSession() as session: verify_data_table_permission(table_id, session=session) return logic.delete_table_ownership(table_id=table_id, uid=current_user.id, session=session)
def get_all_table_ownerships_by_table_id(table_id): """Add all table ownerships""" with DBSession() as session: verify_data_table_permission(table_id, session=session) return logic.get_all_table_ownerships_by_table_id(table_id=table_id, session=session)
def create_query_execution_notification(query_id,): with DBSession() as session: verify_query_execution_permission(query_id, session=session) return logic.create_query_execution_notification( query_execution_id=query_id, uid=current_user.id, session=session )
def __init__(self, query_execution_id, celery_task, query, statement_ranges): self._query_execution_id = query_execution_id self._celery_task = celery_task self._task_id = celery_task.request.id self.statement_execution_ids = [] self._query = query self._statement_ranges = statement_ranges # logging variable self._has_log = False self._log_cache = "" # [statement_logs] self._meta_info = None # statement_urls self._percent_complete = 0 # percent_complete self._statement_progress = {} # Connect to mysql db with DBSession() as session: query_execution = qe_logic.update_query_execution( self._query_execution_id, task_id=self._task_id, session=session ).to_dict() # Emit a event from socketio socketio.emit( "query_received", query_execution, namespace=QUERY_EXECUTION_NAMESPACE, room=self._query_execution_id, )
def create_query_engine( name, language, executor, executor_params, description=None, status_checker=None, metastore_id=None, ): with DBSession() as session: query_engine = QueryEngine.create( { "name": name, "description": description, "language": language, "executor": executor, "executor_params": executor_params, "metastore_id": metastore_id, "status_checker": status_checker, }, session=session, ) query_engine_dict = query_engine.to_dict_admin() return query_engine_dict
def create_table_column_stats_by_name(metastore_name, data): """Batch add/update table column stats""" # TODO: verify user is a service account with DBSession() as session: metastore = admin_logic.get_query_metastore_by_name(metastore_name, session=session) api_assert(metastore, "Invalid metastore") verify_metastore_permission(metastore.id, session=session) with DataTableFinder(metastore.id) as t_finder: for d in data: column = t_finder.get_table_column_by_name( schema_name=d["schema_name"], table_name=d["table_name"], column_name=d["column_name"], session=session, ) if column is not None: for s in d["stats"]: logic.upsert_table_column_stat( column_id=column.id, key=s["key"], value=s["value"], uid=current_user.id, session=session, ) return
def get_users_in_environment( id, limit, offset, ): with DBSession() as session: return environment_logic.get_users_in_environment( id, offset, limit, session=session )
def get_metastore(): """Lazily initialize metastore_loader from DB. Use outer-scope variable to memoized initialization Raises: LatestPartitionException: If the metastore does not exist for engine_id, throw error Returns: BaseMetastoreLoader: metastore loader to fetch table/schema info """ nonlocal _metastore_loader if _metastore_loader is not None: return _metastore_loader with DBSession() as session: engine = admin_logic.get_query_engine_by_id(engine_id, session=session) metastore_id = engine.metastore_id if engine else None _metastore_loader = (metastore.get_metastore_loader( metastore_id, session=session) if metastore_id is not None else None) if _metastore_loader is None: raise LatestPartitionException( f"Unable to load metastore for engine id {engine_id}") return _metastore_loader
def create_datadoc_schedule( id, cron, kwargs, ): kwargs_valid, kwargs_valid_reason = validate_datadoc_schedule_config( kwargs) api_assert(kwargs_valid, kwargs_valid_reason) api_assert(validate_cron(cron), "Invalid cron expression") schedule_name = schedule_logic.get_data_doc_schedule_name(id) with DBSession() as session: assert_can_write(id, session=session) data_doc = logic.get_data_doc_by_id(id, session=session) verify_environment_permission([data_doc.environment_id]) return schedule_logic.create_task_schedule( schedule_name, "tasks.run_datadoc.run_datadoc", cron=cron, kwargs={ **kwargs, "user_id": current_user.id, "doc_id": id }, task_type="user", session=session, )
def oauth_callback(self): LOG.debug("Handling Oauth callback...") if request.args.get("error"): return f"<h1>Error: {request.args.get('error')}</h1>" code = request.args.get("code") try: access_token = self._fetch_access_token(code) username, email, fullname = self._get_user_profile(access_token) with DBSession() as session: flask_login.login_user( AuthUser( self.login_user(username, email, fullname, session=session) ) ) except AuthenticationError as e: LOG.error("Failed authenticate oauth user", e) abort_unauthorized() next_url = QuerybookSettings.PUBLIC_URL if "next" in flask_session: next_url = flask_session["next"] del flask_session["next"] return redirect(next_url)
def get_data_docs(environment_id, filter_mode=None, offset=0, limit=500, archived=False): with DBSession() as session: verify_environment_permission([environment_id]) docs = [] if filter_mode == "mine": docs = logic.get_data_doc_by_user( current_user.id, environment_id=environment_id, offset=offset, limit=limit, session=session, ) elif filter_mode == "favorite": docs = logic.get_user_favorite_data_docs( current_user.id, environment_id=environment_id, session=session) elif filter_mode == "recent": docs = logic.get_user_recent_data_docs( current_user.id, environment_id=environment_id, session=session) return docs
def run_query_task(self, query_execution_id): executor = None error_message = None query_execution_status = QueryExecutionStatus.INITIALIZED try: executor = create_executor_from_execution(query_execution_id, celery_task=self) run_executor_until_finish(self, executor) except SoftTimeLimitExceeded: # SoftTimeLimitExceeded # This exception happens when query has been running for more than # the limited time (default 2 days) error_message = format_error_message( 7408, "The execution has exceeded the maximum allowed time.") except QueryExecutorException as e: error_message = format_error_message(7403, str(e)) except Exception as e: error_message = format_error_message( 7406, "{}\n{}".format(e, traceback.format_exc())) finally: # When the finally block is reached, it is expected # that the executor should be in one of the end state with DBSession() as session: query_execution_status = get_query_execution_final_status( query_execution_id, executor, error_message, session=session) notifiy_on_execution_completion(query_execution_id, session=session) # Executor exists means the query actually executed # This prevents cases when query_execution got executed twice if executor and query_execution_status == QueryExecutionStatus.DONE: log_query_per_table_task.delay(query_execution_id) return query_execution_status.value if executor is not None else None
def search_query_execution(environment_id, filters={}, orderBy=None, limit=100, offset=0): verify_environment_permission([environment_id]) with DBSession() as session: if "user" in filters: api_assert( current_user.id == filters["user"], "You can only search your own queries", ) else: filters["user"] = current_user.id query_executions = logic.search_query_execution( environment_id=environment_id, filters=filters, orderBy=orderBy, limit=limit, offset=offset, session=session, ) result = [ query_execution.to_dict(with_statement=False) for query_execution in query_executions ] return result
def search_query_snippet(environment_id, engine_id=None, is_public=False, golden=None): search_by = current_user.id with DBSession() as session: # TODO: Check if engine_id is under environment_id verify_environment_permission([environment_id]) engine_ids = ( [engine_id] if engine_id is not None else list( map( lambda e: e.id, admin_logic.get_query_engines_by_environment(environment_id), ) ) ) query_snippets = logic.search_snippet( search_by=search_by, engine_ids=engine_ids, is_public=is_public, golden=golden, session=session, ) query_snippet_dicts = [ dict(id=snippet[0], title=snippet[1]) for snippet in query_snippets ] return query_snippet_dicts
def create_query_snippet( context=None, title=None, engine_id=None, description=None, is_public=None, golden=None, ): created_by = current_user.id api_assert(len(context) > 0, "No empty context") api_assert(len(title) > 0, "No empty title") api_assert(engine_id is not None, "Must specify engine") api_assert( not golden or current_user.is_admin, "Only data gurus can create golden snippet" ) with DBSession() as session: verify_query_engine_permission(engine_id, session=session) return logic.create_snippet( created_by, context=context, title=title, engine_id=engine_id, description=description, is_public=is_public, golden=golden, session=session, )
def on_cancel(self): utcnow = datetime.datetime.utcnow() if len(self.statement_execution_ids) > 0: statement_execution_id = self.statement_execution_ids[-1] upload_path, has_log = self._upload_log(statement_execution_id) qe_logic.update_statement_execution( statement_execution_id, status=StatementExecutionStatus.CANCEL, completed_at=utcnow, has_log=self._has_log, log_path=upload_path if has_log else None, ) with DBSession() as session: query_execution = qe_logic.update_query_execution( self._query_execution_id, status=QueryExecutionStatus.CANCEL, completed_at=utcnow, session=session, ).to_dict() socketio.emit( "query_cancel", query_execution, namespace=QUERY_EXECUTION_NAMESPACE, room=self._query_execution_id, )
def _create_tables(self, schema_tables): with DBSession() as session: for (schema_id, schema_name, table) in schema_tables: self._create_table_table(schema_id, schema_name, table, session=session)
def create_tag_item(table_id, tag): with DBSession() as session: verify_data_table_permission(table_id, session=session) return logic.create_tag_item(table_id=table_id, tag_name=tag, uid=current_user.id, session=session)