def create_table_samples( table_id, environment_id, engine_id, partition=None, where=None, order_by=None, order_by_asc=True, limit=100, ): with DBSession() as session: api_assert(limit <= 100, "Too many rows requested") verify_environment_permission([environment_id]) verify_data_table_permission(table_id, session=session) verify_query_engine_environment_permission(engine_id, environment_id, session=session) task = run_sample_query.apply_async(args=[ table_id, engine_id, current_user.id, limit, partition, where, order_by, order_by_asc, ]) return task.task_id
def get_statement_execution_log(statement_execution_id): with DBSession() as session: statement_execution = logic.get_statement_execution_by_id( statement_execution_id, session=session) api_assert(statement_execution is not None, message="Invalid statement execution") verify_query_execution_permission( statement_execution.query_execution_id, session=session) log_path = statement_execution.log_path try: if log_path.startswith("stream"): logs = logic.get_statement_execution_stream_logs( statement_execution_id) return list(map(lambda log: log.log, logs)) else: with DBSession() as session: MAX_LOG_RETURN_LINES = 2000 result = "" statement_execution = logic.get_statement_execution_by_id( statement_execution_id, session=session) if statement_execution is not None and statement_execution.has_log: with GenericReader( statement_execution.log_path) as reader: result = reader.read_lines( number_of_lines=MAX_LOG_RETURN_LINES) if len(result) == MAX_LOG_RETURN_LINES: result += [ "---------------------------------------------------------------------------", f"We are truncating results since it reached limit of {MAX_LOG_RETURN_LINES} lines.", ] return result except FileDoesNotExist as e: abort(RESOURCE_NOT_FOUND_STATUS_CODE, str(e))
def verify_query_execution_owner(execution_id, session=None): execution = query_execution_logic.get_query_execution_by_id( execution_id, session=session) api_assert( current_user.id == getattr(execution, "uid", None), "Action can only be preformed by execution owner", )
def get_table_query_examples( table_id, environment_id, uid=None, engine_id=None, with_table_id=None, limit=10, offset=0, ): api_assert(limit < 100) with DBSession() as session: verify_environment_permission([environment_id]) verify_data_table_permission(table_id, session=session) engines = admin_logic.get_query_engines_by_environment(environment_id, session=session) engine_ids = [engine.id for engine in engines] api_assert(engine_id is None or engine_id in engine_ids, "Invalid engine id") query_logs = logic.get_table_query_examples( table_id, engine_ids, uid=uid, engine_id=engine_id, with_table_id=with_table_id, limit=limit, offset=offset, session=session, ) query_ids = [log.query_execution_id for log in query_logs] return query_ids
def download_statement_execution_result(statement_execution_id): with DBSession() as session: statement_execution = logic.get_statement_execution_by_id( statement_execution_id, session=session) api_assert(statement_execution is not None, message="Invalid statement execution") verify_query_execution_permission( statement_execution.query_execution_id, session=session) reader = GenericReader(statement_execution.result_path) response = None if reader.has_download_url: # If the Reader can generate a download, # we proxy download the file download_url = reader.get_download_url() req = requests.get(download_url, stream=True) # 10 KB size response = Response(req.iter_content(chunk_size=10 * 1024)) else: # We read the raw file and download it for the user reader.start() raw = reader.read_raw() response = Response(raw) response.headers["Content-Type"] = "text/plain" response.headers[ "Content-Disposition"] = 'attachment; filename="result_{}_{}.csv"'.format( statement_execution.query_execution_id, statement_execution_id) return response
def create_datadoc_schedule( id, cron, kwargs, ): kwargs_valid, kwargs_valid_reason = validate_datadoc_schedule_config( kwargs) api_assert(kwargs_valid, kwargs_valid_reason) api_assert(validate_cron(cron), "Invalid cron expression") schedule_name = schedule_logic.get_data_doc_schedule_name(id) with DBSession() as session: assert_can_write(id, session=session) data_doc = logic.get_data_doc_by_id(id, session=session) verify_environment_permission([data_doc.environment_id]) return schedule_logic.create_task_schedule( schedule_name, "tasks.run_datadoc.run_datadoc", cron=cron, kwargs={ **kwargs, "user_id": current_user.id, "doc_id": id }, task_type="user", session=session, )
def create_data_doc_from_execution( environment_id, execution_id, engine_id, query_string, title=None, ): with DBSession() as session: verify_environment_permission([environment_id]) environment = Environment.get(id=environment_id, session=session) execution = get_query_execution_by_id(execution_id, session=session) uid = current_user.id api_assert(execution.uid == uid, "You can only create from your own executions.") return logic.create_data_doc_from_execution( environment_id=environment_id, owner_uid=uid, engine_id=engine_id, query_string=query_string, execution_id=execution_id, public=environment.shareable, archived=False, title=title, meta={}, session=session, )
def create_table_column_stats_by_name(metastore_name, data): """Batch add/update table column stats""" # TODO: verify user is a service account with DBSession() as session: metastore = admin_logic.get_query_metastore_by_name(metastore_name, session=session) api_assert(metastore, "Invalid metastore") verify_metastore_permission(metastore.id, session=session) with DataTableFinder(metastore.id) as t_finder: for d in data: column = t_finder.get_table_column_by_name( schema_name=d["schema_name"], table_name=d["table_name"], column_name=d["column_name"], session=session, ) if column is not None: for s in d["stats"]: logic.upsert_table_column_stat( column_id=column.id, key=s["key"], value=s["value"], uid=current_user.id, session=session, ) return
def get_task_run_records_by_name(id, offset=0, limit=10, hide_successful_jobs=False): api_assert(limit < 100, "You are requesting too much data") with DBSession() as session: task = logic.get_task_schedule_by_id(id=id, session=session) api_assert(task, "Invalid task id") records, _ = logic.get_task_run_record_run_by_name( name=task.name, offset=offset, limit=limit, hide_successful_jobs=hide_successful_jobs, session=session, ) data = [] for record in records: record_dict = record.to_dict() record_dict["task_type"] = record.task.task_type data.append(record_dict) return data
def search_query_execution(environment_id, filters={}, orderBy=None, limit=100, offset=0): verify_environment_permission([environment_id]) with DBSession() as session: if "user" in filters: api_assert( current_user.id == filters["user"], "You can only search your own queries", ) else: filters["user"] = current_user.id query_executions = logic.search_query_execution( environment_id=environment_id, filters=filters, orderBy=orderBy, limit=limit, offset=offset, session=session, ) result = [ query_execution.to_dict(with_statement=False) for query_execution in query_executions ] return result
def suggest_user(name, limit=10, offset=None): api_assert(limit is None or limit <= 100, "Requesting too many users") query = { "suggest": { "suggest": { "text": (name or "").lower(), "completion": { "field": "suggest", "size": limit }, } }, } index_name = ES_CONFIG["users"]["index_name"] result = None try: # print '\n--ES latest hosted_index %s\n' % hosted_index result = get_hosted_es().search(index=index_name, body=query) except Exception as e: LOG.info(e) finally: if result is None: result = {} options = next(iter(result.get("suggest", {}).get("suggest", [])), {}).get("options", []) users = [{ "id": option.get("_source", {}).get("id"), "username": option.get("_source", {}).get("username"), "fullname": option.get("_source", {}).get("fullname"), } for option in options] return users
def get_board_by_id(board_id): with DBSession() as session: assert_can_read(board_id, session=session) board = Board.get(id=board_id, session=session) api_assert(board is not None, "Invalid board id", 404) verify_environment_permission([board.environment_id]) return board.to_dict(extra_fields=["docs", "tables", "items"])
def download_statement_execution_result(statement_execution_id): with DBSession() as session: statement_execution = logic.get_statement_execution_by_id( statement_execution_id, session=session) api_assert(statement_execution is not None, message="Invalid statement execution") verify_query_execution_permission( statement_execution.query_execution_id, session=session) download_file_name = f"result_{statement_execution.query_execution_id}_{statement_execution_id}.csv" reader = GenericReader(statement_execution.result_path) response = None if reader.has_download_url: # If the Reader can generate a download, # we let user download the file by redirection download_url = reader.get_download_url( custom_name=download_file_name) response = redirect(download_url) else: # We read the raw file and download it for the user reader.start() raw = reader.read_raw() response = Response(raw) response.headers["Content-Type"] = "text/csv" response.headers[ "Content-Disposition"] = f'attachment; filename="{download_file_name}"' return response
def get_schema(schema_id, include_metastore=False, include_table=False): with DBSession() as session: schema = logic.get_schema_by_id(schema_id, session=session) api_assert(schema, "Invalid schema") verify_metastore_permission(schema.metastore_id, session=session) schema_dict = schema.to_dict(include_metastore, include_table) return schema_dict
def update_tag(tag_id, meta): tag = Tag.get(id=tag_id) if (tag.meta or {}).get("admin", False): api_assert(current_user.is_admin, "Tag can only be modified by admin") return Tag.update(id=tag_id, fields={"meta": meta}, skip_if_value_none=True)
def soft_delete_data_doc(id): with DBSession() as session: doc = logic.get_data_doc_by_id(id=id, session=session) api_assert(doc, "Invalid doc") verify_environment_permission([doc.environment_id]) api_assert(current_user.id == doc.owner_uid, "You can only delete your own data doc") logic.update_data_doc(id=id, archived=True, session=session)
def delete_board(board_id, **fields): with DBSession() as session: assert_can_edit(board_id, session=session) board = Board.get(id=board_id, session=session) api_assert(not board.board_type == "favorite", "Cannot delete favorite") Board.delete(board.id, session=session)
def get_task_run_record_by_name(name, offset=0, limit=10, hide_successful_jobs=False): api_assert(limit < 1000, "You are requesting too much data") records, count = logic.get_task_run_record_run_by_name( name=name, offset=offset, limit=limit ) return {"data": records, "count": count}
def update_schedule(id, **kwargs): allowed_fields = ["cron", "args", "kwargs", "enabled", "options"] filtered_kwargs = {k: v for k, v in kwargs.items() if k in allowed_fields} if "cron" in filtered_kwargs: api_assert(validate_cron(filtered_kwargs["cron"]), "Invalid cron expression") return logic.update_task_schedule(id=id, **filtered_kwargs)
def run_data_doc(id): schedule_name = schedule_logic.get_data_doc_schedule_name(id) with DBSession() as session: assert_can_write(id, session=session) verify_data_doc_permission(id, session=session) schedule = schedule_logic.get_task_schedule_by_name(schedule_name, session=session) api_assert(schedule, "Schedule does not exist") run_and_log_scheduled_task(schedule.id, session=session)
def assert_can_write(doc_id, session=None): try: api_assert( user_can_write(doc_id, uid=current_user.id, session=session), "CANNOT_WRITE_DATADOC", 403, ) except DocDoesNotExist: api_assert(False, "DOC_DNE", 404)
def assert_can_read(board_id, session=None): try: api_assert( user_can_read(board_id, uid=current_user.id, session=session), "CANNOT_READ_BOARD", 403, ) except BoardDoesNotExist: api_assert(False, "BOARD_DNE", 404)
def get_table_query_examples_users(table_id, environment_id, limit=5): api_assert(limit <= 10) verify_environment_permission([environment_id]) verify_data_table_permission(table_id) engines = admin_logic.get_query_engines_by_environment(environment_id) engine_ids = [engine.id for engine in engines] users = logic.get_query_example_users(table_id, engine_ids, limit=limit) return [{"uid": r[0], "count": r[1]} for r in users]
def verify_query_engine_environment_permission(query_engine_id, environment_id, session=None): api_assert( session.query(QueryEngineEnvironment).filter_by( query_engine_id=query_engine_id, environment_id=environment_id).first() is not None, message="Engine is not in Environment", status_code=ACCESS_RESTRICTED_STATUS_CODE, )
def delete_board_item(board_id, item_type, item_id): api_assert(item_type == "data_doc" or item_type == "table", "Invalid item type") with DBSession() as session: assert_can_edit(board_id, session=session) board = Board.get(id=board_id, session=session) logic.remove_item_from_board(board.id, item_id, item_type, session=session)
def get_query_engine_status(engine_id): engine_checker = None # Security check with DBSession() as session: verify_query_engine_permission(engine_id, session=session) engine = admin_logic.get_query_engine_by_id(engine_id, session=session) engine_checker = get_engine_checker_class( engine.get_feature_params().get("status_checker", "NullChecker")) api_assert(engine_checker is not None, "Invalid engine checker") return engine_checker.check(engine_id=engine_id, uid=current_user.id)
def get_table(table_id, with_schema=True, with_column=True, with_warnings=True): # username = flask_session['uid'] with DBSession() as session: table = logic.get_table_by_id(table_id, session=session) api_assert(table, "Invalid table") verify_data_schema_permission(table.schema_id, session=session) result = table.to_dict(with_schema, with_column, with_warnings) return result
def update_board_item_fields(board_item_id, **fields): with DBSession() as session: board_item = BoardItem.get(id=board_item_id, session=session) api_assert( board_item, "List item does not exist", ) assert_can_edit(board_item.parent_board_id, session=session) return logic.update_board_item(id=board_item_id, **fields, session=session)
def assert_is_owner(doc_id, session=None): try: doc = session.query(DataDoc).filter(DataDoc.id == doc_id).first() if doc is None: raise DocDoesNotExist api_assert( doc.owner_uid == current_user.id, "NOT_DATADOC_OWNER", 403, ) except DocDoesNotExist: api_assert(False, "DOC_DNE", 404)
def assert_is_owner(board_id, session=None): try: board = session.query(Board).filter(Board.id == board_id).first() if board is None: raise BoardDoesNotExist api_assert( board.owner_uid == current_user.id, "NOT_BOARD_OWNER", 403, ) except BoardDoesNotExist: api_assert(False, "BOARD_DNE", 404)