def refresh_datasources(self, refreshAll=True): """endpoint that refreshes druid datasources metadata""" session = db.session() DruidCluster = ConnectorRegistry.sources['druid'].cluster_class for cluster in session.query(DruidCluster).all(): cluster_name = cluster.cluster_name valid_cluster = True try: cluster.refresh_datasources(refreshAll=refreshAll) except Exception as e: valid_cluster = False flash( "Error while processing cluster '{}'\n{}".format( cluster_name, utils.error_msg_from_exception(e)), 'danger') logging.exception(e) pass if valid_cluster: cluster.metadata_last_refreshed = datetime.now() flash( _('Refreshed metadata from cluster [{}]').format( cluster.cluster_name), 'info') session.commit() return redirect('/druiddatasourcemodelview/list/')
def wraps(self, *args, **kwargs): try: return f(self, *args, **kwargs) except SupersetSecurityException as e: logging.exception(e) return json_error_response(utils.error_msg_from_exception(e), status=e.status, stacktrace=traceback.format_exc(), link=e.link) except SupersetException as e: logging.exception(e) return json_error_response(utils.error_msg_from_exception(e), stacktrace=traceback.format_exc(), status=e.status) except Exception as e: logging.exception(e) return json_error_response(utils.error_msg_from_exception(e), stacktrace=traceback.format_exc())
def query(self, query_obj): df = None error_message = None qry = db.session.query(Annotation) qry = qry.filter(Annotation.layer_id == query_obj["filter"][0]["val"]) if query_obj["from_dttm"]: qry = qry.filter(Annotation.start_dttm >= query_obj["from_dttm"]) if query_obj["to_dttm"]: qry = qry.filter(Annotation.end_dttm <= query_obj["to_dttm"]) status = utils.QueryStatus.SUCCESS try: df = pd.read_sql_query(qry.statement, db.engine) except Exception as e: status = utils.QueryStatus.FAILED logging.exception(e) error_message = utils.error_msg_from_exception(e) return QueryResult(status=status, df=df, duration=0, query="", error_message=error_message)
def refresh_datasources(self, refreshAll=True): """endpoint that refreshes druid datasources metadata""" session = db.session() DruidCluster = ConnectorRegistry.sources['druid'].cluster_class for cluster in session.query(DruidCluster).all(): cluster_name = cluster.cluster_name try: cluster.refresh_datasources(refreshAll=refreshAll) except Exception as e: flash( "Error while processing cluster '{}'\n{}".format( cluster_name, utils.error_msg_from_exception(e)), 'danger') logging.exception(e) return redirect('/druidclustermodelview/list/') cluster.metadata_last_refreshed = datetime.now() flash( _('Refreshed metadata from cluster [{}]').format( cluster.cluster_name), 'info') session.commit() return redirect('/druiddatasourcemodelview/list/')
def get_viz_annotation_data(annotation_layer: Dict[str, Any], force: bool) -> Dict[str, Any]: chart = ChartDAO.find_by_id(annotation_layer["value"]) if not chart: raise QueryObjectValidationError(_("The chart does not exist")) if not chart.datasource: raise QueryObjectValidationError( _("The chart datasource does not exist")) form_data = chart.form_data.copy() try: viz_obj = get_viz( datasource_type=chart.datasource.type, datasource_id=chart.datasource.id, form_data=form_data, force=force, ) payload = viz_obj.get_payload() return payload["data"] except SupersetException as ex: raise QueryObjectValidationError( error_msg_from_exception(ex)) from ex
def refresh_datasources(self): """endpoint that refreshes elastic datasources metadata""" session = db.session() elastic_cluster = ConnectorRegistry.sources['elastic'].cluster_class for cluster in session.query(elastic_cluster).all(): cluster_name = cluster.cluster_name try: cluster.refresh_datasources() except Exception as e: flash( 'Error while processing cluster \'{}\'\n{}'.format( cluster_name, error_msg_from_exception(e)), 'danger') logging.exception(e) return redirect('/elasticclustermodelview/list/') cluster.metadata_last_refreshed = datetime.now() flash( 'Refreshed metadata from cluster ' '[' + cluster.cluster_name + ']', 'info') session.commit() return redirect('/elasticdatasourcemodelview/list/')
def query(self, query_obj: QueryObjectDict) -> QueryResult: error_message = None qry = db.session.query(Annotation) qry = qry.filter(Annotation.layer_id == query_obj["filter"][0]["val"]) if query_obj["from_dttm"]: qry = qry.filter(Annotation.start_dttm >= query_obj["from_dttm"]) if query_obj["to_dttm"]: qry = qry.filter(Annotation.end_dttm <= query_obj["to_dttm"]) status = utils.QueryStatus.SUCCESS try: df = pd.read_sql_query(qry.statement, db.engine) except Exception as ex: df = pd.DataFrame() status = utils.QueryStatus.FAILED logger.exception(ex) error_message = utils.error_msg_from_exception(ex) return QueryResult( status=status, df=df, duration=timedelta(0), query="", error_message=error_message, )
def query(self, query_obj): df = None error_message = None qry = db.session.query(Annotation) qry = qry.filter(Annotation.layer_id == query_obj['filter'][0]['val']) if query_obj['from_dttm']: qry = qry.filter(Annotation.start_dttm >= query_obj['from_dttm']) if query_obj['to_dttm']: qry = qry.filter(Annotation.end_dttm <= query_obj['to_dttm']) status = utils.QueryStatus.SUCCESS try: df = pd.read_sql_query(qry.statement, db.engine) except Exception as e: status = utils.QueryStatus.FAILED logging.exception(e) error_message = ( utils.error_msg_from_exception(e)) return QueryResult( status=status, df=df, duration=0, query='', error_message=error_message)
def get_df_payload( # pylint: disable=too-many-statements self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.cache_key(query_obj, **kwargs) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS is_loaded = True stats_logger.incr("loaded_from_cache") except KeyError as ex: logger.exception(ex) logger.error("Error reading cache: %s", utils.error_msg_from_exception(ex)) logger.info("Serving from cache") if query_obj and not is_loaded: try: invalid_columns = [ col for col in query_obj.columns + query_obj.groupby + utils.get_column_names_from_metrics(query_obj.metrics) if col not in self.datasource.column_names ] if invalid_columns: raise QueryObjectValidationError( _( "Columns missing in datasource: %(invalid_columns)s", invalid_columns=invalid_columns, )) query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") if not self.force: stats_logger.incr("loaded_from_source_without_force") is_loaded = True except QueryObjectValidationError as ex: error_message = str(ex) status = utils.QueryStatus.FAILED except Exception as ex: # pylint: disable=broad-except logger.exception(ex) if not error_message: error_message = str(ex) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: set_and_log_cache( cache_key, df, query, cached_dttm, self.cache_timeout, self.datasource.uid, ) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "error": error_message, "is_cached": cache_value is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index), }
def get_df_payload( # pylint: disable=too-many-statements,too-many-locals self, query_obj: QueryObject, force_cached: Optional[bool] = False, ) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.query_cache_key(query_obj) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() cache_value = None status = None query = "" annotation_data = {} error_message = None if cache_key and cache_manager.data_cache and not self.force: cache_value = cache_manager.data_cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: df = cache_value["df"] query = cache_value["query"] annotation_data = cache_value.get("annotation_data", {}) status = utils.QueryStatus.SUCCESS is_loaded = True stats_logger.incr("loaded_from_cache") except KeyError as ex: logger.exception(ex) logger.error( "Error reading cache: %s", utils.error_msg_from_exception(ex) ) logger.info("Serving from cache") if force_cached and not is_loaded: logger.warning( "force_cached (QueryContext): value not found for key %s", cache_key ) raise CacheLoadError("Error loading data from cache") if query_obj and not is_loaded: try: invalid_columns = [ col for col in query_obj.columns + query_obj.groupby + utils.get_column_names_from_metrics(query_obj.metrics) if col not in self.datasource.column_names and col != DTTM_ALIAS ] if invalid_columns: raise QueryObjectValidationError( _( "Columns missing in datasource: %(invalid_columns)s", invalid_columns=invalid_columns, ) ) query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] annotation_data = self.get_annotation_data(query_obj) if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") if not self.force: stats_logger.incr("loaded_from_source_without_force") is_loaded = True except QueryObjectValidationError as ex: error_message = str(ex) status = utils.QueryStatus.FAILED except Exception as ex: # pylint: disable=broad-except logger.exception(ex) if not error_message: error_message = str(ex) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and status != utils.QueryStatus.FAILED: set_and_log_cache( cache_manager.data_cache, cache_key, {"df": df, "query": query, "annotation_data": annotation_data}, self.cache_timeout, self.datasource.uid, ) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "annotation_data": annotation_data, "error": error_message, "is_cached": cache_value is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index), }
def get_df_payload( # pylint: disable=too-many-locals,too-many-statements self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.cache_key(query_obj, **kwargs) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: cache_value = pkl.loads(cache_value) df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS is_loaded = True stats_logger.incr("loaded_from_cache") except Exception as ex: # pylint: disable=broad-except logger.exception(ex) logger.error("Error reading cache: %s", utils.error_msg_from_exception(ex)) logger.info("Serving from cache") if query_obj and not is_loaded: try: query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") if not self.force: stats_logger.incr("loaded_from_source_without_force") is_loaded = True except Exception as ex: # pylint: disable=broad-except logger.exception(ex) if not error_message: error_message = "{}".format(ex) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: try: cache_value = dict(dttm=cached_dttm, df=df, query=query) cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL) logger.info("Caching %d chars at key %s", len(cache_binary), cache_key) stats_logger.incr("set_cache_key") cache.set(cache_key, cache_binary, timeout=self.cache_timeout) except Exception as ex: # pylint: disable=broad-except # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logger.warning("Could not cache key %s", cache_key) logger.exception(ex) cache.delete(cache_key) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "error": error_message, "is_cached": cache_key is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index), }
def get_df_payload(self, query_obj, **kwargs): """Handles caching around the df paylod retrieval""" cache_key = query_obj.cache_key( datasource=self.datasource.uid, **kwargs) if query_obj else None logging.info('Cache key: {}'.format(cache_key)) is_loaded = False stacktrace = None df = None cached_dttm = datetime.utcnow().isoformat().split('.')[0] cache_value = None status = None query = '' error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr('loaded_from_cache') try: cache_value = pkl.loads(cache_value) df = cache_value['df'] query = cache_value['query'] status = utils.QueryStatus.SUCCESS is_loaded = True except Exception as e: logging.exception(e) logging.error('Error reading cache: ' + utils.error_msg_from_exception(e)) logging.info('Serving from cache') if query_obj and not is_loaded: try: query_result = self.get_query_result(query_obj) status = query_result['status'] query = query_result['query'] error_message = query_result['error_message'] df = query_result['df'] if status != utils.QueryStatus.FAILED: stats_logger.incr('loaded_from_source') is_loaded = True except Exception as e: logging.exception(e) if not error_message: error_message = '{}'.format(e) status = utils.QueryStatus.FAILED stacktrace = traceback.format_exc() if ( is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED): try: cache_value = dict( dttm=cached_dttm, df=df if df is not None else None, query=query, ) cache_value = pkl.dumps( cache_value, protocol=pkl.HIGHEST_PROTOCOL) logging.info('Caching {} chars at key {}'.format( len(cache_value), cache_key)) stats_logger.incr('set_cache_key') cache.set( cache_key, cache_value, timeout=self.cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning('Could not cache key {}'.format(cache_key)) logging.exception(e) cache.delete(cache_key) return { 'cache_key': cache_key, 'cached_dttm': cache_value['dttm'] if cache_value is not None else None, 'cache_timeout': self.cache_timeout, 'df': df, 'error': error_message, 'is_cached': cache_key is not None, 'query': query, 'status': status, 'stacktrace': stacktrace, 'rowcount': len(df.index) if df is not None else 0, }
def get_df_payload(self, query_obj: QueryObject, **kwargs): """Handles caching around the df paylod retrieval""" extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict()) cache_key = ( query_obj.cache_key( datasource=self.datasource.uid, extra_cache_keys=extra_cache_keys, **kwargs ) if query_obj else None ) logging.info("Cache key: {}".format(cache_key)) is_loaded = False stacktrace = None df = None cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr("loaded_from_cache") try: cache_value = pkl.loads(cache_value) df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS is_loaded = True except Exception as e: logging.exception(e) logging.error( "Error reading cache: " + utils.error_msg_from_exception(e) ) logging.info("Serving from cache") if query_obj and not is_loaded: try: query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") is_loaded = True except Exception as e: logging.exception(e) if not error_message: error_message = "{}".format(e) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: try: cache_value = dict( dttm=cached_dttm, df=df if df is not None else None, query=query ) cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL) logging.info( "Caching {} chars at key {}".format( len(cache_binary), cache_key ) ) stats_logger.incr("set_cache_key") cache.set(cache_key, cache_binary, timeout=self.cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning("Could not cache key {}".format(cache_key)) logging.exception(e) cache.delete(cache_key) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "error": error_message, "is_cached": cache_key is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index) if df is not None else 0, }
def table_metadata(self, database: Database, table_name: str, schema_name: str) -> FlaskResponse: """ Table schema info --- get: description: Get database table metadata parameters: - in: path schema: type: integer name: pk description: The database id - in: path schema: type: string name: table_name description: Table name - in: path schema: type: string name: schema description: Table schema responses: 200: description: Table schema info content: text/plain: schema: type: object properties: columns: type: array description: Table columns info items: type: object properties: keys: type: array items: type: string longType: type: string name: type: string type: type: string foreignKeys: type: array description: Table list of foreign keys items: type: object properties: column_names: type: array items: type: string name: type: string options: type: object referred_columns: type: array items: type: string referred_schema: type: string referred_table: type: string type: type: string indexes: type: array description: Table list of indexes items: type: object properties: column_names: type: array items: type: string name: type: string options: type: object referred_columns: type: array items: type: string referred_schema: type: string referred_table: type: string type: type: string primaryKey: type: object properties: column_names: type: array items: type: string name: type: string type: type: string 400: $ref: '#/components/responses/400' 401: $ref: '#/components/responses/401' 404: $ref: '#/components/responses/404' 422: $ref: '#/components/responses/422' 500: $ref: '#/components/responses/500' """ self.incr_stats("init", self.table_metadata.__name__) try: table_info: Dict = get_table_metadata(database, table_name, schema_name) except SQLAlchemyError as ex: self.incr_stats("error", self.table_metadata.__name__) return self.response_422(error_msg_from_exception(ex)) self.incr_stats("success", self.table_metadata.__name__) return self.response(200, **table_info)
def extract_error_message(cls, e): """Extract error message for queries""" return utils.error_msg_from_exception(e)
def on_security_exception(self: Any, ex: Exception) -> Response: return self.response(403, **{"message": utils.error_msg_from_exception(ex)})
def table_metadata(self, pk: int, table_name: str, schema_name: str): # pylint: disable=invalid-name """ Table schema info --- get: description: Get database table metadata parameters: - in: path schema: type: integer name: pk description: The database id - in: path schema: type: string name: table_name description: Table name - in: path schema: type: string name: schema description: Table schema responses: 200: description: Table schema info content: text/plain: schema: type: object properties: columns: type: array description: Table columns info items: type: object properties: keys: type: array items: type: string longType: type: string name: type: string type: type: string foreignKeys: type: array description: Table list of foreign keys items: type: object properties: column_names: type: array items: type: string name: type: string options: type: object referred_columns: type: array items: type: string referred_schema: type: string referred_table: type: string type: type: string indexes: type: array description: Table list of indexes items: type: object properties: column_names: type: array items: type: string name: type: string options: type: object referred_columns: type: array items: type: string referred_schema: type: string referred_table: type: string type: type: string primaryKey: type: object properties: column_names: type: array items: type: string name: type: string type: type: string 400: $ref: '#/components/responses/400' 401: $ref: '#/components/responses/401' 404: $ref: '#/components/responses/404' 422: $ref: '#/components/responses/422' 500: $ref: '#/components/responses/500' """ table_name_parsed = parse_js_uri_path_item(table_name) schema_parsed = parse_js_uri_path_item(schema_name, eval_undefined=True) # schemas can be None but not tables if not table_name_parsed: return self.response_422( message=_(f"Could not parse table name or schema")) database: Database = self.datamodel.get(pk, self._base_filters) if not database: return self.response_404() try: table_info: Dict = get_table_metadata(database, table_name_parsed, schema_parsed) except SQLAlchemyError as e: return self.response_422(error_msg_from_exception(e)) return self.response(200, **table_info)
def _extract_error_message(cls, ex: Exception) -> str: """Extract error message for queries""" return utils.error_msg_from_exception(ex)
def _extract_error_message(cls, e: Exception) -> Optional[str]: """Extract error message for queries""" return utils.error_msg_from_exception(e)