def log_snowflake_table( table_name: str, connection_string: Union[str, SnowflakeConnection], database: str, schema: str, key: Optional[str] = None, with_preview: Optional[bool] = None, with_schema: Optional[bool] = None, raise_on_error: bool = False, ): """ :param table_name: table name :param connection_string: either connection_string or actual connection :param database: :param schema: :param key: :param with_preview: :param with_schema: :param raise_on_error: :return: """ if not is_plugin_enabled("dbnd-snowflake", module_import="dbnd_snowflake"): return from dbnd_snowflake import snowflake_values with log_duration("log_snowflake_table__time_seconds", source="system"), SnowflakeController( connection_string) as snowflake_ctrl: config = SnowflakeConfig() snowflake_table = snowflake_values.SnowflakeTable( snowflake_ctrl, database, schema, table_name, config.table_preview_rows, ) log_data( key or table_name, snowflake_table, with_preview=with_preview, with_schema=with_schema, with_size=with_schema, with_histograms=False, raise_on_error=raise_on_error, )
def log_snowflake_resource_usage( query_text, database, user, connection_string, session_id=None ): """ get and log cpu time, run time, disk read, and processed rows. connection or connection_string is required. supports only psycopg2 connections. """ try: with log_duration("log_snowflake_resource_usage__time_seconds", "system"): _log_snowflake_resource_usage( query_text, database, user, connection_string, session_id ) except Exception as exc: conn_without_pass = _censor_password(connection_string) logger.exception( "Failed to log_redshift_resource_usage (query_text=%s, connection_string=%s)", query_text, conn_without_pass, )
def log_snowflake_table( table_name: str, connection_string: str, database: str, schema: str, key: Optional[str] = None, with_preview: Optional[bool] = None, with_schema: Optional[bool] = None, raise_on_error: bool = False, ): if not is_plugin_enabled("dbnd-snowflake", module_import="dbnd_snowflake"): return from dbnd_snowflake import snowflake_values with log_duration("log_snowflake_table__time_seconds", source="system"): conn_params = snowflake_values.conn_str_to_conn_params(connection_string) account = conn_params["account"] user = conn_params["user"] password = conn_params["password"] config = SnowflakeConfig() snowflake_table = snowflake_values.SnowflakeTable( account, user, password, database, schema, table_name, config.table_preview_rows, ) log_data( key or table_name, snowflake_table, with_preview=with_preview, with_schema=with_schema, with_size=with_schema, with_histograms=False, raise_on_error=raise_on_error, )
def log_snowflake_resource_usage( database: str, connection_string: str, query_ids: List[str], session_id: Optional[int] = None, key: str = "snowflake_query", history_window: float = 15, query_history_result_limit: Optional[int] = None, retries: int = 3, retry_pause: float = 0, raise_on_error: bool = False, ) -> None: """ Search for a query previously executed by Snowflake in it's QUERY_HISTORY and log cpu time, run time, disk read, and other resources. Query's metadata can appear in QUERY_HISTORY with a lag up to 45 minutes. :param database: Name of the database query was issued to. :param connection_string: Snowflake connection string to use. :param query_ids: Supply a list of `query_id` generated by Snowflake for search in QUERY_HISTORY. :param session_id: Supply `session_id` generated by Snowflake for more efficient search in QUERY_HISTORY. :param key: Override it if you call this function twice or more within the same task/Airflow Operator :param history_window: How deep to search into QUERY_HISTORY. Set in minutes :param query_history_result_limit: Passed through directly to QUERY_HISTORY search function as `RESULT_LIMIT` param :param retries: How much times to search in QUERY_HISTORY. Each time search is widened by increasing `RESULT_LIMIT` param. :param raise_on_error: By default all exceptions are muted so your task success status is not affected by errors in tracking. Set to true to re-raise all exceptions. :param retry_pause: Set number of seconds to pause before next retry. """ snowflake_config = SnowflakeConfig() if query_history_result_limit is None: query_history_result_limit = snowflake_config.query_history_result_limit if not all(query_id for query_id in query_ids): error_msg = f"query_ids cannot be empty. You supplied: {query_ids}" if raise_on_error: raise SnowflakeError(error_msg) else: logger.error(error_msg) return metrics_to_log = {} # XXX: Do we actually need log_duration? with log_duration("log_snowflake_resource_usage__time_seconds", "system"): for i, query_id in enumerate(query_ids): query_key = f"{key}.{i}" if len(query_ids) > 1 else key metrics_to_log.update( _get_snowflake_resource_usage( database, connection_string, query_id, session_id, query_key, history_window, query_history_result_limit, retries, retry_pause, raise_on_error, snowflake_config, ) ) log_metrics(metrics_to_log, source="user")