コード例 #1
0
class MySQLStatementMetrics(DBMAsyncJob):
    """
    MySQLStatementMetrics collects database metrics per normalized MySQL statement
    """
    def __init__(self, check, config, connection_args):
        # (MySql, MySQLConfig) -> None
        collection_interval = float(
            config.statement_metrics_config.get('collection_interval', 10))
        if collection_interval <= 0:
            collection_interval = 10
        super(MySQLStatementMetrics, self).__init__(
            check,
            rate_limit=1 / float(collection_interval),
            run_sync=is_affirmative(
                config.statement_metrics_config.get('run_sync', False)),
            enabled=is_affirmative(
                config.statement_metrics_config.get('enabled', True)),
            expected_db_exceptions=(pymysql.err.DatabaseError, ),
            min_collection_interval=config.min_collection_interval,
            dbms="mysql",
            job_name="statement-metrics",
            shutdown_callback=self._close_db_conn,
        )
        self._metric_collection_interval = collection_interval
        self._connection_args = connection_args
        self._db = None
        self._config = config
        self.log = get_check_logger()
        self._state = StatementMetrics()
        self._obfuscate_options = to_native_string(
            json.dumps(self._config.obfuscator_options))
        # full_statement_text_cache: limit the ingestion rate of full statement text events per query_signature
        self._full_statement_text_cache = TTLCache(
            maxsize=self._config.full_statement_text_cache_max_size,
            ttl=60 * 60 /
            self._config.full_statement_text_samples_per_hour_per_query,
        )  # type: TTLCache

    def _get_db_connection(self):
        """
        lazy reconnect db
        pymysql connections are not thread safe so we can't reuse the same connection from the main check
        :return:
        """
        if not self._db:
            self._db = pymysql.connect(**self._connection_args)
        return self._db

    def _close_db_conn(self):
        if self._db:
            try:
                self._db.close()
            except Exception:
                self._log.debug("Failed to close db connection", exc_info=1)
            finally:
                self._db = None

    def run_job(self):
        self.collect_per_statement_metrics()

    def collect_per_statement_metrics(self):
        # Detect a database misconfiguration by checking if the performance schema is enabled since mysql
        # just returns no rows without errors if the performance schema is disabled
        if not self._check.performance_schema_enabled:
            self._check.record_warning(
                DatabaseConfigurationError.performance_schema_not_enabled,
                warning_with_tags(
                    'Unable to collect statement metrics because the performance schema is disabled. '
                    'See https://docs.datadoghq.com/database_monitoring/setup_mysql/'
                    'troubleshooting#%s for more details',
                    DatabaseConfigurationError.performance_schema_not_enabled.
                    value,
                    code=DatabaseConfigurationError.
                    performance_schema_not_enabled.value,
                    host=self._check.resolved_hostname,
                ),
            )
            return

        rows = self._collect_per_statement_metrics()
        if not rows:
            return

        for event in self._rows_to_fqt_events(rows):
            self._check.database_monitoring_query_sample(
                json.dumps(event, default=default_json_event_encoding))

        # truncate query text to the maximum length supported by metrics tags
        for row in rows:
            row['digest_text'] = row['digest_text'][0:200] if row[
                'digest_text'] is not None else None

        payload = {
            'host':
            self._check.resolved_hostname,
            'timestamp':
            time.time() * 1000,
            'mysql_version':
            self._check.version.version + '+' + self._check.version.build,
            'mysql_flavor':
            self._check.version.flavor,
            "ddagenthostname":
            self._check.agent_hostname,
            'ddagentversion':
            datadog_agent.get_version(),
            'min_collection_interval':
            self._metric_collection_interval,
            'tags':
            self._tags,
            'cloud_metadata':
            self._config.cloud_metadata,
            'mysql_rows':
            rows,
        }
        self._check.database_monitoring_query_metrics(
            json.dumps(payload, default=default_json_event_encoding))

    def _collect_per_statement_metrics(self):
        # type: () -> List[PyMysqlRow]
        monotonic_rows = self._query_summary_per_statement()
        monotonic_rows = self._normalize_queries(monotonic_rows)
        rows = self._state.compute_derivative_rows(monotonic_rows,
                                                   METRICS_COLUMNS,
                                                   key=_row_key)
        return rows

    def _query_summary_per_statement(self):
        # type: () -> List[PyMysqlRow]
        """
        Collects per-statement metrics from performance schema. Because the statement sums are
        cumulative, the results of the previous run are stored and subtracted from the current
        values to get the counts for the elapsed period. This is similar to monotonic_count, but
        several fields must be further processed from the delta values.
        """

        sql_statement_summary = """\
            SELECT `schema_name`,
                   `digest`,
                   `digest_text`,
                   `count_star`,
                   `sum_timer_wait`,
                   `sum_lock_time`,
                   `sum_errors`,
                   `sum_rows_affected`,
                   `sum_rows_sent`,
                   `sum_rows_examined`,
                   `sum_select_scan`,
                   `sum_select_full_join`,
                   `sum_no_index_used`,
                   `sum_no_good_index_used`
            FROM performance_schema.events_statements_summary_by_digest
            WHERE `digest_text` NOT LIKE 'EXPLAIN %' OR `digest_text` IS NULL
            ORDER BY `count_star` DESC
            LIMIT 10000"""

        with closing(self._get_db_connection().cursor(
                pymysql.cursors.DictCursor)) as cursor:
            cursor.execute(sql_statement_summary)

            rows = cursor.fetchall() or []  # type: ignore

        return rows

    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                statement = obfuscate_sql_with_metadata(
                    row['digest_text'], self._obfuscate_options)
                obfuscated_statement = statement['query'] if row[
                    'digest_text'] is not None else None
            except Exception as e:
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['digest_text'], e)
                continue

            normalized_row['digest_text'] = obfuscated_statement
            normalized_row['query_signature'] = compute_sql_signature(
                obfuscated_statement)
            metadata = statement['metadata']
            normalized_row['dd_tables'] = metadata.get('tables', None)
            normalized_row['dd_commands'] = metadata.get('commands', None)
            normalized_rows.append(normalized_row)

        return normalized_rows

    def _rows_to_fqt_events(self, rows):
        for row in rows:
            query_cache_key = _row_key(row)
            if query_cache_key in self._full_statement_text_cache:
                continue
            self._full_statement_text_cache[query_cache_key] = True
            row_tags = self._tags + ["schema:{}".format(row['schema_name'])
                                     ] if row['schema_name'] else self._tags
            yield {
                "timestamp": time.time() * 1000,
                "host": self._check.resolved_hostname,
                "ddagentversion": datadog_agent.get_version(),
                "ddsource": "mysql",
                "ddtags": ",".join(row_tags),
                "dbm_type": "fqt",
                "db": {
                    "instance": row['schema_name'],
                    "query_signature": row['query_signature'],
                    "statement": row['digest_text'],
                    "metadata": {
                        "tables": row['dd_tables'],
                        "commands": row['dd_commands'],
                    },
                },
                "mysql": {
                    "schema": row["schema_name"]
                },
            }
コード例 #2
0
class PostgresStatementMetrics(object):
    """Collects telemetry for SQL statements"""

    def __init__(self, check, config):
        self._check = check
        self._config = config
        self._db_hostname = None
        self._log = get_check_logger()
        self._state = StatementMetrics()

    def _execute_query(self, cursor, query, params=()):
        try:
            self._log.debug("Running query [%s] %s", query, params)
            cursor.execute(query, params)
            return cursor.fetchall()
        except (psycopg2.ProgrammingError, psycopg2.errors.QueryCanceled) as e:
            self._log.warning('Statement-level metrics are unavailable: %s', e)
            return []

    def _get_pg_stat_statements_columns(self, db):
        """
        Load the list of the columns available under the `pg_stat_statements` table. This must be queried because
        version is not a reliable way to determine the available columns on `pg_stat_statements`. The database can
        be upgraded without upgrading extensions, even when the extension is included by default.
        """
        # Querying over '*' with limit 0 allows fetching only the column names from the cursor without data
        query = STATEMENTS_QUERY.format(
            cols='*', pg_stat_statements_view=self._config.pg_stat_statements_view, limit=0, filters=""
        )
        cursor = db.cursor()
        self._execute_query(cursor, query, params=(self._config.dbname,))
        colnames = [desc[0] for desc in cursor.description] if cursor.description else None
        return colnames

    def _db_hostname_cached(self):
        if self._db_hostname:
            return self._db_hostname
        self._db_hostname = resolve_db_host(self._config.host)
        return self._db_hostname

    def collect_per_statement_metrics(self, db, tags):
        try:
            rows = self._collect_metrics_rows(db)
            if not rows:
                return
            payload = {
                'host': self._db_hostname_cached(),
                'timestamp': time.time() * 1000,
                'min_collection_interval': self._config.min_collection_interval,
                'tags': tags,
                'postgres_rows': rows,
            }
            self._check.database_monitoring_query_metrics(json.dumps(payload, default=default_json_event_encoding))
        except Exception:
            db.rollback()
            self._log.exception('Unable to collect statement metrics due to an error')
            return []

    def _load_pg_stat_statements(self, db):
        available_columns = set(self._get_pg_stat_statements_columns(db))
        missing_columns = PG_STAT_STATEMENTS_REQUIRED_COLUMNS - available_columns
        if len(missing_columns) > 0:
            self._log.warning(
                'Unable to collect statement metrics because required fields are unavailable: %s',
                ', '.join(list(missing_columns)),
            )
            return []

        query_columns = sorted(list(PG_STAT_ALL_DESIRED_COLUMNS & available_columns))
        params = ()
        filters = ""
        if self._config.dbstrict:
            filters = "AND pg_database.datname = %s"
            params = (self._config.dbname,)
        return self._execute_query(
            db.cursor(cursor_factory=psycopg2.extras.DictCursor),
            STATEMENTS_QUERY.format(
                cols=', '.join(query_columns),
                pg_stat_statements_view=self._config.pg_stat_statements_view,
                filters=filters,
                limit=DEFAULT_STATEMENTS_LIMIT,
            ),
            params=params,
        )

    def _collect_metrics_rows(self, db):
        rows = self._load_pg_stat_statements(db)

        def row_keyfunc(row):
            return (row['query_signature'], row['datname'], row['rolname'])

        rows = self._normalize_queries(rows)
        rows = self._state.compute_derivative_rows(rows, PG_STAT_STATEMENTS_METRICS_COLUMNS, key=row_keyfunc)
        self._check.gauge('dd.postgres.queries.query_rows_raw', len(rows))

        return rows

    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(row['query'])
            except Exception as e:
                # obfuscation errors are relatively common so only log them during debugging
                self._log.debug("Failed to obfuscate query '%s': %s", row['query'], e)
                continue

            normalized_row['query'] = obfuscated_statement
            normalized_row['query_signature'] = compute_sql_signature(obfuscated_statement)
            normalized_rows.append(normalized_row)

        return normalized_rows
コード例 #3
0
class SqlserverStatementMetrics(DBMAsyncJob):
    """Collects query metrics and plans"""

    def __init__(self, check):
        self.check = check
        self.log = check.log
        collection_interval = float(
            check.statement_metrics_config.get('collection_interval', DEFAULT_COLLECTION_INTERVAL)
        )
        if collection_interval <= 0:
            collection_interval = DEFAULT_COLLECTION_INTERVAL
        self.collection_interval = collection_interval
        super(SqlserverStatementMetrics, self).__init__(
            check,
            run_sync=is_affirmative(check.statement_metrics_config.get('run_sync', False)),
            enabled=is_affirmative(check.statement_metrics_config.get('enabled', True)),
            expected_db_exceptions=(),
            min_collection_interval=check.min_collection_interval,
            dbms="sqlserver",
            rate_limit=1 / float(collection_interval),
            job_name="query-metrics",
            shutdown_callback=self._close_db_conn,
        )
        self.disable_secondary_tags = is_affirmative(
            check.statement_metrics_config.get('disable_secondary_tags', False)
        )
        self.dm_exec_query_stats_row_limit = int(
            check.statement_metrics_config.get('dm_exec_query_stats_row_limit', 10000)
        )
        self.enforce_collection_interval_deadline = is_affirmative(
            check.statement_metrics_config.get('enforce_collection_interval_deadline', True)
        )
        self._state = StatementMetrics()
        self._init_caches()
        self._conn_key_prefix = "dbm-"
        self._statement_metrics_query = None
        self._last_stats_query_time = None

    def _init_caches(self):
        # full_statement_text_cache: limit the ingestion rate of full statement text events per query_signature
        self._full_statement_text_cache = TTLCache(
            maxsize=self.check.instance.get('full_statement_text_cache_max_size', 10000),
            ttl=60 * 60 / self.check.instance.get('full_statement_text_samples_per_hour_per_query', 1),
        )

        # seen_plans_ratelimiter: limit the ingestion rate per unique plan.
        # plans, we only really need them once per hour
        self._seen_plans_ratelimiter = RateLimitingTTLCache(
            # assuming ~100 bytes per entry (query & plan signature, key hash, 4 pointers (ordered dict), expiry time)
            # total size: 10k * 100 = 1 Mb
            maxsize=int(self.check.instance.get('seen_samples_cache_maxsize', 10000)),
            ttl=60 * 60 / int(self.check.instance.get('samples_per_hour_per_query', 4)),
        )

    def _close_db_conn(self):
        pass

    def _get_available_query_metrics_columns(self, cursor, all_expected_columns):
        cursor.execute("select top 0 * from sys.dm_exec_query_stats")
        all_columns = set([i[0] for i in cursor.description])
        available_columns = [c for c in all_expected_columns if c in all_columns]
        missing_columns = set(all_expected_columns) - set(available_columns)
        if missing_columns:
            self.log.debug(
                "missing the following expected query metrics columns from dm_exec_query_stats: %s", missing_columns
            )
        self.log.debug("found available sys.dm_exec_query_stats columns: %s", available_columns)
        return available_columns

    def _get_statement_metrics_query_cached(self, cursor):
        if self._statement_metrics_query:
            return self._statement_metrics_query
        available_columns = self._get_available_query_metrics_columns(cursor, SQL_SERVER_QUERY_METRICS_COLUMNS)

        statements_query = (
            STATEMENT_METRICS_QUERY_NO_AGGREGATES if self.disable_secondary_tags else STATEMENT_METRICS_QUERY
        )
        self._statement_metrics_query = statements_query.format(
            query_metrics_columns=', '.join(available_columns),
            query_metrics_column_sums=', '.join(['sum({}) as {}'.format(c, c) for c in available_columns]),
            collection_interval=int(math.ceil(self.collection_interval) * 2),
            limit=self.dm_exec_query_stats_row_limit,
        )
        return self._statement_metrics_query

    @tracked_method(agent_check_getter=agent_check_getter, track_result_length=True)
    def _load_raw_query_metrics_rows(self, cursor):
        self.log.debug("collecting sql server statement metrics")
        statement_metrics_query = self._get_statement_metrics_query_cached(cursor)
        now = time.time()
        query_interval = self.collection_interval
        if self._last_stats_query_time:
            query_interval = now - self._last_stats_query_time
        self._last_stats_query_time = now
        params = (math.ceil(query_interval),)
        self.log.debug("Running query [%s] %s", statement_metrics_query, params)
        cursor.execute(statement_metrics_query, params)
        columns = [i[0] for i in cursor.description]
        # construct row dicts manually as there's no DictCursor for pyodbc
        rows = [dict(zip(columns, row)) for row in cursor.fetchall()]
        self.log.debug("loaded sql server statement metrics len(rows)=%s", len(rows))
        return rows

    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            try:
                statement = obfuscate_sql_with_metadata(row['text'], self.check.obfuscator_options)
            except Exception as e:
                # obfuscation errors are relatively common so only log them during debugging
                self.log.debug("Failed to obfuscate query: %s", e)
                self.check.count(
                    "dd.sqlserver.statements.error",
                    1,
                    **self.check.debug_stats_kwargs(tags=["error:obfuscate-query-{}".format(type(e))])
                )
                continue
            obfuscated_statement = statement['query']
            row['text'] = obfuscated_statement
            row['query_signature'] = compute_sql_signature(obfuscated_statement)
            row['query_hash'] = _hash_to_hex(row['query_hash'])
            row['query_plan_hash'] = _hash_to_hex(row['query_plan_hash'])
            row['plan_handle'] = _hash_to_hex(row['plan_handle'])
            metadata = statement['metadata']
            row['dd_tables'] = metadata.get('tables', None)
            row['dd_commands'] = metadata.get('commands', None)
            row['dd_comments'] = metadata.get('comments', None)
            normalized_rows.append(row)
        return normalized_rows

    def _collect_metrics_rows(self, cursor):
        rows = self._load_raw_query_metrics_rows(cursor)
        rows = self._normalize_queries(rows)
        if not rows:
            return []
        metric_columns = [c for c in rows[0].keys() if c.startswith("total_") or c == 'execution_count']
        rows = self._state.compute_derivative_rows(rows, metric_columns, key=_row_key)
        return rows

    @staticmethod
    def _to_metrics_payload_row(row):
        row = {k: v for k, v in row.items()}
        if 'dd_comments' in row:
            del row['dd_comments']
        # truncate query text to the maximum length supported by metrics tags
        row['text'] = row['text'][0:200]
        return row

    def _to_metrics_payload(self, rows):
        return {
            'host': self.check.resolved_hostname,
            'timestamp': time.time() * 1000,
            'min_collection_interval': self.collection_interval,
            'tags': self.check.tags,
            'cloud_metadata': self.check.cloud_metadata,
            'sqlserver_rows': [self._to_metrics_payload_row(r) for r in rows],
            'sqlserver_version': self.check.static_info_cache.get(STATIC_INFO_VERSION, ""),
            'ddagentversion': datadog_agent.get_version(),
            'ddagenthostname': self._check.agent_hostname,
        }

    @tracked_method(agent_check_getter=agent_check_getter)
    def collect_statement_metrics_and_plans(self):
        """
        Collects statement metrics and plans.
        :return:
        """
        plans_submitted = 0
        deadline = time.time() + self.collection_interval

        # re-use the check's conn module, but set extra_key=dbm- to ensure we get our own
        # raw connection. adodbapi and pyodbc modules are thread safe, but connections are not.
        with self.check.connection.open_managed_default_connection(key_prefix=self._conn_key_prefix):
            with self.check.connection.get_managed_cursor(key_prefix=self._conn_key_prefix) as cursor:
                rows = self._collect_metrics_rows(cursor)
                if not rows:
                    return
                for event in self._rows_to_fqt_events(rows):
                    self.check.database_monitoring_query_sample(json.dumps(event, default=default_json_event_encoding))
                payload = self._to_metrics_payload(rows)
                self.check.database_monitoring_query_metrics(json.dumps(payload, default=default_json_event_encoding))
                for event in self._collect_plans(rows, cursor, deadline):
                    self.check.database_monitoring_query_sample(json.dumps(event, default=default_json_event_encoding))
                    plans_submitted += 1

        self.check.count(
            "dd.sqlserver.statements.plans_submitted.count", plans_submitted, **self.check.debug_stats_kwargs()
        )
        self.check.gauge(
            "dd.sqlserver.statements.seen_plans_cache.len",
            len(self._seen_plans_ratelimiter),
            **self.check.debug_stats_kwargs()
        )
        self.check.gauge(
            "dd.sqlserver.statements.fqt_cache.len",
            len(self._full_statement_text_cache),
            **self.check.debug_stats_kwargs()
        )

    def _rows_to_fqt_events(self, rows):
        for row in rows:
            query_cache_key = _row_key(row)
            if query_cache_key in self._full_statement_text_cache:
                continue
            self._full_statement_text_cache[query_cache_key] = True
            tags = list(self.check.tags)
            if 'database_name' in row:
                tags += ["db:{}".format(row['database_name'])]
            yield {
                "timestamp": time.time() * 1000,
                "host": self.check.resolved_hostname,
                "ddagentversion": datadog_agent.get_version(),
                "ddsource": "sqlserver",
                "ddtags": ",".join(tags),
                "dbm_type": "fqt",
                "db": {
                    "instance": row.get('database_name', None),
                    "query_signature": row['query_signature'],
                    "statement": row['text'],
                    "metadata": {
                        "tables": row['dd_tables'],
                        "commands": row['dd_commands'],
                    },
                },
                'sqlserver': {
                    'query_hash': row['query_hash'],
                    'query_plan_hash': row['query_plan_hash'],
                },
            }

    def run_job(self):
        self.collect_statement_metrics_and_plans()

    @tracked_method(agent_check_getter=agent_check_getter)
    def _load_plan(self, plan_handle, cursor):
        self.log.debug("collecting plan. plan_handle=%s", plan_handle)
        self.log.debug("Running query [%s] %s", PLAN_LOOKUP_QUERY, (plan_handle,))
        cursor.execute(PLAN_LOOKUP_QUERY, ("0x" + plan_handle,))
        result = cursor.fetchall()
        if not result or not result[0]:
            self.log.debug("failed to loan plan, it must have just been expired out of the plan cache")
            return None, None
        return result[0]

    @tracked_method(agent_check_getter=agent_check_getter)
    def _collect_plans(self, rows, cursor, deadline):
        for row in rows:
            if self.enforce_collection_interval_deadline and time.time() > deadline:
                self.log.debug("ending plan collection early because check deadline has been exceeded")
                self.check.count("dd.sqlserver.statements.deadline_exceeded", 1, **self.check.debug_stats_kwargs())
                return
            plan_key = (row['query_signature'], row['query_hash'], row['query_plan_hash'])
            if self._seen_plans_ratelimiter.acquire(plan_key):
                raw_plan, is_plan_encrypted = self._load_plan(row['plan_handle'], cursor)
                obfuscated_plan, collection_errors = None, None

                try:
                    if raw_plan:
                        obfuscated_plan = obfuscate_xml_plan(raw_plan, self.check.obfuscator_options)
                except Exception as e:
                    self.log.debug(
                        (
                            "failed to obfuscate XML Plan query_signature=%s query_hash=%s "
                            "query_plan_hash=%s plan_handle=%s: %s"
                        ),
                        row['query_signature'],
                        row['query_hash'],
                        row['query_plan_hash'],
                        row['plan_handle'],
                        e,
                    )
                    collection_errors = [{'code': "obfuscate_xml_plan_error", 'message': str(e)}]
                    self.check.count(
                        "dd.sqlserver.statements.error",
                        1,
                        **self.check.debug_stats_kwargs(tags=["error:obfuscate-xml-plan-{}".format(type(e))])
                    )
                tags = list(self.check.tags)
                if 'database_name' in row:
                    tags += ["db:{}".format(row['database_name'])]
                yield {
                    "host": self.check.resolved_hostname,
                    "ddagentversion": datadog_agent.get_version(),
                    "ddsource": "sqlserver",
                    "ddtags": ",".join(tags),
                    "timestamp": time.time() * 1000,
                    "dbm_type": "plan",
                    "db": {
                        "instance": row.get("database_name", None),
                        "plan": {
                            "definition": obfuscated_plan,
                            "signature": row['query_plan_hash'],
                            "collection_errors": collection_errors,
                        },
                        "query_signature": row['query_signature'],
                        "statement": row['text'],
                        "metadata": {
                            "tables": row['dd_tables'],
                            "commands": row['dd_commands'],
                            "comments": row['dd_comments'],
                        },
                    },
                    'sqlserver': {
                        "is_plan_encrypted": is_plan_encrypted,
                        "is_statement_encrypted": row['is_encrypted'],
                        'query_hash': row['query_hash'],
                        'query_plan_hash': row['query_plan_hash'],
                        'plan_handle': row['plan_handle'],
                        'execution_count': row.get('execution_count', None),
                        'total_elapsed_time': row.get('total_elapsed_time', None),
                    },
                }
コード例 #4
0
class PostgresStatementMetrics(object):
    """Collects telemetry for SQL statements"""
    def __init__(self, config):
        self.config = config
        self.log = get_check_logger()
        self._state = StatementMetrics()

    def _execute_query(self, cursor, query, params=()):
        try:
            cursor.execute(query, params)
            return cursor.fetchall()
        except (psycopg2.ProgrammingError, psycopg2.errors.QueryCanceled) as e:
            self.log.warning('Statement-level metrics are unavailable: %s', e)
            return []

    def _get_pg_stat_statements_columns(self, db):
        """
        Load the list of the columns available under the `pg_stat_statements` table. This must be queried because
        version is not a reliable way to determine the available columns on `pg_stat_statements`. The database can
        be upgraded without upgrading extensions, even when the extension is included by default.
        """
        # Querying over '*' with limit 0 allows fetching only the column names from the cursor without data
        query = STATEMENTS_QUERY.format(
            cols='*',
            pg_stat_statements_view=self.config.pg_stat_statements_view,
            limit=0,
        )
        cursor = db.cursor()
        self._execute_query(cursor, query, params=(self.config.dbname, ))
        colnames = [desc[0] for desc in cursor.description]
        return colnames

    def collect_per_statement_metrics(self, db):
        try:
            return self._collect_per_statement_metrics(db)
        except Exception:
            db.rollback()
            self.log.exception(
                'Unable to collect statement metrics due to an error')
            return []

    def _collect_per_statement_metrics(self, db):
        metrics = []

        available_columns = self._get_pg_stat_statements_columns(db)
        missing_columns = PG_STAT_STATEMENTS_REQUIRED_COLUMNS - set(
            available_columns)
        if len(missing_columns) > 0:
            self.log.warning(
                'Unable to collect statement metrics because required fields are unavailable: %s',
                ', '.join(list(missing_columns)),
            )
            return metrics

        desired_columns = (list(PG_STAT_STATEMENTS_METRIC_COLUMNS.keys()) +
                           list(PG_STAT_STATEMENTS_OPTIONAL_COLUMNS) +
                           list(PG_STAT_STATEMENTS_TAG_COLUMNS.keys()))
        query_columns = list(
            set(desired_columns) & set(available_columns)
            | set(PG_STAT_STATEMENTS_TAG_COLUMNS.keys()))
        rows = self._execute_query(
            db.cursor(cursor_factory=psycopg2.extras.DictCursor),
            STATEMENTS_QUERY.format(
                cols=', '.join(query_columns),
                pg_stat_statements_view=self.config.pg_stat_statements_view,
                limit=DEFAULT_STATEMENTS_LIMIT,
            ),
            params=(self.config.dbname, ),
        )
        if not rows:
            return metrics

        def row_keyfunc(row):
            # old versions of pg_stat_statements don't have a query ID so fall back to the query string itself
            queryid = row['queryid'] if 'queryid' in row else row['query']
            return (queryid, row['datname'], row['rolname'])

        rows = self._state.compute_derivative_rows(
            rows, PG_STAT_STATEMENTS_METRIC_COLUMNS.keys(), key=row_keyfunc)
        rows = apply_row_limits(rows,
                                DEFAULT_STATEMENT_METRIC_LIMITS,
                                tiebreaker_metric='calls',
                                tiebreaker_reverse=True,
                                key=row_keyfunc)

        for row in rows:
            try:
                normalized_query = datadog_agent.obfuscate_sql(row['query'])
                if not normalized_query:
                    self.log.warning(
                        "Obfuscation of query '%s' resulted in empty query",
                        row['query'])
                    continue
            except Exception as e:
                # If query obfuscation fails, it is acceptable to log the raw query here because the
                # pg_stat_statements table contains no parameters in the raw queries.
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['query'], e)
                continue

            query_signature = compute_sql_signature(normalized_query)

            # All "Deep Database Monitoring" statement-level metrics are tagged with a `query_signature`
            # which uniquely identifies the normalized query family. Where possible, this hash should
            # match the hash of APM "resources" (https://docs.datadoghq.com/tracing/visualization/resource/)
            # when the resource is a SQL query. Postgres' query normalization in the `pg_stat_statements` table
            # preserves most of the original query, so we tag the `resource_hash` with the same value as the
            # `query_signature`. The `resource_hash` tag should match the *actual* APM resource hash most of
            # the time, but not always. So this is a best-effort approach to link these metrics to APM metrics.
            tags = [
                'query_signature:' + query_signature,
                'resource_hash:' + query_signature
            ]

            for column, tag_name in PG_STAT_STATEMENTS_TAG_COLUMNS.items():
                if column not in row:
                    continue
                value = row[column]
                if column == 'query':
                    value = normalize_query_tag(normalized_query)
                tags.append('{tag_name}:{value}'.format(tag_name=tag_name,
                                                        value=value))

            for column, metric_name in PG_STAT_STATEMENTS_METRIC_COLUMNS.items(
            ):
                if column not in row:
                    continue
                value = row[column]
                if column == 'total_time':
                    # All "Deep Database Monitoring" timing metrics are in nanoseconds
                    # Postgres tracks pg_stat* timing stats in milliseconds
                    value = milliseconds_to_nanoseconds(value)
                metrics.append((metric_name, value, tags))

        return metrics
コード例 #5
0
 def test_compute_derivative_rows_boundary_cases(self, fn_args):
     sm = StatementMetrics()
     sm.compute_derivative_rows(*fn_args)
     sm.compute_derivative_rows(*fn_args)
コード例 #6
0
    def test_compute_derivative_rows_happy_path(self):
        sm = StatementMetrics()

        rows1 = [
            {
                'count': 13,
                'time': 2005,
                'errors': 1,
                'query': 'COMMIT',
                'db': 'puppies',
                'user': '******'
            },
            {
                'count': 25,
                'time': 105,
                'errors': 0,
                'query': 'ROLLBACK',
                'db': 'puppies',
                'user': '******'
            },
            {
                'count': 1,
                'time': 10005,
                'errors': 0,
                'query': 'select * from kennel',
                'db': 'puppies',
                'user': '******'
            },
            {
                'count': 99991882777665555,
                'time': 10005,
                'errors': 0,
                'query': 'update kennel set breed="dalmatian" where id = ?',
                'db': 'puppies',
                'user': '******',
            },
        ]

        def key(row):
            return (row['query'], row['db'], row['user'])

        metrics = ['count', 'time']

        assert [] == sm.compute_derivative_rows(rows1, metrics, key=key)
        # No changes should produce no rows
        assert [] == sm.compute_derivative_rows(rows1, metrics, key=key)

        rows2 = [
            {
                'count': 1,
                'time': 1,
                'errors': 1,
                'query': 'SELECT CURRENT_TIME',
                'db': 'puppies',
                'user': '******'
            },
            add_to_dict(rows1[0], {
                'count': 0,
                'time': 0,
                'errors': 15
            }),
            add_to_dict(rows1[1], {
                'count': 1,
                'time': 15,
                'errors': 0
            }),
            add_to_dict(rows1[2], {
                'count': 20,
                'time': 900,
                'errors': 0
            }),
            add_to_dict(rows1[3], {
                'count': 7,
                'time': 0.5,
                'errors': 0
            }),
        ]
        expected = [
            # First row only incremented 'errors' which is not a tracked metric, so it is omitted from the output
            {
                'count': 1,
                'time': 15,
                'errors': 0,
                'query': 'ROLLBACK',
                'db': 'puppies',
                'user': '******'
            },
            {
                'count': 20,
                'time': 900,
                'errors': 0,
                'query': 'select * from kennel',
                'db': 'puppies',
                'user': '******'
            },
            {
                'count': 7,
                'time': 0.5,
                'errors': 0,
                'query': 'update kennel set breed="dalmatian" where id = ?',
                'db': 'puppies',
                'user': '******',
            },
        ]
        assert expected == sm.compute_derivative_rows(rows2, metrics, key=key)
        # No changes should produce no rows
        assert [] == sm.compute_derivative_rows(rows2, metrics, key=key)
コード例 #7
0
class MySQLStatementMetrics(object):
    """
    MySQLStatementMetrics collects database metrics per normalized MySQL statement
    """
    def __init__(self, config):
        # type: (MySQLConfig) -> None
        self.config = config
        self.log = get_check_logger()
        self._state = StatementMetrics()

    def collect_per_statement_metrics(self, db):
        # type: (pymysql.connections.Connection) -> List[Metric]
        try:
            return self._collect_per_statement_metrics(db)
        except Exception:
            self.log.exception(
                'Unable to collect statement metrics due to an error')
            return []

    def _collect_per_statement_metrics(self, db):
        # type: (pymysql.connections.Connection) -> List[Metric]
        metrics = []

        def keyfunc(row):
            return (row['schema'], row['digest'])

        monotonic_rows = self._query_summary_per_statement(db)
        monotonic_rows = self._merge_duplicate_rows(monotonic_rows,
                                                    key=keyfunc)
        rows = self._state.compute_derivative_rows(monotonic_rows,
                                                   STATEMENT_METRICS.keys(),
                                                   key=keyfunc)
        rows = apply_row_limits(
            rows,
            DEFAULT_STATEMENT_METRIC_LIMITS,
            tiebreaker_metric='count',
            tiebreaker_reverse=True,
            key=keyfunc,
        )

        for row in rows:
            tags = []
            tags.append('digest:' + row['digest'])
            if row['schema'] is not None:
                tags.append('schema:' + row['schema'])

            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(
                    row['query'])
            except Exception as e:
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['query'], e)
                continue
            tags.append('query_signature:' +
                        compute_sql_signature(obfuscated_statement))
            tags.append('query:' +
                        normalize_query_tag(obfuscated_statement).strip())

            for col, name in STATEMENT_METRICS.items():
                value = row[col]
                metrics.append((name, value, tags))

        return metrics

    @staticmethod
    def _merge_duplicate_rows(rows, key):
        # type: (List[PyMysqlRow], RowKeyFunction) -> List[PyMysqlRow]
        """
        Merges the metrics from duplicate rows because the (schema, digest) identifier may not be
        unique, see: https://bugs.mysql.com/bug.php?id=79533
        """
        merged = {}  # type: Dict[RowKey, PyMysqlRow]

        for row in rows:
            k = key(row)
            if k in merged:
                for m in STATEMENT_METRICS:
                    merged[k][m] += row[m]
            else:
                merged[k] = copy.copy(row)

        return list(merged.values())

    def _query_summary_per_statement(self, db):
        # type: (pymysql.connections.Connection) -> List[PyMysqlRow]
        """
        Collects per-statement metrics from performance schema. Because the statement sums are
        cumulative, the results of the previous run are stored and subtracted from the current
        values to get the counts for the elapsed period. This is similar to monotonic_count, but
        several fields must be further processed from the delta values.
        """

        sql_statement_summary = """\
            SELECT `schema_name` as `schema`,
                `digest` as `digest`,
                `digest_text` as `query`,
                `count_star` as `count`,
                `sum_timer_wait` / 1000 as `time`,
                `sum_lock_time` / 1000 as `lock_time`,
                `sum_errors` as `errors`,
                `sum_rows_affected` as `rows_affected`,
                `sum_rows_sent` as `rows_sent`,
                `sum_rows_examined` as `rows_examined`,
                `sum_select_scan` as `select_scan`,
                `sum_select_full_join` as `select_full_join`,
                `sum_no_index_used` as `no_index_used`,
                `sum_no_good_index_used` as `no_good_index_used`
            FROM performance_schema.events_statements_summary_by_digest
            WHERE `digest_text` NOT LIKE 'EXPLAIN %'
            ORDER BY `count_star` DESC
            LIMIT 10000"""

        rows = []  # type: List[PyMysqlRow]

        try:
            with closing(db.cursor(pymysql.cursors.DictCursor)) as cursor:
                cursor.execute(sql_statement_summary)

                rows = cursor.fetchall() or []  # type: ignore
        except (pymysql.err.InternalError, pymysql.err.OperationalError) as e:
            self.log.warning(
                "Statement summary metrics are unavailable at this time: %s",
                e)

        return rows
コード例 #8
0
    def test_compute_derivative_rows_stats_reset(self):
        sm = StatementMetrics()

        def key(row):
            return (row['query'], row['db'], row['user'])

        metrics = ['count', 'time']

        rows1 = [
            {
                'count': 13,
                'time': 2005,
                'errors': 1,
                'query': 'COMMIT',
                'db': 'puppies',
                'user': '******'
            },
            {
                'count': 25,
                'time': 105,
                'errors': 0,
                'query': 'ROLLBACK',
                'db': 'puppies',
                'user': '******'
            },
        ]
        rows2 = [
            add_to_dict(rows1[0], {
                'count': 0,
                'time': 1,
                'errors': 15
            }),
            add_to_dict(rows1[1], {
                'count': 1,
                'time': 15,
                'errors': 0
            }),
        ]
        # Simulate a stats reset by decreasing one of the metrics rather than increasing
        rows3 = [
            add_to_dict(rows2[1], {
                'count': 1,
                'time': 15,
                'errors': 0
            }),
            add_to_dict(rows2[0], {
                'count': -1,
                'time': 0,
                'errors': 15
            }),
        ]
        rows4 = [
            add_to_dict(rows3[1], {
                'count': 1,
                'time': 1,
                'errors': 0
            }),
            add_to_dict(rows3[0], {
                'count': 1,
                'time': 1,
                'errors': 1
            }),
        ]
        assert [] == sm.compute_derivative_rows(rows1, metrics, key=key)
        assert 2 == len(sm.compute_derivative_rows(rows2, metrics, key=key))
        assert [] == sm.compute_derivative_rows(rows3, metrics, key=key)
        assert 2 == len(sm.compute_derivative_rows(rows4, metrics, key=key))
コード例 #9
0
class MySQLStatementMetrics(object):
    """
    MySQLStatementMetrics collects database metrics per normalized MySQL statement
    """
    def __init__(self, check, config):
        # (MySql, MySQLConfig) -> None
        self._check = check
        self._config = config
        self._db_hostname = None
        self.log = get_check_logger()
        self._state = StatementMetrics()
        # full_statement_text_cache: limit the ingestion rate of full statement text events per query_signature
        self._full_statement_text_cache = TTLCache(
            maxsize=self._config.full_statement_text_cache_max_size,
            ttl=60 * 60 /
            self._config.full_statement_text_samples_per_hour_per_query,
        )  # type: TTLCache

    def _db_hostname_cached(self):
        if self._db_hostname:
            return self._db_hostname
        self._db_hostname = resolve_db_host(self._config.host)
        return self._db_hostname

    def collect_per_statement_metrics(self, db, tags):
        # type: (pymysql.connections.Connection, List[str]) -> None
        try:
            rows = self._collect_per_statement_metrics(db)
            if not rows:
                return

            for event in self._rows_to_fqt_events(rows, tags):
                self._check.database_monitoring_query_sample(
                    json.dumps(event, default=default_json_event_encoding))

            # truncate query text to the maximum length supported by metrics tags
            for row in rows:
                row['digest_text'] = row['digest_text'][0:200]

            payload = {
                'host': self._db_hostname_cached(),
                'timestamp': time.time() * 1000,
                'min_collection_interval':
                self._config.min_collection_interval,
                'tags': tags,
                'mysql_rows': rows,
            }
            self._check.database_monitoring_query_metrics(
                json.dumps(payload, default=default_json_event_encoding))
        except Exception:
            self.log.exception(
                'Unable to collect statement metrics due to an error')

    def _collect_per_statement_metrics(self, db):
        # type: (pymysql.connections.Connection) -> List[PyMysqlRow]
        monotonic_rows = self._query_summary_per_statement(db)
        monotonic_rows = self._normalize_queries(monotonic_rows)
        rows = self._state.compute_derivative_rows(monotonic_rows,
                                                   METRICS_COLUMNS,
                                                   key=_row_key)
        return rows

    def _query_summary_per_statement(self, db):
        # type: (pymysql.connections.Connection) -> List[PyMysqlRow]
        """
        Collects per-statement metrics from performance schema. Because the statement sums are
        cumulative, the results of the previous run are stored and subtracted from the current
        values to get the counts for the elapsed period. This is similar to monotonic_count, but
        several fields must be further processed from the delta values.
        """

        sql_statement_summary = """\
            SELECT `schema_name`,
                   `digest`,
                   `digest_text`,
                   `count_star`,
                   `sum_timer_wait`,
                   `sum_lock_time`,
                   `sum_errors`,
                   `sum_rows_affected`,
                   `sum_rows_sent`,
                   `sum_rows_examined`,
                   `sum_select_scan`,
                   `sum_select_full_join`,
                   `sum_no_index_used`,
                   `sum_no_good_index_used`
            FROM performance_schema.events_statements_summary_by_digest
            WHERE `digest_text` NOT LIKE 'EXPLAIN %'
            ORDER BY `count_star` DESC
            LIMIT 10000"""

        rows = []  # type: List[PyMysqlRow]

        try:
            with closing(db.cursor(pymysql.cursors.DictCursor)) as cursor:
                cursor.execute(sql_statement_summary)

                rows = cursor.fetchall() or []  # type: ignore
        except (pymysql.err.InternalError, pymysql.err.OperationalError) as e:
            self.log.warning(
                "Statement summary metrics are unavailable at this time: %s",
                e)

        return rows

    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(
                    row['digest_text'])
            except Exception as e:
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['digest_text'], e)
                continue

            normalized_row['digest_text'] = obfuscated_statement
            normalized_row['query_signature'] = compute_sql_signature(
                obfuscated_statement)
            normalized_rows.append(normalized_row)

        return normalized_rows

    def _rows_to_fqt_events(self, rows, tags):
        for row in rows:
            query_cache_key = _row_key(row)
            if query_cache_key in self._full_statement_text_cache:
                continue
            self._full_statement_text_cache[query_cache_key] = True
            row_tags = tags + ["schema:{}".format(row['schema_name'])
                               ] if row['schema_name'] else tags
            yield {
                "timestamp": time.time() * 1000,
                "host": self._db_hostname_cached(),
                "ddsource": "mysql",
                "ddtags": ",".join(row_tags),
                "dbm_type": "fqt",
                "db": {
                    "instance": row['schema_name'],
                    "query_signature": row['query_signature'],
                    "statement": row['digest_text'],
                },
                "mysql": {
                    "schema": row["schema_name"]
                },
            }
コード例 #10
0
class MySQLStatementMetrics(object):
    """
    MySQLStatementMetrics collects database metrics per normalized MySQL statement
    """
    def __init__(self, check, config):
        # (MySql, MySQLConfig) -> None
        self._check = check
        self._config = config
        self._db_hostname = None
        self.log = get_check_logger()
        self._state = StatementMetrics()

    def _db_hostname_cached(self):
        if self._db_hostname:
            return self._db_hostname
        self._db_hostname = resolve_db_host(self._config.host)
        return self._db_hostname

    def collect_per_statement_metrics(self, db, tags):
        # type: (pymysql.connections.Connection, List[str]) -> None
        try:
            rows = self._collect_per_statement_metrics(db)
            if not rows:
                return
            payload = {
                'host': self._db_hostname_cached(),
                'timestamp': time.time() * 1000,
                'min_collection_interval':
                self._config.min_collection_interval,
                'tags': tags,
                'mysql_rows': rows,
            }
            self._check.database_monitoring_query_metrics(
                json.dumps(payload, default=default_json_event_encoding))
        except Exception:
            self.log.exception(
                'Unable to collect statement metrics due to an error')

    def _collect_per_statement_metrics(self, db):
        # type: (pymysql.connections.Connection) -> List[Metric]
        metrics = []  # type: List[Metric]

        def keyfunc(row):
            return (row['schema_name'], row['query_signature'])

        monotonic_rows = self._query_summary_per_statement(db)
        monotonic_rows = self._normalize_queries(monotonic_rows)
        rows = self._state.compute_derivative_rows(monotonic_rows,
                                                   METRICS_COLUMNS,
                                                   key=keyfunc)
        metrics.append(('dd.mysql.queries.query_rows_raw', len(rows), []))

        return rows

    def _query_summary_per_statement(self, db):
        # type: (pymysql.connections.Connection) -> List[PyMysqlRow]
        """
        Collects per-statement metrics from performance schema. Because the statement sums are
        cumulative, the results of the previous run are stored and subtracted from the current
        values to get the counts for the elapsed period. This is similar to monotonic_count, but
        several fields must be further processed from the delta values.
        """

        sql_statement_summary = """\
            SELECT `schema_name`,
                   `digest`,
                   `digest_text`,
                   `count_star`,
                   `sum_timer_wait`,
                   `sum_lock_time`,
                   `sum_errors`,
                   `sum_rows_affected`,
                   `sum_rows_sent`,
                   `sum_rows_examined`,
                   `sum_select_scan`,
                   `sum_select_full_join`,
                   `sum_no_index_used`,
                   `sum_no_good_index_used`
            FROM performance_schema.events_statements_summary_by_digest
            WHERE `digest_text` NOT LIKE 'EXPLAIN %'
            ORDER BY `count_star` DESC
            LIMIT 10000"""

        rows = []  # type: List[PyMysqlRow]

        try:
            with closing(db.cursor(pymysql.cursors.DictCursor)) as cursor:
                cursor.execute(sql_statement_summary)

                rows = cursor.fetchall() or []  # type: ignore
        except (pymysql.err.InternalError, pymysql.err.OperationalError) as e:
            self.log.warning(
                "Statement summary metrics are unavailable at this time: %s",
                e)

        return rows

    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(
                    row['digest_text'])
            except Exception as e:
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['digest_text'], e)
                continue

            normalized_row['digest_text'] = obfuscated_statement
            normalized_row['query_signature'] = compute_sql_signature(
                obfuscated_statement)
            normalized_rows.append(normalized_row)

        return normalized_rows
コード例 #11
0
class PostgresStatementMetrics(DBMAsyncJob):
    """Collects telemetry for SQL statements"""

    def __init__(self, check, config, shutdown_callback):
        collection_interval = float(
            config.statement_metrics_config.get('collection_interval', DEFAULT_COLLECTION_INTERVAL)
        )
        if collection_interval <= 0:
            collection_interval = DEFAULT_COLLECTION_INTERVAL
        super(PostgresStatementMetrics, self).__init__(
            check,
            run_sync=is_affirmative(config.statement_metrics_config.get('run_sync', False)),
            enabled=is_affirmative(config.statement_metrics_config.get('enabled', True)),
            expected_db_exceptions=(psycopg2.errors.DatabaseError,),
            min_collection_interval=config.min_collection_interval,
            dbms="postgres",
            rate_limit=1 / float(collection_interval),
            job_name="query-metrics",
            shutdown_callback=shutdown_callback,
        )
        self._metrics_collection_interval = collection_interval
        self._config = config
        self._state = StatementMetrics()
        self._stat_column_cache = []
        self._obfuscate_options = to_native_string(json.dumps(self._config.obfuscator_options))
        # full_statement_text_cache: limit the ingestion rate of full statement text events per query_signature
        self._full_statement_text_cache = TTLCache(
            maxsize=config.full_statement_text_cache_max_size,
            ttl=60 * 60 / config.full_statement_text_samples_per_hour_per_query,
        )

    def _execute_query(self, cursor, query, params=()):
        try:
            self._log.debug("Running query [%s] %s", query, params)
            cursor.execute(query, params)
            return cursor.fetchall()
        except (psycopg2.ProgrammingError, psycopg2.errors.QueryCanceled) as e:
            # A failed query could've derived from incorrect columns within the cache. It's a rare edge case,
            # but the next time the query is run, it will retrieve the correct columns.
            self._stat_column_cache = []
            raise e

    def _get_pg_stat_statements_columns(self):
        """
        Load the list of the columns available under the `pg_stat_statements` table. This must be queried because
        version is not a reliable way to determine the available columns on `pg_stat_statements`. The database can
        be upgraded without upgrading extensions, even when the extension is included by default.
        """
        if self._stat_column_cache:
            return self._stat_column_cache

        # Querying over '*' with limit 0 allows fetching only the column names from the cursor without data
        query = STATEMENTS_QUERY.format(
            cols='*', pg_stat_statements_view=self._config.pg_stat_statements_view, limit=0, filters=""
        )
        cursor = self._check._get_db(self._config.dbname).cursor()
        self._execute_query(cursor, query, params=(self._config.dbname,))
        col_names = [desc[0] for desc in cursor.description] if cursor.description else []
        self._stat_column_cache = col_names
        return col_names

    def run_job(self):
        self._tags_no_db = [t for t in self._tags if not t.startswith('db:')]
        self.collect_per_statement_metrics()

    def _payload_pg_version(self):
        version = self._check.version
        if not version:
            return ""
        return 'v{major}.{minor}.{patch}'.format(major=version.major, minor=version.minor, patch=version.patch)

    def collect_per_statement_metrics(self):
        # exclude the default "db" tag from statement metrics & FQT events because this data is collected from
        # all databases on the host. For metrics the "db" tag is added during ingestion based on which database
        # each query came from.
        try:
            rows = self._collect_metrics_rows()
            if not rows:
                return
            for event in self._rows_to_fqt_events(rows):
                self._check.database_monitoring_query_sample(json.dumps(event, default=default_json_event_encoding))
            # truncate query text to the maximum length supported by metrics tags
            for row in rows:
                row['query'] = row['query'][0:200]
            payload = {
                'host': self._check.resolved_hostname,
                'timestamp': time.time() * 1000,
                'min_collection_interval': self._metrics_collection_interval,
                'tags': self._tags_no_db,
                'cloud_metadata': self._config.cloud_metadata,
                'postgres_rows': rows,
                'postgres_version': self._payload_pg_version(),
                'ddagentversion': datadog_agent.get_version(),
                "ddagenthostname": self._check.agent_hostname,
            }
            self._check.database_monitoring_query_metrics(json.dumps(payload, default=default_json_event_encoding))
        except Exception:
            self._log.exception('Unable to collect statement metrics due to an error')
            return []

    def _load_pg_stat_statements(self):
        try:
            available_columns = set(self._get_pg_stat_statements_columns())
            missing_columns = PG_STAT_STATEMENTS_REQUIRED_COLUMNS - available_columns
            if len(missing_columns) > 0:
                self._check.warning(
                    warning_with_tags(
                        "Unable to collect statement metrics because required fields are unavailable: %s.",
                        ', '.join(sorted(list(missing_columns))),
                        host=self._check.resolved_hostname,
                        dbname=self._config.dbname,
                    ),
                )
                self._check.count(
                    "dd.postgres.statement_metrics.error",
                    1,
                    tags=self._tags
                    + [
                        "error:database-missing_pg_stat_statements_required_columns",
                    ]
                    + self._check._get_debug_tags(),
                    hostname=self._check.resolved_hostname,
                )
                return []

            query_columns = sorted(list(available_columns & PG_STAT_ALL_DESIRED_COLUMNS))
            params = ()
            filters = ""
            if self._config.dbstrict:
                filters = "AND pg_database.datname = %s"
                params = (self._config.dbname,)
            return self._execute_query(
                self._check._get_db(self._config.dbname).cursor(cursor_factory=psycopg2.extras.DictCursor),
                STATEMENTS_QUERY.format(
                    cols=', '.join(query_columns),
                    pg_stat_statements_view=self._config.pg_stat_statements_view,
                    filters=filters,
                    limit=DEFAULT_STATEMENTS_LIMIT,
                ),
                params=params,
            )
        except psycopg2.Error as e:
            error_tag = "error:database-{}".format(type(e).__name__)

            if (
                isinstance(e, psycopg2.errors.ObjectNotInPrerequisiteState)
            ) and 'pg_stat_statements must be loaded' in str(e.pgerror):
                error_tag = "error:database-{}-pg_stat_statements_not_loaded".format(type(e).__name__)
                self._check.record_warning(
                    DatabaseConfigurationError.pg_stat_statements_not_loaded,
                    warning_with_tags(
                        "Unable to collect statement metrics because pg_stat_statements "
                        "extension is not loaded in database '%s'. "
                        "See https://docs.datadoghq.com/database_monitoring/setup_postgres/"
                        "troubleshooting#%s for more details",
                        self._config.dbname,
                        DatabaseConfigurationError.pg_stat_statements_not_loaded.value,
                        host=self._check.resolved_hostname,
                        dbname=self._config.dbname,
                        code=DatabaseConfigurationError.pg_stat_statements_not_loaded.value,
                    ),
                )
            elif isinstance(e, psycopg2.errors.UndefinedTable) and 'pg_stat_statements' in str(e.pgerror):
                error_tag = "error:database-{}-pg_stat_statements_not_created".format(type(e).__name__)
                self._check.record_warning(
                    DatabaseConfigurationError.pg_stat_statements_not_created,
                    warning_with_tags(
                        "Unable to collect statement metrics because pg_stat_statements is not created "
                        "in database '%s'. See https://docs.datadoghq.com/database_monitoring/setup_postgres/"
                        "troubleshooting#%s for more details",
                        self._config.dbname,
                        DatabaseConfigurationError.pg_stat_statements_not_created.value,
                        host=self._check.resolved_hostname,
                        dbname=self._config.dbname,
                        code=DatabaseConfigurationError.pg_stat_statements_not_created.value,
                    ),
                )
            else:
                self._check.warning(
                    warning_with_tags(
                        "Unable to collect statement metrics because of an error running queries "
                        "in database '%s'. See https://docs.datadoghq.com/database_monitoring/troubleshooting for "
                        "help: %s",
                        self._config.dbname,
                        str(e),
                        host=self._check.resolved_hostname,
                        dbname=self._config.dbname,
                    ),
                )

            self._check.count(
                "dd.postgres.statement_metrics.error",
                1,
                tags=self._tags + [error_tag] + self._check._get_debug_tags(),
                hostname=self._check.resolved_hostname,
            )

            return []

    def _emit_pg_stat_statements_metrics(self):
        query = PG_STAT_STATEMENTS_COUNT_QUERY_LT_9_4 if self._check.version < V9_4 else PG_STAT_STATEMENTS_COUNT_QUERY
        try:
            rows = self._execute_query(
                self._check._get_db(self._config.dbname).cursor(cursor_factory=psycopg2.extras.DictCursor),
                query,
            )
            count = 0
            if rows:
                count = rows[0][0]
            self._check.count(
                "postgresql.pg_stat_statements.max",
                self._check.pg_settings.get("pg_stat_statements.max", 0),
                tags=self._tags,
                hostname=self._check.resolved_hostname,
            )
            self._check.count(
                "postgresql.pg_stat_statements.count",
                count,
                tags=self._tags,
                hostname=self._check.resolved_hostname,
            )
        except psycopg2.Error as e:
            self._log.warning("Failed to query for pg_stat_statements count: %s", e)

    def _collect_metrics_rows(self):
        rows = self._load_pg_stat_statements()
        if rows:
            self._emit_pg_stat_statements_metrics()

        rows = self._normalize_queries(rows)
        if not rows:
            return []

        available_columns = set(rows[0].keys())
        metric_columns = available_columns & PG_STAT_STATEMENTS_METRICS_COLUMNS
        rows = self._state.compute_derivative_rows(rows, metric_columns, key=_row_key)
        self._check.gauge(
            'dd.postgres.queries.query_rows_raw',
            len(rows),
            tags=self._tags + self._check._get_debug_tags(),
            hostname=self._check.resolved_hostname,
        )
        return rows

    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                statement = obfuscate_sql_with_metadata(row['query'], self._obfuscate_options)
            except Exception as e:
                # obfuscation errors are relatively common so only log them during debugging
                self._log.debug("Failed to obfuscate query '%s': %s", row['query'], e)
                continue

            obfuscated_query = statement['query']
            normalized_row['query'] = obfuscated_query
            normalized_row['query_signature'] = compute_sql_signature(obfuscated_query)
            metadata = statement['metadata']
            normalized_row['dd_tables'] = metadata.get('tables', None)
            normalized_row['dd_commands'] = metadata.get('commands', None)
            normalized_rows.append(normalized_row)

        return normalized_rows

    def _rows_to_fqt_events(self, rows):
        for row in rows:
            query_cache_key = _row_key(row)
            if query_cache_key in self._full_statement_text_cache:
                continue
            self._full_statement_text_cache[query_cache_key] = True
            row_tags = self._tags_no_db + [
                "db:{}".format(row['datname']),
                "rolname:{}".format(row['rolname']),
            ]
            yield {
                "timestamp": time.time() * 1000,
                "host": self._check.resolved_hostname,
                "ddagentversion": datadog_agent.get_version(),
                "ddsource": "postgres",
                "ddtags": ",".join(row_tags),
                "dbm_type": "fqt",
                "db": {
                    "instance": row['datname'],
                    "query_signature": row['query_signature'],
                    "statement": row['query'],
                    "metadata": {
                        "tables": row['dd_tables'],
                        "commands": row['dd_commands'],
                    },
                },
                "postgres": {
                    "datname": row["datname"],
                    "rolname": row["rolname"],
                },
            }
コード例 #12
0
    def test_compute_derivative_rows_with_duplicates(self):
        sm = StatementMetrics()

        def key(row):
            return (row['query_signature'], row['db'], row['user'])

        metrics = ['count', 'time']

        rows1 = [
            {
                'count': 13,
                'time': 2005,
                'errors': 1,
                'query': 'SELECT * FROM table1 where id = ANY(?)',
                'query_signature': 'sig1',
                'db': 'puppies',
                'user': '******',
            },
            {
                'count': 25,
                'time': 105,
                'errors': 0,
                'query': 'SELECT * FROM table1 where id = ANY(?, ?)',
                'query_signature': 'sig1',
                'db': 'puppies',
                'user': '******',
            },
        ]

        rows2 = [
            {
                'count': 14,
                'time': 2006,
                'errors': 32,
                'query': 'SELECT * FROM table1 where id = ANY(?)',
                'query_signature': 'sig1',
                'db': 'puppies',
                'user': '******',
            },
            {
                'count': 26,
                'time': 125,
                'errors': 1,
                'query': 'SELECT * FROM table1 where id = ANY(?, ?)',
                'query_signature': 'sig1',
                'db': 'puppies',
                'user': '******',
            },
        ]

        # Run a first check to initialize tracking
        sm.compute_derivative_rows(rows1, metrics, key=key)
        # Run the check again to compute the metrics
        metrics = sm.compute_derivative_rows(rows2, metrics, key=key)

        expected_merged_metrics = [{
            'count': 2,
            'time': 21,
            'errors': 32,
            'db': 'puppies',
            'query': 'SELECT * FROM table1 where id = ANY(?)',
            'query_signature': 'sig1',
            'user': '******',
        }]

        assert expected_merged_metrics == metrics
コード例 #13
0
class PostgresStatementMetrics(object):
    """Collects telemetry for SQL statements"""
    def __init__(self, check, config):
        self._check = check
        self._config = config
        self._db_hostname = None
        self._log = get_check_logger()
        self._state = StatementMetrics()
        self._stat_column_cache = []
        # full_statement_text_cache: limit the ingestion rate of full statement text events per query_signature
        self._full_statement_text_cache = TTLCache(
            maxsize=self._config.full_statement_text_cache_max_size,
            ttl=60 * 60 /
            self._config.full_statement_text_samples_per_hour_per_query,
        )

    def _execute_query(self, cursor, query, params=()):
        try:
            self._log.debug("Running query [%s] %s", query, params)
            cursor.execute(query, params)
            return cursor.fetchall()
        except (psycopg2.ProgrammingError, psycopg2.errors.QueryCanceled) as e:
            # A failed query could've derived from incorrect columns within the cache. It's a rare edge case,
            # but the next time the query is run, it will retrieve the correct columns.
            self._stat_column_cache = []
            self._log.warning('Statement-level metrics are unavailable: %s', e)
            return []

    def _get_pg_stat_statements_columns(self, db):
        """
        Load the list of the columns available under the `pg_stat_statements` table. This must be queried because
        version is not a reliable way to determine the available columns on `pg_stat_statements`. The database can
        be upgraded without upgrading extensions, even when the extension is included by default.
        """
        if self._stat_column_cache:
            return self._stat_column_cache

        # Querying over '*' with limit 0 allows fetching only the column names from the cursor without data
        query = STATEMENTS_QUERY.format(
            cols='*',
            pg_stat_statements_view=self._config.pg_stat_statements_view,
            limit=0,
            filters="")
        cursor = db.cursor()
        self._execute_query(cursor, query, params=(self._config.dbname, ))
        col_names = [desc[0] for desc in cursor.description
                     ] if cursor.description else []
        self._stat_column_cache = col_names
        return col_names

    def _db_hostname_cached(self):
        if self._db_hostname:
            return self._db_hostname
        self._db_hostname = resolve_db_host(self._config.host)
        return self._db_hostname

    def collect_per_statement_metrics(self, db, db_version, tags):
        try:
            rows = self._collect_metrics_rows(db)
            if not rows:
                return
            for event in self._rows_to_fqt_events(rows, tags):
                self._check.database_monitoring_query_sample(
                    json.dumps(event, default=default_json_event_encoding))
            # truncate query text to the maximum length supported by metrics tags
            for row in rows:
                row['query'] = row['query'][0:200]
            payload = {
                'host':
                self._db_hostname_cached(),
                'timestamp':
                time.time() * 1000,
                'min_collection_interval':
                self._config.min_collection_interval,
                'tags':
                tags,
                'postgres_rows':
                rows,
                'postgres_version':
                'v{major}.{minor}.{patch}'.format(major=db_version.major,
                                                  minor=db_version.minor,
                                                  patch=db_version.patch),
            }
            self._check.database_monitoring_query_metrics(
                json.dumps(payload, default=default_json_event_encoding))
        except Exception:
            db.rollback()
            self._log.exception(
                'Unable to collect statement metrics due to an error')
            return []

    def _load_pg_stat_statements(self, db):
        available_columns = set(self._get_pg_stat_statements_columns(db))
        missing_columns = PG_STAT_STATEMENTS_REQUIRED_COLUMNS - available_columns
        if len(missing_columns) > 0:
            self._log.warning(
                'Unable to collect statement metrics because required fields are unavailable: %s',
                ', '.join(list(missing_columns)),
            )
            return []

        query_columns = sorted(
            list(available_columns & PG_STAT_ALL_DESIRED_COLUMNS))
        params = ()
        filters = ""
        if self._config.dbstrict:
            filters = "AND pg_database.datname = %s"
            params = (self._config.dbname, )
        return self._execute_query(
            db.cursor(cursor_factory=psycopg2.extras.DictCursor),
            STATEMENTS_QUERY.format(
                cols=', '.join(query_columns),
                pg_stat_statements_view=self._config.pg_stat_statements_view,
                filters=filters,
                limit=DEFAULT_STATEMENTS_LIMIT,
            ),
            params=params,
        )

    def _collect_metrics_rows(self, db):
        rows = self._load_pg_stat_statements(db)

        rows = self._normalize_queries(rows)
        if not rows:
            return []

        available_columns = set(rows[0].keys())
        metric_columns = available_columns & PG_STAT_STATEMENTS_METRICS_COLUMNS
        rows = self._state.compute_derivative_rows(rows,
                                                   metric_columns,
                                                   key=_row_key)
        self._check.gauge('dd.postgres.queries.query_rows_raw', len(rows))
        return rows

    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(
                    row['query'])
            except Exception as e:
                # obfuscation errors are relatively common so only log them during debugging
                self._log.debug("Failed to obfuscate query '%s': %s",
                                row['query'], e)
                continue

            normalized_row['query'] = obfuscated_statement
            normalized_row['query_signature'] = compute_sql_signature(
                obfuscated_statement)
            normalized_rows.append(normalized_row)

        return normalized_rows

    def _rows_to_fqt_events(self, rows, tags):
        for row in rows:
            query_cache_key = _row_key(row)
            if query_cache_key in self._full_statement_text_cache:
                continue
            self._full_statement_text_cache[query_cache_key] = True
            row_tags = tags + [
                "db:{}".format(row['datname']),
                "rolname:{}".format(row['rolname']),
            ]
            yield {
                "timestamp": time.time() * 1000,
                "host": self._db_hostname_cached(),
                "ddsource": "postgres",
                "ddtags": ",".join(row_tags),
                "dbm_type": "fqt",
                "db": {
                    "instance": row['datname'],
                    "query_signature": row['query_signature'],
                    "statement": row['query'],
                },
                "postgres": {
                    "datname": row["datname"],
                    "rolname": row["rolname"],
                },
            }