Beispiel #1
0
 def test_compute_sql_signature(self):
     """
     This is a simple test which only exists to validate the consistency of query hashes
     when changes are made to the hashing algorithm. Changes to the hash can have
     product impact since the backend expects consistency with the APM resource hash.
     """
     assert '11b755a835280e8e' == compute_sql_signature('select * from dogs')
     assert 'd2a193f97126ad67' == compute_sql_signature('update dogs set name = ? where id = ?')
Beispiel #2
0
 def _normalize_queries(self, rows):
     normalized_rows = []
     for row in rows:
         try:
             statement = obfuscate_sql_with_metadata(row['text'], self.check.obfuscator_options)
         except Exception as e:
             # obfuscation errors are relatively common so only log them during debugging
             self.log.debug("Failed to obfuscate query: %s", e)
             self.check.count(
                 "dd.sqlserver.statements.error",
                 1,
                 **self.check.debug_stats_kwargs(tags=["error:obfuscate-query-{}".format(type(e))])
             )
             continue
         obfuscated_statement = statement['query']
         row['text'] = obfuscated_statement
         row['query_signature'] = compute_sql_signature(obfuscated_statement)
         row['query_hash'] = _hash_to_hex(row['query_hash'])
         row['query_plan_hash'] = _hash_to_hex(row['query_plan_hash'])
         row['plan_handle'] = _hash_to_hex(row['plan_handle'])
         metadata = statement['metadata']
         row['dd_tables'] = metadata.get('tables', None)
         row['dd_commands'] = metadata.get('commands', None)
         row['dd_comments'] = metadata.get('comments', None)
         normalized_rows.append(row)
     return normalized_rows
Beispiel #3
0
    def _finalize_row(row, statement):
        # type: (Dict[str], Dict[str]) -> None
        obfuscated_statement = statement["query"]
        row["sql_text"] = obfuscated_statement
        row["query_signature"] = compute_sql_signature(obfuscated_statement)

        metadata = statement["metadata"]
        row["dd_commands"] = metadata.get("commands", None)
        row["dd_tables"] = metadata.get("tables", None)
        row["dd_comments"] = metadata.get("comments", None)
Beispiel #4
0
    def _collect_per_statement_metrics(self, db):
        # type: (pymysql.connections.Connection) -> List[Metric]
        metrics = []  # type: List[Metric]

        def keyfunc(row):
            return (row['schema'], row['digest'])

        monotonic_rows = self._query_summary_per_statement(db)
        monotonic_rows = self._merge_duplicate_rows(monotonic_rows,
                                                    key=keyfunc)
        rows = self._state.compute_derivative_rows(monotonic_rows,
                                                   STATEMENT_METRICS.keys(),
                                                   key=keyfunc)
        metrics.append(('dd.mysql.queries.query_rows_raw', len(rows), []))

        rows = generate_synthetic_rows(rows)
        rows = apply_row_limits(
            rows,
            self.config.statement_metrics_limits
            or DEFAULT_STATEMENT_METRICS_LIMITS,
            tiebreaker_metric='count',
            tiebreaker_reverse=True,
            key=keyfunc,
        )
        metrics.append(('dd.mysql.queries.query_rows_limited', len(rows), []))

        for row in rows:
            tags = []
            tags.append('digest:' + row['digest'])
            if row['schema'] is not None:
                tags.append('schema:' + row['schema'])

            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(
                    row['query'])
            except Exception as e:
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['query'], e)
                continue
            tags.append('query_signature:' +
                        compute_sql_signature(obfuscated_statement))
            tags.append('query:' +
                        normalize_query_tag(obfuscated_statement).strip())

            for col, name in STATEMENT_METRICS.items():
                value = row[col]
                metrics.append((name, value, tags))

        return metrics
    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(row['query'])
            except Exception as e:
                # obfuscation errors are relatively common so only log them during debugging
                self._log.debug("Failed to obfuscate query '%s': %s", row['query'], e)
                continue

            normalized_row['query'] = obfuscated_statement
            normalized_row['query_signature'] = compute_sql_signature(obfuscated_statement)
            normalized_rows.append(normalized_row)

        return normalized_rows
Beispiel #6
0
 def _obfuscate_and_sanitize_row(self, row):
     row = self._remove_null_vals(row)
     if 'text' not in row:
         return row
     try:
         statement = obfuscate_sql_with_metadata(row['text'], self.check.obfuscator_options)
         obfuscated_statement = statement['query']
         metadata = statement['metadata']
         row['dd_commands'] = metadata.get('commands', None)
         row['dd_tables'] = metadata.get('tables', None)
         row['dd_comments'] = metadata.get('comments', None)
         row['query_signature'] = compute_sql_signature(obfuscated_statement)
     except Exception as e:
         # obfuscation errors are relatively common so only log them during debugging
         self.log.debug("Failed to obfuscate query: %s", e)
         obfuscated_statement = "ERROR: failed to obfuscate"
     row = self._sanitize_row(row, obfuscated_statement)
     return row
    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(
                    row['digest_text'])
            except Exception as e:
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['digest_text'], e)
                continue

            normalized_row['digest_text'] = obfuscated_statement
            normalized_row['query_signature'] = compute_sql_signature(
                obfuscated_statement)
            normalized_rows.append(normalized_row)

        return normalized_rows
Beispiel #8
0
    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                statement = obfuscate_sql_with_metadata(row['query'], self._obfuscate_options)
            except Exception as e:
                # obfuscation errors are relatively common so only log them during debugging
                self._log.debug("Failed to obfuscate query '%s': %s", row['query'], e)
                continue

            obfuscated_query = statement['query']
            normalized_row['query'] = obfuscated_query
            normalized_row['query_signature'] = compute_sql_signature(obfuscated_query)
            metadata = statement['metadata']
            normalized_row['dd_tables'] = metadata.get('tables', None)
            normalized_row['dd_commands'] = metadata.get('commands', None)
            normalized_rows.append(normalized_row)

        return normalized_rows
    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                statement = obfuscate_sql_with_metadata(
                    row['digest_text'], self._obfuscate_options)
                obfuscated_statement = statement['query'] if row[
                    'digest_text'] is not None else None
            except Exception as e:
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['digest_text'], e)
                continue

            normalized_row['digest_text'] = obfuscated_statement
            normalized_row['query_signature'] = compute_sql_signature(
                obfuscated_statement)
            metadata = statement['metadata']
            normalized_row['dd_tables'] = metadata.get('tables', None)
            normalized_row['dd_commands'] = metadata.get('commands', None)
            normalized_rows.append(normalized_row)

        return normalized_rows
 def _normalize_row(self, row):
     normalized_row = dict(copy.copy(row))
     obfuscated_query = None
     try:
         statement = obfuscate_sql_with_metadata(row['query'],
                                                 self._obfuscate_options)
         obfuscated_query = statement['query']
         metadata = statement['metadata']
         normalized_row['query_signature'] = compute_sql_signature(
             obfuscated_query)
         normalized_row['dd_tables'] = metadata.get('tables', None)
         normalized_row['dd_commands'] = metadata.get('commands', None)
         normalized_row['dd_comments'] = metadata.get('comments', None)
     except Exception as e:
         self._log.debug("Failed to obfuscate statement: %s", e)
         self._check.count(
             "dd.postgres.statement_samples.error",
             1,
             tags=self._dbtags(row['datname'], "error:sql-obfuscate") +
             self._check._get_debug_tags(),
             hostname=self._check.resolved_hostname,
         )
     normalized_row['statement'] = obfuscated_query
     return normalized_row
Beispiel #11
0
    def _collect_per_statement_metrics(self, db):
        metrics = []

        available_columns = self._get_pg_stat_statements_columns(db)
        missing_columns = PG_STAT_STATEMENTS_REQUIRED_COLUMNS - set(
            available_columns)
        if len(missing_columns) > 0:
            self.log.warning(
                'Unable to collect statement metrics because required fields are unavailable: %s',
                ', '.join(list(missing_columns)),
            )
            return metrics

        desired_columns = (list(PG_STAT_STATEMENTS_METRIC_COLUMNS.keys()) +
                           list(PG_STAT_STATEMENTS_OPTIONAL_COLUMNS) +
                           list(PG_STAT_STATEMENTS_TAG_COLUMNS.keys()))
        query_columns = list(
            set(desired_columns) & set(available_columns)
            | set(PG_STAT_STATEMENTS_TAG_COLUMNS.keys()))
        rows = self._execute_query(
            db.cursor(cursor_factory=psycopg2.extras.DictCursor),
            STATEMENTS_QUERY.format(
                cols=', '.join(query_columns),
                pg_stat_statements_view=self.config.pg_stat_statements_view,
                limit=DEFAULT_STATEMENTS_LIMIT,
            ),
            params=(self.config.dbname, ),
        )
        if not rows:
            return metrics

        def row_keyfunc(row):
            # old versions of pg_stat_statements don't have a query ID so fall back to the query string itself
            queryid = row['queryid'] if 'queryid' in row else row['query']
            return (queryid, row['datname'], row['rolname'])

        rows = self._state.compute_derivative_rows(
            rows, PG_STAT_STATEMENTS_METRIC_COLUMNS.keys(), key=row_keyfunc)
        rows = apply_row_limits(rows,
                                DEFAULT_STATEMENT_METRIC_LIMITS,
                                tiebreaker_metric='calls',
                                tiebreaker_reverse=True,
                                key=row_keyfunc)

        for row in rows:
            try:
                normalized_query = datadog_agent.obfuscate_sql(row['query'])
                if not normalized_query:
                    self.log.warning(
                        "Obfuscation of query '%s' resulted in empty query",
                        row['query'])
                    continue
            except Exception as e:
                # If query obfuscation fails, it is acceptable to log the raw query here because the
                # pg_stat_statements table contains no parameters in the raw queries.
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['query'], e)
                continue

            query_signature = compute_sql_signature(normalized_query)

            # All "Deep Database Monitoring" statement-level metrics are tagged with a `query_signature`
            # which uniquely identifies the normalized query family. Where possible, this hash should
            # match the hash of APM "resources" (https://docs.datadoghq.com/tracing/visualization/resource/)
            # when the resource is a SQL query. Postgres' query normalization in the `pg_stat_statements` table
            # preserves most of the original query, so we tag the `resource_hash` with the same value as the
            # `query_signature`. The `resource_hash` tag should match the *actual* APM resource hash most of
            # the time, but not always. So this is a best-effort approach to link these metrics to APM metrics.
            tags = [
                'query_signature:' + query_signature,
                'resource_hash:' + query_signature
            ]

            for column, tag_name in PG_STAT_STATEMENTS_TAG_COLUMNS.items():
                if column not in row:
                    continue
                value = row[column]
                if column == 'query':
                    value = normalize_query_tag(normalized_query)
                tags.append('{tag_name}:{value}'.format(tag_name=tag_name,
                                                        value=value))

            for column, metric_name in PG_STAT_STATEMENTS_METRIC_COLUMNS.items(
            ):
                if column not in row:
                    continue
                value = row[column]
                if column == 'total_time':
                    # All "Deep Database Monitoring" timing metrics are in nanoseconds
                    # Postgres tracks pg_stat* timing stats in milliseconds
                    value = milliseconds_to_nanoseconds(value)
                metrics.append((metric_name, value, tags))

        return metrics
Beispiel #12
0
    def _collect_plan_for_statement(self, row):
        # Plans have several important signatures to tag events with:
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the digest text to match query metrics
        try:
            statement = obfuscate_sql_with_metadata(row['sql_text'],
                                                    self._obfuscate_options)
            statement_digest_text = obfuscate_sql_with_metadata(
                row['digest_text'], self._obfuscate_options)
        except Exception:
            # do not log the raw sql_text to avoid leaking sensitive data into logs. digest_text is safe as parameters
            # are obfuscated by the database
            self._log.debug("Failed to obfuscate statement: %s",
                            row['digest_text'])
            self._check.count(
                "dd.mysql.query_samples.error",
                1,
                tags=self._tags + ["error:sql-obfuscate"] +
                self._check._get_debug_tags(),
                hostname=self._check.resolved_hostname,
            )
            return None

        obfuscated_statement = statement['query']
        obfuscated_digest_text = statement_digest_text['query']
        apm_resource_hash = compute_sql_signature(obfuscated_statement)
        query_signature = compute_sql_signature(obfuscated_digest_text)

        query_cache_key = (row['current_schema'], query_signature)
        if not self._explained_statements_ratelimiter.acquire(query_cache_key):
            return None

        with closing(self._get_db_connection().cursor()) as cursor:
            plan, error_states = self._explain_statement(
                cursor, row['sql_text'], row['current_schema'],
                obfuscated_statement, query_signature)

        collection_errors = []
        if error_states:
            for state in error_states:
                error_tag = "error:explain-{}-{}".format(
                    state.error_code, state.error_message)
                self._check.count(
                    "dd.mysql.query_samples.error",
                    1,
                    tags=self._tags + [error_tag] +
                    self._check._get_debug_tags(),
                    hostname=self._check.resolved_hostname,
                )
                collection_errors.append({
                    'strategy':
                    state.strategy,
                    'code':
                    state.error_code.value if state.error_code else None,
                    'message':
                    state.error_message,
                })

        normalized_plan, obfuscated_plan, plan_signature = None, None, None
        if plan:
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(
                plan, normalize=True) if plan else None
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)

        query_plan_cache_key = (query_cache_key, plan_signature)
        if self._seen_samples_ratelimiter.acquire(query_plan_cache_key):
            return {
                "timestamp": row["timer_end_time_s"] * 1000,
                "host": self._check.resolved_hostname,
                "ddagentversion": datadog_agent.get_version(),
                "ddsource": "mysql",
                "ddtags": self._tags_str,
                "duration": row['timer_wait_ns'],
                "network": {
                    "client": {
                        "ip": row.get('processlist_host', None),
                    }
                },
                "db": {
                    "instance": row['current_schema'],
                    "plan": {
                        "definition":
                        obfuscated_plan,
                        "signature":
                        plan_signature,
                        "collection_errors":
                        collection_errors if collection_errors else None,
                    },
                    "query_signature": query_signature,
                    "resource_hash": apm_resource_hash,
                    "statement": obfuscated_statement,
                    "metadata": {
                        "tables": statement['metadata'].get('tables', None),
                        "commands":
                        statement['metadata'].get('commands', None),
                        "comments":
                        statement['metadata'].get('comments', None),
                    },
                    "query_truncated":
                    get_truncation_state(row['sql_text']).value,
                },
                'mysql': {
                    k: v
                    for k, v in row.items()
                    if k not in EVENTS_STATEMENTS_SAMPLE_EXCLUDE_KEYS
                },
            }
def test_statement_metrics(aggregator, integration_check, dbm_instance,
                           dbstrict, pg_stat_statements_view):
    dbm_instance['dbstrict'] = dbstrict
    dbm_instance['pg_stat_statements_view'] = pg_stat_statements_view
    # don't need samples for this test
    dbm_instance['statement_samples'] = {'enabled': False}
    connections = {}

    def _run_queries():
        for user, password, dbname, query, arg in SAMPLE_QUERIES:
            if dbname not in connections:
                connections[dbname] = psycopg2.connect(host=HOST,
                                                       dbname=dbname,
                                                       user=user,
                                                       password=password)
            connections[dbname].cursor().execute(query, (arg, ))

    check = integration_check(dbm_instance)
    check._connect()

    _run_queries()
    check.check(dbm_instance)
    _run_queries()
    check.check(dbm_instance)

    def _should_catch_query(dbname):
        # we can always catch it if the query originals in the same DB
        # when dbstrict=True we expect to only capture those queries for the initial database to which the
        # agent is connecting
        if POSTGRES_VERSION.split('.')[
                0] == "9" and pg_stat_statements_view == "pg_stat_statements":
            # cannot catch any queries from other users
            # only can see own queries
            return False
        if dbstrict and dbname != dbm_instance['dbname']:
            return False
        return True

    events = aggregator.get_event_platform_events("dbm-metrics")
    assert len(events) == 1
    event = events[0]

    assert event['host'] == 'stubbed.hostname'
    assert event['timestamp'] > 0
    assert event['min_collection_interval'] == dbm_instance[
        'min_collection_interval']
    assert set(event['tags']) == {
        'foo:bar', 'server:{}'.format(HOST), 'port:{}'.format(PORT),
        'db:datadog_test'
    }
    obfuscated_param = '?' if POSTGRES_VERSION.split('.')[0] == "9" else '$1'

    for username, _, dbname, query, _ in SAMPLE_QUERIES:
        expected_query = query % obfuscated_param
        query_signature = compute_sql_signature(expected_query)
        matching_rows = [
            r for r in event['postgres_rows']
            if r['query_signature'] == query_signature
        ]
        if not _should_catch_query(dbname):
            assert len(matching_rows) == 0
            continue
        assert len(matching_rows) == 1
        row = matching_rows[0]
        assert row['calls'] == 1
        assert row['datname'] == dbname
        assert row['rolname'] == username
        assert row['query'] == expected_query
        for col in PG_STAT_STATEMENTS_METRICS_COLUMNS:
            assert type(row[col]) in (float, int)

    for conn in connections.values():
        conn.close()
Beispiel #14
0
    def _collect_plan_for_statement(self, row):
        try:
            obfuscated_statement = datadog_agent.obfuscate_sql(row['query'])
        except Exception as e:
            self._log.debug("Failed to obfuscate statement: %s", e)
            self._check.count("dd.postgres.statement_samples.error", 1, tags=self._tags + ["error:sql-obfuscate"])
            return None

        # limit the rate of explains done to the database
        query_signature = compute_sql_signature(obfuscated_statement)
        if query_signature in self._explained_statements_cache:
            return None
        self._explained_statements_cache[query_signature] = True

        # Plans have several important signatures to tag events with. Note that for postgres, the
        # query_signature and resource_hash will be the same value.
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the raw sql text to match query metrics
        plan_dict = self._run_explain(row['query'], obfuscated_statement)
        plan, normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None, None
        if plan_dict:
            plan = json.dumps(plan_dict)
            # if we're using the orjson implementation then json.dumps returns bytes
            plan = plan.decode('utf-8') if isinstance(plan, bytes) else plan
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(plan, normalize=True)
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)
            plan_cost = plan_dict.get('Plan', {}).get('Total Cost', 0.0) or 0.0

        statement_plan_sig = (query_signature, plan_signature)
        if statement_plan_sig not in self._seen_samples_cache:
            self._seen_samples_cache[statement_plan_sig] = True
            event = {
                "host": self._db_hostname,
                "service": self._service,
                "ddsource": "postgres",
                "ddtags": self._tags_str,
                "network": {
                    "client": {
                        "ip": row.get('client_addr', None),
                        "port": row.get('client_port', None),
                        "hostname": row.get('client_hostname', None),
                    }
                },
                "db": {
                    "instance": row.get('datname', None),
                    "plan": {"definition": obfuscated_plan, "cost": plan_cost, "signature": plan_signature},
                    "query_signature": query_signature,
                    "resource_hash": query_signature,
                    "application": row.get('application_name', None),
                    "user": row['usename'],
                    "statement": obfuscated_statement,
                },
                'postgres': {k: v for k, v in row.items() if k not in pg_stat_activity_sample_exclude_keys},
            }
            event['timestamp'] = time.time() * 1000
            if row['state'] in {'idle', 'idle in transaction'}:
                if row['state_change'] and row['query_start']:
                    event['duration'] = (row['state_change'] - row['query_start']).total_seconds() * 1e9
                    # If the transaction is idle then we have a more specific "end time" than the current time at
                    # which we're collecting this event. According to the postgres docs, all of the timestamps in
                    # pg_stat_activity are `timestamp with time zone` so the timezone should always be present. However,
                    # if there is something wrong and it's missing then we can't use `state_change` for the timestamp
                    # of the event else we risk the timestamp being significantly off and the event getting dropped
                    # during ingestion.
                    if row['state_change'].tzinfo:
                        event['timestamp'] = get_timestamp(row['state_change']) * 1000
            return event
Beispiel #15
0
    def _collect_plan_for_statement(self, row):
        try:
            obfuscated_statement = datadog_agent.obfuscate_sql(row['query'])
        except Exception as e:
            self._log.debug("Failed to obfuscate statement: %s", e)
            self._check.count("dd.postgres.statement_samples.error",
                              1,
                              tags=self._tags + ["error:sql-obfuscate"])
            return None

        # limit the rate of explains done to the database
        query_signature = compute_sql_signature(obfuscated_statement)
        if query_signature in self._explained_statements_cache:
            return None
        self._explained_statements_cache[query_signature] = True

        # Plans have several important signatures to tag events with. Note that for postgres, the
        # query_signature and resource_hash will be the same value.
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the raw sql text to match query metrics
        plan_dict = self._run_explain(row['query'], obfuscated_statement)
        plan, normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None, None
        if plan_dict:
            plan = json.dumps(plan_dict)
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(
                plan, normalize=True)
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)
            plan_cost = plan_dict.get('Plan', {}).get('Total Cost', 0.0) or 0.0

        statement_plan_sig = (query_signature, plan_signature)
        if statement_plan_sig not in self._seen_samples_cache:
            self._seen_samples_cache[statement_plan_sig] = True
            event = {
                "host": self._db_hostname,
                "service": self._service,
                "ddsource": "postgres",
                "ddtags": self._tags_str,
                "network": {
                    "client": {
                        "ip": row.get('client_addr', None),
                        "port": row.get('client_port', None),
                        "hostname": row.get('client_hostname', None),
                    }
                },
                "db": {
                    "instance": row.get('datname', None),
                    "plan": {
                        "definition": obfuscated_plan,
                        "cost": plan_cost,
                        "signature": plan_signature
                    },
                    "query_signature": query_signature,
                    "resource_hash": query_signature,
                    "application": row.get('application_name', None),
                    "user": row['usename'],
                    "statement": obfuscated_statement,
                },
                'postgres': {
                    k: v
                    for k, v in row.items()
                    if k not in pg_stat_activity_sample_exclude_keys
                },
            }
            if row['state'] in {'idle', 'idle in transaction'}:
                if row['state_change'] and row['query_start']:
                    event['duration'] = (
                        row['state_change'] -
                        row['query_start']).total_seconds() * 1e9
                    event['timestamp'] = time.mktime(
                        row['state_change'].timetuple()) * 1000
            else:
                event['timestamp'] = time.time() * 1000
            return event
Beispiel #16
0
    def _collect_plan_for_statement(self, row):
        # Plans have several important signatures to tag events with:
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the digest text to match query metrics

        try:
            obfuscated_statement = datadog_agent.obfuscate_sql(row['sql_text'])
            obfuscated_digest_text = datadog_agent.obfuscate_sql(
                row['digest_text'])
        except Exception:
            # do not log the raw sql_text to avoid leaking sensitive data into logs. digest_text is safe as parameters
            # are obfuscated by the database
            self._log.debug("Failed to obfuscate statement: %s",
                            row['digest_text'])
            self._check.count("dd.mysql.statement_samples.error",
                              1,
                              tags=self._tags + ["error:sql-obfuscate"])
            return None

        apm_resource_hash = compute_sql_signature(obfuscated_statement)
        query_signature = compute_sql_signature(obfuscated_digest_text)

        query_cache_key = (row['current_schema'], query_signature)
        if query_cache_key in self._explained_statements_cache:
            return None
        self._explained_statements_cache[query_cache_key] = True

        plan = None
        with closing(self._get_db_connection().cursor()) as cursor:
            try:
                plan = self._explain_statement(cursor, row['sql_text'],
                                               row['current_schema'],
                                               obfuscated_statement)
            except Exception as e:
                self._check.count("dd.mysql.statement_samples.error",
                                  1,
                                  tags=self._tags +
                                  ["error:explain-{}".format(type(e))])
                self._log.exception("Failed to explain statement: %s",
                                    obfuscated_statement)

        normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None
        if plan:
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(
                plan, normalize=True) if plan else None
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)
            plan_cost = self._parse_execution_plan_cost(plan)

        query_plan_cache_key = (query_cache_key, plan_signature)
        if query_plan_cache_key not in self._seen_samples_cache:
            self._seen_samples_cache[query_plan_cache_key] = True
            return {
                "timestamp": row["timer_end_time_s"] * 1000,
                "host": self._db_hostname,
                "service": self._service,
                "ddsource": "mysql",
                "ddtags": self._tags_str,
                "duration": row['timer_wait_ns'],
                "network": {
                    "client": {
                        "ip": row.get('processlist_host', None),
                    }
                },
                "db": {
                    "instance": row['current_schema'],
                    "plan": {
                        "definition": obfuscated_plan,
                        "cost": plan_cost,
                        "signature": plan_signature
                    },
                    "query_signature": query_signature,
                    "resource_hash": apm_resource_hash,
                    "statement": obfuscated_statement,
                },
                'mysql': {
                    k: v
                    for k, v in row.items()
                    if k not in EVENTS_STATEMENTS_SAMPLE_EXCLUDE_KEYS
                },
            }
Beispiel #17
0
def test_statement_metrics(aggregator, dbm_instance, query, default_schema,
                           datadog_agent):
    mysql_check = MySql(common.CHECK_NAME, {}, instances=[dbm_instance])

    def run_query(q):
        with mysql_check._connect() as db:
            with closing(db.cursor()) as cursor:
                if default_schema:
                    cursor.execute("USE " + default_schema)
                cursor.execute(q)

    with mock.patch.object(datadog_agent, 'obfuscate_sql',
                           passthrough=True) as mock_agent:
        mock_agent.side_effect = _obfuscate_sql

        # Run a query
        run_query(query)
        mysql_check.check(dbm_instance)

        # Run the query and check a second time so statement metrics are computed from the previous run
        run_query(query)
        mysql_check.check(dbm_instance)

    events = aggregator.get_event_platform_events("dbm-metrics")
    assert len(events) == 1
    event = events[0]

    assert event['host'] == 'stubbed.hostname'
    assert event['timestamp'] > 0
    assert event['min_collection_interval'] == 15
    assert set(event['tags']) == set(
        tags.METRIC_TAGS +
        ['server:{}'.format(common.HOST), 'port:{}'.format(common.PORT)])

    query_signature = compute_sql_signature(query)
    matching_rows = [
        r for r in event['mysql_rows']
        if r['query_signature'] == query_signature
    ]
    assert len(matching_rows) == 1
    row = matching_rows[0]

    assert row['digest']
    assert row['schema_name'] == default_schema
    assert row['digest_text'].strip() == query.strip()[0:200]

    for col in statements.METRICS_COLUMNS:
        assert type(row[col]) in (float, int)

    events = aggregator.get_event_platform_events("dbm-samples")
    assert len(events) > 0
    fqt_events = [e for e in events if e.get('dbm_type') == 'fqt']
    assert len(fqt_events) > 0
    matching = [
        e for e in fqt_events if e['db']['query_signature'] == query_signature
    ]
    assert len(matching) == 1
    event = matching[0]
    assert event['db']['query_signature'] == query_signature
    assert event['db']['statement'] == query
    assert event['mysql']['schema'] == default_schema
    assert event['timestamp'] > 0
    assert event['host'] == 'stubbed.hostname'