Exemple #1
0
    def _collect_per_statement_metrics(self, db):
        # type: (pymysql.connections.Connection) -> List[Metric]
        metrics = []  # type: List[Metric]

        def keyfunc(row):
            return (row['schema'], row['digest'])

        monotonic_rows = self._query_summary_per_statement(db)
        monotonic_rows = self._merge_duplicate_rows(monotonic_rows,
                                                    key=keyfunc)
        rows = self._state.compute_derivative_rows(monotonic_rows,
                                                   STATEMENT_METRICS.keys(),
                                                   key=keyfunc)
        metrics.append(('dd.mysql.queries.query_rows_raw', len(rows), []))

        rows = generate_synthetic_rows(rows)
        rows = apply_row_limits(
            rows,
            self.config.statement_metrics_limits
            or DEFAULT_STATEMENT_METRICS_LIMITS,
            tiebreaker_metric='count',
            tiebreaker_reverse=True,
            key=keyfunc,
        )
        metrics.append(('dd.mysql.queries.query_rows_limited', len(rows), []))

        for row in rows:
            tags = []
            tags.append('digest:' + row['digest'])
            if row['schema'] is not None:
                tags.append('schema:' + row['schema'])

            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(
                    row['query'])
            except Exception as e:
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['query'], e)
                continue
            tags.append('query_signature:' +
                        compute_sql_signature(obfuscated_statement))
            tags.append('query:' +
                        normalize_query_tag(obfuscated_statement).strip())

            for col, name in STATEMENT_METRICS.items():
                value = row[col]
                metrics.append((name, value, tags))

        return metrics
    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(row['query'])
            except Exception as e:
                # obfuscation errors are relatively common so only log them during debugging
                self._log.debug("Failed to obfuscate query '%s': %s", row['query'], e)
                continue

            normalized_row['query'] = obfuscated_statement
            normalized_row['query_signature'] = compute_sql_signature(obfuscated_statement)
            normalized_rows.append(normalized_row)

        return normalized_rows
    def _normalize_queries(self, rows):
        normalized_rows = []
        for row in rows:
            normalized_row = dict(copy.copy(row))
            try:
                obfuscated_statement = datadog_agent.obfuscate_sql(
                    row['digest_text'])
            except Exception as e:
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['digest_text'], e)
                continue

            normalized_row['digest_text'] = obfuscated_statement
            normalized_row['query_signature'] = compute_sql_signature(
                obfuscated_statement)
            normalized_rows.append(normalized_row)

        return normalized_rows
Exemple #4
0
def obfuscate_sql_with_metadata(query, options=None):
    if not query:
        return {'query': '', 'metadata': {}}

    statement = datadog_agent.obfuscate_sql(query, options)
    # The `obfuscate_sql` testing stub returns bytes, so we have to handle that here.
    # The actual `obfuscate_sql` method in the agent's Go code returns a JSON string.
    statement = to_native_string(statement.strip())

    # Older agents may not have the new metadata API which returns a JSON string, so we must support cases where
    # newer integrations are running on an older agent. We use this "shortcut" to determine if we've received
    # a JSON string to avoid throwing excessive exceptions. We found that orjson leaks memory when failing
    # to parse these strings which are not valid json. Note, this condition is only relevant for integrations
    # running on agent versions < 7.34
    if not statement.startswith('{'):
        return {'query': statement, 'metadata': {}}

    statement_with_metadata = json.loads(statement)
    metadata = statement_with_metadata.get('metadata', {})
    tables = metadata.pop('tables_csv', None)
    tables = [table.strip() for table in tables.split(',')
              if table != ''] if tables else None
    statement_with_metadata['metadata']['tables'] = tables
    return statement_with_metadata
Exemple #5
0
    def _collect_plan_for_statement(self, row):
        # Plans have several important signatures to tag events with:
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the digest text to match query metrics

        try:
            obfuscated_statement = datadog_agent.obfuscate_sql(row['sql_text'])
            obfuscated_digest_text = datadog_agent.obfuscate_sql(
                row['digest_text'])
        except Exception:
            # do not log the raw sql_text to avoid leaking sensitive data into logs. digest_text is safe as parameters
            # are obfuscated by the database
            self._log.debug("Failed to obfuscate statement: %s",
                            row['digest_text'])
            self._check.count("dd.mysql.statement_samples.error",
                              1,
                              tags=self._tags + ["error:sql-obfuscate"])
            return None

        apm_resource_hash = compute_sql_signature(obfuscated_statement)
        query_signature = compute_sql_signature(obfuscated_digest_text)

        query_cache_key = (row['current_schema'], query_signature)
        if query_cache_key in self._explained_statements_cache:
            return None
        self._explained_statements_cache[query_cache_key] = True

        plan = None
        with closing(self._get_db_connection().cursor()) as cursor:
            try:
                plan = self._explain_statement(cursor, row['sql_text'],
                                               row['current_schema'],
                                               obfuscated_statement)
            except Exception as e:
                self._check.count("dd.mysql.statement_samples.error",
                                  1,
                                  tags=self._tags +
                                  ["error:explain-{}".format(type(e))])
                self._log.exception("Failed to explain statement: %s",
                                    obfuscated_statement)

        normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None
        if plan:
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(
                plan, normalize=True) if plan else None
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)
            plan_cost = self._parse_execution_plan_cost(plan)

        query_plan_cache_key = (query_cache_key, plan_signature)
        if query_plan_cache_key not in self._seen_samples_cache:
            self._seen_samples_cache[query_plan_cache_key] = True
            return {
                "timestamp": row["timer_end_time_s"] * 1000,
                "host": self._db_hostname,
                "service": self._service,
                "ddsource": "mysql",
                "ddtags": self._tags_str,
                "duration": row['timer_wait_ns'],
                "network": {
                    "client": {
                        "ip": row.get('processlist_host', None),
                    }
                },
                "db": {
                    "instance": row['current_schema'],
                    "plan": {
                        "definition": obfuscated_plan,
                        "cost": plan_cost,
                        "signature": plan_signature
                    },
                    "query_signature": query_signature,
                    "resource_hash": apm_resource_hash,
                    "statement": obfuscated_statement,
                },
                'mysql': {
                    k: v
                    for k, v in row.items()
                    if k not in EVENTS_STATEMENTS_SAMPLE_EXCLUDE_KEYS
                },
            }
Exemple #6
0
    def _collect_plan_for_statement(self, row):
        try:
            obfuscated_statement = datadog_agent.obfuscate_sql(row['query'])
        except Exception as e:
            self._log.debug("Failed to obfuscate statement: %s", e)
            self._check.count("dd.postgres.statement_samples.error", 1, tags=self._tags + ["error:sql-obfuscate"])
            return None

        # limit the rate of explains done to the database
        query_signature = compute_sql_signature(obfuscated_statement)
        if query_signature in self._explained_statements_cache:
            return None
        self._explained_statements_cache[query_signature] = True

        # Plans have several important signatures to tag events with. Note that for postgres, the
        # query_signature and resource_hash will be the same value.
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the raw sql text to match query metrics
        plan_dict = self._run_explain(row['query'], obfuscated_statement)
        plan, normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None, None
        if plan_dict:
            plan = json.dumps(plan_dict)
            # if we're using the orjson implementation then json.dumps returns bytes
            plan = plan.decode('utf-8') if isinstance(plan, bytes) else plan
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(plan, normalize=True)
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)
            plan_cost = plan_dict.get('Plan', {}).get('Total Cost', 0.0) or 0.0

        statement_plan_sig = (query_signature, plan_signature)
        if statement_plan_sig not in self._seen_samples_cache:
            self._seen_samples_cache[statement_plan_sig] = True
            event = {
                "host": self._db_hostname,
                "service": self._service,
                "ddsource": "postgres",
                "ddtags": self._tags_str,
                "network": {
                    "client": {
                        "ip": row.get('client_addr', None),
                        "port": row.get('client_port', None),
                        "hostname": row.get('client_hostname', None),
                    }
                },
                "db": {
                    "instance": row.get('datname', None),
                    "plan": {"definition": obfuscated_plan, "cost": plan_cost, "signature": plan_signature},
                    "query_signature": query_signature,
                    "resource_hash": query_signature,
                    "application": row.get('application_name', None),
                    "user": row['usename'],
                    "statement": obfuscated_statement,
                },
                'postgres': {k: v for k, v in row.items() if k not in pg_stat_activity_sample_exclude_keys},
            }
            event['timestamp'] = time.time() * 1000
            if row['state'] in {'idle', 'idle in transaction'}:
                if row['state_change'] and row['query_start']:
                    event['duration'] = (row['state_change'] - row['query_start']).total_seconds() * 1e9
                    # If the transaction is idle then we have a more specific "end time" than the current time at
                    # which we're collecting this event. According to the postgres docs, all of the timestamps in
                    # pg_stat_activity are `timestamp with time zone` so the timezone should always be present. However,
                    # if there is something wrong and it's missing then we can't use `state_change` for the timestamp
                    # of the event else we risk the timestamp being significantly off and the event getting dropped
                    # during ingestion.
                    if row['state_change'].tzinfo:
                        event['timestamp'] = get_timestamp(row['state_change']) * 1000
            return event
Exemple #7
0
    def _collect_per_statement_metrics(self, db):
        metrics = []

        available_columns = self._get_pg_stat_statements_columns(db)
        missing_columns = PG_STAT_STATEMENTS_REQUIRED_COLUMNS - set(
            available_columns)
        if len(missing_columns) > 0:
            self.log.warning(
                'Unable to collect statement metrics because required fields are unavailable: %s',
                ', '.join(list(missing_columns)),
            )
            return metrics

        desired_columns = (list(PG_STAT_STATEMENTS_METRIC_COLUMNS.keys()) +
                           list(PG_STAT_STATEMENTS_OPTIONAL_COLUMNS) +
                           list(PG_STAT_STATEMENTS_TAG_COLUMNS.keys()))
        query_columns = list(
            set(desired_columns) & set(available_columns)
            | set(PG_STAT_STATEMENTS_TAG_COLUMNS.keys()))
        rows = self._execute_query(
            db.cursor(cursor_factory=psycopg2.extras.DictCursor),
            STATEMENTS_QUERY.format(
                cols=', '.join(query_columns),
                pg_stat_statements_view=self.config.pg_stat_statements_view,
                limit=DEFAULT_STATEMENTS_LIMIT,
            ),
            params=(self.config.dbname, ),
        )
        if not rows:
            return metrics

        def row_keyfunc(row):
            # old versions of pg_stat_statements don't have a query ID so fall back to the query string itself
            queryid = row['queryid'] if 'queryid' in row else row['query']
            return (queryid, row['datname'], row['rolname'])

        rows = self._state.compute_derivative_rows(
            rows, PG_STAT_STATEMENTS_METRIC_COLUMNS.keys(), key=row_keyfunc)
        rows = apply_row_limits(rows,
                                DEFAULT_STATEMENT_METRIC_LIMITS,
                                tiebreaker_metric='calls',
                                tiebreaker_reverse=True,
                                key=row_keyfunc)

        for row in rows:
            try:
                normalized_query = datadog_agent.obfuscate_sql(row['query'])
                if not normalized_query:
                    self.log.warning(
                        "Obfuscation of query '%s' resulted in empty query",
                        row['query'])
                    continue
            except Exception as e:
                # If query obfuscation fails, it is acceptable to log the raw query here because the
                # pg_stat_statements table contains no parameters in the raw queries.
                self.log.warning("Failed to obfuscate query '%s': %s",
                                 row['query'], e)
                continue

            query_signature = compute_sql_signature(normalized_query)

            # All "Deep Database Monitoring" statement-level metrics are tagged with a `query_signature`
            # which uniquely identifies the normalized query family. Where possible, this hash should
            # match the hash of APM "resources" (https://docs.datadoghq.com/tracing/visualization/resource/)
            # when the resource is a SQL query. Postgres' query normalization in the `pg_stat_statements` table
            # preserves most of the original query, so we tag the `resource_hash` with the same value as the
            # `query_signature`. The `resource_hash` tag should match the *actual* APM resource hash most of
            # the time, but not always. So this is a best-effort approach to link these metrics to APM metrics.
            tags = [
                'query_signature:' + query_signature,
                'resource_hash:' + query_signature
            ]

            for column, tag_name in PG_STAT_STATEMENTS_TAG_COLUMNS.items():
                if column not in row:
                    continue
                value = row[column]
                if column == 'query':
                    value = normalize_query_tag(normalized_query)
                tags.append('{tag_name}:{value}'.format(tag_name=tag_name,
                                                        value=value))

            for column, metric_name in PG_STAT_STATEMENTS_METRIC_COLUMNS.items(
            ):
                if column not in row:
                    continue
                value = row[column]
                if column == 'total_time':
                    # All "Deep Database Monitoring" timing metrics are in nanoseconds
                    # Postgres tracks pg_stat* timing stats in milliseconds
                    value = milliseconds_to_nanoseconds(value)
                metrics.append((metric_name, value, tags))

        return metrics
Exemple #8
0
    def _collect_plan_for_statement(self, row):
        try:
            obfuscated_statement = datadog_agent.obfuscate_sql(row['query'])
        except Exception as e:
            self._log.debug("Failed to obfuscate statement: %s", e)
            self._check.count("dd.postgres.statement_samples.error",
                              1,
                              tags=self._tags + ["error:sql-obfuscate"])
            return None

        # limit the rate of explains done to the database
        query_signature = compute_sql_signature(obfuscated_statement)
        if query_signature in self._explained_statements_cache:
            return None
        self._explained_statements_cache[query_signature] = True

        # Plans have several important signatures to tag events with. Note that for postgres, the
        # query_signature and resource_hash will be the same value.
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the raw sql text to match query metrics
        plan_dict = self._run_explain(row['query'], obfuscated_statement)
        plan, normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None, None
        if plan_dict:
            plan = json.dumps(plan_dict)
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(
                plan, normalize=True)
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)
            plan_cost = plan_dict.get('Plan', {}).get('Total Cost', 0.0) or 0.0

        statement_plan_sig = (query_signature, plan_signature)
        if statement_plan_sig not in self._seen_samples_cache:
            self._seen_samples_cache[statement_plan_sig] = True
            event = {
                "host": self._db_hostname,
                "service": self._service,
                "ddsource": "postgres",
                "ddtags": self._tags_str,
                "network": {
                    "client": {
                        "ip": row.get('client_addr', None),
                        "port": row.get('client_port', None),
                        "hostname": row.get('client_hostname', None),
                    }
                },
                "db": {
                    "instance": row.get('datname', None),
                    "plan": {
                        "definition": obfuscated_plan,
                        "cost": plan_cost,
                        "signature": plan_signature
                    },
                    "query_signature": query_signature,
                    "resource_hash": query_signature,
                    "application": row.get('application_name', None),
                    "user": row['usename'],
                    "statement": obfuscated_statement,
                },
                'postgres': {
                    k: v
                    for k, v in row.items()
                    if k not in pg_stat_activity_sample_exclude_keys
                },
            }
            if row['state'] in {'idle', 'idle in transaction'}:
                if row['state_change'] and row['query_start']:
                    event['duration'] = (
                        row['state_change'] -
                        row['query_start']).total_seconds() * 1e9
                    event['timestamp'] = time.mktime(
                        row['state_change'].timetuple()) * 1000
            else:
                event['timestamp'] = time.time() * 1000
            return event