Example #1
0
 def collect_per_statement_metrics(self):
     # exclude the default "db" tag from statement metrics & FQT events because this data is collected from
     # all databases on the host. For metrics the "db" tag is added during ingestion based on which database
     # each query came from.
     try:
         rows = self._collect_metrics_rows()
         if not rows:
             return
         for event in self._rows_to_fqt_events(rows):
             self._check.database_monitoring_query_sample(json.dumps(event, default=default_json_event_encoding))
         # truncate query text to the maximum length supported by metrics tags
         for row in rows:
             row['query'] = row['query'][0:200]
         payload = {
             'host': self._check.resolved_hostname,
             'timestamp': time.time() * 1000,
             'min_collection_interval': self._metrics_collection_interval,
             'tags': self._tags_no_db,
             'cloud_metadata': self._config.cloud_metadata,
             'postgres_rows': rows,
             'postgres_version': self._payload_pg_version(),
             'ddagentversion': datadog_agent.get_version(),
             "ddagenthostname": self._check.agent_hostname,
         }
         self._check.database_monitoring_query_metrics(json.dumps(payload, default=default_json_event_encoding))
     except Exception:
         self._log.exception('Unable to collect statement metrics due to an error')
         return []
Example #2
0
 def collect_per_statement_metrics(self, db, db_version, tags):
     try:
         rows = self._collect_metrics_rows(db)
         if not rows:
             return
         for event in self._rows_to_fqt_events(rows, tags):
             self._check.database_monitoring_query_sample(
                 json.dumps(event, default=default_json_event_encoding))
         # truncate query text to the maximum length supported by metrics tags
         for row in rows:
             row['query'] = row['query'][0:200]
         payload = {
             'host':
             self._db_hostname_cached(),
             'timestamp':
             time.time() * 1000,
             'min_collection_interval':
             self._config.min_collection_interval,
             'tags':
             tags,
             'postgres_rows':
             rows,
             'postgres_version':
             'v{major}.{minor}.{patch}'.format(major=db_version.major,
                                               minor=db_version.minor,
                                               patch=db_version.patch),
         }
         self._check.database_monitoring_query_metrics(
             json.dumps(payload, default=default_json_event_encoding))
     except Exception:
         db.rollback()
         self._log.exception(
             'Unable to collect statement metrics due to an error')
         return []
Example #3
0
    def collect_per_statement_metrics(self, db, tags):
        # type: (pymysql.connections.Connection, List[str]) -> None
        try:
            rows = self._collect_per_statement_metrics(db)
            if not rows:
                return

            for event in self._rows_to_fqt_events(rows, tags):
                self._check.database_monitoring_query_sample(
                    json.dumps(event, default=default_json_event_encoding))

            # truncate query text to the maximum length supported by metrics tags
            for row in rows:
                row['digest_text'] = row['digest_text'][0:200]

            payload = {
                'host': self._db_hostname_cached(),
                'timestamp': time.time() * 1000,
                'min_collection_interval':
                self._config.min_collection_interval,
                'tags': tags,
                'mysql_rows': rows,
            }
            self._check.database_monitoring_query_metrics(
                json.dumps(payload, default=default_json_event_encoding))
        except Exception:
            self.log.exception(
                'Unable to collect statement metrics due to an error')
Example #4
0
    def collect_per_statement_metrics(self):
        # Detect a database misconfiguration by checking if the performance schema is enabled since mysql
        # just returns no rows without errors if the performance schema is disabled
        if not self._check.performance_schema_enabled:
            self._check.record_warning(
                DatabaseConfigurationError.performance_schema_not_enabled,
                warning_with_tags(
                    'Unable to collect statement metrics because the performance schema is disabled. '
                    'See https://docs.datadoghq.com/database_monitoring/setup_mysql/'
                    'troubleshooting#%s for more details',
                    DatabaseConfigurationError.performance_schema_not_enabled.
                    value,
                    code=DatabaseConfigurationError.
                    performance_schema_not_enabled.value,
                    host=self._check.resolved_hostname,
                ),
            )
            return

        rows = self._collect_per_statement_metrics()
        if not rows:
            return

        for event in self._rows_to_fqt_events(rows):
            self._check.database_monitoring_query_sample(
                json.dumps(event, default=default_json_event_encoding))

        # truncate query text to the maximum length supported by metrics tags
        for row in rows:
            row['digest_text'] = row['digest_text'][0:200] if row[
                'digest_text'] is not None else None

        payload = {
            'host':
            self._check.resolved_hostname,
            'timestamp':
            time.time() * 1000,
            'mysql_version':
            self._check.version.version + '+' + self._check.version.build,
            'mysql_flavor':
            self._check.version.flavor,
            "ddagenthostname":
            self._check.agent_hostname,
            'ddagentversion':
            datadog_agent.get_version(),
            'min_collection_interval':
            self._metric_collection_interval,
            'tags':
            self._tags,
            'cloud_metadata':
            self._config.cloud_metadata,
            'mysql_rows':
            rows,
        }
        self._check.database_monitoring_query_metrics(
            json.dumps(payload, default=default_json_event_encoding))
    def _collect_statement_samples(self):
        start_time = time.time()
        pg_activity_cols = self._get_pg_stat_activity_cols_cached(
            PG_STAT_ACTIVITY_COLS)
        rows = self._get_new_pg_stat_activity(pg_activity_cols)
        rows = self._filter_and_normalize_statement_rows(rows)
        event_samples = self._collect_plans(rows)
        submitted_count = 0
        for e in event_samples:
            self._check.database_monitoring_query_sample(
                json.dumps(e, default=default_json_event_encoding))
            submitted_count += 1

        if self._report_activity_event():
            active_connections = self._get_active_connections()
            activity_event = self._create_activity_event(
                rows, active_connections)
            self._check.database_monitoring_query_activity(
                json.dumps(activity_event,
                           default=default_json_event_encoding))
            self._check.histogram("dd.postgres.collect_activity_snapshot.time",
                                  (time.time() - start_time) * 1000,
                                  tags=self._tags)
        elapsed_ms = (time.time() - start_time) * 1000
        self._check.histogram(
            "dd.postgres.collect_statement_samples.time",
            elapsed_ms,
            tags=self._tags + self._check._get_debug_tags(),
            hostname=self._check.resolved_hostname,
        )
        self._check.count(
            "dd.postgres.collect_statement_samples.events_submitted.count",
            submitted_count,
            tags=self._tags + self._check._get_debug_tags(),
            hostname=self._check.resolved_hostname,
        )
        self._check.gauge(
            "dd.postgres.collect_statement_samples.seen_samples_cache.len",
            len(self._seen_samples_ratelimiter),
            tags=self._tags + self._check._get_debug_tags(),
            hostname=self._check.resolved_hostname,
        )
        self._check.gauge(
            "dd.postgres.collect_statement_samples.explained_statements_cache.len",
            len(self._explained_statements_ratelimiter),
            tags=self._tags + self._check._get_debug_tags(),
            hostname=self._check.resolved_hostname,
        )
        self._check.gauge(
            "dd.postgres.collect_statement_samples.explain_errors_cache.len",
            len(self._explain_errors_cache),
            tags=self._tags + self._check._get_debug_tags(),
            hostname=self._check.resolved_hostname,
        )
Example #6
0
 def __init__(self, check, config, shutdown_callback):
     collection_interval = float(
         config.statement_metrics_config.get('collection_interval', DEFAULT_COLLECTION_INTERVAL)
     )
     if collection_interval <= 0:
         collection_interval = DEFAULT_COLLECTION_INTERVAL
     super(PostgresStatementMetrics, self).__init__(
         check,
         run_sync=is_affirmative(config.statement_metrics_config.get('run_sync', False)),
         enabled=is_affirmative(config.statement_metrics_config.get('enabled', True)),
         expected_db_exceptions=(psycopg2.errors.DatabaseError,),
         min_collection_interval=config.min_collection_interval,
         dbms="postgres",
         rate_limit=1 / float(collection_interval),
         job_name="query-metrics",
         shutdown_callback=shutdown_callback,
     )
     self._metrics_collection_interval = collection_interval
     self._config = config
     self._state = StatementMetrics()
     self._stat_column_cache = []
     self._obfuscate_options = to_native_string(json.dumps(self._config.obfuscator_options))
     # full_statement_text_cache: limit the ingestion rate of full statement text events per query_signature
     self._full_statement_text_cache = TTLCache(
         maxsize=config.full_statement_text_cache_max_size,
         ttl=60 * 60 / config.full_statement_text_samples_per_hour_per_query,
     )
Example #7
0
    def refresh_scrapers(self):
        # Create assume_role credentials if assume_role ARN is specified in config
        assume_role = self.config.assume_role
        try:
            if assume_role:
                self.log.info('Assume role %s found. Creating temporary credentials using role...', assume_role)
                sts = boto3.client('sts')
                response = sts.assume_role(
                    RoleArn=assume_role, RoleSessionName='dd-msk-check-session', DurationSeconds=3600
                )
                access_key_id = response['Credentials']['AccessKeyId']
                secret_access_key = response['Credentials']['SecretAccessKey']
                session_token = response['Credentials']['SessionToken']
                client = boto3.client(
                    'kafka',
                    aws_access_key_id=access_key_id,
                    aws_secret_access_key=secret_access_key,
                    aws_session_token=session_token,
                    config=self._boto_config,
                    region_name=self._region_name,
                )
            else:
                # Always create a new client to account for changes in auth
                client = boto3.client(
                    'kafka',
                    config=self._boto_config,
                    region_name=self._region_name,
                )
            response = client.list_nodes(ClusterArn=self.config.cluster_arn)
            self.log.debug('Received list_nodes response: %s', json.dumps(response))
        except Exception as e:
            self.service_check(self.SERVICE_CHECK_CONNECT, self.CRITICAL, message=str(e), tags=self._service_check_tags)
            raise
        else:
            self.service_check(self.SERVICE_CHECK_CONNECT, self.OK, tags=self._service_check_tags)

        scrapers = {}

        for node_info in response['NodeInfoList']:
            broker_info = node_info['BrokerNodeInfo']
            broker_id_tag = f'broker_id:{broker_info["BrokerId"]}'

            for endpoint in broker_info['Endpoints']:
                for port, metrics in self._exporter_data:
                    if port:
                        url = f'{self._endpoint_prefix}://{endpoint}:{port}{self.config.prometheus_metrics_path}'
                        if url in self.scrapers:
                            scrapers[url] = self.scrapers[url]
                            continue

                        scraper = self.create_scraper(
                            {'openmetrics_endpoint': url, 'metrics': metrics, **self.instance}
                        )
                        scraper.static_tags += self._static_tags
                        scraper.set_dynamic_tags(broker_id_tag)
                        self.configure_additional_transformers(scraper.metric_transformer.transformer_data)

                        scrapers[url] = scraper

        self.scrapers = scrapers
Example #8
0
def query():
    connection = None

    for string in sys.stdin:
        if connection is None:
            connection_string = string.strip()
            try:
                connection = pyodbc.connect(connection_string)
            except Exception as e:
                print("{}".format(e), file=sys.stderr, flush=True)
                # Make the next query end immediately and fetch the error
                print('ENDOFQUERY', flush=True)
        else:
            query = string.strip()
            try:
                rows = []
                with closing(connection.execute(query)) as c:
                    rows = c.fetchall()

                for row in rows:
                    print(
                        json.dumps([item if item is None else str(item) for item in row]).decode("utf-8"),
                        flush=True,
                    )
            except Exception as e:
                print("{}".format(e), file=sys.stderr, flush=True)
            print('ENDOFQUERY', flush=True)
Example #9
0
    def __init__(self, check, config, connection_args):
        self.collection_interval = float(
            config.activity_config.get("collection_interval", MySQLActivity.DEFAULT_COLLECTION_INTERVAL)
        )
        if self.collection_interval <= 0:
            self.collection_interval = MySQLActivity.DEFAULT_COLLECTION_INTERVAL
        super(MySQLActivity, self).__init__(
            check,
            run_sync=is_affirmative(config.activity_config.get("run_sync", False)),
            enabled=is_affirmative(config.activity_config.get("enabled", True)),
            expected_db_exceptions=(pymysql.err.OperationalError, pymysql.err.InternalError),
            min_collection_interval=config.min_collection_interval,
            dbms="mysql",
            rate_limit=1 / float(self.collection_interval),
            job_name="query-activity",
            shutdown_callback=self._close_db_conn,
        )
        self._check = check
        self._config = config
        self._log = check.log

        self._connection_args = connection_args
        self._db = None
        self._db_version = None
        self._obfuscator_options = to_native_string(json.dumps(self._config.obfuscator_options))
Example #10
0
 def submit_events(self, events):
     """
     Submit the statement sample events to the event intake
     :return: submitted_count, failed_count
     """
     submitted_count = 0
     failed_count = 0
     for chunk in _chunks(events, 100):
         for http, url in self._endpoints:
             try:
                 r = http.request(
                     'post',
                     url,
                     data=json.dumps(chunk, cls=EventEncoder),
                     timeout=5,
                     headers={'Content-Type': 'application/json'},
                 )
                 r.raise_for_status()
                 logger.debug("Submitted %s statement samples to %s",
                              len(chunk), url)
                 submitted_count += len(chunk)
             except requests.HTTPError as e:
                 logger.warning(
                     "Failed to submit statement samples to %s: %s", url, e)
                 failed_count += len(chunk)
             except Exception:
                 logger.exception(
                     "Failed to submit statement samples to %s", url)
                 failed_count += len(chunk)
     return submitted_count, failed_count
Example #11
0
 def _collect_statement_samples(self):
     self._rate_limiter.sleep()
     start_time = time.time()
     rows = self._get_new_pg_stat_activity()
     rows = self._filter_valid_statement_rows(rows)
     events = self._explain_pg_stat_activity(rows)
     submitted_count = 0
     for e in events:
         self._check.database_monitoring_query_sample(json.dumps(e, default=default_json_event_encoding))
         submitted_count += 1
     elapsed_ms = (time.time() - start_time) * 1000
     self._check.histogram("dd.postgres.collect_statement_samples.time", elapsed_ms, tags=self._tags)
     self._check.count(
         "dd.postgres.collect_statement_samples.events_submitted.count", submitted_count, tags=self._tags
     )
     self._check.gauge(
         "dd.postgres.collect_statement_samples.seen_samples_cache.len",
         len(self._seen_samples_cache),
         tags=self._tags,
     )
     self._check.gauge(
         "dd.postgres.collect_statement_samples.explained_statements_cache.len",
         len(self._explained_statements_cache),
         tags=self._tags,
     )
Example #12
0
 def __init__(self, check, config, connection_args):
     # (MySql, MySQLConfig) -> None
     collection_interval = float(
         config.statement_metrics_config.get('collection_interval', 10))
     if collection_interval <= 0:
         collection_interval = 10
     super(MySQLStatementMetrics, self).__init__(
         check,
         rate_limit=1 / float(collection_interval),
         run_sync=is_affirmative(
             config.statement_metrics_config.get('run_sync', False)),
         enabled=is_affirmative(
             config.statement_metrics_config.get('enabled', True)),
         expected_db_exceptions=(pymysql.err.DatabaseError, ),
         min_collection_interval=config.min_collection_interval,
         dbms="mysql",
         job_name="statement-metrics",
         shutdown_callback=self._close_db_conn,
     )
     self._metric_collection_interval = collection_interval
     self._connection_args = connection_args
     self._db = None
     self._config = config
     self.log = get_check_logger()
     self._state = StatementMetrics()
     self._obfuscate_options = to_native_string(
         json.dumps(self._config.obfuscator_options))
     # full_statement_text_cache: limit the ingestion rate of full statement text events per query_signature
     self._full_statement_text_cache = TTLCache(
         maxsize=self._config.full_statement_text_cache_max_size,
         ttl=60 * 60 /
         self._config.full_statement_text_samples_per_hour_per_query,
     )  # type: TTLCache
Example #13
0
def compute_exec_plan_signature(normalized_json_plan):
    """
    Given an already normalized json string query execution plan, generate its 64-bit hex signature.
    TODO: try to push this logic into the agent go code to avoid the two extra json serialization steps here
    """
    if not normalized_json_plan:
        return None
    with_sorted_keys = json.dumps(json.loads(normalized_json_plan),
                                  **sort_keys_kwargs)
    return format(mmh3.hash64(with_sorted_keys, signed=False)[0], 'x')
Example #14
0
    def _collect_statement_samples(self):
        self._read_version_info()
        self._log.debug("collecting statement samples")
        events_statements_table, collection_interval = self._get_sample_collection_strategy(
        )
        if not events_statements_table:
            return
        self._set_rate_limit(1.0 / collection_interval)

        start_time = time.time()

        rows = self._get_new_events_statements(
            events_statements_table, self._events_statements_row_limit)
        rows = self._filter_valid_statement_rows(rows)
        events = self._collect_plans_for_statements(rows)
        submitted_count = 0
        tags = (
            self._tags +
            ["events_statements_table:{}".format(events_statements_table)] +
            self._check._get_debug_tags())
        for e in events:
            self._check.database_monitoring_query_sample(
                json.dumps(e, default=default_json_event_encoding))
            submitted_count += 1
        self._check.histogram(
            "dd.mysql.collect_statement_samples.time",
            (time.time() - start_time) * 1000,
            tags=tags,
            hostname=self._check.resolved_hostname,
        )
        self._check.count(
            "dd.mysql.collect_statement_samples.events_submitted.count",
            submitted_count,
            tags=tags,
            hostname=self._check.resolved_hostname,
        )
        self._check.gauge(
            "dd.mysql.collect_statement_samples.seen_samples_cache.len",
            len(self._seen_samples_ratelimiter),
            tags=tags,
            hostname=self._check.resolved_hostname,
        )
        self._check.gauge(
            "dd.mysql.collect_statement_samples.explained_statements_cache.len",
            len(self._explained_statements_ratelimiter),
            tags=tags,
            hostname=self._check.resolved_hostname,
        )
        self._check.gauge(
            "dd.mysql.collect_statement_samples.collection_strategy_cache.len",
            len(self._collection_strategy_cache),
            tags=tags,
            hostname=self._check.resolved_hostname,
        )
 def obfuscate_sql(self, query, options=None):
     # Full obfuscation implementation is in go code.
     if options:
         # Options provided is a JSON string because the Go stub requires it, whereas
         # the python stub does not for things such as testing.
         if json.loads(options).get('return_json_metadata', False):
             return json.dumps({
                 'query':
                 re.sub(r'\s+', ' ', query or '').strip(),
                 'metadata': {}
             })
     return re.sub(r'\s+', ' ', query or '').strip()
Example #16
0
 def _collect_activity(self):
     # type: () -> None
     with closing(self._get_db_connection().cursor(pymysql.cursors.DictCursor)) as cursor:
         connections = self._get_active_connections(cursor)
         rows = self._get_activity(cursor)
         rows = self._normalize_rows(rows)
         event = self._create_activity_event(rows, connections)
         payload = json.dumps(event, default=self._json_event_encoding)
         self._check.database_monitoring_query_activity(payload)
         self._check.histogram(
             "dd.mysql.activity.collect_activity.payload_size",
             len(payload),
             tags=self._tags + self._check._get_debug_tags(),
         )
Example #17
0
    def collect_statement_metrics_and_plans(self):
        """
        Collects statement metrics and plans.
        :return:
        """
        plans_submitted = 0
        deadline = time.time() + self.collection_interval

        # re-use the check's conn module, but set extra_key=dbm- to ensure we get our own
        # raw connection. adodbapi and pyodbc modules are thread safe, but connections are not.
        with self.check.connection.open_managed_default_connection(key_prefix=self._conn_key_prefix):
            with self.check.connection.get_managed_cursor(key_prefix=self._conn_key_prefix) as cursor:
                rows = self._collect_metrics_rows(cursor)
                if not rows:
                    return
                for event in self._rows_to_fqt_events(rows):
                    self.check.database_monitoring_query_sample(json.dumps(event, default=default_json_event_encoding))
                payload = self._to_metrics_payload(rows)
                self.check.database_monitoring_query_metrics(json.dumps(payload, default=default_json_event_encoding))
                for event in self._collect_plans(rows, cursor, deadline):
                    self.check.database_monitoring_query_sample(json.dumps(event, default=default_json_event_encoding))
                    plans_submitted += 1

        self.check.count(
            "dd.sqlserver.statements.plans_submitted.count", plans_submitted, **self.check.debug_stats_kwargs()
        )
        self.check.gauge(
            "dd.sqlserver.statements.seen_plans_cache.len",
            len(self._seen_plans_ratelimiter),
            **self.check.debug_stats_kwargs()
        )
        self.check.gauge(
            "dd.sqlserver.statements.fqt_cache.len",
            len(self._full_statement_text_cache),
            **self.check.debug_stats_kwargs()
        )
 def collect_per_statement_metrics(self, db, tags):
     try:
         rows = self._collect_metrics_rows(db)
         if not rows:
             return
         payload = {
             'host': self._db_hostname_cached(),
             'timestamp': time.time() * 1000,
             'min_collection_interval': self._config.min_collection_interval,
             'tags': tags,
             'postgres_rows': rows,
         }
         self._check.database_monitoring_query_metrics(json.dumps(payload, default=default_json_event_encoding))
     except Exception:
         db.rollback()
         self._log.exception('Unable to collect statement metrics due to an error')
         return []
Example #19
0
    def _collect_statement_samples(self):
        self._log.debug("collecting statement samples")
        self._rate_limiter.sleep()
        events_statements_table, rate_limit = self._get_sample_collection_strategy(
        )
        if not events_statements_table:
            return
        if self._rate_limiter.rate_limit_s != rate_limit:
            self._rate_limiter = ConstantRateLimiter(rate_limit)
        start_time = time.time()

        tags = self._tags + [
            "events_statements_table:{}".format(events_statements_table)
        ]
        rows = self._get_new_events_statements(
            events_statements_table, self._events_statements_row_limit)
        rows = self._filter_valid_statement_rows(rows)
        events = self._collect_plans_for_statements(rows)
        submitted_count = 0
        for e in events:
            self._check.database_monitoring_query_sample(
                json.dumps(e, default=default_json_event_encoding))
            submitted_count += 1
        self._check.histogram("dd.mysql.collect_statement_samples.time",
                              (time.time() - start_time) * 1000,
                              tags=tags)
        self._check.count(
            "dd.mysql.collect_statement_samples.events_submitted.count",
            submitted_count,
            tags=tags)
        self._check.gauge(
            "dd.mysql.collect_statement_samples.seen_samples_cache.len",
            len(self._seen_samples_cache),
            tags=tags)
        self._check.gauge(
            "dd.mysql.collect_statement_samples.explained_statements_cache.len",
            len(self._explained_statements_cache),
            tags=tags,
        )
        self._check.gauge(
            "dd.mysql.collect_statement_samples.collection_strategy_cache.len",
            len(self._collection_strategy_cache),
            tags=tags,
        )
Example #20
0
 def collect_per_statement_metrics(self, db, tags):
     # type: (pymysql.connections.Connection, List[str]) -> None
     try:
         rows = self._collect_per_statement_metrics(db)
         if not rows:
             return
         payload = {
             'host': self._db_hostname_cached(),
             'timestamp': time.time() * 1000,
             'min_collection_interval':
             self._config.min_collection_interval,
             'tags': tags,
             'mysql_rows': rows,
         }
         self._check.database_monitoring_query_metrics(
             json.dumps(payload, default=default_json_event_encoding))
     except Exception:
         self.log.exception(
             'Unable to collect statement metrics due to an error')
Example #21
0
    def collect_activity(self):
        """
        Collects all current activity for the SQLServer intance.
        :return:
        """

        # re-use the check's conn module, but set extra_key=dbm-activity- to ensure we get our own
        # raw connection. adodbapi and pyodbc modules are thread safe, but connections are not.
        with self.check.connection.open_managed_default_connection(key_prefix=self._conn_key_prefix):
            with self.check.connection.get_managed_cursor(key_prefix=self._conn_key_prefix) as cursor:
                connections = self._get_active_connections(cursor)
                request_cols = self._get_exec_requests_cols_cached(cursor, DM_EXEC_REQUESTS_COLS)
                rows = self._get_activity(cursor, request_cols)
                normalized_rows = self._normalize_queries_and_filter_rows(rows, MAX_PAYLOAD_BYTES)
                event = self._create_activity_event(normalized_rows, connections)
                payload = json.dumps(event, default=default_json_event_encoding)
                self._check.database_monitoring_query_activity(payload)

        self.check.histogram(
            "dd.sqlserver.activity.collect_activity.payload_size", len(payload), **self.check.debug_stats_kwargs()
        )
Example #22
0
    for i in range(5, 10):
        assert cache.acquire(i), "cache should be empty again so these keys should go in OK"


class TestDBExcepption(BaseException):
    pass


@pytest.mark.parametrize(
    "obfuscator_return_value,expected_value",
    [
        (
            json.dumps(
                {
                    'query': 'SELECT * FROM datadog',
                    'metadata': {'tables_csv': 'datadog,', 'commands': ['SELECT'], 'comments': None},
                }
            ),
            {
                'query': 'SELECT * FROM datadog',
                'metadata': {'commands': ['SELECT'], 'comments': None, 'tables': ['datadog']},
            },
        ),
        (
            # Whitespace test
            "  {\"query\":\"SELECT * FROM datadog\",\"metadata\":{\"tables_csv\":\"datadog\",\"commands\":[\"SELECT\"],"
            "\"comments\":null}}          ",
            {
                'query': 'SELECT * FROM datadog',
                'metadata': {'commands': ['SELECT'], 'comments': None, 'tables': ['datadog']},
            },
Example #23
0
 def _obfuscate_sql(sql_query, options=None):
     return json.dumps({'query': sql_query, 'metadata': metadata})
Example #24
0
 def submit_events(self, events):
     events = list(events)
     self._payloads.append(json.dumps(events, default=default_encoding))
     return len(events), 0
Example #25
0
 def __init__(self, check, config, connection_args):
     collection_interval = float(
         config.statement_metrics_config.get('collection_interval', 1))
     if collection_interval <= 0:
         collection_interval = 1
     super(MySQLStatementSamples, self).__init__(
         check,
         rate_limit=1 / collection_interval,
         run_sync=is_affirmative(
             config.statement_samples_config.get('run_sync', False)),
         enabled=is_affirmative(
             config.statement_samples_config.get('enabled', True)),
         min_collection_interval=config.min_collection_interval,
         dbms="mysql",
         expected_db_exceptions=(pymysql.err.DatabaseError, ),
         job_name="statement-samples",
         shutdown_callback=self._close_db_conn,
     )
     self._config = config
     self._version_processed = False
     self._connection_args = connection_args
     # checkpoint at zero so we pull the whole history table on the first run
     self._checkpoint = 0
     self._last_check_run = 0
     self._db = None
     self._configured_collection_interval = self._config.statement_samples_config.get(
         'collection_interval', -1)
     self._events_statements_row_limit = self._config.statement_samples_config.get(
         'events_statements_row_limit', 5000)
     self._explain_procedure = self._config.statement_samples_config.get(
         'explain_procedure', 'explain_statement')
     self._fully_qualified_explain_procedure = self._config.statement_samples_config.get(
         'fully_qualified_explain_procedure', 'datadog.explain_statement')
     self._events_statements_temp_table = self._config.statement_samples_config.get(
         'events_statements_temp_table_name', 'datadog.temp_events')
     self._events_statements_enable_procedure = self._config.statement_samples_config.get(
         'events_statements_enable_procedure',
         'datadog.enable_events_statements_consumers')
     self._preferred_events_statements_tables = EVENTS_STATEMENTS_PREFERRED_TABLES
     self._has_window_functions = False
     events_statements_table = self._config.statement_samples_config.get(
         'events_statements_table', None)
     if events_statements_table:
         if events_statements_table in DEFAULT_EVENTS_STATEMENTS_COLLECTION_INTERVAL:
             self._log.debug(
                 "Configured preferred events_statements_table: %s",
                 events_statements_table)
             self._preferred_events_statements_tables = [
                 events_statements_table
             ]
         else:
             self._log.warning(
                 "Invalid events_statements_table: %s. Must be one of %s. Falling back to trying all tables.",
                 events_statements_table,
                 ', '.join(
                     DEFAULT_EVENTS_STATEMENTS_COLLECTION_INTERVAL.keys()),
             )
     self._explain_strategies = {
         'PROCEDURE': self._run_explain_procedure,
         'FQ_PROCEDURE': self._run_fully_qualified_explain_procedure,
         'STATEMENT': self._run_explain,
     }
     self._preferred_explain_strategies = [
         'PROCEDURE', 'FQ_PROCEDURE', 'STATEMENT'
     ]
     self._obfuscate_options = to_native_string(
         json.dumps(self._config.obfuscator_options))
     self._init_caches()
    def _collect_plan_for_statement(self, row):
        # limit the rate of explains done to the database
        cache_key = (row['datname'], row['query_signature'])
        if not self._explained_statements_ratelimiter.acquire(cache_key):
            return None

        # Plans have several important signatures to tag events with. Note that for postgres, the
        # query_signature and resource_hash will be the same value.
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the raw sql text to match query metrics
        plan_dict, explain_err_code, err_msg = self._run_and_track_explain(
            row['datname'], row['query'], row['statement'],
            row['query_signature'])
        collection_errors = None
        if explain_err_code:
            collection_errors = [{
                'code': explain_err_code.value,
                'message': err_msg if err_msg else None
            }]

        plan, normalized_plan, obfuscated_plan, plan_signature = None, None, None, None
        if plan_dict:
            plan = json.dumps(plan_dict)
            # if we're using the orjson implementation then json.dumps returns bytes
            plan = plan.decode('utf-8') if isinstance(plan, bytes) else plan
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(
                plan, normalize=True)
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)

        statement_plan_sig = (row['query_signature'], plan_signature)
        if self._seen_samples_ratelimiter.acquire(statement_plan_sig):
            event = {
                "host": self._check.resolved_hostname,
                "ddagentversion": datadog_agent.get_version(),
                "ddsource": "postgres",
                "ddtags": ",".join(self._dbtags(row['datname'])),
                "timestamp": time.time() * 1000,
                "network": {
                    "client": {
                        "ip": row.get('client_addr', None),
                        "port": row.get('client_port', None),
                        "hostname": row.get('client_hostname', None),
                    }
                },
                "db": {
                    "instance":
                    row.get('datname', None),
                    "plan": {
                        "definition": obfuscated_plan,
                        "signature": plan_signature,
                        "collection_errors": collection_errors,
                    },
                    "query_signature":
                    row['query_signature'],
                    "resource_hash":
                    row['query_signature'],
                    "application":
                    row.get('application_name', None),
                    "user":
                    row['usename'],
                    "statement":
                    row['statement'],
                    "metadata": {
                        "tables": row['dd_tables'],
                        "commands": row['dd_commands'],
                        "comments": row['dd_comments'],
                    },
                    "query_truncated":
                    self._get_truncation_state(
                        self._get_track_activity_query_size(),
                        row['query']).value,
                },
                'postgres': {
                    k: v
                    for k, v in row.items()
                    if k not in pg_stat_activity_sample_exclude_keys
                },
            }
            if row['state'] in {'idle', 'idle in transaction'}:
                if row['state_change'] and row['query_start']:
                    event['duration'] = (
                        row['state_change'] -
                        row['query_start']).total_seconds() * 1e9
                    # If the transaction is idle then we have a more specific "end time" than the current time at
                    # which we're collecting this event. According to the postgres docs, all of the timestamps in
                    # pg_stat_activity are `timestamp with time zone` so the timezone should always be present. However,
                    # if there is something wrong and it's missing then we can't use `state_change` for the timestamp
                    # of the event else we risk the timestamp being significantly off and the event getting dropped
                    # during ingestion.
                    if row['state_change'].tzinfo:
                        event['timestamp'] = get_timestamp(
                            row['state_change']) * 1000
            return event
        return None
Example #27
0
    def _collect_plan_for_statement(self, row):
        try:
            obfuscated_statement = datadog_agent.obfuscate_sql(row['query'])
        except Exception as e:
            self._log.debug("Failed to obfuscate statement: %s", e)
            self._check.count("dd.postgres.statement_samples.error",
                              1,
                              tags=self._tags + ["error:sql-obfuscate"])
            return None

        # limit the rate of explains done to the database
        query_signature = compute_sql_signature(obfuscated_statement)
        if query_signature in self._explained_statements_cache:
            return None
        self._explained_statements_cache[query_signature] = True

        # Plans have several important signatures to tag events with. Note that for postgres, the
        # query_signature and resource_hash will be the same value.
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the raw sql text to match query metrics
        plan_dict = self._run_explain(row['query'], obfuscated_statement)
        plan, normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None, None
        if plan_dict:
            plan = json.dumps(plan_dict)
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(
                plan, normalize=True)
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)
            plan_cost = plan_dict.get('Plan', {}).get('Total Cost', 0.0) or 0.0

        statement_plan_sig = (query_signature, plan_signature)
        if statement_plan_sig not in self._seen_samples_cache:
            self._seen_samples_cache[statement_plan_sig] = True
            event = {
                "host": self._db_hostname,
                "service": self._service,
                "ddsource": "postgres",
                "ddtags": self._tags_str,
                "network": {
                    "client": {
                        "ip": row.get('client_addr', None),
                        "port": row.get('client_port', None),
                        "hostname": row.get('client_hostname', None),
                    }
                },
                "db": {
                    "instance": row.get('datname', None),
                    "plan": {
                        "definition": obfuscated_plan,
                        "cost": plan_cost,
                        "signature": plan_signature
                    },
                    "query_signature": query_signature,
                    "resource_hash": query_signature,
                    "application": row.get('application_name', None),
                    "user": row['usename'],
                    "statement": obfuscated_statement,
                },
                'postgres': {
                    k: v
                    for k, v in row.items()
                    if k not in pg_stat_activity_sample_exclude_keys
                },
            }
            if row['state'] in {'idle', 'idle in transaction'}:
                if row['state_change'] and row['query_start']:
                    event['duration'] = (
                        row['state_change'] -
                        row['query_start']).total_seconds() * 1e9
                    event['timestamp'] = time.mktime(
                        row['state_change'].timetuple()) * 1000
            else:
                event['timestamp'] = time.time() * 1000
            return event
    def __init__(self, check, config, shutdown_callback):
        collection_interval = float(
            config.statement_samples_config.get('collection_interval',
                                                DEFAULT_COLLECTION_INTERVAL))
        if collection_interval <= 0:
            collection_interval = DEFAULT_COLLECTION_INTERVAL
        super(PostgresStatementSamples, self).__init__(
            check,
            rate_limit=1 / collection_interval,
            run_sync=is_affirmative(
                config.statement_samples_config.get('run_sync', False)),
            enabled=is_affirmative(
                config.statement_samples_config.get('enabled', True)),
            dbms="postgres",
            min_collection_interval=config.min_collection_interval,
            expected_db_exceptions=(psycopg2.errors.DatabaseError, ),
            job_name="query-samples",
            shutdown_callback=shutdown_callback,
        )
        self._check = check
        self._config = config
        self._tags_no_db = None
        self._activity_last_query_start = None
        # The value is loaded when connecting to the main database
        self._explain_function = config.statement_samples_config.get(
            'explain_function', 'datadog.explain_statement')
        self._obfuscate_options = to_native_string(
            json.dumps(self._config.obfuscator_options))

        self._collection_strategy_cache = TTLCache(
            maxsize=config.statement_samples_config.get(
                'collection_strategy_cache_maxsize', 1000),
            ttl=config.statement_samples_config.get(
                'collection_strategy_cache_ttl', 300),
        )

        self._explain_errors_cache = TTLCache(
            maxsize=config.statement_samples_config.get(
                'explain_errors_cache_maxsize', 5000),
            # only try to re-explain invalid statements once per day
            ttl=config.statement_samples_config.get('explain_errors_cache_ttl',
                                                    24 * 60 * 60),
        )

        # explained_statements_ratelimiter: limit how often we try to re-explain the same query
        self._explained_statements_ratelimiter = RateLimitingTTLCache(
            maxsize=int(
                config.statement_samples_config.get(
                    'explained_queries_cache_maxsize', 5000)),
            ttl=60 * 60 / int(
                config.statement_samples_config.get(
                    'explained_queries_per_hour_per_query', 60)),
        )

        # seen_samples_ratelimiter: limit the ingestion rate per (query_signature, plan_signature)
        self._seen_samples_ratelimiter = RateLimitingTTLCache(
            # assuming ~100 bytes per entry (query & plan signature, key hash, 4 pointers (ordered dict), expiry time)
            # total size: 10k * 100 = 1 Mb
            maxsize=int(
                config.statement_samples_config.get(
                    'seen_samples_cache_maxsize', 10000)),
            ttl=60 * 60 / int(
                config.statement_samples_config.get(
                    'samples_per_hour_per_query', 15)),
        )

        self._activity_coll_enabled = is_affirmative(
            self._config.statement_activity_config.get('enabled', True))
        # activity events cannot be reported more often than regular samples
        self._activity_coll_interval = max(
            self._config.statement_activity_config.get(
                'collection_interval', DEFAULT_ACTIVITY_COLLECTION_INTERVAL),
            collection_interval,
        )
        self._activity_max_rows = self._config.statement_activity_config.get(
            'payload_row_limit', 3500)
        # Keep track of last time we sent an activity event
        self._time_since_last_activity_event = 0
        self._pg_stat_activity_cols = None
Example #29
0
    def _collect_plan_for_statement(self, row):
        try:
            obfuscated_statement = datadog_agent.obfuscate_sql(row['query'])
        except Exception as e:
            self._log.debug("Failed to obfuscate statement: %s", e)
            self._check.count("dd.postgres.statement_samples.error", 1, tags=self._tags + ["error:sql-obfuscate"])
            return None

        # limit the rate of explains done to the database
        query_signature = compute_sql_signature(obfuscated_statement)
        if query_signature in self._explained_statements_cache:
            return None
        self._explained_statements_cache[query_signature] = True

        # Plans have several important signatures to tag events with. Note that for postgres, the
        # query_signature and resource_hash will be the same value.
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the raw sql text to match query metrics
        plan_dict = self._run_explain(row['query'], obfuscated_statement)
        plan, normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None, None
        if plan_dict:
            plan = json.dumps(plan_dict)
            # if we're using the orjson implementation then json.dumps returns bytes
            plan = plan.decode('utf-8') if isinstance(plan, bytes) else plan
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(plan, normalize=True)
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)
            plan_cost = plan_dict.get('Plan', {}).get('Total Cost', 0.0) or 0.0

        statement_plan_sig = (query_signature, plan_signature)
        if statement_plan_sig not in self._seen_samples_cache:
            self._seen_samples_cache[statement_plan_sig] = True
            event = {
                "host": self._db_hostname,
                "service": self._service,
                "ddsource": "postgres",
                "ddtags": self._tags_str,
                "network": {
                    "client": {
                        "ip": row.get('client_addr', None),
                        "port": row.get('client_port', None),
                        "hostname": row.get('client_hostname', None),
                    }
                },
                "db": {
                    "instance": row.get('datname', None),
                    "plan": {"definition": obfuscated_plan, "cost": plan_cost, "signature": plan_signature},
                    "query_signature": query_signature,
                    "resource_hash": query_signature,
                    "application": row.get('application_name', None),
                    "user": row['usename'],
                    "statement": obfuscated_statement,
                },
                'postgres': {k: v for k, v in row.items() if k not in pg_stat_activity_sample_exclude_keys},
            }
            event['timestamp'] = time.time() * 1000
            if row['state'] in {'idle', 'idle in transaction'}:
                if row['state_change'] and row['query_start']:
                    event['duration'] = (row['state_change'] - row['query_start']).total_seconds() * 1e9
                    # If the transaction is idle then we have a more specific "end time" than the current time at
                    # which we're collecting this event. According to the postgres docs, all of the timestamps in
                    # pg_stat_activity are `timestamp with time zone` so the timezone should always be present. However,
                    # if there is something wrong and it's missing then we can't use `state_change` for the timestamp
                    # of the event else we risk the timestamp being significantly off and the event getting dropped
                    # during ingestion.
                    if row['state_change'].tzinfo:
                        event['timestamp'] = get_timestamp(row['state_change']) * 1000
            return event
Example #30
0
    def __init__(self, name, init_config, instances):
        super(SQLServer, self).__init__(name, init_config, instances)

        self._resolved_hostname = None
        self._agent_hostname = None
        self.connection = None
        self.failed_connections = {}
        self.instance_metrics = []
        self.instance_per_type_metrics = defaultdict(set)
        self.do_check = True

        self.tags = self.instance.get("tags", [])
        self.reported_hostname = self.instance.get('reported_hostname')
        self.autodiscovery = is_affirmative(self.instance.get('database_autodiscovery'))
        self.autodiscovery_include = self.instance.get('autodiscovery_include', ['.*'])
        self.autodiscovery_exclude = self.instance.get('autodiscovery_exclude', [])
        self.autodiscovery_db_service_check = is_affirmative(self.instance.get('autodiscovery_db_service_check', True))
        self.min_collection_interval = self.instance.get('min_collection_interval', 15)
        self._compile_patterns()
        self.autodiscovery_interval = self.instance.get('autodiscovery_interval', DEFAULT_AUTODISCOVERY_INTERVAL)
        self.databases = set()
        self.ad_last_check = 0

        self.proc = self.instance.get('stored_procedure')
        self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram}
        self.custom_metrics = init_config.get('custom_metrics', [])

        # DBM
        self.dbm_enabled = self.instance.get('dbm', False)
        self.statement_metrics_config = self.instance.get('query_metrics', {}) or {}
        self.statement_metrics = SqlserverStatementMetrics(self)
        self.activity_config = self.instance.get('query_activity', {}) or {}
        self.activity = SqlserverActivity(self)
        self.cloud_metadata = {}
        aws = self.instance.get('aws', {})
        gcp = self.instance.get('gcp', {})
        azure = self.instance.get('azure', {})
        if aws:
            self.cloud_metadata.update({'aws': aws})
        if gcp:
            self.cloud_metadata.update({'gcp': gcp})
        if azure:
            self.cloud_metadata.update({'azure': azure})
        obfuscator_options_config = self.instance.get('obfuscator_options', {}) or {}
        self.obfuscator_options = to_native_string(
            json.dumps(
                {
                    # Valid values for this can be found at
                    # https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md#connection-level-attributes
                    'dbms': 'mssql',
                    'replace_digits': is_affirmative(
                        obfuscator_options_config.get(
                            'replace_digits',
                            obfuscator_options_config.get('quantize_sql_tables', False),
                        )
                    ),
                    'keep_sql_alias': is_affirmative(obfuscator_options_config.get('keep_sql_alias', True)),
                    'return_json_metadata': is_affirmative(obfuscator_options_config.get('collect_metadata', True)),
                    'table_names': is_affirmative(obfuscator_options_config.get('collect_tables', True)),
                    'collect_commands': is_affirmative(obfuscator_options_config.get('collect_commands', True)),
                    'collect_comments': is_affirmative(obfuscator_options_config.get('collect_comments', True)),
                }
            )
        )

        self.static_info_cache = TTLCache(
            maxsize=100,
            # cache these for a full day
            ttl=60 * 60 * 24,
        )

        # Query declarations
        check_queries = []
        if is_affirmative(self.instance.get('include_ao_metrics', False)):
            check_queries.extend(
                [
                    QUERY_AO_AVAILABILITY_GROUPS,
                    QUERY_AO_FAILOVER_CLUSTER,
                    QUERY_AO_FAILOVER_CLUSTER_MEMBER,
                ]
            )
        if is_affirmative(self.instance.get('include_fci_metrics', False)):
            check_queries.extend([QUERY_FAILOVER_CLUSTER_INSTANCE])
        self._check_queries = self._new_query_executor(check_queries)
        self.check_initializations.append(self._check_queries.compile_queries)

        self.server_state_queries = self._new_query_executor([QUERY_SERVER_STATIC_INFO])
        self.check_initializations.append(self.server_state_queries.compile_queries)

        # use QueryManager to process custom queries
        self._query_manager = QueryManager(
            self, self.execute_query_raw, tags=self.tags, hostname=self.resolved_hostname
        )

        self._dynamic_queries = None

        self.check_initializations.append(self.config_checks)
        self.check_initializations.append(self._query_manager.compile_queries)
        self.check_initializations.append(self.initialize_connection)