Exemple #1
0
    def collect_timestamp(self, event_payload, rendered_event, event_object):
        value, variant = rendered_event[win32evtlog.EvtSystemTimeCreated]
        if variant == win32evtlog.EvtVarTypeNull:
            event_payload['timestamp'] = get_timestamp()
            return

        event_payload['timestamp'] = get_timestamp(value)
    def load_intermediate_certs(self, der_cert):
        # https://tools.ietf.org/html/rfc3280#section-4.2.2.1
        # https://tools.ietf.org/html/rfc5280#section-5.2.7
        try:
            cert = load_der_x509_certificate(der_cert, default_backend())
        except Exception as e:
            self.log.error(
                'Error while deserializing peer certificate to discover intermediate certificates: %s',
                e)
            return

        try:
            authority_information_access = cert.extensions.get_extension_for_oid(
                ExtensionOID.AUTHORITY_INFORMATION_ACCESS)
        except ExtensionNotFound:
            self.log.debug(
                'No Authority Information Access extension found, skipping discovery of intermediate certificates'
            )
            return

        for access_description in authority_information_access.value:
            if access_description.access_method != AuthorityInformationAccessOID.CA_ISSUERS:
                continue

            uri = access_description.access_location.value
            if (uri in self.agent_check._intermediate_cert_uri_cache
                    and get_timestamp() -
                    self.agent_check._intermediate_cert_uri_cache[uri] <
                    self._intermediate_cert_refresh_interval):
                continue

            # Assume HTTP for now
            try:
                response = self.agent_check.http.get(
                    uri)  # SKIP_HTTP_VALIDATION
                response.raise_for_status()
            except Exception as e:
                self.log.error(
                    'Error fetching intermediate certificate from `%s`: %s',
                    uri, e)
                continue
            else:
                access_time = get_timestamp()
                intermediate_cert = response.content

            cert_id = sha256(intermediate_cert).digest()
            if cert_id not in self.agent_check._intermediate_cert_id_cache:
                self.agent_check.get_tls_context().load_verify_locations(
                    cadata=intermediate_cert)
                self.agent_check._intermediate_cert_id_cache.add(cert_id)

            self.agent_check._intermediate_cert_uri_cache[uri] = access_time
            self.load_intermediate_certs(intermediate_cert)
Exemple #3
0
def test(aggregator, dd_run_check, mock_performance_objects):
    mock_performance_objects(
        {'Foo': (['instance1'], {
            'Bar': [get_timestamp() - 1.2]
        })})
    check = get_check({
        'metrics': {
            'Foo': {
                'name': 'foo',
                'tag_name': 'baz',
                'counters': [{
                    'Bar': {
                        'name': 'bar',
                        'type': 'time_elapsed'
                    }
                }],
            }
        }
    })
    dd_run_check(check)

    tags = ['baz:instance1']
    tags.extend(GLOBAL_TAGS)

    assert 'test.foo.bar' in aggregator._metrics
    assert len(aggregator._metrics) == 1
    assert len(aggregator._metrics['test.foo.bar']) == 1
    m = aggregator._metrics['test.foo.bar'][0]

    assert 1.2 < m.value < 2
    assert m.type == aggregator.GAUGE
    assert sorted(m.tags) == sorted(tags)
Exemple #4
0
    def collect_events(self, system_tags):
        self.log.debug("Starting events collection (query start time: %s).",
                       self.latest_event_query)
        last_event_time = None
        collect_events_start_time = get_timestamp()
        try:
            # Use latest event query as starting time
            event_query = EVENT_PATH.format(self.latest_event_query)
            raw_events, code = self._get_data(event_query)

            for event in raw_events:
                try:
                    tags = self._tags + system_tags
                    normalized_event = SilkEvent(event, tags)
                    event_payload = normalized_event.get_datadog_payload()
                    self.event(event_payload)
                except ValueError as e:
                    self.log.warning(str(e))

                # If this is the first valid event or this event timestamp is newer, update last event time checked
                if (last_event_time is None and event_payload is not None
                    ) or event_payload.get("timestamp") > last_event_time:
                    last_event_time = event_payload.get("timestamp")

        except Exception as e:
            # Don't get stuck on a failure to fetch an event
            # Ignore them for next pass
            self.log.warning("Unable to fetch events: %s", str(e))

        # Update latest event query to last event time
        if last_event_time is not None:
            self.latest_event_query = int(last_event_time)
        else:
            # In case no events were collected
            self.latest_event_query = int(collect_events_start_time)
        def wrapper(self, *args, **kwargs):
            if os.getenv('DD_DISABLE_TRACKED_METHOD') == "true":
                return function(self, *args, **kwargs)

            start_time = get_timestamp()

            try:
                check = agent_check_getter(self) if agent_check_getter else self
            except Exception:
                print("[{}] invalid tracked_method. failed to get check reference.".format(function.__name__))
                return function(self, *args, **kwargs)

            for attr in required_attrs:
                if not hasattr(check, attr):
                    print(
                        "[{}] invalid check reference. Missing required attribute {}.".format(function.__name__, attr)
                    )
                    return function(self, *args, **kwargs)

            check_name = check.name

            stats_kwargs = {}
            if hasattr(check, 'debug_stats_kwargs'):
                stats_kwargs = dict(check.debug_stats_kwargs())

            stats_kwargs['tags'] = stats_kwargs.get('tags', []) + ["operation:{}".format(function.__name__)]

            try:
                result = function(self, *args, **kwargs)

                elapsed_ms = (get_timestamp() - start_time) * 1000
                check.histogram("dd.{}.operation.time".format(check_name), elapsed_ms, **stats_kwargs)

                check.log.debug("[%s.%s] operation completed in %s ms", check_name, function.__name__, elapsed_ms)

                if track_result_length and result is not None:
                    check.log.debug("[%s.%s] received result length %s", check_name, function.__name__, len(result))
                    check.gauge("dd.{}.operation.result.length".format(check_name), len(result), **stats_kwargs)

                return result
            except Exception as e:
                check.log.exception("operation %s error", function.__name__)
                stats_kwargs['tags'] += ["error:{}".format(type(e))]
                check.count("dd.{}.operation.error".format(check_name), 1, **stats_kwargs)
                raise
Exemple #6
0
    def _flatten_json(cls, metric_base, val, tags):
        """
        Recursively flattens the nginx json object. Returns the following: [(metric_name, value, tags)]
        """
        output = []
        if isinstance(val, dict):
            # Pull out the server as a tag instead of trying to read as a metric
            if 'server' in val and val['server']:
                server = 'server:%s' % val.pop('server')
                if tags is None:
                    tags = []
                tags = tags + [server]
            for key, val2 in iteritems(val):
                if key in TAGGED_KEYS:
                    metric_name = '%s.%s' % (metric_base, TAGGED_KEYS[key])
                    for tag_val, data in iteritems(val2):
                        tag = '%s:%s' % (TAGGED_KEYS[key], tag_val)
                        output.extend(
                            cls._flatten_json(metric_name, data, tags + [tag]))
                else:
                    metric_name = '%s.%s' % (metric_base, key)
                    output.extend(cls._flatten_json(metric_name, val2, tags))

        elif isinstance(val, list):
            for val2 in val:
                output.extend(cls._flatten_json(metric_base, val2, tags))

        elif isinstance(val, bool):
            output.append((metric_base, int(val), tags, 'gauge'))

        elif isinstance(val, (int, float, long)):
            output.append((metric_base, val, tags, 'gauge'))

        elif isinstance(val, (text_type, str)) and val[-1] == "Z":
            try:
                # In the new Plus API, timestamps are now formatted
                # strings, some include microseconds, some don't...
                timestamp = fromisoformat(val[:19])
            except ValueError:
                pass
            else:
                output.append((metric_base, int(get_timestamp(timestamp)),
                               tags, 'gauge'))
        return output
Exemple #7
0
def calculate_elapsed_time(datestamp,
                           timestamp,
                           qm_timezone,
                           current_time=None):
    """
    Calculate elapsed time in seconds from IBM MQ queue status date and timestamps
    Expected Timestamp format: %H.%M.%S, e.g. 18.45.20
    Expected Datestamp format: %Y-%m-%d, e.g. 2021-09-15
    https://www.ibm.com/docs/en/ibm-mq/9.2?topic=reference-display-qstatus-display-queue-status#q086260___3
    """
    if qm_timezone is not None:
        qm_tz = tz.gettz(qm_timezone)
        if qm_tz is None or type(qm_tz) == str:
            msg = ('Time zone `{}` is not recognized or may be deprecated. '
                   'Please specify a valid time zone in IANA/Olson format.'.
                   format(qm_timezone))
            raise ValueError(msg)
    else:
        qm_tz = tz.UTC

    if current_time is None:
        current_time = get_timestamp()
    else:
        current_time = current_time
    """
    1. Construct a datetime object from the IBM MQ timestamp string format
    2. Set the QM time zone on the datetime object.
    3. Calculate the POSIX timestamp in seconds since EPOCH
    """
    if datestamp and timestamp:
        timestamp_str = sanitize_strings(datestamp) + ' ' + sanitize_strings(
            timestamp)
        timestamp_dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H.%M.%S')
        timestamp_tz = timestamp_dt.replace(tzinfo=qm_tz)
        timestamp_posix = (timestamp_tz - EPOCH).total_seconds()
    else:
        return None

    elapsed = round_value(current_time - timestamp_posix)

    return elapsed
def test(aggregator, dd_run_check, mock_http_response):
    mock_http_response(
        """
        # HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection.
        # TYPE go_memstats_last_gc_time_seconds gauge
        go_memstats_last_gc_time_seconds{{foo="bar"}} {}
        """.format(
            get_timestamp() - 1.2
        )
    )
    check = get_check({'metrics': [{'go_memstats_last_gc_time_seconds': {'type': 'time_elapsed'}}]})
    dd_run_check(check)

    assert 'test.go_memstats_last_gc_time_seconds' in aggregator._metrics
    assert len(aggregator._metrics) == 1
    assert len(aggregator._metrics['test.go_memstats_last_gc_time_seconds']) == 1
    m = aggregator._metrics['test.go_memstats_last_gc_time_seconds'][0]

    assert 1.2 < m.value < 2
    assert m.type == aggregator.GAUGE
    assert set(m.tags) == {'endpoint:test', 'foo:bar'}
Exemple #9
0
    def __init__(self, name, init_config, instances):
        super(SilkCheck, self).__init__(name, init_config, instances)
        self.metrics_to_collect = dict(METRICS)

        server = self.instance.get("host_address")

        if server is None:
            raise ConfigurationError("host_address is a required parameter.")

        self.latest_event_query = int(get_timestamp())
        self.url = "{}/api/v2/".format(server)

        host = urlparse(server).netloc
        self._tags = self.instance.get("tags",
                                       []) + ["silk_host:{}".format(host)]

        if self.instance.get("enable_read_write_statistics", False):
            self.metrics_to_collect.update(dict(READ_WRITE_METRICS))

        if self.instance.get("enable_blocksize_statistics", False):
            self.metrics_to_collect.update(dict(BLOCKSIZE_METRICS))

        # System tags are collected from the /state/endpoint
        self._system_tags = []
Exemple #10
0
    def _collect_plan_for_statement(self, row):
        try:
            obfuscated_statement = datadog_agent.obfuscate_sql(row['query'])
        except Exception as e:
            self._log.debug("Failed to obfuscate statement: %s", e)
            self._check.count("dd.postgres.statement_samples.error", 1, tags=self._tags + ["error:sql-obfuscate"])
            return None

        # limit the rate of explains done to the database
        query_signature = compute_sql_signature(obfuscated_statement)
        if query_signature in self._explained_statements_cache:
            return None
        self._explained_statements_cache[query_signature] = True

        # Plans have several important signatures to tag events with. Note that for postgres, the
        # query_signature and resource_hash will be the same value.
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the raw sql text to match query metrics
        plan_dict = self._run_explain(row['query'], obfuscated_statement)
        plan, normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None, None
        if plan_dict:
            plan = json.dumps(plan_dict)
            # if we're using the orjson implementation then json.dumps returns bytes
            plan = plan.decode('utf-8') if isinstance(plan, bytes) else plan
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(plan, normalize=True)
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)
            plan_cost = plan_dict.get('Plan', {}).get('Total Cost', 0.0) or 0.0

        statement_plan_sig = (query_signature, plan_signature)
        if statement_plan_sig not in self._seen_samples_cache:
            self._seen_samples_cache[statement_plan_sig] = True
            event = {
                "host": self._db_hostname,
                "service": self._service,
                "ddsource": "postgres",
                "ddtags": self._tags_str,
                "network": {
                    "client": {
                        "ip": row.get('client_addr', None),
                        "port": row.get('client_port', None),
                        "hostname": row.get('client_hostname', None),
                    }
                },
                "db": {
                    "instance": row.get('datname', None),
                    "plan": {"definition": obfuscated_plan, "cost": plan_cost, "signature": plan_signature},
                    "query_signature": query_signature,
                    "resource_hash": query_signature,
                    "application": row.get('application_name', None),
                    "user": row['usename'],
                    "statement": obfuscated_statement,
                },
                'postgres': {k: v for k, v in row.items() if k not in pg_stat_activity_sample_exclude_keys},
            }
            event['timestamp'] = time.time() * 1000
            if row['state'] in {'idle', 'idle in transaction'}:
                if row['state_change'] and row['query_start']:
                    event['duration'] = (row['state_change'] - row['query_start']).total_seconds() * 1e9
                    # If the transaction is idle then we have a more specific "end time" than the current time at
                    # which we're collecting this event. According to the postgres docs, all of the timestamps in
                    # pg_stat_activity are `timestamp with time zone` so the timezone should always be present. However,
                    # if there is something wrong and it's missing then we can't use `state_change` for the timestamp
                    # of the event else we risk the timestamp being significantly off and the event getting dropped
                    # during ingestion.
                    if row['state_change'].tzinfo:
                        event['timestamp'] = get_timestamp(row['state_change']) * 1000
            return event
Exemple #11
0
    def check_leader_v1(self, submission_queue, dynamic_tags):
        url = self._api_url + '/sys/leader'
        leader_data = self.access_api(
            url, ignore_status_codes=SYS_LEADER_DEFAULT_CODES)
        errors = leader_data.get('errors')
        if errors:
            error_msg = ';'.join(errors)
            self.log.error(
                'Unable to fetch leader data from Vault. Reason: %s',
                error_msg)
            return

        is_leader = is_affirmative(leader_data.get('is_self'))
        dynamic_tags.append(f'is_leader:{str(is_leader).lower()}')

        submission_queue.append(
            lambda tags: self.gauge('is_leader', int(is_leader), tags=tags))

        current_leader = Leader(leader_data.get('leader_address'),
                                leader_data.get('leader_cluster_address'))
        has_leader = any(current_leader)  # At least one address is set

        if self.config.detect_leader and has_leader:
            if self._previous_leader is None:
                # First check run, let's set the previous leader variable.
                self._previous_leader = current_leader
                return
            if self._previous_leader == current_leader:
                # Leader hasn't changed
                return
            if not is_leader:
                # Leader has changed but the monitored vault node is not the leader. Because the agent monitors
                # each vault node in the cluster, let's use this condition to submit a single event.
                self._previous_leader = current_leader
                self.log.debug(
                    'Leader changed from %s to %s but not reporting an event as the current node is not the leader.',
                    self._previous_leader,
                    current_leader,
                )
                return

            if current_leader.leader_addr != self._previous_leader.leader_addr:
                # The main leader address has changed
                event_message = (
                    f'Leader address changed from `{self._previous_leader.leader_addr}` to '
                    f'`{current_leader.leader_addr}`.')
            else:
                # The leader_cluster_addr changed (usually happen when the leader address points to a load balancer
                event_message = (
                    f'Leader cluster address changed from `{self._previous_leader.leader_cluster_addr}` to '
                    f'`{current_leader.leader_cluster_addr}`.')

            self.log.debug('Leader changed from %s to %s, sending the event.',
                           self._previous_leader, current_leader)
            submission_queue.append(
                lambda tags: self.event({
                    'timestamp': get_timestamp(),
                    'event_type': self.EVENT_LEADER_CHANGE,
                    'msg_title': 'Leader change',
                    'msg_text': event_message,
                    'alert_type': 'info',
                    'host': self.hostname,
                    'tags': tags,
                }))
            # Update _previous_leader for the next run
            self._previous_leader = current_leader
Exemple #12
0
    def test_time_delta(self):
        now = datetime.now()
        expected = (now.replace(tzinfo=UTC) - EPOCH).total_seconds()

        assert get_timestamp(now) == expected
Exemple #13
0
 def test_default(self, mocker):
     time_time = mocker.patch('datadog_checks.base.utils.time.epoch_offset')
     get_timestamp()
     time_time.assert_called_once()
Exemple #14
0
 def test_type(self):
     assert isinstance(get_timestamp(), float)
    def _collect_plan_for_statement(self, row):
        # limit the rate of explains done to the database
        cache_key = (row['datname'], row['query_signature'])
        if not self._explained_statements_ratelimiter.acquire(cache_key):
            return None

        # Plans have several important signatures to tag events with. Note that for postgres, the
        # query_signature and resource_hash will be the same value.
        # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees
        # - `resource_hash` - hash computed off the raw sql text to match apm resources
        # - `query_signature` - hash computed from the raw sql text to match query metrics
        plan_dict, explain_err_code, err_msg = self._run_and_track_explain(
            row['datname'], row['query'], row['statement'],
            row['query_signature'])
        collection_errors = None
        if explain_err_code:
            collection_errors = [{
                'code': explain_err_code.value,
                'message': err_msg if err_msg else None
            }]

        plan, normalized_plan, obfuscated_plan, plan_signature = None, None, None, None
        if plan_dict:
            plan = json.dumps(plan_dict)
            # if we're using the orjson implementation then json.dumps returns bytes
            plan = plan.decode('utf-8') if isinstance(plan, bytes) else plan
            normalized_plan = datadog_agent.obfuscate_sql_exec_plan(
                plan, normalize=True)
            obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan)
            plan_signature = compute_exec_plan_signature(normalized_plan)

        statement_plan_sig = (row['query_signature'], plan_signature)
        if self._seen_samples_ratelimiter.acquire(statement_plan_sig):
            event = {
                "host": self._check.resolved_hostname,
                "ddagentversion": datadog_agent.get_version(),
                "ddsource": "postgres",
                "ddtags": ",".join(self._dbtags(row['datname'])),
                "timestamp": time.time() * 1000,
                "network": {
                    "client": {
                        "ip": row.get('client_addr', None),
                        "port": row.get('client_port', None),
                        "hostname": row.get('client_hostname', None),
                    }
                },
                "db": {
                    "instance":
                    row.get('datname', None),
                    "plan": {
                        "definition": obfuscated_plan,
                        "signature": plan_signature,
                        "collection_errors": collection_errors,
                    },
                    "query_signature":
                    row['query_signature'],
                    "resource_hash":
                    row['query_signature'],
                    "application":
                    row.get('application_name', None),
                    "user":
                    row['usename'],
                    "statement":
                    row['statement'],
                    "metadata": {
                        "tables": row['dd_tables'],
                        "commands": row['dd_commands'],
                        "comments": row['dd_comments'],
                    },
                    "query_truncated":
                    self._get_truncation_state(
                        self._get_track_activity_query_size(),
                        row['query']).value,
                },
                'postgres': {
                    k: v
                    for k, v in row.items()
                    if k not in pg_stat_activity_sample_exclude_keys
                },
            }
            if row['state'] in {'idle', 'idle in transaction'}:
                if row['state_change'] and row['query_start']:
                    event['duration'] = (
                        row['state_change'] -
                        row['query_start']).total_seconds() * 1e9
                    # If the transaction is idle then we have a more specific "end time" than the current time at
                    # which we're collecting this event. According to the postgres docs, all of the timestamps in
                    # pg_stat_activity are `timestamp with time zone` so the timezone should always be present. However,
                    # if there is something wrong and it's missing then we can't use `state_change` for the timestamp
                    # of the event else we risk the timestamp being significantly off and the event getting dropped
                    # during ingestion.
                    if row['state_change'].tzinfo:
                        event['timestamp'] = get_timestamp(
                            row['state_change']) * 1000
            return event
        return None