def collect_timestamp(self, event_payload, rendered_event, event_object): value, variant = rendered_event[win32evtlog.EvtSystemTimeCreated] if variant == win32evtlog.EvtVarTypeNull: event_payload['timestamp'] = get_timestamp() return event_payload['timestamp'] = get_timestamp(value)
def load_intermediate_certs(self, der_cert): # https://tools.ietf.org/html/rfc3280#section-4.2.2.1 # https://tools.ietf.org/html/rfc5280#section-5.2.7 try: cert = load_der_x509_certificate(der_cert, default_backend()) except Exception as e: self.log.error( 'Error while deserializing peer certificate to discover intermediate certificates: %s', e) return try: authority_information_access = cert.extensions.get_extension_for_oid( ExtensionOID.AUTHORITY_INFORMATION_ACCESS) except ExtensionNotFound: self.log.debug( 'No Authority Information Access extension found, skipping discovery of intermediate certificates' ) return for access_description in authority_information_access.value: if access_description.access_method != AuthorityInformationAccessOID.CA_ISSUERS: continue uri = access_description.access_location.value if (uri in self.agent_check._intermediate_cert_uri_cache and get_timestamp() - self.agent_check._intermediate_cert_uri_cache[uri] < self._intermediate_cert_refresh_interval): continue # Assume HTTP for now try: response = self.agent_check.http.get( uri) # SKIP_HTTP_VALIDATION response.raise_for_status() except Exception as e: self.log.error( 'Error fetching intermediate certificate from `%s`: %s', uri, e) continue else: access_time = get_timestamp() intermediate_cert = response.content cert_id = sha256(intermediate_cert).digest() if cert_id not in self.agent_check._intermediate_cert_id_cache: self.agent_check.get_tls_context().load_verify_locations( cadata=intermediate_cert) self.agent_check._intermediate_cert_id_cache.add(cert_id) self.agent_check._intermediate_cert_uri_cache[uri] = access_time self.load_intermediate_certs(intermediate_cert)
def test(aggregator, dd_run_check, mock_performance_objects): mock_performance_objects( {'Foo': (['instance1'], { 'Bar': [get_timestamp() - 1.2] })}) check = get_check({ 'metrics': { 'Foo': { 'name': 'foo', 'tag_name': 'baz', 'counters': [{ 'Bar': { 'name': 'bar', 'type': 'time_elapsed' } }], } } }) dd_run_check(check) tags = ['baz:instance1'] tags.extend(GLOBAL_TAGS) assert 'test.foo.bar' in aggregator._metrics assert len(aggregator._metrics) == 1 assert len(aggregator._metrics['test.foo.bar']) == 1 m = aggregator._metrics['test.foo.bar'][0] assert 1.2 < m.value < 2 assert m.type == aggregator.GAUGE assert sorted(m.tags) == sorted(tags)
def collect_events(self, system_tags): self.log.debug("Starting events collection (query start time: %s).", self.latest_event_query) last_event_time = None collect_events_start_time = get_timestamp() try: # Use latest event query as starting time event_query = EVENT_PATH.format(self.latest_event_query) raw_events, code = self._get_data(event_query) for event in raw_events: try: tags = self._tags + system_tags normalized_event = SilkEvent(event, tags) event_payload = normalized_event.get_datadog_payload() self.event(event_payload) except ValueError as e: self.log.warning(str(e)) # If this is the first valid event or this event timestamp is newer, update last event time checked if (last_event_time is None and event_payload is not None ) or event_payload.get("timestamp") > last_event_time: last_event_time = event_payload.get("timestamp") except Exception as e: # Don't get stuck on a failure to fetch an event # Ignore them for next pass self.log.warning("Unable to fetch events: %s", str(e)) # Update latest event query to last event time if last_event_time is not None: self.latest_event_query = int(last_event_time) else: # In case no events were collected self.latest_event_query = int(collect_events_start_time)
def wrapper(self, *args, **kwargs): if os.getenv('DD_DISABLE_TRACKED_METHOD') == "true": return function(self, *args, **kwargs) start_time = get_timestamp() try: check = agent_check_getter(self) if agent_check_getter else self except Exception: print("[{}] invalid tracked_method. failed to get check reference.".format(function.__name__)) return function(self, *args, **kwargs) for attr in required_attrs: if not hasattr(check, attr): print( "[{}] invalid check reference. Missing required attribute {}.".format(function.__name__, attr) ) return function(self, *args, **kwargs) check_name = check.name stats_kwargs = {} if hasattr(check, 'debug_stats_kwargs'): stats_kwargs = dict(check.debug_stats_kwargs()) stats_kwargs['tags'] = stats_kwargs.get('tags', []) + ["operation:{}".format(function.__name__)] try: result = function(self, *args, **kwargs) elapsed_ms = (get_timestamp() - start_time) * 1000 check.histogram("dd.{}.operation.time".format(check_name), elapsed_ms, **stats_kwargs) check.log.debug("[%s.%s] operation completed in %s ms", check_name, function.__name__, elapsed_ms) if track_result_length and result is not None: check.log.debug("[%s.%s] received result length %s", check_name, function.__name__, len(result)) check.gauge("dd.{}.operation.result.length".format(check_name), len(result), **stats_kwargs) return result except Exception as e: check.log.exception("operation %s error", function.__name__) stats_kwargs['tags'] += ["error:{}".format(type(e))] check.count("dd.{}.operation.error".format(check_name), 1, **stats_kwargs) raise
def _flatten_json(cls, metric_base, val, tags): """ Recursively flattens the nginx json object. Returns the following: [(metric_name, value, tags)] """ output = [] if isinstance(val, dict): # Pull out the server as a tag instead of trying to read as a metric if 'server' in val and val['server']: server = 'server:%s' % val.pop('server') if tags is None: tags = [] tags = tags + [server] for key, val2 in iteritems(val): if key in TAGGED_KEYS: metric_name = '%s.%s' % (metric_base, TAGGED_KEYS[key]) for tag_val, data in iteritems(val2): tag = '%s:%s' % (TAGGED_KEYS[key], tag_val) output.extend( cls._flatten_json(metric_name, data, tags + [tag])) else: metric_name = '%s.%s' % (metric_base, key) output.extend(cls._flatten_json(metric_name, val2, tags)) elif isinstance(val, list): for val2 in val: output.extend(cls._flatten_json(metric_base, val2, tags)) elif isinstance(val, bool): output.append((metric_base, int(val), tags, 'gauge')) elif isinstance(val, (int, float, long)): output.append((metric_base, val, tags, 'gauge')) elif isinstance(val, (text_type, str)) and val[-1] == "Z": try: # In the new Plus API, timestamps are now formatted # strings, some include microseconds, some don't... timestamp = fromisoformat(val[:19]) except ValueError: pass else: output.append((metric_base, int(get_timestamp(timestamp)), tags, 'gauge')) return output
def calculate_elapsed_time(datestamp, timestamp, qm_timezone, current_time=None): """ Calculate elapsed time in seconds from IBM MQ queue status date and timestamps Expected Timestamp format: %H.%M.%S, e.g. 18.45.20 Expected Datestamp format: %Y-%m-%d, e.g. 2021-09-15 https://www.ibm.com/docs/en/ibm-mq/9.2?topic=reference-display-qstatus-display-queue-status#q086260___3 """ if qm_timezone is not None: qm_tz = tz.gettz(qm_timezone) if qm_tz is None or type(qm_tz) == str: msg = ('Time zone `{}` is not recognized or may be deprecated. ' 'Please specify a valid time zone in IANA/Olson format.'. format(qm_timezone)) raise ValueError(msg) else: qm_tz = tz.UTC if current_time is None: current_time = get_timestamp() else: current_time = current_time """ 1. Construct a datetime object from the IBM MQ timestamp string format 2. Set the QM time zone on the datetime object. 3. Calculate the POSIX timestamp in seconds since EPOCH """ if datestamp and timestamp: timestamp_str = sanitize_strings(datestamp) + ' ' + sanitize_strings( timestamp) timestamp_dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H.%M.%S') timestamp_tz = timestamp_dt.replace(tzinfo=qm_tz) timestamp_posix = (timestamp_tz - EPOCH).total_seconds() else: return None elapsed = round_value(current_time - timestamp_posix) return elapsed
def test(aggregator, dd_run_check, mock_http_response): mock_http_response( """ # HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection. # TYPE go_memstats_last_gc_time_seconds gauge go_memstats_last_gc_time_seconds{{foo="bar"}} {} """.format( get_timestamp() - 1.2 ) ) check = get_check({'metrics': [{'go_memstats_last_gc_time_seconds': {'type': 'time_elapsed'}}]}) dd_run_check(check) assert 'test.go_memstats_last_gc_time_seconds' in aggregator._metrics assert len(aggregator._metrics) == 1 assert len(aggregator._metrics['test.go_memstats_last_gc_time_seconds']) == 1 m = aggregator._metrics['test.go_memstats_last_gc_time_seconds'][0] assert 1.2 < m.value < 2 assert m.type == aggregator.GAUGE assert set(m.tags) == {'endpoint:test', 'foo:bar'}
def __init__(self, name, init_config, instances): super(SilkCheck, self).__init__(name, init_config, instances) self.metrics_to_collect = dict(METRICS) server = self.instance.get("host_address") if server is None: raise ConfigurationError("host_address is a required parameter.") self.latest_event_query = int(get_timestamp()) self.url = "{}/api/v2/".format(server) host = urlparse(server).netloc self._tags = self.instance.get("tags", []) + ["silk_host:{}".format(host)] if self.instance.get("enable_read_write_statistics", False): self.metrics_to_collect.update(dict(READ_WRITE_METRICS)) if self.instance.get("enable_blocksize_statistics", False): self.metrics_to_collect.update(dict(BLOCKSIZE_METRICS)) # System tags are collected from the /state/endpoint self._system_tags = []
def _collect_plan_for_statement(self, row): try: obfuscated_statement = datadog_agent.obfuscate_sql(row['query']) except Exception as e: self._log.debug("Failed to obfuscate statement: %s", e) self._check.count("dd.postgres.statement_samples.error", 1, tags=self._tags + ["error:sql-obfuscate"]) return None # limit the rate of explains done to the database query_signature = compute_sql_signature(obfuscated_statement) if query_signature in self._explained_statements_cache: return None self._explained_statements_cache[query_signature] = True # Plans have several important signatures to tag events with. Note that for postgres, the # query_signature and resource_hash will be the same value. # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees # - `resource_hash` - hash computed off the raw sql text to match apm resources # - `query_signature` - hash computed from the raw sql text to match query metrics plan_dict = self._run_explain(row['query'], obfuscated_statement) plan, normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None, None if plan_dict: plan = json.dumps(plan_dict) # if we're using the orjson implementation then json.dumps returns bytes plan = plan.decode('utf-8') if isinstance(plan, bytes) else plan normalized_plan = datadog_agent.obfuscate_sql_exec_plan(plan, normalize=True) obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan) plan_signature = compute_exec_plan_signature(normalized_plan) plan_cost = plan_dict.get('Plan', {}).get('Total Cost', 0.0) or 0.0 statement_plan_sig = (query_signature, plan_signature) if statement_plan_sig not in self._seen_samples_cache: self._seen_samples_cache[statement_plan_sig] = True event = { "host": self._db_hostname, "service": self._service, "ddsource": "postgres", "ddtags": self._tags_str, "network": { "client": { "ip": row.get('client_addr', None), "port": row.get('client_port', None), "hostname": row.get('client_hostname', None), } }, "db": { "instance": row.get('datname', None), "plan": {"definition": obfuscated_plan, "cost": plan_cost, "signature": plan_signature}, "query_signature": query_signature, "resource_hash": query_signature, "application": row.get('application_name', None), "user": row['usename'], "statement": obfuscated_statement, }, 'postgres': {k: v for k, v in row.items() if k not in pg_stat_activity_sample_exclude_keys}, } event['timestamp'] = time.time() * 1000 if row['state'] in {'idle', 'idle in transaction'}: if row['state_change'] and row['query_start']: event['duration'] = (row['state_change'] - row['query_start']).total_seconds() * 1e9 # If the transaction is idle then we have a more specific "end time" than the current time at # which we're collecting this event. According to the postgres docs, all of the timestamps in # pg_stat_activity are `timestamp with time zone` so the timezone should always be present. However, # if there is something wrong and it's missing then we can't use `state_change` for the timestamp # of the event else we risk the timestamp being significantly off and the event getting dropped # during ingestion. if row['state_change'].tzinfo: event['timestamp'] = get_timestamp(row['state_change']) * 1000 return event
def check_leader_v1(self, submission_queue, dynamic_tags): url = self._api_url + '/sys/leader' leader_data = self.access_api( url, ignore_status_codes=SYS_LEADER_DEFAULT_CODES) errors = leader_data.get('errors') if errors: error_msg = ';'.join(errors) self.log.error( 'Unable to fetch leader data from Vault. Reason: %s', error_msg) return is_leader = is_affirmative(leader_data.get('is_self')) dynamic_tags.append(f'is_leader:{str(is_leader).lower()}') submission_queue.append( lambda tags: self.gauge('is_leader', int(is_leader), tags=tags)) current_leader = Leader(leader_data.get('leader_address'), leader_data.get('leader_cluster_address')) has_leader = any(current_leader) # At least one address is set if self.config.detect_leader and has_leader: if self._previous_leader is None: # First check run, let's set the previous leader variable. self._previous_leader = current_leader return if self._previous_leader == current_leader: # Leader hasn't changed return if not is_leader: # Leader has changed but the monitored vault node is not the leader. Because the agent monitors # each vault node in the cluster, let's use this condition to submit a single event. self._previous_leader = current_leader self.log.debug( 'Leader changed from %s to %s but not reporting an event as the current node is not the leader.', self._previous_leader, current_leader, ) return if current_leader.leader_addr != self._previous_leader.leader_addr: # The main leader address has changed event_message = ( f'Leader address changed from `{self._previous_leader.leader_addr}` to ' f'`{current_leader.leader_addr}`.') else: # The leader_cluster_addr changed (usually happen when the leader address points to a load balancer event_message = ( f'Leader cluster address changed from `{self._previous_leader.leader_cluster_addr}` to ' f'`{current_leader.leader_cluster_addr}`.') self.log.debug('Leader changed from %s to %s, sending the event.', self._previous_leader, current_leader) submission_queue.append( lambda tags: self.event({ 'timestamp': get_timestamp(), 'event_type': self.EVENT_LEADER_CHANGE, 'msg_title': 'Leader change', 'msg_text': event_message, 'alert_type': 'info', 'host': self.hostname, 'tags': tags, })) # Update _previous_leader for the next run self._previous_leader = current_leader
def test_time_delta(self): now = datetime.now() expected = (now.replace(tzinfo=UTC) - EPOCH).total_seconds() assert get_timestamp(now) == expected
def test_default(self, mocker): time_time = mocker.patch('datadog_checks.base.utils.time.epoch_offset') get_timestamp() time_time.assert_called_once()
def test_type(self): assert isinstance(get_timestamp(), float)
def _collect_plan_for_statement(self, row): # limit the rate of explains done to the database cache_key = (row['datname'], row['query_signature']) if not self._explained_statements_ratelimiter.acquire(cache_key): return None # Plans have several important signatures to tag events with. Note that for postgres, the # query_signature and resource_hash will be the same value. # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees # - `resource_hash` - hash computed off the raw sql text to match apm resources # - `query_signature` - hash computed from the raw sql text to match query metrics plan_dict, explain_err_code, err_msg = self._run_and_track_explain( row['datname'], row['query'], row['statement'], row['query_signature']) collection_errors = None if explain_err_code: collection_errors = [{ 'code': explain_err_code.value, 'message': err_msg if err_msg else None }] plan, normalized_plan, obfuscated_plan, plan_signature = None, None, None, None if plan_dict: plan = json.dumps(plan_dict) # if we're using the orjson implementation then json.dumps returns bytes plan = plan.decode('utf-8') if isinstance(plan, bytes) else plan normalized_plan = datadog_agent.obfuscate_sql_exec_plan( plan, normalize=True) obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan) plan_signature = compute_exec_plan_signature(normalized_plan) statement_plan_sig = (row['query_signature'], plan_signature) if self._seen_samples_ratelimiter.acquire(statement_plan_sig): event = { "host": self._check.resolved_hostname, "ddagentversion": datadog_agent.get_version(), "ddsource": "postgres", "ddtags": ",".join(self._dbtags(row['datname'])), "timestamp": time.time() * 1000, "network": { "client": { "ip": row.get('client_addr', None), "port": row.get('client_port', None), "hostname": row.get('client_hostname', None), } }, "db": { "instance": row.get('datname', None), "plan": { "definition": obfuscated_plan, "signature": plan_signature, "collection_errors": collection_errors, }, "query_signature": row['query_signature'], "resource_hash": row['query_signature'], "application": row.get('application_name', None), "user": row['usename'], "statement": row['statement'], "metadata": { "tables": row['dd_tables'], "commands": row['dd_commands'], "comments": row['dd_comments'], }, "query_truncated": self._get_truncation_state( self._get_track_activity_query_size(), row['query']).value, }, 'postgres': { k: v for k, v in row.items() if k not in pg_stat_activity_sample_exclude_keys }, } if row['state'] in {'idle', 'idle in transaction'}: if row['state_change'] and row['query_start']: event['duration'] = ( row['state_change'] - row['query_start']).total_seconds() * 1e9 # If the transaction is idle then we have a more specific "end time" than the current time at # which we're collecting this event. According to the postgres docs, all of the timestamps in # pg_stat_activity are `timestamp with time zone` so the timezone should always be present. However, # if there is something wrong and it's missing then we can't use `state_change` for the timestamp # of the event else we risk the timestamp being significantly off and the event getting dropped # during ingestion. if row['state_change'].tzinfo: event['timestamp'] = get_timestamp( row['state_change']) * 1000 return event return None