def forward_logs(logs): """Forward logs to Datadog""" if logger.isEnabledFor(logging.DEBUG): logger.debug(f"Forwarding {len(logs)} logs") logs_to_forward = filter_logs(list(map(json.dumps, logs))) scrubber = DatadogScrubber(SCRUBBING_RULE_CONFIGS) if DD_USE_TCP: batcher = DatadogBatcher(256 * 1000, 256 * 1000, 1) cli = DatadogTCPClient(DD_URL, DD_PORT, DD_NO_SSL, DD_API_KEY, scrubber) else: batcher = DatadogBatcher(256 * 1000, 4 * 1000 * 1000, 400) cli = DatadogHTTPClient(DD_URL, DD_PORT, DD_NO_SSL, DD_SKIP_SSL_VALIDATION, DD_API_KEY, scrubber) with DatadogClient(cli) as client: for batch in batcher.batch(logs_to_forward): try: client.send(batch) except Exception: logger.exception( f"Exception while forwarding log batch {batch}") else: if logger.isEnabledFor(logging.DEBUG): logger.debug(f"Forwarded log batch: {json.dumps(batch)}") lambda_stats.distribution( "{}.logs_forwarded".format(DD_FORWARDER_TELEMETRY_NAMESPACE_PREFIX), len(logs_to_forward), tags=DD_FORWARDER_TELEMETRY_TAGS, )
def forward_metrics(metrics): """ Forward custom metrics submitted via logs to Datadog in a background thread using `lambda_stats` that is provided by the Datadog Python Lambda Layer. """ if logger.isEnabledFor(logging.DEBUG): logger.debug(f"Forwarding {len(metrics)} metrics") for metric in metrics: try: lambda_stats.distribution(metric["m"], metric["v"], timestamp=metric["e"], tags=metric["t"]) except Exception: logger.exception( f"Exception while forwarding metric {json.dumps(metric)}") else: if logger.isEnabledFor(logging.DEBUG): logger.debug(f"Forwarded metric: {json.dumps(metric)}") lambda_stats.distribution( "{}.metrics_forwarded".format(DD_FORWARDER_TELEMETRY_NAMESPACE_PREFIX), len(metrics), tags=DD_FORWARDER_TELEMETRY_TAGS, )
def send_forwarder_internal_metrics(name, additional_tags=[]): """Send forwarder's internal metrics to DD""" lambda_stats.distribution( "{}.{}".format(DD_FORWARDER_TELEMETRY_NAMESPACE_PREFIX, name), 1, tags=get_forwarder_telemetry_tags() + additional_tags, )
def build_tags_by_arn_cache(): """Makes API calls to GetResources to get the live tags of the account's Lambda functions Returns an empty dict instead of fetching custom tags if the tag fetch env variable is not set to true Returns: tags_by_arn_cache (dict<str, str[]>): each Lambda's tags in a dict keyed by ARN """ tags_by_arn_cache = {} get_resources_paginator = resource_tagging_client.get_paginator( "get_resources") try: for page in get_resources_paginator.paginate( ResourceTypeFilters=[GET_RESOURCES_LAMBDA_FILTER], ResourcesPerPage=100): lambda_stats.distribution( "{}.get_resources_api_calls".format( ENHANCED_METRICS_NAMESPACE_PREFIX), 1, ) page_tags_by_arn = parse_get_resources_response_for_tags_by_arn( page) tags_by_arn_cache.update(page_tags_by_arn) except ClientError: log.exception( "Encountered a ClientError when trying to fetch tags. You may need to give " "this Lambda's role the 'tag:GetResources' permission") log.debug("Built this tags cache from GetResources API calls: %s", tags_by_arn_cache) return tags_by_arn_cache
def send_forwarder_internal_metrics(name, additional_tags=[]): """Send forwarder's internal metrics to DD""" prefix, tags = get_forwarder_telemetry_prefix_and_tags() lambda_stats.distribution( "{}.{}".format(prefix, name), 1, tags=tags + additional_tags, )
def submit_to_dd(self): """Submit this metric to the Datadog API""" timestamp = self.timestamp if not timestamp: timestamp = time() logger.debug("Submitting metric {} {} {}".format( self.name, self.value, self.tags)) lambda_stats.distribution(self.name, self.value, timestamp=timestamp, tags=self.tags)
def forward_metrics(metrics): """ Forward custom metrics submitted via logs to Datadog in a background thread using `lambda_stats` that is provided by the Datadog Python Lambda Layer. """ for metric in metrics: try: lambda_stats.distribution( metric["m"], metric["v"], timestamp=metric["e"], tags=metric["t"] ) except Exception: log.exception("Exception while forwarding metric %s", metric)
def forward_metrics(metrics): """ Forward custom metrics submitted via logs to Datadog in a background thread using `lambda_stats` that is provided by the Datadog Python Lambda Layer. """ for metric in metrics: try: lambda_stats.distribution(metric['m'], metric['v'], timestamp=metric['e'], tags=metric['t']) except Exception as e: print("Unexpected exception: {}, metric: {}".format( str(e), metric))
def forward_traces(trace_payloads): try: trace_connection.send_traces(trace_payloads) except Exception: logger.exception( f"Exception while forwarding traces {json.dumps(trace_payloads)}") else: if logger.isEnabledFor(logging.DEBUG): logger.debug(f"Forwarded traces: {json.dumps(trace_payloads)}") lambda_stats.distribution( "{}.traces_forwarded".format(DD_FORWARDER_TELEMETRY_NAMESPACE_PREFIX), len(trace_payloads), tags=DD_FORWARDER_TELEMETRY_TAGS, )
def forward_metrics(metrics): """ Forward custom metrics submitted via logs to Datadog in a background thread using `lambda_stats` that is provided by the Datadog Python Lambda Layer. """ for metric in metrics: try: lambda_stats.distribution( metric["m"], metric["v"], timestamp=metric["e"], tags=metric["t"] ) except Exception: logger.exception(f"Exception while forwarding metric {json.dumps(metric)}") else: if logger.isEnabledFor(logging.DEBUG): logger.debug(f"Forwarded metric: {json.dumps(metric)}")
def normalize_events(events, metadata): normalized = [] events_counter = 0 for event in events: events_counter += 1 if isinstance(event, dict): normalized.append(merge_dicts(event, metadata)) elif isinstance(event, str): normalized.append(merge_dicts({"message": event}, metadata)) else: # drop this log continue """Submit count of total events""" lambda_stats.distribution( "{}.incoming_events".format(DD_FORWARDER_TELEMETRY_NAMESPACE_PREFIX), events_counter, tags=DD_FORWARDER_TELEMETRY_TAGS, ) return normalized
def _process_rds_enhanced_monitoring_message(ts, message, account, region): instance_id = message["instanceID"] host_id = message["instanceResourceID"] tags = [ 'dbinstanceidentifier:%s' % instance_id, 'aws_account:%s' % account, 'engine:%s' % message["engine"], ] # metrics generation # uptime: "54 days, 1:53:04" to be converted into seconds uptime = 0 uptime_msg = re.split(' days?, ', message["uptime"]) # edge case "1 day 1:53:04" if len(uptime_msg) == 2: uptime += 24 * 3600 * int(uptime_msg[0]) uptime_day = uptime_msg[-1].split(':') uptime += 3600 * int(uptime_day[0]) uptime += 60 * int(uptime_day[1]) uptime += int(uptime_day[2]) lambda_stats.distribution( 'aws.rds.uptime', uptime, timestamp=ts, tags=tags, host=host_id ) lambda_stats.distribution( 'aws.rds.virtual_cpus', message["numVCPUs"], timestamp=ts, tags=tags, host=host_id ) if "loadAverageMinute" in message: lambda_stats.distribution( 'aws.rds.load.1', message["loadAverageMinute"]["one"], timestamp=ts, tags=tags, host=host_id ) lambda_stats.distribution( 'aws.rds.load.5', message["loadAverageMinute"]["five"], timestamp=ts, tags=tags, host=host_id ) lambda_stats.distribution( 'aws.rds.load.15', message["loadAverageMinute"]["fifteen"], timestamp=ts, tags=tags, host=host_id ) for namespace in ["cpuUtilization", "memory", "tasks", "swap"]: for key, value in message.get(namespace, {}).items(): lambda_stats.distribution( 'aws.rds.%s.%s' % (namespace.lower(), key), value, timestamp=ts, tags=tags, host=host_id ) for network_stats in message.get("network", []): if "interface" in network_stats: network_tag = ["interface:%s" % network_stats.pop("interface")] else: network_tag = [] for key, value in network_stats.items(): lambda_stats.distribution( 'aws.rds.network.%s' % key, value, timestamp=ts, tags=tags + network_tag, host=host_id ) disk_stats = message.get("diskIO", [{}])[0] # we never expect to have more than one disk for key, value in disk_stats.items(): lambda_stats.distribution( 'aws.rds.diskio.%s' % key, value, timestamp=ts, tags=tags, host=host_id ) for fs_stats in message.get("fileSys", []): fs_tag = [] for tag_key in ["name", "mountPoint"]: if tag_key in fs_stats: fs_tag.append("%s:%s" % (tag_key, fs_stats.pop(tag_key))) for key, value in fs_stats.items(): lambda_stats.distribution( 'aws.rds.filesystem.%s' % key, value, timestamp=ts, tags=tags + fs_tag, host=host_id ) for process_stats in message.get("processList", []): process_tag = [] for tag_key in ["name", "id"]: if tag_key in process_stats: process_tag.append("%s:%s" % (tag_key, process_stats.pop(tag_key))) for key, value in process_stats.items(): lambda_stats.distribution( 'aws.rds.process.%s' % key, value, timestamp=ts, tags=tags + process_tag, host=host_id )