def _get_endpoint(): legacy = _attr.from_env("DD_PROFILING_API_URL", "", str)() if legacy: deprecation.deprecation("DD_PROFILING_API_URL", "Use DD_SITE") return legacy site = _attr.from_env("DD_SITE", "datadoghq.com", str)() return ENDPOINT_TEMPLATE.format(site)
class MemoryCollector(collector.PeriodicCollector): """Memory allocation collector.""" _DEFAULT_MAX_EVENTS = 32 _DEFAULT_INTERVAL = 0.5 # Arbitrary interval to empty the _memalloc event buffer _interval = attr.ib(default=_DEFAULT_INTERVAL, repr=False) # TODO make this dynamic based on the 1. interval and 2. the max number of events allowed in the Recorder _max_events = attr.ib(factory=_attr.from_env("_DD_PROFILING_MEMORY_EVENTS_BUFFER", _DEFAULT_MAX_EVENTS, int)) max_nframe = attr.ib(factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int)) heap_sample_size = attr.ib(factory=_attr.from_env("DD_PROFILING_HEAP_SAMPLE_SIZE", 0, int)) ignore_profiler = attr.ib(factory=_attr.from_env("DD_PROFILING_IGNORE_PROFILER", True, formats.asbool)) def start(self): """Start collecting memory profiles.""" if _memalloc is None: raise RuntimeError("memalloc is unavailable") _memalloc.start(self.max_nframe, self._max_events, self.heap_sample_size) super(MemoryCollector, self).start() def stop(self): if _memalloc is not None: try: _memalloc.stop() except RuntimeError: pass super(MemoryCollector, self).stop() def collect(self): events, count, alloc_count = _memalloc.iter_events() capture_pct = 100 * count / alloc_count # TODO: The event timestamp is slightly off since it's going to be the time we copy the data from the # _memalloc buffer to our Recorder. This is fine for now, but we might want to store the nanoseconds # timestamp in C and then return it via iter_events. return ( tuple( MemoryAllocSampleEvent( thread_id=thread_id, thread_name=_threading.get_thread_name(thread_id), thread_native_id=_threading.get_thread_native_id(thread_id), frames=stack, nframes=nframes, size=size, capture_pct=capture_pct, nevents=alloc_count, ) for (stack, nframes, thread_id), size in events # TODO: this should be implemented in _memalloc directly so we have more space for samples # not coming from the profiler if not self.ignore_profiler or not any(frame[0].startswith(_MODULE_TOP_DIR) for frame in stack) ), )
class CaptureSamplerCollector(Collector): capture_pct = attr.ib( factory=_attr.from_env("DD_PROFILING_CAPTURE_PCT", 5, float)) _capture_sampler = attr.ib(default=attr.Factory(_create_capture_sampler, takes_self=True), init=False, repr=False)
class LockCollector(collector.CaptureSamplerCollector): """Record lock usage.""" nframes = attr.ib( factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int)) tracer = attr.ib(default=None) def start(self): """Start collecting `threading.Lock` usage.""" super(LockCollector, self).start() self.patch() def stop(self): """Stop collecting `threading.Lock` usage.""" self.unpatch() super(LockCollector, self).stop() def patch(self): """Patch the threading module for tracking lock allocation.""" # We only patch the lock from the `threading` module. # Nobody should use locks from `_thread`; if they do so, then it's deliberate and we don't profile. self.original = threading.Lock def _allocate_lock(wrapped, instance, args, kwargs): lock = wrapped(*args, **kwargs) return _ProfiledLock(lock, self.recorder, self.tracer, self.nframes, self._capture_sampler) threading.Lock = FunctionWrapper(self.original, _allocate_lock) def unpatch(self): """Unpatch the threading module for tracking lock allocation.""" threading.Lock = self.original
class Scheduler(_periodic.PeriodicService): """Schedule export of recorded data.""" recorder = attr.ib() exporters = attr.ib() _interval = attr.ib(factory=_attr.from_env("DD_PROFILING_UPLOAD_INTERVAL", 60, float)) _last_export = attr.ib(init=False, default=None) def start(self): """Start the scheduler.""" LOG.debug("Starting scheduler") super(Scheduler, self).start() self._last_export = compat.time_ns() LOG.debug("Scheduler started") def flush(self): """Flush events from recorder to exporters.""" LOG.debug("Flushing events") if self.exporters: events = self.recorder.reset() start = self._last_export self._last_export = compat.time_ns() total_events = sum(len(v) for v in events.values()) for exp in self.exporters: try: exp.export(events, start, self._last_export) except exporter.ExportError as e: LOG.error("Unable to export %d events: %s", total_events, _traceback.format_exception(e)) except Exception: LOG.exception("Error while exporting %d events", total_events) periodic = flush on_shutdown = flush
class UncaughtExceptionCollector(collector.Collector): """Record uncaught thrown exceptions.""" max_nframes = attr.ib( factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int)) def start(self): """Start collecting uncaught exceptions.""" self.original_except_hook = sys.excepthook sys.excepthook = self.except_hook super(UncaughtExceptionCollector, self).start() def stop(self): """Stop collecting uncaught exceptions.""" if hasattr(self, "original_except_hook"): sys.excepthook = self.original_except_hook del self.original_except_hook super(UncaughtExceptionCollector, self).stop() def except_hook(self, exctype, value, traceback): try: frames, nframes = _traceback.traceback_to_frames( traceback, self.max_nframes) thread_id, thread_name = threading._current_thread() self.recorder.push_event( UncaughtExceptionEvent(frames=frames, nframes=nframes, thread_id=thread_id, thread_name=thread_name, exc_type=exctype)) finally: return self.original_except_hook(exctype, value, traceback)
class Scheduler(object): """Schedule export of recorded data.""" recorder = attr.ib() exporters = attr.ib() interval = attr.ib( factory=_attr.from_env("DD_PROFILING_UPLOAD_INTERVAL", 60, float)) _periodic = attr.ib(init=False, default=None) _last_export = attr.ib(init=False, default=None) def __enter__(self): self.start() return self def start(self): """Start the scheduler.""" self._periodic = _periodic.PeriodicThread( self.interval, self.flush, name="%s:%s" % (__name__, self.__class__.__name__)) LOG.debug("Starting scheduler") self._last_export = compat.time_ns() self._periodic.start() LOG.debug("Scheduler started") def __exit__(self, exc_type, exc_value, traceback): return self.stop() def stop(self, flush=True): """Stop the scheduler. :param flush: Whetever to do a final flush. """ LOG.debug("Stopping scheduler") if self._periodic: self._periodic.stop() self._periodic.join() self._periodic = None if flush: self.flush() LOG.debug("Scheduler stopped") def flush(self): """Flush events from recorder to exporters.""" LOG.debug("Flushing events") events = self.recorder.reset() start = self._last_export self._last_export = compat.time_ns() total_events = sum(len(v) for v in events.values()) for exp in self.exporters: try: exp.export(events, start, self._last_export) except exporter.ExportError as e: LOG.error("Unable to export %d events: %s", total_events, _traceback.format_exception(e)) except Exception: LOG.exception("Error while exporting %d events", total_events)
class Scheduler(_periodic.PeriodicService): """Schedule export of recorded data.""" recorder = attr.ib() exporters = attr.ib() before_flush = attr.ib(default=None, eq=False) _interval = attr.ib( factory=_attr.from_env("DD_PROFILING_UPLOAD_INTERVAL", 60, float)) _configured_interval = attr.ib(init=False) _last_export = attr.ib(init=False, default=None, eq=False) def __attrs_post_init__(self): # Copy the value to use it later since we're going to adjust the real interval self._configured_interval = self.interval def start(self): """Start the scheduler.""" LOG.debug("Starting scheduler") super(Scheduler, self).start() self._last_export = compat.time_ns() LOG.debug("Scheduler started") def flush(self): """Flush events from recorder to exporters.""" LOG.debug("Flushing events") if self.before_flush is not None: try: self.before_flush() except Exception: LOG.error("Scheduler before_flush hook failed", exc_info=True) if self.exporters: events = self.recorder.reset() start = self._last_export self._last_export = compat.time_ns() for exp in self.exporters: try: exp.export(events, start, self._last_export) except exporter.ExportError as e: LOG.error("Unable to export profile: %s. Ignoring.", _traceback.format_exception(e)) except Exception: LOG.exception( "Unexpected error while exporting events. " "Please report this bug to https://github.com/DataDog/dd-trace-py/issues" ) def periodic(self): start_time = compat.monotonic() try: self.flush() finally: self.interval = max( 0, self._configured_interval - (compat.monotonic() - start_time))
class Scheduler(_periodic.PeriodicService): """Schedule export of recorded data.""" recorder = attr.ib() exporters = attr.ib() _interval = attr.ib( factory=_attr.from_env("DD_PROFILING_UPLOAD_INTERVAL", 60, float)) _configured_interval = attr.ib(init=False) _last_export = attr.ib(init=False, default=None) def __attrs_post_init__(self): # Copy the value to use it later since we're going to adjust the real interval self._configured_interval = self.interval def start(self): """Start the scheduler.""" LOG.debug("Starting scheduler") super(Scheduler, self).start() self._last_export = compat.time_ns() LOG.debug("Scheduler started") def flush(self): """Flush events from recorder to exporters.""" LOG.debug("Flushing events") if self.exporters: events = self.recorder.reset() start = self._last_export self._last_export = compat.time_ns() total_events = sum(len(v) for v in events.values()) for exp in self.exporters: try: exp.export(events, start, self._last_export) except exporter.ExportError as e: LOG.error("Unable to export %d events: %s", total_events, _traceback.format_exception(e)) except Exception: LOG.exception("Error while exporting %d events", total_events) def periodic(self): start_time = compat.monotonic() try: self.flush() finally: self.interval = max( 0, self._configured_interval - (compat.monotonic() - start_time)) on_shutdown = flush
def _get_api_key(): legacy = _attr.from_env("DD_PROFILING_API_KEY", "", str)() if legacy: deprecation.deprecation("DD_PROFILING_API_KEY", "Use DD_API_KEY") return legacy return _attr.from_env("DD_API_KEY", "", str)()
class MemoryCollector(collector.PeriodicCollector, collector.CaptureSamplerCollector): """Memory allocation collector.""" # Arbitrary interval to use for enabling/disabling tracemalloc _interval = attr.ib(default=0.1, repr=False) nframes = attr.ib( factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int)) ignore_profiler = attr.ib(factory=_attr.from_env( "DD_PROFILING_IGNORE_PROFILER", True, formats.asbool)) def __attrs_post_init__(self): if sys.version_info[:2] <= (3, 5): self._filter_profiler = self._filter_profiler_35 @staticmethod def _filter_profiler(traces): return [ trace for trace in traces if not any(frame[0].startswith(_MODULE_TOP_DIR) for frame in trace[2]) ] @staticmethod def _filter_profiler_35(traces): # Python <= 3.5 does not have support for domain return [ trace for trace in traces if not any(frame[0].startswith(_MODULE_TOP_DIR) for frame in trace[1]) ] def start(self): """Start collecting memory profiles.""" if tracemalloc is None: raise RuntimeError("tracemalloc is unavailable") super(MemoryCollector, self).start() def stop(self): if tracemalloc is not None: tracemalloc.stop() super(MemoryCollector, self).stop() def collect(self): try: snapshot = tracemalloc.take_snapshot() except RuntimeError: events = [] else: tracemalloc.stop() if snapshot.traces and self.ignore_profiler: snapshot.traces._traces = self._filter_profiler( snapshot.traces._traces) if snapshot.traces: events = [ MemorySampleEvent(snapshot=snapshot, sampling_pct=self.capture_pct) ] else: events = [] if self._capture_sampler.capture(): tracemalloc.start(self.nframes) return [events]
class PprofHTTPExporter(pprof.PprofExporter): """PProf HTTP exporter.""" endpoint = attr.ib(factory=_get_endpoint, type=str) api_key = attr.ib(factory=_get_api_key, type=str) timeout = attr.ib(factory=_attr.from_env("DD_PROFILING_API_TIMEOUT", 10, float), type=float) service_name = attr.ib(factory=_get_service_name) @staticmethod def _encode_multipart_formdata(fields, tags): boundary = binascii.hexlify(os.urandom(16)) # The body that is generated is very sensitive and must perfectly match what the server expects. body = ( b"".join(b"--%s\r\n" b'Content-Disposition: form-data; name="%s"\r\n' b"\r\n" b"%s\r\n" % (boundary, field.encode(), value) for field, value in fields.items() if field != "chunk-data") + b"".join(b"--%s\r\n" b'Content-Disposition: form-data; name="tags[]"\r\n' b"\r\n" b"%s:%s\r\n" % (boundary, tag.encode(), value) for tag, value in tags.items()) + b"--" + boundary + b"\r\n" b'Content-Disposition: form-data; name="chunk-data"; filename="profile.pb.gz"\r\n' + b"Content-Type: application/octet-stream\r\n\r\n" + fields["chunk-data"] + b"\r\n--%s--\r\n" % boundary) content_type = b"multipart/form-data; boundary=%s" % boundary return content_type, body @staticmethod def _get_tags(service): tags = { "service": service.encode("utf-8"), "host": HOSTNAME.encode("utf-8"), "runtime-id": RUNTIME_ID, "language": b"python", "runtime": PYTHON_IMPLEMENTATION, "runtime_version": PYTHON_VERSION, "profiler_version": ddtrace.__version__.encode("utf-8"), } version = os.environ.get("DD_VERSION") if version: tags["version"] = version env = os.environ.get("DD_ENV") if env: tags["env"] = env user_tags = os.getenv("DD_PROFILING_TAGS") if user_tags: for tag in user_tags.split(","): try: key, value = tag.split(":", 1) except ValueError: LOG.error("Malformed tag in DD_PROFILING_TAGS: %s", tag) else: if isinstance(value, six.text_type): value = value.encode("utf-8") tags[key] = value return tags def export(self, events, start_time_ns, end_time_ns): """Export events to an HTTP endpoint. :param events: The event dictionary from a `ddtrace.profiling.recorder.Recorder`. :param start_time_ns: The start time of recording. :param end_time_ns: The end time of recording. """ if not self.endpoint: raise InvalidEndpoint("Endpoint is empty") common_headers = { "DD-API-KEY": self.api_key.encode(), } profile = super(PprofHTTPExporter, self).export(events, start_time_ns, end_time_ns) s = six.BytesIO() with gzip.GzipFile(fileobj=s, mode="wb") as gz: gz.write(profile.SerializeToString()) fields = { "runtime-id": RUNTIME_ID, "recording-start": (datetime.datetime.utcfromtimestamp( start_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z").encode(), "recording-end": (datetime.datetime.utcfromtimestamp( end_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z").encode(), "runtime": PYTHON_IMPLEMENTATION, "format": b"pprof", "type": b"cpu+alloc+exceptions", "chunk-data": s.getvalue(), } service_name = self.service_name or os.path.basename( profile.string_table[profile.mapping[0].filename]) content_type, body = self._encode_multipart_formdata( fields, tags=self._get_tags(service_name), ) headers = common_headers.copy() headers["Content-Type"] = content_type # urllib uses `POST` if `data` is supplied (Python 2 version does not handle `method` kwarg) req = request.Request(self.endpoint, data=body, headers=headers) try: request.urlopen(req, timeout=self.timeout) except (error.HTTPError, error.URLError, http_client.HTTPException, socket.timeout) as e: raise UploadFailed(e)
class PprofHTTPExporter(pprof.PprofExporter): """PProf HTTP exporter.""" endpoint = attr.ib(factory=_get_endpoint, type=str) api_key = attr.ib(factory=_get_api_key, type=str) timeout = attr.ib(factory=_attr.from_env("DD_PROFILING_API_TIMEOUT", 10, float), type=float) @staticmethod def _encode_multipart_formdata(fields, tags): boundary = binascii.hexlify(os.urandom(16)) # The body that is generated is very sensitive and must perfectly match what the server expects. body = ( b"".join(b"--%s\r\n" b'Content-Disposition: form-data; name="%s"\r\n' b"\r\n" b"%s\r\n" % (boundary, field.encode(), value) for field, value in fields.items() if field != "chunk-data") + b"".join(b"--%s\r\n" b'Content-Disposition: form-data; name="tags[]"\r\n' b"\r\n" b"%s:%s\r\n" % (boundary, tag.encode(), value) for tag, value in tags.items()) + b"--" + boundary + b"\r\n" b'Content-Disposition: form-data; name="chunk-data"; filename="profile.pb.gz"\r\n' + b"Content-Type: application/octet-stream\r\n\r\n" + fields["chunk-data"] + b"\r\n--%s--\r\n" % boundary) content_type = b"multipart/form-data; boundary=%s" % boundary return content_type, body @staticmethod def _get_tags(service): tags = { "service": service.encode("utf-8"), "host": HOSTNAME.encode("utf-8"), "runtime-id": RUNTIME_ID, "language": b"python", "runtime": PYTHON_IMPLEMENTATION, "runtime_version": PYTHON_VERSION, "profiler_version": ddtrace.__version__.encode("utf-8"), } version = os.environ.get("DD_VERSION") if version: tags["version"] = version env = os.environ.get("DD_ENV") if env: tags["env"] = env user_tags = os.getenv("DD_PROFILING_TAGS") if user_tags: for tag in user_tags.split(","): try: key, value = tag.split(":", 1) except ValueError: LOG.error("Malformed tag in DD_PROFILING_TAGS: %s", tag) else: if isinstance(value, six.text_type): value = value.encode("utf-8") tags[key] = value return tags def export(self, events, start_time_ns, end_time_ns): """Export events to an HTTP endpoint. :param events: The event dictionary from a `ddtrace.profiling.recorder.Recorder`. :param start_time_ns: The start time of recording. :param end_time_ns: The end time of recording. """ if not self.endpoint: raise InvalidEndpoint("Endpoint is empty") parsed = urlparse.urlparse(self.endpoint) if parsed.scheme == "https": client_class = http_client.HTTPSConnection else: client_class = http_client.HTTPConnection if ":" in parsed.netloc: host, port = parsed.netloc.split(":", 1) else: host, port = parsed.netloc, None client = client_class(host, port, timeout=self.timeout) common_headers = { "DD-API-KEY": self.api_key.encode(), } exceptions = [] profile = super(PprofHTTPExporter, self).export(events, start_time_ns, end_time_ns) s = six.BytesIO() with gzip.GzipFile(fileobj=s, mode="wb") as gz: gz.write(profile.SerializeToString()) fields = { "runtime-id": RUNTIME_ID, "recording-start": (datetime.datetime.utcfromtimestamp( start_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z").encode(), "recording-end": (datetime.datetime.utcfromtimestamp( end_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z").encode(), "runtime": PYTHON_IMPLEMENTATION, "format": b"pprof", "type": b"cpu+alloc+exceptions", "chunk-data": s.getvalue(), } for service_name_var in ("DD_SERVICE", "DD_SERVICE_NAME", "DATADOG_SERVICE_NAME"): service_name = os.environ.get(service_name_var) if service_name is not None: break else: service_name = os.path.basename( profile.string_table[profile.mapping[0].filename]) content_type, body = self._encode_multipart_formdata( fields, tags=self._get_tags(service_name), ) headers = common_headers.copy() headers["Content-Type"] = content_type try: client.request("POST", parsed.path, body=body, headers=headers) except (OSError, IOError, http_client.CannotSendRequest) as e: exceptions.append(e) else: try: response = client.getresponse() content = response.read() # have to read to not fail! except (OSError, IOError, http_client.BadStatusLine) as e: exceptions.append(e) else: if not 200 <= response.status < 400: exceptions.append(RequestFailed(response, content)) if exceptions: raise UploadFailed(exceptions)
class Recorder(object): """An object that records program activity.""" events = attr.ib(init=False, repr=False) max_size = attr.ib( factory=_attr.from_env("DD_PROFILING_MAX_EVENTS", 49152, int)) event_filters = attr.ib(factory=lambda: collections.defaultdict(list), repr=False) def __attrs_post_init__(self): self._reset_events() def add_event_filter(self, event_type, filter_fn): """Add an event filter function. A filter function must accept a lists of events as argument and returns a list of events that should be pushed into the recorder. :param event_type: A class of event. :param filter_fn: A filter function to append. """ self.event_filters[event_type].append(filter_fn) def remove_event_filter(self, event_type, filter_fn): """Remove an event filter from the recorder. :param event_type: A class of event. :param filter_fn: The filter function to remove. """ self.event_filters[event_type].remove(filter_fn) def push_event(self, event): """Push an event in the recorder. :param event: The `ddtrace.profiling.event.Event` to push. """ return self.push_events([event]) def push_events(self, events): """Push multiple events in the recorder. All the events MUST be of the same type. There is no sanity check as whether all the events are from the same class for performance reasons. :param events: The event list to push. """ if events: event_type = events[0].__class__ for filter_fn in self.event_filters[event_type]: events = filter_fn(events) q = self.events[event_type] q.extend(events) def _reset_events(self): self.events = collections.defaultdict( lambda: collections.deque(maxlen=self.max_size)) def reset(self): """Reset the recorder. This is useful when e.g. exporting data. Once the event queue is retrieved, a new one can be created by calling the reset method, avoiding iterating on a mutating event list. :return: The list of events that has been removed. """ events = self.events self._reset_events() return events
class PprofHTTPExporter(pprof.PprofExporter): """PProf HTTP exporter.""" endpoint = attr.ib(factory=_get_endpoint, type=str) api_key = attr.ib(factory=_get_api_key, type=str) timeout = attr.ib(factory=_attr.from_env("DD_PROFILING_API_TIMEOUT", 10, float), type=float) service_name = attr.ib(factory=_get_service_name) max_retry_delay = attr.ib(default=None) def __attrs_post_init__(self): if self.max_retry_delay is None: self.max_retry_delay = self.timeout * 3 @staticmethod def _encode_multipart_formdata(fields, tags): boundary = binascii.hexlify(os.urandom(16)) # The body that is generated is very sensitive and must perfectly match what the server expects. body = ( b"".join( b"--%s\r\n" b'Content-Disposition: form-data; name="%s"\r\n' b"\r\n" b"%s\r\n" % (boundary, field.encode(), value) for field, value in fields.items() if field != "chunk-data" ) + b"".join( b"--%s\r\n" b'Content-Disposition: form-data; name="tags[]"\r\n' b"\r\n" b"%s:%s\r\n" % (boundary, tag.encode(), value) for tag, value in tags.items() ) + b"--" + boundary + b"\r\n" b'Content-Disposition: form-data; name="chunk-data"; filename="profile.pb.gz"\r\n' + b"Content-Type: application/octet-stream\r\n\r\n" + fields["chunk-data"] + b"\r\n--%s--\r\n" % boundary ) content_type = b"multipart/form-data; boundary=%s" % boundary return content_type, body @staticmethod def _get_tags(service): tags = { "service": service.encode("utf-8"), "host": HOSTNAME.encode("utf-8"), "runtime-id": runtime.get_runtime_id().encode("ascii"), "language": b"python", "runtime": PYTHON_IMPLEMENTATION, "runtime_version": PYTHON_VERSION, "profiler_version": ddtrace.__version__.encode("utf-8"), } version = os.environ.get("DD_VERSION") if version: tags["version"] = version.encode("utf-8") env = os.environ.get("DD_ENV") if env: tags["env"] = env.encode("utf-8") user_tags = parse_tags_str(os.environ.get("DD_TAGS", {})) user_tags.update(parse_tags_str(os.environ.get("DD_PROFILING_TAGS", {}))) tags.update({k: six.ensure_binary(v) for k, v in user_tags.items()}) return tags def export(self, events, start_time_ns, end_time_ns): """Export events to an HTTP endpoint. :param events: The event dictionary from a `ddtrace.profiling.recorder.Recorder`. :param start_time_ns: The start time of recording. :param end_time_ns: The end time of recording. """ if not self.endpoint: raise InvalidEndpoint("Endpoint is empty") common_headers = { "DD-API-KEY": self.api_key.encode(), } profile = super(PprofHTTPExporter, self).export(events, start_time_ns, end_time_ns) s = six.BytesIO() with gzip.GzipFile(fileobj=s, mode="wb") as gz: gz.write(profile.SerializeToString()) fields = { "runtime-id": runtime.get_runtime_id().encode("ascii"), "recording-start": ( datetime.datetime.utcfromtimestamp(start_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z" ).encode(), "recording-end": ( datetime.datetime.utcfromtimestamp(end_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z" ).encode(), "runtime": PYTHON_IMPLEMENTATION, "format": b"pprof", "type": b"cpu+alloc+exceptions", "chunk-data": s.getvalue(), } service_name = self.service_name or os.path.basename(profile.string_table[profile.mapping[0].filename]) content_type, body = self._encode_multipart_formdata(fields, tags=self._get_tags(service_name),) headers = common_headers.copy() headers["Content-Type"] = content_type # urllib uses `POST` if `data` is supplied (Python 2 version does not handle `method` kwarg) req = request.Request(self.endpoint, data=body, headers=headers) retry = tenacity.Retrying( # Retry after 1s, 2s, 4s, 8s with some randomness wait=tenacity.wait_random_exponential(multiplier=0.5), stop=tenacity.stop_after_delay(self.max_retry_delay), retry=tenacity.retry_if_exception_type( (error.HTTPError, error.URLError, http_client.HTTPException, OSError, IOError) ), ) try: retry(request.urlopen, req, timeout=self.timeout) except tenacity.RetryError as e: raise UploadFailed(e.last_attempt.exception())
class PprofHTTPExporter(pprof.PprofExporter): """PProf HTTP exporter.""" endpoint = attr.ib() api_key = attr.ib(default=None) timeout = attr.ib(factory=_attr.from_env("DD_PROFILING_API_TIMEOUT", 10, float), type=float) service = attr.ib(default=None) env = attr.ib(default=None) version = attr.ib(default=None) max_retry_delay = attr.ib(default=None) _container_info = attr.ib(factory=container.get_container_info, repr=False) _retry_upload = attr.ib(init=None, default=None) endpoint_path = attr.ib(default="/profiling/v1/input") def __attrs_post_init__(self): if self.max_retry_delay is None: self.max_retry_delay = self.timeout * 3 self._retry_upload = tenacity.Retrying( # Retry after 1s, 2s, 4s, 8s with some randomness wait=tenacity.wait_random_exponential(multiplier=0.5), stop=tenacity.stop_after_delay(self.max_retry_delay), retry_error_cls=UploadFailed, retry=tenacity.retry_if_exception_type((http_client.HTTPException, OSError, IOError)), ) @staticmethod def _encode_multipart_formdata(fields, tags): boundary = binascii.hexlify(os.urandom(16)) # The body that is generated is very sensitive and must perfectly match what the server expects. body = ( b"".join( b"--%s\r\n" b'Content-Disposition: form-data; name="%s"\r\n' b"\r\n" b"%s\r\n" % (boundary, field.encode(), value) for field, value in fields.items() if field != "chunk-data" ) + b"".join( b"--%s\r\n" b'Content-Disposition: form-data; name="tags[]"\r\n' b"\r\n" b"%s:%s\r\n" % (boundary, tag.encode(), value) for tag, value in tags.items() ) + b"--" + boundary + b"\r\n" b'Content-Disposition: form-data; name="chunk-data"; filename="profile.pb.gz"\r\n' + b"Content-Type: application/octet-stream\r\n\r\n" + fields["chunk-data"] + b"\r\n--%s--\r\n" % boundary ) content_type = b"multipart/form-data; boundary=%s" % boundary return content_type, body def _get_tags(self, service): tags = { "service": service.encode("utf-8"), "host": HOSTNAME.encode("utf-8"), "runtime-id": runtime.get_runtime_id().encode("ascii"), "language": b"python", "runtime": PYTHON_IMPLEMENTATION, "runtime_version": PYTHON_VERSION, "profiler_version": ddtrace.__version__.encode("utf-8"), } if self.version: tags["version"] = self.version.encode("utf-8") if self.env: tags["env"] = self.env.encode("utf-8") user_tags = parse_tags_str(os.environ.get("DD_TAGS", {})) user_tags.update(parse_tags_str(os.environ.get("DD_PROFILING_TAGS", {}))) tags.update({k: six.ensure_binary(v) for k, v in user_tags.items()}) return tags def export(self, events, start_time_ns, end_time_ns): """Export events to an HTTP endpoint. :param events: The event dictionary from a `ddtrace.profiling.recorder.Recorder`. :param start_time_ns: The start time of recording. :param end_time_ns: The end time of recording. """ if self.api_key: headers = { "DD-API-KEY": self.api_key.encode(), } else: headers = {} if self._container_info and self._container_info.container_id: headers["Datadog-Container-Id"] = self._container_info.container_id profile = super(PprofHTTPExporter, self).export(events, start_time_ns, end_time_ns) s = six.BytesIO() with gzip.GzipFile(fileobj=s, mode="wb") as gz: gz.write(profile.SerializeToString()) fields = { "runtime-id": runtime.get_runtime_id().encode("ascii"), "recording-start": ( datetime.datetime.utcfromtimestamp(start_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z" ).encode(), "recording-end": ( datetime.datetime.utcfromtimestamp(end_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z" ).encode(), "runtime": PYTHON_IMPLEMENTATION, "format": b"pprof", "type": b"cpu+alloc+exceptions", "chunk-data": s.getvalue(), } service = self.service or os.path.basename(profile.string_table[profile.mapping[0].filename]) content_type, body = self._encode_multipart_formdata( fields, tags=self._get_tags(service), ) headers["Content-Type"] = content_type parsed = urlparse.urlparse(self.endpoint) if parsed.scheme == "https": client = http_client.HTTPSConnection(parsed.hostname, parsed.port, timeout=self.timeout) elif parsed.scheme == "http": client = http_client.HTTPConnection(parsed.hostname, parsed.port, timeout=self.timeout) elif parsed.scheme == "unix": client = uds.UDSHTTPConnection(parsed.path, False, parsed.hostname, parsed.port, timeout=self.timeout) else: raise ValueError("Unknown connection scheme %s" % parsed.scheme) self._upload(client, self.endpoint_path, body, headers) def _upload(self, client, path, body, headers): self._retry_upload(self._upload_once, client, path, body, headers) def _upload_once(self, client, path, body, headers): try: client.request("POST", path, body=body, headers=headers) response = client.getresponse() response.read() # reading is mandatory finally: client.close() if 200 <= response.status < 300: return if 500 <= response.status < 600: raise tenacity.TryAgain if response.status == 400: raise exporter.ExportError("Server returned 400, check your API key") elif response.status == 404 and not self.api_key: raise exporter.ExportError( "Datadog Agent is not accepting profiles. " "Agent-based profiling deployments require Datadog Agent >= 7.20" ) raise exporter.ExportError("HTTP Error %d" % response.status)
class MemoryCollector(collector.PeriodicCollector): """Memory allocation collector.""" _DEFAULT_MAX_EVENTS = 32 _DEFAULT_INTERVAL = 0.5 _DEFAULT_HEAP_SAMPLE_SIZE = 0 # Arbitrary interval to empty the _memalloc event buffer _interval = attr.ib(default=_DEFAULT_INTERVAL, repr=False) # TODO make this dynamic based on the 1. interval and 2. the max number of events allowed in the Recorder _max_events = attr.ib(factory=_attr.from_env( "_DD_PROFILING_MEMORY_EVENTS_BUFFER", _DEFAULT_MAX_EVENTS, int)) max_nframe = attr.ib( factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int)) heap_sample_size = attr.ib(factory=_attr.from_env( "DD_PROFILING_HEAP_SAMPLE_SIZE", _DEFAULT_HEAP_SAMPLE_SIZE, int)) ignore_profiler = attr.ib(factory=_attr.from_env( "DD_PROFILING_IGNORE_PROFILER", True, formats.asbool)) def start(self): """Start collecting memory profiles.""" if _memalloc is None: raise collector.CollectorUnavailable _memalloc.start(self.max_nframe, self._max_events, self.heap_sample_size) super(MemoryCollector, self).start() def stop(self): if _memalloc is not None: try: _memalloc.stop() except RuntimeError: pass super(MemoryCollector, self).stop() def _get_thread_id_ignore_set(self): # type: () -> typing.Set[int] # This method is not perfect and prone to race condition in theory, but very little in practice. # Anyhow it's not a big deal — it's a best effort feature. return { thread.ident for thread in threading.enumerate() if getattr(thread, "_ddtrace_profiling_ignore", False) and thread.ident is not None } def snapshot(self): thread_id_ignore_set = self._get_thread_id_ignore_set() return (tuple( MemoryHeapSampleEvent( thread_id=thread_id, thread_name=_threading.get_thread_name(thread_id), thread_native_id=_threading.get_thread_native_id(thread_id), frames=stack, nframes=nframes, size=size, sample_size=self.heap_sample_size, ) for (stack, nframes, thread_id), size in _memalloc.heap() if not self.ignore_profiler or thread_id not in thread_id_ignore_set), ) def collect(self): events, count, alloc_count = _memalloc.iter_events() capture_pct = 100 * count / alloc_count thread_id_ignore_set = self._get_thread_id_ignore_set() # TODO: The event timestamp is slightly off since it's going to be the time we copy the data from the # _memalloc buffer to our Recorder. This is fine for now, but we might want to store the nanoseconds # timestamp in C and then return it via iter_events. return (tuple( MemoryAllocSampleEvent( thread_id=thread_id, thread_name=_threading.get_thread_name(thread_id), thread_native_id=_threading.get_thread_native_id(thread_id), frames=stack, nframes=nframes, size=size, capture_pct=capture_pct, nevents=alloc_count, ) for (stack, nframes, thread_id), size in events if not self.ignore_profiler or thread_id not in thread_id_ignore_set), )