def _get_endpoint():
    legacy = _attr.from_env("DD_PROFILING_API_URL", "", str)()
    if legacy:
        deprecation.deprecation("DD_PROFILING_API_URL", "Use DD_SITE")
        return legacy
    site = _attr.from_env("DD_SITE", "datadoghq.com", str)()
    return ENDPOINT_TEMPLATE.format(site)
Exemple #2
0
class MemoryCollector(collector.PeriodicCollector):
    """Memory allocation collector."""

    _DEFAULT_MAX_EVENTS = 32
    _DEFAULT_INTERVAL = 0.5

    # Arbitrary interval to empty the _memalloc event buffer
    _interval = attr.ib(default=_DEFAULT_INTERVAL, repr=False)

    # TODO make this dynamic based on the 1. interval and 2. the max number of events allowed in the Recorder
    _max_events = attr.ib(factory=_attr.from_env("_DD_PROFILING_MEMORY_EVENTS_BUFFER", _DEFAULT_MAX_EVENTS, int))
    max_nframe = attr.ib(factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int))
    heap_sample_size = attr.ib(factory=_attr.from_env("DD_PROFILING_HEAP_SAMPLE_SIZE", 0, int))
    ignore_profiler = attr.ib(factory=_attr.from_env("DD_PROFILING_IGNORE_PROFILER", True, formats.asbool))

    def start(self):
        """Start collecting memory profiles."""
        if _memalloc is None:
            raise RuntimeError("memalloc is unavailable")
        _memalloc.start(self.max_nframe, self._max_events, self.heap_sample_size)
        super(MemoryCollector, self).start()

    def stop(self):
        if _memalloc is not None:
            try:
                _memalloc.stop()
            except RuntimeError:
                pass
            super(MemoryCollector, self).stop()

    def collect(self):
        events, count, alloc_count = _memalloc.iter_events()
        capture_pct = 100 * count / alloc_count
        # TODO: The event timestamp is slightly off since it's going to be the time we copy the data from the
        # _memalloc buffer to our Recorder. This is fine for now, but we might want to store the nanoseconds
        # timestamp in C and then return it via iter_events.
        return (
            tuple(
                MemoryAllocSampleEvent(
                    thread_id=thread_id,
                    thread_name=_threading.get_thread_name(thread_id),
                    thread_native_id=_threading.get_thread_native_id(thread_id),
                    frames=stack,
                    nframes=nframes,
                    size=size,
                    capture_pct=capture_pct,
                    nevents=alloc_count,
                )
                for (stack, nframes, thread_id), size in events
                # TODO: this should be implemented in _memalloc directly so we have more space for samples
                # not coming from the profiler
                if not self.ignore_profiler or not any(frame[0].startswith(_MODULE_TOP_DIR) for frame in stack)
            ),
        )
class CaptureSamplerCollector(Collector):
    capture_pct = attr.ib(
        factory=_attr.from_env("DD_PROFILING_CAPTURE_PCT", 5, float))
    _capture_sampler = attr.ib(default=attr.Factory(_create_capture_sampler,
                                                    takes_self=True),
                               init=False,
                               repr=False)
Exemple #4
0
class LockCollector(collector.CaptureSamplerCollector):
    """Record lock usage."""

    nframes = attr.ib(
        factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int))
    tracer = attr.ib(default=None)

    def start(self):
        """Start collecting `threading.Lock` usage."""
        super(LockCollector, self).start()
        self.patch()

    def stop(self):
        """Stop collecting `threading.Lock` usage."""
        self.unpatch()
        super(LockCollector, self).stop()

    def patch(self):
        """Patch the threading module for tracking lock allocation."""
        # We only patch the lock from the `threading` module.
        # Nobody should use locks from `_thread`; if they do so, then it's deliberate and we don't profile.
        self.original = threading.Lock

        def _allocate_lock(wrapped, instance, args, kwargs):
            lock = wrapped(*args, **kwargs)
            return _ProfiledLock(lock, self.recorder, self.tracer,
                                 self.nframes, self._capture_sampler)

        threading.Lock = FunctionWrapper(self.original, _allocate_lock)

    def unpatch(self):
        """Unpatch the threading module for tracking lock allocation."""
        threading.Lock = self.original
Exemple #5
0
class Scheduler(_periodic.PeriodicService):
    """Schedule export of recorded data."""

    recorder = attr.ib()
    exporters = attr.ib()
    _interval = attr.ib(factory=_attr.from_env("DD_PROFILING_UPLOAD_INTERVAL", 60, float))
    _last_export = attr.ib(init=False, default=None)

    def start(self):
        """Start the scheduler."""
        LOG.debug("Starting scheduler")
        super(Scheduler, self).start()
        self._last_export = compat.time_ns()
        LOG.debug("Scheduler started")

    def flush(self):
        """Flush events from recorder to exporters."""
        LOG.debug("Flushing events")
        if self.exporters:
            events = self.recorder.reset()
            start = self._last_export
            self._last_export = compat.time_ns()
            total_events = sum(len(v) for v in events.values())
            for exp in self.exporters:
                try:
                    exp.export(events, start, self._last_export)
                except exporter.ExportError as e:
                    LOG.error("Unable to export %d events: %s", total_events, _traceback.format_exception(e))
                except Exception:
                    LOG.exception("Error while exporting %d events", total_events)

    periodic = flush
    on_shutdown = flush
class UncaughtExceptionCollector(collector.Collector):
    """Record uncaught thrown exceptions."""

    max_nframes = attr.ib(
        factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int))

    def start(self):
        """Start collecting uncaught exceptions."""
        self.original_except_hook = sys.excepthook
        sys.excepthook = self.except_hook
        super(UncaughtExceptionCollector, self).start()

    def stop(self):
        """Stop collecting uncaught exceptions."""
        if hasattr(self, "original_except_hook"):
            sys.excepthook = self.original_except_hook
            del self.original_except_hook
        super(UncaughtExceptionCollector, self).stop()

    def except_hook(self, exctype, value, traceback):
        try:
            frames, nframes = _traceback.traceback_to_frames(
                traceback, self.max_nframes)
            thread_id, thread_name = threading._current_thread()
            self.recorder.push_event(
                UncaughtExceptionEvent(frames=frames,
                                       nframes=nframes,
                                       thread_id=thread_id,
                                       thread_name=thread_name,
                                       exc_type=exctype))
        finally:
            return self.original_except_hook(exctype, value, traceback)
Exemple #7
0
class Scheduler(object):
    """Schedule export of recorded data."""

    recorder = attr.ib()
    exporters = attr.ib()
    interval = attr.ib(
        factory=_attr.from_env("DD_PROFILING_UPLOAD_INTERVAL", 60, float))
    _periodic = attr.ib(init=False, default=None)
    _last_export = attr.ib(init=False, default=None)

    def __enter__(self):
        self.start()
        return self

    def start(self):
        """Start the scheduler."""
        self._periodic = _periodic.PeriodicThread(
            self.interval,
            self.flush,
            name="%s:%s" % (__name__, self.__class__.__name__))
        LOG.debug("Starting scheduler")
        self._last_export = compat.time_ns()
        self._periodic.start()
        LOG.debug("Scheduler started")

    def __exit__(self, exc_type, exc_value, traceback):
        return self.stop()

    def stop(self, flush=True):
        """Stop the scheduler.

        :param flush: Whetever to do a final flush.
        """
        LOG.debug("Stopping scheduler")
        if self._periodic:
            self._periodic.stop()
            self._periodic.join()
            self._periodic = None
        if flush:
            self.flush()
        LOG.debug("Scheduler stopped")

    def flush(self):
        """Flush events from recorder to exporters."""
        LOG.debug("Flushing events")
        events = self.recorder.reset()
        start = self._last_export
        self._last_export = compat.time_ns()
        total_events = sum(len(v) for v in events.values())
        for exp in self.exporters:
            try:
                exp.export(events, start, self._last_export)
            except exporter.ExportError as e:
                LOG.error("Unable to export %d events: %s", total_events,
                          _traceback.format_exception(e))
            except Exception:
                LOG.exception("Error while exporting %d events", total_events)
Exemple #8
0
class Scheduler(_periodic.PeriodicService):
    """Schedule export of recorded data."""

    recorder = attr.ib()
    exporters = attr.ib()
    before_flush = attr.ib(default=None, eq=False)
    _interval = attr.ib(
        factory=_attr.from_env("DD_PROFILING_UPLOAD_INTERVAL", 60, float))
    _configured_interval = attr.ib(init=False)
    _last_export = attr.ib(init=False, default=None, eq=False)

    def __attrs_post_init__(self):
        # Copy the value to use it later since we're going to adjust the real interval
        self._configured_interval = self.interval

    def start(self):
        """Start the scheduler."""
        LOG.debug("Starting scheduler")
        super(Scheduler, self).start()
        self._last_export = compat.time_ns()
        LOG.debug("Scheduler started")

    def flush(self):
        """Flush events from recorder to exporters."""
        LOG.debug("Flushing events")
        if self.before_flush is not None:
            try:
                self.before_flush()
            except Exception:
                LOG.error("Scheduler before_flush hook failed", exc_info=True)
        if self.exporters:
            events = self.recorder.reset()
            start = self._last_export
            self._last_export = compat.time_ns()
            for exp in self.exporters:
                try:
                    exp.export(events, start, self._last_export)
                except exporter.ExportError as e:
                    LOG.error("Unable to export profile: %s. Ignoring.",
                              _traceback.format_exception(e))
                except Exception:
                    LOG.exception(
                        "Unexpected error while exporting events. "
                        "Please report this bug to https://github.com/DataDog/dd-trace-py/issues"
                    )

    def periodic(self):
        start_time = compat.monotonic()
        try:
            self.flush()
        finally:
            self.interval = max(
                0,
                self._configured_interval - (compat.monotonic() - start_time))
Exemple #9
0
class Scheduler(_periodic.PeriodicService):
    """Schedule export of recorded data."""

    recorder = attr.ib()
    exporters = attr.ib()
    _interval = attr.ib(
        factory=_attr.from_env("DD_PROFILING_UPLOAD_INTERVAL", 60, float))
    _configured_interval = attr.ib(init=False)
    _last_export = attr.ib(init=False, default=None)

    def __attrs_post_init__(self):
        # Copy the value to use it later since we're going to adjust the real interval
        self._configured_interval = self.interval

    def start(self):
        """Start the scheduler."""
        LOG.debug("Starting scheduler")
        super(Scheduler, self).start()
        self._last_export = compat.time_ns()
        LOG.debug("Scheduler started")

    def flush(self):
        """Flush events from recorder to exporters."""
        LOG.debug("Flushing events")
        if self.exporters:
            events = self.recorder.reset()
            start = self._last_export
            self._last_export = compat.time_ns()
            total_events = sum(len(v) for v in events.values())
            for exp in self.exporters:
                try:
                    exp.export(events, start, self._last_export)
                except exporter.ExportError as e:
                    LOG.error("Unable to export %d events: %s", total_events,
                              _traceback.format_exception(e))
                except Exception:
                    LOG.exception("Error while exporting %d events",
                                  total_events)

    def periodic(self):
        start_time = compat.monotonic()
        try:
            self.flush()
        finally:
            self.interval = max(
                0,
                self._configured_interval - (compat.monotonic() - start_time))

    on_shutdown = flush
def _get_api_key():
    legacy = _attr.from_env("DD_PROFILING_API_KEY", "", str)()
    if legacy:
        deprecation.deprecation("DD_PROFILING_API_KEY", "Use DD_API_KEY")
        return legacy
    return _attr.from_env("DD_API_KEY", "", str)()
Exemple #11
0
class MemoryCollector(collector.PeriodicCollector,
                      collector.CaptureSamplerCollector):
    """Memory allocation collector."""

    # Arbitrary interval to use for enabling/disabling tracemalloc
    _interval = attr.ib(default=0.1, repr=False)

    nframes = attr.ib(
        factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int))
    ignore_profiler = attr.ib(factory=_attr.from_env(
        "DD_PROFILING_IGNORE_PROFILER", True, formats.asbool))

    def __attrs_post_init__(self):
        if sys.version_info[:2] <= (3, 5):
            self._filter_profiler = self._filter_profiler_35

    @staticmethod
    def _filter_profiler(traces):
        return [
            trace for trace in traces
            if not any(frame[0].startswith(_MODULE_TOP_DIR)
                       for frame in trace[2])
        ]

    @staticmethod
    def _filter_profiler_35(traces):
        # Python <= 3.5 does not have support for domain
        return [
            trace for trace in traces
            if not any(frame[0].startswith(_MODULE_TOP_DIR)
                       for frame in trace[1])
        ]

    def start(self):
        """Start collecting memory profiles."""
        if tracemalloc is None:
            raise RuntimeError("tracemalloc is unavailable")
        super(MemoryCollector, self).start()

    def stop(self):
        if tracemalloc is not None:
            tracemalloc.stop()
            super(MemoryCollector, self).stop()

    def collect(self):
        try:
            snapshot = tracemalloc.take_snapshot()
        except RuntimeError:
            events = []
        else:
            tracemalloc.stop()

            if snapshot.traces and self.ignore_profiler:
                snapshot.traces._traces = self._filter_profiler(
                    snapshot.traces._traces)

            if snapshot.traces:
                events = [
                    MemorySampleEvent(snapshot=snapshot,
                                      sampling_pct=self.capture_pct)
                ]
            else:
                events = []

        if self._capture_sampler.capture():
            tracemalloc.start(self.nframes)

        return [events]
Exemple #12
0
class PprofHTTPExporter(pprof.PprofExporter):
    """PProf HTTP exporter."""

    endpoint = attr.ib(factory=_get_endpoint, type=str)
    api_key = attr.ib(factory=_get_api_key, type=str)
    timeout = attr.ib(factory=_attr.from_env("DD_PROFILING_API_TIMEOUT", 10,
                                             float),
                      type=float)
    service_name = attr.ib(factory=_get_service_name)

    @staticmethod
    def _encode_multipart_formdata(fields, tags):
        boundary = binascii.hexlify(os.urandom(16))

        # The body that is generated is very sensitive and must perfectly match what the server expects.
        body = (
            b"".join(b"--%s\r\n"
                     b'Content-Disposition: form-data; name="%s"\r\n'
                     b"\r\n"
                     b"%s\r\n" % (boundary, field.encode(), value)
                     for field, value in fields.items()
                     if field != "chunk-data") +
            b"".join(b"--%s\r\n"
                     b'Content-Disposition: form-data; name="tags[]"\r\n'
                     b"\r\n"
                     b"%s:%s\r\n" % (boundary, tag.encode(), value)
                     for tag, value in tags.items()) + b"--" + boundary +
            b"\r\n"
            b'Content-Disposition: form-data; name="chunk-data"; filename="profile.pb.gz"\r\n'
            + b"Content-Type: application/octet-stream\r\n\r\n" +
            fields["chunk-data"] + b"\r\n--%s--\r\n" % boundary)

        content_type = b"multipart/form-data; boundary=%s" % boundary

        return content_type, body

    @staticmethod
    def _get_tags(service):
        tags = {
            "service": service.encode("utf-8"),
            "host": HOSTNAME.encode("utf-8"),
            "runtime-id": RUNTIME_ID,
            "language": b"python",
            "runtime": PYTHON_IMPLEMENTATION,
            "runtime_version": PYTHON_VERSION,
            "profiler_version": ddtrace.__version__.encode("utf-8"),
        }

        version = os.environ.get("DD_VERSION")
        if version:
            tags["version"] = version

        env = os.environ.get("DD_ENV")
        if env:
            tags["env"] = env

        user_tags = os.getenv("DD_PROFILING_TAGS")
        if user_tags:
            for tag in user_tags.split(","):
                try:
                    key, value = tag.split(":", 1)
                except ValueError:
                    LOG.error("Malformed tag in DD_PROFILING_TAGS: %s", tag)
                else:
                    if isinstance(value, six.text_type):
                        value = value.encode("utf-8")
                    tags[key] = value
        return tags

    def export(self, events, start_time_ns, end_time_ns):
        """Export events to an HTTP endpoint.

        :param events: The event dictionary from a `ddtrace.profiling.recorder.Recorder`.
        :param start_time_ns: The start time of recording.
        :param end_time_ns: The end time of recording.
        """
        if not self.endpoint:
            raise InvalidEndpoint("Endpoint is empty")

        common_headers = {
            "DD-API-KEY": self.api_key.encode(),
        }

        profile = super(PprofHTTPExporter,
                        self).export(events, start_time_ns, end_time_ns)
        s = six.BytesIO()
        with gzip.GzipFile(fileobj=s, mode="wb") as gz:
            gz.write(profile.SerializeToString())
        fields = {
            "runtime-id":
            RUNTIME_ID,
            "recording-start": (datetime.datetime.utcfromtimestamp(
                start_time_ns / 1e9).replace(microsecond=0).isoformat() +
                                "Z").encode(),
            "recording-end": (datetime.datetime.utcfromtimestamp(
                end_time_ns / 1e9).replace(microsecond=0).isoformat() +
                              "Z").encode(),
            "runtime":
            PYTHON_IMPLEMENTATION,
            "format":
            b"pprof",
            "type":
            b"cpu+alloc+exceptions",
            "chunk-data":
            s.getvalue(),
        }

        service_name = self.service_name or os.path.basename(
            profile.string_table[profile.mapping[0].filename])

        content_type, body = self._encode_multipart_formdata(
            fields,
            tags=self._get_tags(service_name),
        )
        headers = common_headers.copy()
        headers["Content-Type"] = content_type

        # urllib uses `POST` if `data` is supplied (Python 2 version does not handle `method` kwarg)
        req = request.Request(self.endpoint, data=body, headers=headers)

        try:
            request.urlopen(req, timeout=self.timeout)
        except (error.HTTPError, error.URLError, http_client.HTTPException,
                socket.timeout) as e:
            raise UploadFailed(e)
Exemple #13
0
class PprofHTTPExporter(pprof.PprofExporter):
    """PProf HTTP exporter."""

    endpoint = attr.ib(factory=_get_endpoint, type=str)
    api_key = attr.ib(factory=_get_api_key, type=str)
    timeout = attr.ib(factory=_attr.from_env("DD_PROFILING_API_TIMEOUT", 10,
                                             float),
                      type=float)

    @staticmethod
    def _encode_multipart_formdata(fields, tags):
        boundary = binascii.hexlify(os.urandom(16))

        # The body that is generated is very sensitive and must perfectly match what the server expects.
        body = (
            b"".join(b"--%s\r\n"
                     b'Content-Disposition: form-data; name="%s"\r\n'
                     b"\r\n"
                     b"%s\r\n" % (boundary, field.encode(), value)
                     for field, value in fields.items()
                     if field != "chunk-data") +
            b"".join(b"--%s\r\n"
                     b'Content-Disposition: form-data; name="tags[]"\r\n'
                     b"\r\n"
                     b"%s:%s\r\n" % (boundary, tag.encode(), value)
                     for tag, value in tags.items()) + b"--" + boundary +
            b"\r\n"
            b'Content-Disposition: form-data; name="chunk-data"; filename="profile.pb.gz"\r\n'
            + b"Content-Type: application/octet-stream\r\n\r\n" +
            fields["chunk-data"] + b"\r\n--%s--\r\n" % boundary)

        content_type = b"multipart/form-data; boundary=%s" % boundary

        return content_type, body

    @staticmethod
    def _get_tags(service):
        tags = {
            "service": service.encode("utf-8"),
            "host": HOSTNAME.encode("utf-8"),
            "runtime-id": RUNTIME_ID,
            "language": b"python",
            "runtime": PYTHON_IMPLEMENTATION,
            "runtime_version": PYTHON_VERSION,
            "profiler_version": ddtrace.__version__.encode("utf-8"),
        }

        version = os.environ.get("DD_VERSION")
        if version:
            tags["version"] = version

        env = os.environ.get("DD_ENV")
        if env:
            tags["env"] = env

        user_tags = os.getenv("DD_PROFILING_TAGS")
        if user_tags:
            for tag in user_tags.split(","):
                try:
                    key, value = tag.split(":", 1)
                except ValueError:
                    LOG.error("Malformed tag in DD_PROFILING_TAGS: %s", tag)
                else:
                    if isinstance(value, six.text_type):
                        value = value.encode("utf-8")
                    tags[key] = value
        return tags

    def export(self, events, start_time_ns, end_time_ns):
        """Export events to an HTTP endpoint.

        :param events: The event dictionary from a `ddtrace.profiling.recorder.Recorder`.
        :param start_time_ns: The start time of recording.
        :param end_time_ns: The end time of recording.
        """
        if not self.endpoint:
            raise InvalidEndpoint("Endpoint is empty")
        parsed = urlparse.urlparse(self.endpoint)
        if parsed.scheme == "https":
            client_class = http_client.HTTPSConnection
        else:
            client_class = http_client.HTTPConnection
        if ":" in parsed.netloc:
            host, port = parsed.netloc.split(":", 1)
        else:
            host, port = parsed.netloc, None
        client = client_class(host, port, timeout=self.timeout)

        common_headers = {
            "DD-API-KEY": self.api_key.encode(),
        }

        exceptions = []
        profile = super(PprofHTTPExporter,
                        self).export(events, start_time_ns, end_time_ns)
        s = six.BytesIO()
        with gzip.GzipFile(fileobj=s, mode="wb") as gz:
            gz.write(profile.SerializeToString())
        fields = {
            "runtime-id":
            RUNTIME_ID,
            "recording-start": (datetime.datetime.utcfromtimestamp(
                start_time_ns / 1e9).replace(microsecond=0).isoformat() +
                                "Z").encode(),
            "recording-end": (datetime.datetime.utcfromtimestamp(
                end_time_ns / 1e9).replace(microsecond=0).isoformat() +
                              "Z").encode(),
            "runtime":
            PYTHON_IMPLEMENTATION,
            "format":
            b"pprof",
            "type":
            b"cpu+alloc+exceptions",
            "chunk-data":
            s.getvalue(),
        }

        for service_name_var in ("DD_SERVICE", "DD_SERVICE_NAME",
                                 "DATADOG_SERVICE_NAME"):
            service_name = os.environ.get(service_name_var)
            if service_name is not None:
                break
        else:
            service_name = os.path.basename(
                profile.string_table[profile.mapping[0].filename])

        content_type, body = self._encode_multipart_formdata(
            fields,
            tags=self._get_tags(service_name),
        )
        headers = common_headers.copy()
        headers["Content-Type"] = content_type
        try:
            client.request("POST", parsed.path, body=body, headers=headers)
        except (OSError, IOError, http_client.CannotSendRequest) as e:
            exceptions.append(e)
        else:
            try:
                response = client.getresponse()
                content = response.read()  # have to read to not fail!
            except (OSError, IOError, http_client.BadStatusLine) as e:
                exceptions.append(e)
            else:
                if not 200 <= response.status < 400:
                    exceptions.append(RequestFailed(response, content))

        if exceptions:
            raise UploadFailed(exceptions)
class Recorder(object):
    """An object that records program activity."""

    events = attr.ib(init=False, repr=False)
    max_size = attr.ib(
        factory=_attr.from_env("DD_PROFILING_MAX_EVENTS", 49152, int))
    event_filters = attr.ib(factory=lambda: collections.defaultdict(list),
                            repr=False)

    def __attrs_post_init__(self):
        self._reset_events()

    def add_event_filter(self, event_type, filter_fn):
        """Add an event filter function.

        A filter function must accept a lists of events as argument and returns a list of events that should be pushed
        into the recorder.

        :param event_type: A class of event.
        :param filter_fn: A filter function to append.

        """
        self.event_filters[event_type].append(filter_fn)

    def remove_event_filter(self, event_type, filter_fn):
        """Remove an event filter from the recorder.

        :param event_type: A class of event.
        :param filter_fn: The filter function to remove.
        """
        self.event_filters[event_type].remove(filter_fn)

    def push_event(self, event):
        """Push an event in the recorder.

        :param event: The `ddtrace.profiling.event.Event` to push.
        """
        return self.push_events([event])

    def push_events(self, events):
        """Push multiple events in the recorder.

        All the events MUST be of the same type.
        There is no sanity check as whether all the events are from the same class for performance reasons.

        :param events: The event list to push.
        """
        if events:
            event_type = events[0].__class__
            for filter_fn in self.event_filters[event_type]:
                events = filter_fn(events)
            q = self.events[event_type]
            q.extend(events)

    def _reset_events(self):
        self.events = collections.defaultdict(
            lambda: collections.deque(maxlen=self.max_size))

    def reset(self):
        """Reset the recorder.

        This is useful when e.g. exporting data. Once the event queue is retrieved, a new one can be created by calling
        the reset method, avoiding iterating on a mutating event list.

        :return: The list of events that has been removed.
        """
        events = self.events
        self._reset_events()
        return events
class PprofHTTPExporter(pprof.PprofExporter):
    """PProf HTTP exporter."""

    endpoint = attr.ib(factory=_get_endpoint, type=str)
    api_key = attr.ib(factory=_get_api_key, type=str)
    timeout = attr.ib(factory=_attr.from_env("DD_PROFILING_API_TIMEOUT", 10, float), type=float)
    service_name = attr.ib(factory=_get_service_name)
    max_retry_delay = attr.ib(default=None)

    def __attrs_post_init__(self):
        if self.max_retry_delay is None:
            self.max_retry_delay = self.timeout * 3

    @staticmethod
    def _encode_multipart_formdata(fields, tags):
        boundary = binascii.hexlify(os.urandom(16))

        # The body that is generated is very sensitive and must perfectly match what the server expects.
        body = (
            b"".join(
                b"--%s\r\n"
                b'Content-Disposition: form-data; name="%s"\r\n'
                b"\r\n"
                b"%s\r\n" % (boundary, field.encode(), value)
                for field, value in fields.items()
                if field != "chunk-data"
            )
            + b"".join(
                b"--%s\r\n"
                b'Content-Disposition: form-data; name="tags[]"\r\n'
                b"\r\n"
                b"%s:%s\r\n" % (boundary, tag.encode(), value)
                for tag, value in tags.items()
            )
            + b"--"
            + boundary
            + b"\r\n"
            b'Content-Disposition: form-data; name="chunk-data"; filename="profile.pb.gz"\r\n'
            + b"Content-Type: application/octet-stream\r\n\r\n"
            + fields["chunk-data"]
            + b"\r\n--%s--\r\n" % boundary
        )

        content_type = b"multipart/form-data; boundary=%s" % boundary

        return content_type, body

    @staticmethod
    def _get_tags(service):
        tags = {
            "service": service.encode("utf-8"),
            "host": HOSTNAME.encode("utf-8"),
            "runtime-id": runtime.get_runtime_id().encode("ascii"),
            "language": b"python",
            "runtime": PYTHON_IMPLEMENTATION,
            "runtime_version": PYTHON_VERSION,
            "profiler_version": ddtrace.__version__.encode("utf-8"),
        }

        version = os.environ.get("DD_VERSION")
        if version:
            tags["version"] = version.encode("utf-8")

        env = os.environ.get("DD_ENV")
        if env:
            tags["env"] = env.encode("utf-8")

        user_tags = parse_tags_str(os.environ.get("DD_TAGS", {}))
        user_tags.update(parse_tags_str(os.environ.get("DD_PROFILING_TAGS", {})))
        tags.update({k: six.ensure_binary(v) for k, v in user_tags.items()})
        return tags

    def export(self, events, start_time_ns, end_time_ns):
        """Export events to an HTTP endpoint.

        :param events: The event dictionary from a `ddtrace.profiling.recorder.Recorder`.
        :param start_time_ns: The start time of recording.
        :param end_time_ns: The end time of recording.
        """
        if not self.endpoint:
            raise InvalidEndpoint("Endpoint is empty")

        common_headers = {
            "DD-API-KEY": self.api_key.encode(),
        }

        profile = super(PprofHTTPExporter, self).export(events, start_time_ns, end_time_ns)
        s = six.BytesIO()
        with gzip.GzipFile(fileobj=s, mode="wb") as gz:
            gz.write(profile.SerializeToString())
        fields = {
            "runtime-id": runtime.get_runtime_id().encode("ascii"),
            "recording-start": (
                datetime.datetime.utcfromtimestamp(start_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z"
            ).encode(),
            "recording-end": (
                datetime.datetime.utcfromtimestamp(end_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z"
            ).encode(),
            "runtime": PYTHON_IMPLEMENTATION,
            "format": b"pprof",
            "type": b"cpu+alloc+exceptions",
            "chunk-data": s.getvalue(),
        }

        service_name = self.service_name or os.path.basename(profile.string_table[profile.mapping[0].filename])

        content_type, body = self._encode_multipart_formdata(fields, tags=self._get_tags(service_name),)
        headers = common_headers.copy()
        headers["Content-Type"] = content_type

        # urllib uses `POST` if `data` is supplied (Python 2 version does not handle `method` kwarg)
        req = request.Request(self.endpoint, data=body, headers=headers)

        retry = tenacity.Retrying(
            # Retry after 1s, 2s, 4s, 8s with some randomness
            wait=tenacity.wait_random_exponential(multiplier=0.5),
            stop=tenacity.stop_after_delay(self.max_retry_delay),
            retry=tenacity.retry_if_exception_type(
                (error.HTTPError, error.URLError, http_client.HTTPException, OSError, IOError)
            ),
        )

        try:
            retry(request.urlopen, req, timeout=self.timeout)
        except tenacity.RetryError as e:
            raise UploadFailed(e.last_attempt.exception())
Exemple #16
0
class PprofHTTPExporter(pprof.PprofExporter):
    """PProf HTTP exporter."""

    endpoint = attr.ib()
    api_key = attr.ib(default=None)
    timeout = attr.ib(factory=_attr.from_env("DD_PROFILING_API_TIMEOUT", 10, float), type=float)
    service = attr.ib(default=None)
    env = attr.ib(default=None)
    version = attr.ib(default=None)
    max_retry_delay = attr.ib(default=None)
    _container_info = attr.ib(factory=container.get_container_info, repr=False)
    _retry_upload = attr.ib(init=None, default=None)
    endpoint_path = attr.ib(default="/profiling/v1/input")

    def __attrs_post_init__(self):
        if self.max_retry_delay is None:
            self.max_retry_delay = self.timeout * 3
        self._retry_upload = tenacity.Retrying(
            # Retry after 1s, 2s, 4s, 8s with some randomness
            wait=tenacity.wait_random_exponential(multiplier=0.5),
            stop=tenacity.stop_after_delay(self.max_retry_delay),
            retry_error_cls=UploadFailed,
            retry=tenacity.retry_if_exception_type((http_client.HTTPException, OSError, IOError)),
        )

    @staticmethod
    def _encode_multipart_formdata(fields, tags):
        boundary = binascii.hexlify(os.urandom(16))

        # The body that is generated is very sensitive and must perfectly match what the server expects.
        body = (
            b"".join(
                b"--%s\r\n"
                b'Content-Disposition: form-data; name="%s"\r\n'
                b"\r\n"
                b"%s\r\n" % (boundary, field.encode(), value)
                for field, value in fields.items()
                if field != "chunk-data"
            )
            + b"".join(
                b"--%s\r\n"
                b'Content-Disposition: form-data; name="tags[]"\r\n'
                b"\r\n"
                b"%s:%s\r\n" % (boundary, tag.encode(), value)
                for tag, value in tags.items()
            )
            + b"--"
            + boundary
            + b"\r\n"
            b'Content-Disposition: form-data; name="chunk-data"; filename="profile.pb.gz"\r\n'
            + b"Content-Type: application/octet-stream\r\n\r\n"
            + fields["chunk-data"]
            + b"\r\n--%s--\r\n" % boundary
        )

        content_type = b"multipart/form-data; boundary=%s" % boundary

        return content_type, body

    def _get_tags(self, service):
        tags = {
            "service": service.encode("utf-8"),
            "host": HOSTNAME.encode("utf-8"),
            "runtime-id": runtime.get_runtime_id().encode("ascii"),
            "language": b"python",
            "runtime": PYTHON_IMPLEMENTATION,
            "runtime_version": PYTHON_VERSION,
            "profiler_version": ddtrace.__version__.encode("utf-8"),
        }

        if self.version:
            tags["version"] = self.version.encode("utf-8")

        if self.env:
            tags["env"] = self.env.encode("utf-8")

        user_tags = parse_tags_str(os.environ.get("DD_TAGS", {}))
        user_tags.update(parse_tags_str(os.environ.get("DD_PROFILING_TAGS", {})))
        tags.update({k: six.ensure_binary(v) for k, v in user_tags.items()})
        return tags

    def export(self, events, start_time_ns, end_time_ns):
        """Export events to an HTTP endpoint.

        :param events: The event dictionary from a `ddtrace.profiling.recorder.Recorder`.
        :param start_time_ns: The start time of recording.
        :param end_time_ns: The end time of recording.
        """
        if self.api_key:
            headers = {
                "DD-API-KEY": self.api_key.encode(),
            }
        else:
            headers = {}

        if self._container_info and self._container_info.container_id:
            headers["Datadog-Container-Id"] = self._container_info.container_id

        profile = super(PprofHTTPExporter, self).export(events, start_time_ns, end_time_ns)
        s = six.BytesIO()
        with gzip.GzipFile(fileobj=s, mode="wb") as gz:
            gz.write(profile.SerializeToString())
        fields = {
            "runtime-id": runtime.get_runtime_id().encode("ascii"),
            "recording-start": (
                datetime.datetime.utcfromtimestamp(start_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z"
            ).encode(),
            "recording-end": (
                datetime.datetime.utcfromtimestamp(end_time_ns / 1e9).replace(microsecond=0).isoformat() + "Z"
            ).encode(),
            "runtime": PYTHON_IMPLEMENTATION,
            "format": b"pprof",
            "type": b"cpu+alloc+exceptions",
            "chunk-data": s.getvalue(),
        }

        service = self.service or os.path.basename(profile.string_table[profile.mapping[0].filename])

        content_type, body = self._encode_multipart_formdata(
            fields,
            tags=self._get_tags(service),
        )
        headers["Content-Type"] = content_type

        parsed = urlparse.urlparse(self.endpoint)
        if parsed.scheme == "https":
            client = http_client.HTTPSConnection(parsed.hostname, parsed.port, timeout=self.timeout)
        elif parsed.scheme == "http":
            client = http_client.HTTPConnection(parsed.hostname, parsed.port, timeout=self.timeout)
        elif parsed.scheme == "unix":
            client = uds.UDSHTTPConnection(parsed.path, False, parsed.hostname, parsed.port, timeout=self.timeout)
        else:
            raise ValueError("Unknown connection scheme %s" % parsed.scheme)

        self._upload(client, self.endpoint_path, body, headers)

    def _upload(self, client, path, body, headers):
        self._retry_upload(self._upload_once, client, path, body, headers)

    def _upload_once(self, client, path, body, headers):
        try:
            client.request("POST", path, body=body, headers=headers)
            response = client.getresponse()
            response.read()  # reading is mandatory
        finally:
            client.close()

        if 200 <= response.status < 300:
            return

        if 500 <= response.status < 600:
            raise tenacity.TryAgain

        if response.status == 400:
            raise exporter.ExportError("Server returned 400, check your API key")
        elif response.status == 404 and not self.api_key:
            raise exporter.ExportError(
                "Datadog Agent is not accepting profiles. "
                "Agent-based profiling deployments require Datadog Agent >= 7.20"
            )

        raise exporter.ExportError("HTTP Error %d" % response.status)
Exemple #17
0
class MemoryCollector(collector.PeriodicCollector):
    """Memory allocation collector."""

    _DEFAULT_MAX_EVENTS = 32
    _DEFAULT_INTERVAL = 0.5
    _DEFAULT_HEAP_SAMPLE_SIZE = 0

    # Arbitrary interval to empty the _memalloc event buffer
    _interval = attr.ib(default=_DEFAULT_INTERVAL, repr=False)

    # TODO make this dynamic based on the 1. interval and 2. the max number of events allowed in the Recorder
    _max_events = attr.ib(factory=_attr.from_env(
        "_DD_PROFILING_MEMORY_EVENTS_BUFFER", _DEFAULT_MAX_EVENTS, int))
    max_nframe = attr.ib(
        factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int))
    heap_sample_size = attr.ib(factory=_attr.from_env(
        "DD_PROFILING_HEAP_SAMPLE_SIZE", _DEFAULT_HEAP_SAMPLE_SIZE, int))
    ignore_profiler = attr.ib(factory=_attr.from_env(
        "DD_PROFILING_IGNORE_PROFILER", True, formats.asbool))

    def start(self):
        """Start collecting memory profiles."""
        if _memalloc is None:
            raise collector.CollectorUnavailable

        _memalloc.start(self.max_nframe, self._max_events,
                        self.heap_sample_size)
        super(MemoryCollector, self).start()

    def stop(self):
        if _memalloc is not None:
            try:
                _memalloc.stop()
            except RuntimeError:
                pass
            super(MemoryCollector, self).stop()

    def _get_thread_id_ignore_set(self):
        # type: () -> typing.Set[int]
        # This method is not perfect and prone to race condition in theory, but very little in practice.
        # Anyhow it's not a big deal — it's a best effort feature.
        return {
            thread.ident
            for thread in threading.enumerate()
            if getattr(thread, "_ddtrace_profiling_ignore", False)
            and thread.ident is not None
        }

    def snapshot(self):
        thread_id_ignore_set = self._get_thread_id_ignore_set()
        return (tuple(
            MemoryHeapSampleEvent(
                thread_id=thread_id,
                thread_name=_threading.get_thread_name(thread_id),
                thread_native_id=_threading.get_thread_native_id(thread_id),
                frames=stack,
                nframes=nframes,
                size=size,
                sample_size=self.heap_sample_size,
            ) for (stack, nframes, thread_id), size in _memalloc.heap()
            if not self.ignore_profiler
            or thread_id not in thread_id_ignore_set), )

    def collect(self):
        events, count, alloc_count = _memalloc.iter_events()
        capture_pct = 100 * count / alloc_count
        thread_id_ignore_set = self._get_thread_id_ignore_set()
        # TODO: The event timestamp is slightly off since it's going to be the time we copy the data from the
        # _memalloc buffer to our Recorder. This is fine for now, but we might want to store the nanoseconds
        # timestamp in C and then return it via iter_events.
        return (tuple(
            MemoryAllocSampleEvent(
                thread_id=thread_id,
                thread_name=_threading.get_thread_name(thread_id),
                thread_native_id=_threading.get_thread_native_id(thread_id),
                frames=stack,
                nframes=nframes,
                size=size,
                capture_pct=capture_pct,
                nevents=alloc_count,
            ) for (stack, nframes, thread_id), size in events
            if not self.ignore_profiler
            or thread_id not in thread_id_ignore_set), )