コード例 #1
0
    def _truncate_span_descriptions(serialized_event, event, excess_bytes):
        # type: (Event, Event, int) -> None
        """
        Modifies serialized_event in-place trying to remove excess_bytes from
        span descriptions. The original event is used read-only to access the
        span timestamps (represented as RFC3399-formatted strings in
        serialized_event).

        It uses heuristics to prioritize preserving the description of spans
        that might be the most interesting ones in terms of understanding and
        optimizing performance.
        """
        # When truncating a description, preserve a small prefix.
        min_length = 10

        def shortest_duration_longest_description_first(args):
            # type: (Tuple[int, Span]) -> Tuple[timedelta, int]
            i, serialized_span = args
            span = event["spans"][i]
            now = datetime.utcnow()
            start = span.get("start_timestamp") or now
            end = span.get("timestamp") or now
            duration = end - start
            description = serialized_span.get("description") or ""
            return (duration, -len(description))

        # Note: for simplicity we sort spans by exact duration and description
        # length. If ever needed, we could have a more involved heuristic, e.g.
        # replacing exact durations with "buckets" and/or looking at other span
        # properties.
        path.append("spans")
        for i, span in sorted(
            enumerate(serialized_event.get("spans") or []),
            key=shortest_duration_longest_description_first,
        ):
            description = span.get("description") or ""
            if len(description) <= min_length:
                continue
            excess_bytes -= len(description) - min_length
            path.extend([i, "description"])
            # Note: the last time we call strip_string we could preserve a few
            # more bytes up to a total length of MAX_EVENT_BYTES. Since that's
            # not strictly required, we leave it out for now for simplicity.
            span["description"] = _flatten_annotated(
                strip_string(description, max_length=min_length)
            )
            del path[-2:]
            del meta_stack[len(path) + 1 :]

            if excess_bytes <= 0:
                break
        path.pop()
        del meta_stack[len(path) + 1 :]
コード例 #2
0
    def _serialize_node_impl(self, obj, max_depth, max_breadth):
        # type: (Any, Optional[int], Optional[int]) -> Any
        if max_depth is None and max_breadth is None and self.meta_node.is_databag(
        ):
            max_depth = self.meta_node._depth + MAX_DATABAG_DEPTH
            max_breadth = self.meta_node._depth + MAX_DATABAG_BREADTH

        if max_depth is None:
            remaining_depth = None
        else:
            remaining_depth = max_depth - self.meta_node._depth

        obj = _flatten_annotated(obj, self.meta_node)

        if remaining_depth is not None and remaining_depth <= 0:
            self.meta_node.annotate(rem=[["!limit", "x"]])
            if self.meta_node.is_databag():
                return _flatten_annotated(strip_string(safe_repr(obj)),
                                          self.meta_node)
            return None

        if self.meta_node.is_databag():
            hints = {"memo": self.memo, "remaining_depth": remaining_depth}
            for processor in global_repr_processors:
                with capture_internal_exceptions():
                    result = processor(obj, hints)
                    if result is not NotImplemented:
                        return _flatten_annotated(result, self.meta_node)

        if isinstance(obj, Mapping):
            # Create temporary list here to avoid calling too much code that
            # might mutate our dictionary while we're still iterating over it.
            items = []
            for i, (k, v) in enumerate(iteritems(obj)):
                if max_breadth is not None and i >= max_breadth:
                    self.meta_node.annotate(len=max_breadth)
                    break

                items.append((k, v))

            rv_dict = {}  # type: Dict[Any, Any]
            for k, v in items:
                k = text_type(k)

                with self.enter(k):
                    v = self._serialize_node(v,
                                             max_depth=max_depth,
                                             max_breadth=max_breadth)
                    if v is not None:
                        rv_dict[k] = v

            return rv_dict
        elif isinstance(obj, Sequence) and not isinstance(obj, string_types):
            rv_list = []  # type: List[Any]
            for i, v in enumerate(obj):
                if max_breadth is not None and i >= max_breadth:
                    self.meta_node.annotate(len=max_breadth)
                    break

                with self.enter(i):
                    rv_list.append(
                        self._serialize_node(v,
                                             max_depth=max_depth,
                                             max_breadth=max_breadth))

            return rv_list

        if self.meta_node.should_repr_strings():
            obj = safe_repr(obj)
        else:
            if obj is None or isinstance(obj, (bool, number_types)):
                return obj

            if isinstance(obj, datetime):
                return text_type(obj.strftime("%Y-%m-%dT%H:%M:%SZ"))

            if isinstance(obj, bytes):
                obj = obj.decode("utf-8", "replace")

            if not isinstance(obj, string_types):
                obj = safe_repr(obj)

        return _flatten_annotated(strip_string(obj), self.meta_node)
コード例 #3
0
    def _serialize_node_impl(
        obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth
    ):
        # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any
        if should_repr_strings is None:
            should_repr_strings = _should_repr_strings()

        if is_databag is None:
            is_databag = _is_databag()

        if is_databag and remaining_depth is None:
            remaining_depth = MAX_DATABAG_DEPTH
        if is_databag and remaining_breadth is None:
            remaining_breadth = MAX_DATABAG_BREADTH

        obj = _flatten_annotated(obj)

        if remaining_depth is not None and remaining_depth <= 0:
            _annotate(rem=[["!limit", "x"]])
            if is_databag:
                return _flatten_annotated(strip_string(safe_repr(obj)))
            return None

        if is_databag and global_repr_processors:
            hints = {"memo": memo, "remaining_depth": remaining_depth}
            for processor in global_repr_processors:
                result = processor(obj, hints)
                if result is not NotImplemented:
                    return _flatten_annotated(result)

        if obj is None or isinstance(obj, (bool, number_types)):
            return obj if not should_repr_strings else safe_repr(obj)

        elif isinstance(obj, datetime):
            return (
                text_type(obj.strftime("%Y-%m-%dT%H:%M:%S.%fZ"))
                if not should_repr_strings
                else safe_repr(obj)
            )

        elif isinstance(obj, Mapping):
            # Create temporary copy here to avoid calling too much code that
            # might mutate our dictionary while we're still iterating over it.
            obj = dict(iteritems(obj))

            rv_dict = {}
            i = 0

            for k, v in iteritems(obj):
                if remaining_breadth is not None and i >= remaining_breadth:
                    _annotate(len=len(obj))
                    break

                str_k = text_type(k)
                v = _serialize_node(
                    v,
                    segment=str_k,
                    should_repr_strings=should_repr_strings,
                    is_databag=is_databag,
                    remaining_depth=remaining_depth - 1
                    if remaining_depth is not None
                    else None,
                    remaining_breadth=remaining_breadth,
                )
                if v is not None:
                    rv_dict[str_k] = v
                    i += 1

            return rv_dict

        elif not isinstance(obj, serializable_str_types) and isinstance(obj, Sequence):
            rv_list = []

            for i, v in enumerate(obj):
                if remaining_breadth is not None and i >= remaining_breadth:
                    _annotate(len=len(obj))
                    break

                rv_list.append(
                    _serialize_node(
                        v,
                        segment=i,
                        should_repr_strings=should_repr_strings,
                        is_databag=is_databag,
                        remaining_depth=remaining_depth - 1
                        if remaining_depth is not None
                        else None,
                        remaining_breadth=remaining_breadth,
                    )
                )

            return rv_list

        if should_repr_strings:
            obj = safe_repr(obj)
        else:
            if isinstance(obj, bytes):
                obj = obj.decode("utf-8", "replace")

            if not isinstance(obj, string_types):
                obj = safe_repr(obj)

        return _flatten_annotated(strip_string(obj))
コード例 #4
0
    def _serialize_node_impl(obj, is_databag, should_repr_strings,
                             remaining_depth, remaining_breadth):
        # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any
        if should_repr_strings is None:
            should_repr_strings = _should_repr_strings()

        if is_databag is None:
            is_databag = _is_databag()

        if is_databag and remaining_depth is None:
            remaining_depth = MAX_DATABAG_DEPTH
        if is_databag and remaining_breadth is None:
            remaining_breadth = MAX_DATABAG_BREADTH

        obj = _flatten_annotated(obj)

        if remaining_depth is not None and remaining_depth <= 0:
            _annotate(rem=[["!limit", "x"]])
            if is_databag:
                return _flatten_annotated(strip_string(safe_repr(obj)))
            return None

        if is_databag and global_repr_processors:
            hints = {"memo": memo, "remaining_depth": remaining_depth}
            for processor in global_repr_processors:
                result = processor(obj, hints)
                if result is not NotImplemented:
                    return _flatten_annotated(result)

        if obj is None or isinstance(obj, (bool, number_types)):
            if should_repr_strings or (isinstance(obj, float) and
                                       (math.isinf(obj) or math.isnan(obj))):
                return safe_repr(obj)
            else:
                return obj

        elif isinstance(obj, datetime):
            return (text_type(format_timestamp(obj))
                    if not should_repr_strings else safe_repr(obj))

        elif isinstance(obj, Mapping):
            # Create temporary copy here to avoid calling too much code that
            # might mutate our dictionary while we're still iterating over it.
            obj = dict(iteritems(obj))

            rv_dict = {}  # type: Dict[str, Any]
            i = 0

            for k, v in iteritems(obj):
                if remaining_breadth is not None and i >= remaining_breadth:
                    _annotate(len=len(obj))
                    break

                str_k = text_type(k)
                v = _serialize_node(
                    v,
                    segment=str_k,
                    should_repr_strings=should_repr_strings,
                    is_databag=is_databag,
                    remaining_depth=remaining_depth -
                    1 if remaining_depth is not None else None,
                    remaining_breadth=remaining_breadth,
                )
                rv_dict[str_k] = v
                i += 1

            return rv_dict

        elif not isinstance(obj, serializable_str_types) and isinstance(
                obj, (Set, Sequence)):
            rv_list = []

            for i, v in enumerate(obj):
                if remaining_breadth is not None and i >= remaining_breadth:
                    _annotate(len=len(obj))
                    break

                rv_list.append(
                    _serialize_node(
                        v,
                        segment=i,
                        should_repr_strings=should_repr_strings,
                        is_databag=is_databag,
                        remaining_depth=remaining_depth -
                        1 if remaining_depth is not None else None,
                        remaining_breadth=remaining_breadth,
                    ))

            return rv_list

        if should_repr_strings:
            obj = safe_repr(obj)
        else:
            if isinstance(obj, bytes):
                obj = obj.decode("utf-8", "replace")

            if not isinstance(obj, string_types):
                obj = safe_repr(obj)

        # Allow span descriptions to be longer than other strings.
        #
        # For database auto-instrumented spans, the description contains
        # potentially long SQL queries that are most useful when not truncated.
        # Because arbitrarily large events may be discarded by the server as a
        # protection mechanism, we dynamically limit the description length
        # later in _truncate_span_descriptions.
        if (smart_transaction_trimming and len(path) == 3
                and path[0] == "spans" and path[-1] == "description"):
            span_description_bytes.append(len(obj))
            return obj
        return _flatten_annotated(strip_string(obj))
コード例 #5
0
 def x(template, params):
     return format_and_strip(
         template,
         params,
         strip_string=lambda x, **_: strip_string(x, max_length=max_length),
     )
コード例 #6
0
    def _serialize_node_impl(
        obj, max_depth, max_breadth, is_databag, should_repr_strings
    ):
        # type: (Any, Optional[int], Optional[int], Optional[bool], Optional[bool]) -> Any
        if not should_repr_strings:
            should_repr_strings = (
                _startswith_path(
                    ("exception", "values", None, "stacktrace", "frames", None, "vars")
                )
                or _startswith_path(
                    ("threads", "values", None, "stacktrace", "frames", None, "vars")
                )
                or _startswith_path(("stacktrace", "frames", None, "vars"))
            )

        if obj is None or isinstance(obj, (bool, number_types)):
            return obj if not should_repr_strings else safe_repr(obj)

        if isinstance(obj, datetime):
            return (
                text_type(obj.strftime("%Y-%m-%dT%H:%M:%S.%fZ"))
                if not should_repr_strings
                else safe_repr(obj)
            )

        if not is_databag:
            is_databag = (
                should_repr_strings
                or _startswith_path(("request", "data"))
                or _startswith_path(("breadcrumbs", None))
                or _startswith_path(("extra",))
            )

        cur_depth = len(path)
        if max_depth is None and max_breadth is None and is_databag:
            max_depth = cur_depth + MAX_DATABAG_DEPTH
            max_breadth = cur_depth + MAX_DATABAG_BREADTH

        if max_depth is None:
            remaining_depth = None
        else:
            remaining_depth = max_depth - cur_depth

        obj = _flatten_annotated(obj)

        if remaining_depth is not None and remaining_depth <= 0:
            _annotate(rem=[["!limit", "x"]])
            if is_databag:
                return _flatten_annotated(strip_string(safe_repr(obj)))
            return None

        if global_repr_processors and is_databag:
            hints = {"memo": memo, "remaining_depth": remaining_depth}
            for processor in global_repr_processors:
                result = processor(obj, hints)
                if result is not NotImplemented:
                    return _flatten_annotated(result)

        if isinstance(obj, Mapping):
            # Create temporary copy here to avoid calling too much code that
            # might mutate our dictionary while we're still iterating over it.
            if max_breadth is not None and len(obj) >= max_breadth:
                rv_dict = dict(itertools.islice(iteritems(obj), None, max_breadth))
                _annotate(len=len(obj))
            else:
                if type(obj) is dict:
                    rv_dict = dict(obj)
                else:
                    rv_dict = dict(iteritems(obj))

            for k in list(rv_dict):
                str_k = text_type(k)
                v = _serialize_node(
                    rv_dict.pop(k),
                    max_depth=max_depth,
                    max_breadth=max_breadth,
                    segment=str_k,
                    should_repr_strings=should_repr_strings,
                    is_databag=is_databag,
                )
                if v is not None:
                    rv_dict[str_k] = v

            return rv_dict
        elif not isinstance(obj, string_types) and isinstance(obj, Sequence):
            if max_breadth is not None and len(obj) >= max_breadth:
                rv_list = list(obj)[:max_breadth]
                _annotate(len=len(obj))
            else:
                rv_list = list(obj)

            for i in range(len(rv_list)):
                rv_list[i] = _serialize_node(
                    rv_list[i],
                    max_depth=max_depth,
                    max_breadth=max_breadth,
                    segment=i,
                    should_repr_strings=should_repr_strings,
                    is_databag=is_databag,
                )

            return rv_list

        if should_repr_strings:
            obj = safe_repr(obj)
        else:
            if isinstance(obj, bytes):
                obj = obj.decode("utf-8", "replace")

            if not isinstance(obj, string_types):
                obj = safe_repr(obj)

        return _flatten_annotated(strip_string(obj))