def _truncate_span_descriptions(serialized_event, event, excess_bytes): # type: (Event, Event, int) -> None """ Modifies serialized_event in-place trying to remove excess_bytes from span descriptions. The original event is used read-only to access the span timestamps (represented as RFC3399-formatted strings in serialized_event). It uses heuristics to prioritize preserving the description of spans that might be the most interesting ones in terms of understanding and optimizing performance. """ # When truncating a description, preserve a small prefix. min_length = 10 def shortest_duration_longest_description_first(args): # type: (Tuple[int, Span]) -> Tuple[timedelta, int] i, serialized_span = args span = event["spans"][i] now = datetime.utcnow() start = span.get("start_timestamp") or now end = span.get("timestamp") or now duration = end - start description = serialized_span.get("description") or "" return (duration, -len(description)) # Note: for simplicity we sort spans by exact duration and description # length. If ever needed, we could have a more involved heuristic, e.g. # replacing exact durations with "buckets" and/or looking at other span # properties. path.append("spans") for i, span in sorted( enumerate(serialized_event.get("spans") or []), key=shortest_duration_longest_description_first, ): description = span.get("description") or "" if len(description) <= min_length: continue excess_bytes -= len(description) - min_length path.extend([i, "description"]) # Note: the last time we call strip_string we could preserve a few # more bytes up to a total length of MAX_EVENT_BYTES. Since that's # not strictly required, we leave it out for now for simplicity. span["description"] = _flatten_annotated( strip_string(description, max_length=min_length) ) del path[-2:] del meta_stack[len(path) + 1 :] if excess_bytes <= 0: break path.pop() del meta_stack[len(path) + 1 :]
def _serialize_node_impl(self, obj, max_depth, max_breadth): # type: (Any, Optional[int], Optional[int]) -> Any if max_depth is None and max_breadth is None and self.meta_node.is_databag( ): max_depth = self.meta_node._depth + MAX_DATABAG_DEPTH max_breadth = self.meta_node._depth + MAX_DATABAG_BREADTH if max_depth is None: remaining_depth = None else: remaining_depth = max_depth - self.meta_node._depth obj = _flatten_annotated(obj, self.meta_node) if remaining_depth is not None and remaining_depth <= 0: self.meta_node.annotate(rem=[["!limit", "x"]]) if self.meta_node.is_databag(): return _flatten_annotated(strip_string(safe_repr(obj)), self.meta_node) return None if self.meta_node.is_databag(): hints = {"memo": self.memo, "remaining_depth": remaining_depth} for processor in global_repr_processors: with capture_internal_exceptions(): result = processor(obj, hints) if result is not NotImplemented: return _flatten_annotated(result, self.meta_node) if isinstance(obj, Mapping): # Create temporary list here to avoid calling too much code that # might mutate our dictionary while we're still iterating over it. items = [] for i, (k, v) in enumerate(iteritems(obj)): if max_breadth is not None and i >= max_breadth: self.meta_node.annotate(len=max_breadth) break items.append((k, v)) rv_dict = {} # type: Dict[Any, Any] for k, v in items: k = text_type(k) with self.enter(k): v = self._serialize_node(v, max_depth=max_depth, max_breadth=max_breadth) if v is not None: rv_dict[k] = v return rv_dict elif isinstance(obj, Sequence) and not isinstance(obj, string_types): rv_list = [] # type: List[Any] for i, v in enumerate(obj): if max_breadth is not None and i >= max_breadth: self.meta_node.annotate(len=max_breadth) break with self.enter(i): rv_list.append( self._serialize_node(v, max_depth=max_depth, max_breadth=max_breadth)) return rv_list if self.meta_node.should_repr_strings(): obj = safe_repr(obj) else: if obj is None or isinstance(obj, (bool, number_types)): return obj if isinstance(obj, datetime): return text_type(obj.strftime("%Y-%m-%dT%H:%M:%SZ")) if isinstance(obj, bytes): obj = obj.decode("utf-8", "replace") if not isinstance(obj, string_types): obj = safe_repr(obj) return _flatten_annotated(strip_string(obj), self.meta_node)
def _serialize_node_impl( obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth ): # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any if should_repr_strings is None: should_repr_strings = _should_repr_strings() if is_databag is None: is_databag = _is_databag() if is_databag and remaining_depth is None: remaining_depth = MAX_DATABAG_DEPTH if is_databag and remaining_breadth is None: remaining_breadth = MAX_DATABAG_BREADTH obj = _flatten_annotated(obj) if remaining_depth is not None and remaining_depth <= 0: _annotate(rem=[["!limit", "x"]]) if is_databag: return _flatten_annotated(strip_string(safe_repr(obj))) return None if is_databag and global_repr_processors: hints = {"memo": memo, "remaining_depth": remaining_depth} for processor in global_repr_processors: result = processor(obj, hints) if result is not NotImplemented: return _flatten_annotated(result) if obj is None or isinstance(obj, (bool, number_types)): return obj if not should_repr_strings else safe_repr(obj) elif isinstance(obj, datetime): return ( text_type(obj.strftime("%Y-%m-%dT%H:%M:%S.%fZ")) if not should_repr_strings else safe_repr(obj) ) elif isinstance(obj, Mapping): # Create temporary copy here to avoid calling too much code that # might mutate our dictionary while we're still iterating over it. obj = dict(iteritems(obj)) rv_dict = {} i = 0 for k, v in iteritems(obj): if remaining_breadth is not None and i >= remaining_breadth: _annotate(len=len(obj)) break str_k = text_type(k) v = _serialize_node( v, segment=str_k, should_repr_strings=should_repr_strings, is_databag=is_databag, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, remaining_breadth=remaining_breadth, ) if v is not None: rv_dict[str_k] = v i += 1 return rv_dict elif not isinstance(obj, serializable_str_types) and isinstance(obj, Sequence): rv_list = [] for i, v in enumerate(obj): if remaining_breadth is not None and i >= remaining_breadth: _annotate(len=len(obj)) break rv_list.append( _serialize_node( v, segment=i, should_repr_strings=should_repr_strings, is_databag=is_databag, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, remaining_breadth=remaining_breadth, ) ) return rv_list if should_repr_strings: obj = safe_repr(obj) else: if isinstance(obj, bytes): obj = obj.decode("utf-8", "replace") if not isinstance(obj, string_types): obj = safe_repr(obj) return _flatten_annotated(strip_string(obj))
def _serialize_node_impl(obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth): # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any if should_repr_strings is None: should_repr_strings = _should_repr_strings() if is_databag is None: is_databag = _is_databag() if is_databag and remaining_depth is None: remaining_depth = MAX_DATABAG_DEPTH if is_databag and remaining_breadth is None: remaining_breadth = MAX_DATABAG_BREADTH obj = _flatten_annotated(obj) if remaining_depth is not None and remaining_depth <= 0: _annotate(rem=[["!limit", "x"]]) if is_databag: return _flatten_annotated(strip_string(safe_repr(obj))) return None if is_databag and global_repr_processors: hints = {"memo": memo, "remaining_depth": remaining_depth} for processor in global_repr_processors: result = processor(obj, hints) if result is not NotImplemented: return _flatten_annotated(result) if obj is None or isinstance(obj, (bool, number_types)): if should_repr_strings or (isinstance(obj, float) and (math.isinf(obj) or math.isnan(obj))): return safe_repr(obj) else: return obj elif isinstance(obj, datetime): return (text_type(format_timestamp(obj)) if not should_repr_strings else safe_repr(obj)) elif isinstance(obj, Mapping): # Create temporary copy here to avoid calling too much code that # might mutate our dictionary while we're still iterating over it. obj = dict(iteritems(obj)) rv_dict = {} # type: Dict[str, Any] i = 0 for k, v in iteritems(obj): if remaining_breadth is not None and i >= remaining_breadth: _annotate(len=len(obj)) break str_k = text_type(k) v = _serialize_node( v, segment=str_k, should_repr_strings=should_repr_strings, is_databag=is_databag, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, remaining_breadth=remaining_breadth, ) rv_dict[str_k] = v i += 1 return rv_dict elif not isinstance(obj, serializable_str_types) and isinstance( obj, (Set, Sequence)): rv_list = [] for i, v in enumerate(obj): if remaining_breadth is not None and i >= remaining_breadth: _annotate(len=len(obj)) break rv_list.append( _serialize_node( v, segment=i, should_repr_strings=should_repr_strings, is_databag=is_databag, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, remaining_breadth=remaining_breadth, )) return rv_list if should_repr_strings: obj = safe_repr(obj) else: if isinstance(obj, bytes): obj = obj.decode("utf-8", "replace") if not isinstance(obj, string_types): obj = safe_repr(obj) # Allow span descriptions to be longer than other strings. # # For database auto-instrumented spans, the description contains # potentially long SQL queries that are most useful when not truncated. # Because arbitrarily large events may be discarded by the server as a # protection mechanism, we dynamically limit the description length # later in _truncate_span_descriptions. if (smart_transaction_trimming and len(path) == 3 and path[0] == "spans" and path[-1] == "description"): span_description_bytes.append(len(obj)) return obj return _flatten_annotated(strip_string(obj))
def x(template, params): return format_and_strip( template, params, strip_string=lambda x, **_: strip_string(x, max_length=max_length), )
def _serialize_node_impl( obj, max_depth, max_breadth, is_databag, should_repr_strings ): # type: (Any, Optional[int], Optional[int], Optional[bool], Optional[bool]) -> Any if not should_repr_strings: should_repr_strings = ( _startswith_path( ("exception", "values", None, "stacktrace", "frames", None, "vars") ) or _startswith_path( ("threads", "values", None, "stacktrace", "frames", None, "vars") ) or _startswith_path(("stacktrace", "frames", None, "vars")) ) if obj is None or isinstance(obj, (bool, number_types)): return obj if not should_repr_strings else safe_repr(obj) if isinstance(obj, datetime): return ( text_type(obj.strftime("%Y-%m-%dT%H:%M:%S.%fZ")) if not should_repr_strings else safe_repr(obj) ) if not is_databag: is_databag = ( should_repr_strings or _startswith_path(("request", "data")) or _startswith_path(("breadcrumbs", None)) or _startswith_path(("extra",)) ) cur_depth = len(path) if max_depth is None and max_breadth is None and is_databag: max_depth = cur_depth + MAX_DATABAG_DEPTH max_breadth = cur_depth + MAX_DATABAG_BREADTH if max_depth is None: remaining_depth = None else: remaining_depth = max_depth - cur_depth obj = _flatten_annotated(obj) if remaining_depth is not None and remaining_depth <= 0: _annotate(rem=[["!limit", "x"]]) if is_databag: return _flatten_annotated(strip_string(safe_repr(obj))) return None if global_repr_processors and is_databag: hints = {"memo": memo, "remaining_depth": remaining_depth} for processor in global_repr_processors: result = processor(obj, hints) if result is not NotImplemented: return _flatten_annotated(result) if isinstance(obj, Mapping): # Create temporary copy here to avoid calling too much code that # might mutate our dictionary while we're still iterating over it. if max_breadth is not None and len(obj) >= max_breadth: rv_dict = dict(itertools.islice(iteritems(obj), None, max_breadth)) _annotate(len=len(obj)) else: if type(obj) is dict: rv_dict = dict(obj) else: rv_dict = dict(iteritems(obj)) for k in list(rv_dict): str_k = text_type(k) v = _serialize_node( rv_dict.pop(k), max_depth=max_depth, max_breadth=max_breadth, segment=str_k, should_repr_strings=should_repr_strings, is_databag=is_databag, ) if v is not None: rv_dict[str_k] = v return rv_dict elif not isinstance(obj, string_types) and isinstance(obj, Sequence): if max_breadth is not None and len(obj) >= max_breadth: rv_list = list(obj)[:max_breadth] _annotate(len=len(obj)) else: rv_list = list(obj) for i in range(len(rv_list)): rv_list[i] = _serialize_node( rv_list[i], max_depth=max_depth, max_breadth=max_breadth, segment=i, should_repr_strings=should_repr_strings, is_databag=is_databag, ) return rv_list if should_repr_strings: obj = safe_repr(obj) else: if isinstance(obj, bytes): obj = obj.decode("utf-8", "replace") if not isinstance(obj, string_types): obj = safe_repr(obj) return _flatten_annotated(strip_string(obj))