def _send_event( self, event # type: Event ): # type: (...) -> None if self._check_disabled("error"): self.on_dropped_event("self_rate_limits") return None body = io.BytesIO() with gzip.GzipFile(fileobj=body, mode="w") as f: f.write(json_dumps(event)) assert self.parsed_dsn is not None logger.debug( "Sending event, type:%s level:%s event_id:%s project:%s host:%s" % ( event.get("type") or "null", event.get("level") or "null", event.get("event_id") or "null", self.parsed_dsn.project_id, self.parsed_dsn.host, ) ) self._send_request( body.getvalue(), headers={"Content-Type": "application/json", "Content-Encoding": "gzip"}, ) return None
def serialize_into( self, f # type: Any ): # type: (...) -> None f.write(json_dumps(self.headers)) f.write(b"\n") for item in self.items: item.serialize_into(f)
def get_bytes(self): # type: (...) -> bytes if self.bytes is None: if self.path is not None: with open(self.path, "rb") as f: self.bytes = f.read() elif self.json is not None: self.bytes = json_dumps(self.json) else: self.bytes = b"" return self.bytes
def serialize_into( self, f # type: Any ): # type: (...) -> None headers = dict(self.headers) bytes = self.get_bytes() headers["length"] = len(bytes) f.write(json_dumps(headers)) f.write(b"\n") f.write(bytes) f.write(b"\n")
def serialize_into( self, f # type: Any ): # type: (...) -> None headers = dict(self.headers) length, writer = self.payload._prepare_serialize() headers["length"] = length f.write(json_dumps(headers)) f.write(b"\n") writer(f) f.write(b"\n")
def serialize(event, smart_transaction_trimming=False, **kwargs): # type: (Event, bool, **Any) -> Event memo = Memo() path = [] # type: List[Segment] meta_stack = [] # type: List[Dict[str, Any]] span_description_bytes = [] # type: List[int] def _annotate(**meta): # type: (**Any) -> None while len(meta_stack) <= len(path): try: segment = path[len(meta_stack) - 1] node = meta_stack[-1].setdefault(text_type(segment), {}) except IndexError: node = {} meta_stack.append(node) meta_stack[-1].setdefault("", {}).update(meta) def _should_repr_strings(): # type: () -> Optional[bool] """ By default non-serializable objects are going through safe_repr(). For certain places in the event (local vars) we want to repr() even things that are JSON-serializable to make their type more apparent. For example, it's useful to see the difference between a unicode-string and a bytestring when viewing a stacktrace. For container-types we still don't do anything different. Generally we just try to make the Sentry UI present exactly what a pretty-printed repr would look like. :returns: `True` if we are somewhere in frame variables, and `False` if we are in a position where we will never encounter frame variables when recursing (for example, we're in `event.extra`). `None` if we are not (yet) in frame variables, but might encounter them when recursing (e.g. we're in `event.exception`) """ try: p0 = path[0] if p0 == "stacktrace" and path[1] == "frames" and path[3] == "vars": return True if (p0 in ("threads", "exception") and path[1] == "values" and path[3] == "stacktrace" and path[4] == "frames" and path[6] == "vars"): return True except IndexError: return None return False def _is_databag(): # type: () -> Optional[bool] """ A databag is any value that we need to trim. :returns: Works like `_should_repr_strings()`. `True` for "yes", `False` for :"no", `None` for "maybe soon". """ try: rv = _should_repr_strings() if rv in (True, None): return rv p0 = path[0] if p0 == "request" and path[1] == "data": return True if p0 == "breadcrumbs" and path[1] == "values": path[2] return True if p0 == "extra": return True except IndexError: return None return False def _serialize_node( obj, # type: Any is_databag=None, # type: Optional[bool] should_repr_strings=None, # type: Optional[bool] segment=None, # type: Optional[Segment] remaining_breadth=None, # type: Optional[int] remaining_depth=None, # type: Optional[int] ): # type: (...) -> Any if segment is not None: path.append(segment) try: with memo.memoize(obj) as result: if result: return CYCLE_MARKER return _serialize_node_impl( obj, is_databag=is_databag, should_repr_strings=should_repr_strings, remaining_depth=remaining_depth, remaining_breadth=remaining_breadth, ) except BaseException: capture_internal_exception(sys.exc_info()) if is_databag: return u"<failed to serialize, use init(debug=True) to see error logs>" return None finally: if segment is not None: path.pop() del meta_stack[len(path) + 1:] def _flatten_annotated(obj): # type: (Any) -> Any if isinstance(obj, AnnotatedValue): _annotate(**obj.metadata) obj = obj.value return obj def _serialize_node_impl(obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth): # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any if should_repr_strings is None: should_repr_strings = _should_repr_strings() if is_databag is None: is_databag = _is_databag() if is_databag and remaining_depth is None: remaining_depth = MAX_DATABAG_DEPTH if is_databag and remaining_breadth is None: remaining_breadth = MAX_DATABAG_BREADTH obj = _flatten_annotated(obj) if remaining_depth is not None and remaining_depth <= 0: _annotate(rem=[["!limit", "x"]]) if is_databag: return _flatten_annotated(strip_string(safe_repr(obj))) return None if is_databag and global_repr_processors: hints = {"memo": memo, "remaining_depth": remaining_depth} for processor in global_repr_processors: result = processor(obj, hints) if result is not NotImplemented: return _flatten_annotated(result) if obj is None or isinstance(obj, (bool, number_types)): if should_repr_strings or (isinstance(obj, float) and (math.isinf(obj) or math.isnan(obj))): return safe_repr(obj) else: return obj elif isinstance(obj, datetime): return (text_type(format_timestamp(obj)) if not should_repr_strings else safe_repr(obj)) elif isinstance(obj, Mapping): # Create temporary copy here to avoid calling too much code that # might mutate our dictionary while we're still iterating over it. obj = dict(iteritems(obj)) rv_dict = {} # type: Dict[str, Any] i = 0 for k, v in iteritems(obj): if remaining_breadth is not None and i >= remaining_breadth: _annotate(len=len(obj)) break str_k = text_type(k) v = _serialize_node( v, segment=str_k, should_repr_strings=should_repr_strings, is_databag=is_databag, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, remaining_breadth=remaining_breadth, ) rv_dict[str_k] = v i += 1 return rv_dict elif not isinstance(obj, serializable_str_types) and isinstance( obj, (Set, Sequence)): rv_list = [] for i, v in enumerate(obj): if remaining_breadth is not None and i >= remaining_breadth: _annotate(len=len(obj)) break rv_list.append( _serialize_node( v, segment=i, should_repr_strings=should_repr_strings, is_databag=is_databag, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, remaining_breadth=remaining_breadth, )) return rv_list if should_repr_strings: obj = safe_repr(obj) else: if isinstance(obj, bytes): obj = obj.decode("utf-8", "replace") if not isinstance(obj, string_types): obj = safe_repr(obj) # Allow span descriptions to be longer than other strings. # # For database auto-instrumented spans, the description contains # potentially long SQL queries that are most useful when not truncated. # Because arbitrarily large events may be discarded by the server as a # protection mechanism, we dynamically limit the description length # later in _truncate_span_descriptions. if (smart_transaction_trimming and len(path) == 3 and path[0] == "spans" and path[-1] == "description"): span_description_bytes.append(len(obj)) return obj return _flatten_annotated(strip_string(obj)) def _truncate_span_descriptions(serialized_event, event, excess_bytes): # type: (Event, Event, int) -> None """ Modifies serialized_event in-place trying to remove excess_bytes from span descriptions. The original event is used read-only to access the span timestamps (represented as RFC3399-formatted strings in serialized_event). It uses heuristics to prioritize preserving the description of spans that might be the most interesting ones in terms of understanding and optimizing performance. """ # When truncating a description, preserve a small prefix. min_length = 10 def shortest_duration_longest_description_first(args): # type: (Tuple[int, Span]) -> Tuple[timedelta, int] i, serialized_span = args span = event["spans"][i] now = datetime.utcnow() start = span.get("start_timestamp") or now end = span.get("timestamp") or now duration = end - start description = serialized_span.get("description") or "" return (duration, -len(description)) # Note: for simplicity we sort spans by exact duration and description # length. If ever needed, we could have a more involved heuristic, e.g. # replacing exact durations with "buckets" and/or looking at other span # properties. path.append("spans") for i, span in sorted( enumerate(serialized_event.get("spans") or []), key=shortest_duration_longest_description_first, ): description = span.get("description") or "" if len(description) <= min_length: continue excess_bytes -= len(description) - min_length path.extend([i, "description"]) # Note: the last time we call strip_string we could preserve a few # more bytes up to a total length of MAX_EVENT_BYTES. Since that's # not strictly required, we leave it out for now for simplicity. span["description"] = _flatten_annotated( strip_string(description, max_length=min_length)) del path[-2:] del meta_stack[len(path) + 1:] if excess_bytes <= 0: break path.pop() del meta_stack[len(path) + 1:] disable_capture_event.set(True) try: rv = _serialize_node(event, **kwargs) if meta_stack and isinstance(rv, dict): rv["_meta"] = meta_stack[0] sum_span_description_bytes = sum(span_description_bytes) if smart_transaction_trimming and sum_span_description_bytes > 0: span_count = len(event.get("spans") or []) # This is an upper bound of how many bytes all descriptions would # consume if the usual string truncation in _serialize_node_impl # would have taken place, not accounting for the metadata attached # as event["_meta"]. descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH # If by not truncating descriptions we ended up with more bytes than # per the usual string truncation, check if the event is too large # and we need to truncate some descriptions. # # This is guarded with an if statement to avoid JSON-encoding the # event unnecessarily. if sum_span_description_bytes > descriptions_budget_bytes: original_bytes = len(json_dumps(rv)) excess_bytes = original_bytes - MAX_EVENT_BYTES if excess_bytes > 0: # Event is too large, will likely be discarded by the # server. Trim it down before sending. _truncate_span_descriptions(rv, event, excess_bytes) # Span descriptions truncated, set or reset _meta. # # We run the same code earlier because we want to account # for _meta when calculating original_bytes, the number of # bytes in the JSON-encoded event. if meta_stack and isinstance(rv, dict): rv["_meta"] = meta_stack[0] return rv finally: disable_capture_event.set(False)
def test_too_large_event_truncated(sentry_init, capture_events): sentry_init( traces_sample_rate=1, integrations=[SqlalchemyIntegration()], _experiments={"smart_transaction_trimming": True}, ) events = capture_events() long_str = "x" * (MAX_STRING_LENGTH + 10) with configure_scope() as scope: @scope.add_event_processor def processor(event, hint): event["message"] = long_str return event engine = create_engine("sqlite:///:memory:") with start_transaction(name="test"): with engine.connect() as con: for _ in range(2000): con.execute(" UNION ".join("SELECT {}".format(i) for i in range(100))) (event,) = events # Because of attached metadata in the "_meta" key, we may send out a little # bit more than MAX_EVENT_BYTES. max_bytes = 1.2 * MAX_EVENT_BYTES assert len(json_dumps(event)) < max_bytes # Some spans are discarded. assert len(event["spans"]) == 1000 # Some spans have their descriptions truncated. Because the test always # generates the same amount of descriptions and truncation is deterministic, # the number here should never change across test runs. # # Which exact span descriptions are truncated depends on the span durations # of each SQL query and is non-deterministic. assert len(event["_meta"]["spans"]) == 537 for i, span in enumerate(event["spans"]): description = span["description"] assert description.startswith("SELECT ") if str(i) in event["_meta"]["spans"]: # Description must have been truncated assert len(description) == 10 assert description.endswith("...") else: # Description was not truncated, check for original length assert len(description) == 1583 assert description.endswith("SELECT 98 UNION SELECT 99") # Smoke check the meta info for one of the spans. assert next(iter(event["_meta"]["spans"].values())) == { "description": {"": {"len": 1583, "rem": [["!limit", "x", 7, 10]]}} } # Smoke check that truncation of other fields has not changed. assert len(event["message"]) == MAX_STRING_LENGTH # The _meta for other truncated fields should be there as well. assert event["_meta"]["message"] == { "": {"len": 522, "rem": [["!limit", "x", 509, 512]]} }