def sentry_patched_popen_init(self, *a, **kw): # type: (subprocess.Popen[Any], *Any, **Any) -> None hub = Hub.current if hub.get_integration(StdlibIntegration) is None: return old_popen_init(self, *a, **kw) # type: ignore # Convert from tuple to list to be able to set values. a = list(a) args = _init_argument(a, kw, "args", 0) or [] cwd = _init_argument(a, kw, "cwd", 9) # if args is not a list or tuple (and e.g. some iterator instead), # let's not use it at all. There are too many things that can go wrong # when trying to collect an iterator into a list and setting that list # into `a` again. # # Also invocations where `args` is not a sequence are not actually # legal. They just happen to work under CPython. description = None if isinstance(args, (list, tuple)) and len(args) < 100: with capture_internal_exceptions(): description = " ".join(map(str, args)) if description is None: description = safe_repr(args) env = None with hub.start_span(op="subprocess", description=description) as span: for k, v in hub.iter_trace_propagation_headers(span): if env is None: env = _init_argument( a, kw, "env", 10, lambda x: dict(x or os.environ) ) env["SUBPROCESS_" + k.upper().replace("-", "_")] = v if cwd: span.set_data("subprocess.cwd", cwd) rv = old_popen_init(self, *a, **kw) # type: ignore span.set_tag("subprocess.pid", self.pid) return rv
def format_sql(sql, params): rv = [] if isinstance(params, dict): # convert sql with named parameters to sql with unnamed parameters conv = _FormatConverter(params) if params: sql = sql % conv params = conv.params else: params = () for param in params or (): if param is None: rv.append("NULL") param = safe_repr(param) rv.append(param) return sql, rv
def _format_sql_impl(sql, params): # type: (Any, Any) -> Tuple[str, List[str]] rv = [] if isinstance(params, dict): # convert sql with named parameters to sql with unnamed parameters conv = _FormatConverter(params) if params: sql = sql % conv params = conv.params else: params = () for param in params or (): if param is None: rv.append("NULL") param = safe_repr(param) rv.append(param) return sql, rv
def _serialize_node_impl(self, obj, max_depth, max_breadth): # type: (Any, Optional[int], Optional[int]) -> Any if max_depth is None and max_breadth is None and self.meta_node.is_databag( ): max_depth = self.meta_node._depth + MAX_DATABAG_DEPTH max_breadth = self.meta_node._depth + MAX_DATABAG_BREADTH if max_depth is None: remaining_depth = None else: remaining_depth = max_depth - self.meta_node._depth obj = _flatten_annotated(obj, self.meta_node) if remaining_depth is not None and remaining_depth <= 0: self.meta_node.annotate(rem=[["!limit", "x"]]) if self.meta_node.is_databag(): return _flatten_annotated(strip_string(safe_repr(obj)), self.meta_node) return None if self.meta_node.is_databag(): hints = {"memo": self.memo, "remaining_depth": remaining_depth} for processor in global_repr_processors: with capture_internal_exceptions(): result = processor(obj, hints) if result is not NotImplemented: return _flatten_annotated(result, self.meta_node) if isinstance(obj, Mapping): # Create temporary list here to avoid calling too much code that # might mutate our dictionary while we're still iterating over it. items = [] for i, (k, v) in enumerate(iteritems(obj)): if max_breadth is not None and i >= max_breadth: self.meta_node.annotate(len=max_breadth) break items.append((k, v)) rv_dict = {} # type: Dict[Any, Any] for k, v in items: k = text_type(k) with self.enter(k): v = self._serialize_node(v, max_depth=max_depth, max_breadth=max_breadth) if v is not None: rv_dict[k] = v return rv_dict elif isinstance(obj, Sequence) and not isinstance(obj, string_types): rv_list = [] # type: List[Any] for i, v in enumerate(obj): if max_breadth is not None and i >= max_breadth: self.meta_node.annotate(len=max_breadth) break with self.enter(i): rv_list.append( self._serialize_node(v, max_depth=max_depth, max_breadth=max_breadth)) return rv_list if self.meta_node.should_repr_strings(): obj = safe_repr(obj) else: if obj is None or isinstance(obj, (bool, number_types)): return obj if isinstance(obj, datetime): return text_type(obj.strftime("%Y-%m-%dT%H:%M:%SZ")) if isinstance(obj, bytes): obj = obj.decode("utf-8", "replace") if not isinstance(obj, string_types): obj = safe_repr(obj) return _flatten_annotated(strip_string(obj), self.meta_node)
def _serialize_node_impl( obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth ): # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any if should_repr_strings is None: should_repr_strings = _should_repr_strings() if is_databag is None: is_databag = _is_databag() if is_databag and remaining_depth is None: remaining_depth = MAX_DATABAG_DEPTH if is_databag and remaining_breadth is None: remaining_breadth = MAX_DATABAG_BREADTH obj = _flatten_annotated(obj) if remaining_depth is not None and remaining_depth <= 0: _annotate(rem=[["!limit", "x"]]) if is_databag: return _flatten_annotated(strip_string(safe_repr(obj))) return None if is_databag and global_repr_processors: hints = {"memo": memo, "remaining_depth": remaining_depth} for processor in global_repr_processors: result = processor(obj, hints) if result is not NotImplemented: return _flatten_annotated(result) if obj is None or isinstance(obj, (bool, number_types)): return obj if not should_repr_strings else safe_repr(obj) elif isinstance(obj, datetime): return ( text_type(obj.strftime("%Y-%m-%dT%H:%M:%S.%fZ")) if not should_repr_strings else safe_repr(obj) ) elif isinstance(obj, Mapping): # Create temporary copy here to avoid calling too much code that # might mutate our dictionary while we're still iterating over it. obj = dict(iteritems(obj)) rv_dict = {} i = 0 for k, v in iteritems(obj): if remaining_breadth is not None and i >= remaining_breadth: _annotate(len=len(obj)) break str_k = text_type(k) v = _serialize_node( v, segment=str_k, should_repr_strings=should_repr_strings, is_databag=is_databag, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, remaining_breadth=remaining_breadth, ) if v is not None: rv_dict[str_k] = v i += 1 return rv_dict elif not isinstance(obj, serializable_str_types) and isinstance(obj, Sequence): rv_list = [] for i, v in enumerate(obj): if remaining_breadth is not None and i >= remaining_breadth: _annotate(len=len(obj)) break rv_list.append( _serialize_node( v, segment=i, should_repr_strings=should_repr_strings, is_databag=is_databag, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, remaining_breadth=remaining_breadth, ) ) return rv_list if should_repr_strings: obj = safe_repr(obj) else: if isinstance(obj, bytes): obj = obj.decode("utf-8", "replace") if not isinstance(obj, string_types): obj = safe_repr(obj) return _flatten_annotated(strip_string(obj))
def _serialize_node_impl(obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth): # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any if should_repr_strings is None: should_repr_strings = _should_repr_strings() if is_databag is None: is_databag = _is_databag() if is_databag and remaining_depth is None: remaining_depth = MAX_DATABAG_DEPTH if is_databag and remaining_breadth is None: remaining_breadth = MAX_DATABAG_BREADTH obj = _flatten_annotated(obj) if remaining_depth is not None and remaining_depth <= 0: _annotate(rem=[["!limit", "x"]]) if is_databag: return _flatten_annotated(strip_string(safe_repr(obj))) return None if is_databag and global_repr_processors: hints = {"memo": memo, "remaining_depth": remaining_depth} for processor in global_repr_processors: result = processor(obj, hints) if result is not NotImplemented: return _flatten_annotated(result) if obj is None or isinstance(obj, (bool, number_types)): if should_repr_strings or (isinstance(obj, float) and (math.isinf(obj) or math.isnan(obj))): return safe_repr(obj) else: return obj elif isinstance(obj, datetime): return (text_type(format_timestamp(obj)) if not should_repr_strings else safe_repr(obj)) elif isinstance(obj, Mapping): # Create temporary copy here to avoid calling too much code that # might mutate our dictionary while we're still iterating over it. obj = dict(iteritems(obj)) rv_dict = {} # type: Dict[str, Any] i = 0 for k, v in iteritems(obj): if remaining_breadth is not None and i >= remaining_breadth: _annotate(len=len(obj)) break str_k = text_type(k) v = _serialize_node( v, segment=str_k, should_repr_strings=should_repr_strings, is_databag=is_databag, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, remaining_breadth=remaining_breadth, ) rv_dict[str_k] = v i += 1 return rv_dict elif not isinstance(obj, serializable_str_types) and isinstance( obj, (Set, Sequence)): rv_list = [] for i, v in enumerate(obj): if remaining_breadth is not None and i >= remaining_breadth: _annotate(len=len(obj)) break rv_list.append( _serialize_node( v, segment=i, should_repr_strings=should_repr_strings, is_databag=is_databag, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, remaining_breadth=remaining_breadth, )) return rv_list if should_repr_strings: obj = safe_repr(obj) else: if isinstance(obj, bytes): obj = obj.decode("utf-8", "replace") if not isinstance(obj, string_types): obj = safe_repr(obj) # Allow span descriptions to be longer than other strings. # # For database auto-instrumented spans, the description contains # potentially long SQL queries that are most useful when not truncated. # Because arbitrarily large events may be discarded by the server as a # protection mechanism, we dynamically limit the description length # later in _truncate_span_descriptions. if (smart_transaction_trimming and len(path) == 3 and path[0] == "spans" and path[-1] == "description"): span_description_bytes.append(len(obj)) return obj return _flatten_annotated(strip_string(obj))
def test_safe_repr_regressions(): assert u"лошадь" in safe_repr(u"лошадь")
def test_safe_repr_never_broken_for_strings(x): r = safe_repr(x) assert isinstance(r, text_type) assert u"broken repr" not in r
def large_str_processor(value, hint): if isinstance(value, (bytes, str)) and len(value) > 1024: return safe_repr(value[:1024]) + f"..., len={len(value)}" return NotImplemented
def test_safe_repr_non_printable(prefix, character): """Check that non-printable characters are escaped""" string = prefix + character assert character not in safe_repr(string) assert character not in safe_repr(string.encode("utf-8"))
def _serialize_node_impl( obj, max_depth, max_breadth, is_databag, should_repr_strings ): # type: (Any, Optional[int], Optional[int], Optional[bool], Optional[bool]) -> Any if not should_repr_strings: should_repr_strings = ( _startswith_path( ("exception", "values", None, "stacktrace", "frames", None, "vars") ) or _startswith_path( ("threads", "values", None, "stacktrace", "frames", None, "vars") ) or _startswith_path(("stacktrace", "frames", None, "vars")) ) if obj is None or isinstance(obj, (bool, number_types)): return obj if not should_repr_strings else safe_repr(obj) if isinstance(obj, datetime): return ( text_type(obj.strftime("%Y-%m-%dT%H:%M:%S.%fZ")) if not should_repr_strings else safe_repr(obj) ) if not is_databag: is_databag = ( should_repr_strings or _startswith_path(("request", "data")) or _startswith_path(("breadcrumbs", None)) or _startswith_path(("extra",)) ) cur_depth = len(path) if max_depth is None and max_breadth is None and is_databag: max_depth = cur_depth + MAX_DATABAG_DEPTH max_breadth = cur_depth + MAX_DATABAG_BREADTH if max_depth is None: remaining_depth = None else: remaining_depth = max_depth - cur_depth obj = _flatten_annotated(obj) if remaining_depth is not None and remaining_depth <= 0: _annotate(rem=[["!limit", "x"]]) if is_databag: return _flatten_annotated(strip_string(safe_repr(obj))) return None if global_repr_processors and is_databag: hints = {"memo": memo, "remaining_depth": remaining_depth} for processor in global_repr_processors: result = processor(obj, hints) if result is not NotImplemented: return _flatten_annotated(result) if isinstance(obj, Mapping): # Create temporary copy here to avoid calling too much code that # might mutate our dictionary while we're still iterating over it. if max_breadth is not None and len(obj) >= max_breadth: rv_dict = dict(itertools.islice(iteritems(obj), None, max_breadth)) _annotate(len=len(obj)) else: if type(obj) is dict: rv_dict = dict(obj) else: rv_dict = dict(iteritems(obj)) for k in list(rv_dict): str_k = text_type(k) v = _serialize_node( rv_dict.pop(k), max_depth=max_depth, max_breadth=max_breadth, segment=str_k, should_repr_strings=should_repr_strings, is_databag=is_databag, ) if v is not None: rv_dict[str_k] = v return rv_dict elif not isinstance(obj, string_types) and isinstance(obj, Sequence): if max_breadth is not None and len(obj) >= max_breadth: rv_list = list(obj)[:max_breadth] _annotate(len=len(obj)) else: rv_list = list(obj) for i in range(len(rv_list)): rv_list[i] = _serialize_node( rv_list[i], max_depth=max_depth, max_breadth=max_breadth, segment=i, should_repr_strings=should_repr_strings, is_databag=is_databag, ) return rv_list if should_repr_strings: obj = safe_repr(obj) else: if isinstance(obj, bytes): obj = obj.decode("utf-8", "replace") if not isinstance(obj, string_types): obj = safe_repr(obj) return _flatten_annotated(strip_string(obj))