Beispiel #1
0
    def sentry_patched_popen_init(self, *a, **kw):
        # type: (subprocess.Popen[Any], *Any, **Any) -> None

        hub = Hub.current
        if hub.get_integration(StdlibIntegration) is None:
            return old_popen_init(self, *a, **kw)  # type: ignore

        # Convert from tuple to list to be able to set values.
        a = list(a)

        args = _init_argument(a, kw, "args", 0) or []
        cwd = _init_argument(a, kw, "cwd", 9)

        # if args is not a list or tuple (and e.g. some iterator instead),
        # let's not use it at all. There are too many things that can go wrong
        # when trying to collect an iterator into a list and setting that list
        # into `a` again.
        #
        # Also invocations where `args` is not a sequence are not actually
        # legal. They just happen to work under CPython.
        description = None

        if isinstance(args, (list, tuple)) and len(args) < 100:
            with capture_internal_exceptions():
                description = " ".join(map(str, args))

        if description is None:
            description = safe_repr(args)

        env = None

        with hub.start_span(op="subprocess", description=description) as span:

            for k, v in hub.iter_trace_propagation_headers(span):
                if env is None:
                    env = _init_argument(
                        a, kw, "env", 10, lambda x: dict(x or os.environ)
                    )
                env["SUBPROCESS_" + k.upper().replace("-", "_")] = v

            if cwd:
                span.set_data("subprocess.cwd", cwd)

            rv = old_popen_init(self, *a, **kw)  # type: ignore

            span.set_tag("subprocess.pid", self.pid)
            return rv
Beispiel #2
0
def format_sql(sql, params):
    rv = []

    if isinstance(params, dict):
        # convert sql with named parameters to sql with unnamed parameters
        conv = _FormatConverter(params)
        if params:
            sql = sql % conv
            params = conv.params
        else:
            params = ()

    for param in params or ():
        if param is None:
            rv.append("NULL")
        param = safe_repr(param)
        rv.append(param)

    return sql, rv
Beispiel #3
0
def _format_sql_impl(sql, params):
    # type: (Any, Any) -> Tuple[str, List[str]]
    rv = []

    if isinstance(params, dict):
        # convert sql with named parameters to sql with unnamed parameters
        conv = _FormatConverter(params)
        if params:
            sql = sql % conv
            params = conv.params
        else:
            params = ()

    for param in params or ():
        if param is None:
            rv.append("NULL")
        param = safe_repr(param)
        rv.append(param)

    return sql, rv
Beispiel #4
0
    def _serialize_node_impl(self, obj, max_depth, max_breadth):
        # type: (Any, Optional[int], Optional[int]) -> Any
        if max_depth is None and max_breadth is None and self.meta_node.is_databag(
        ):
            max_depth = self.meta_node._depth + MAX_DATABAG_DEPTH
            max_breadth = self.meta_node._depth + MAX_DATABAG_BREADTH

        if max_depth is None:
            remaining_depth = None
        else:
            remaining_depth = max_depth - self.meta_node._depth

        obj = _flatten_annotated(obj, self.meta_node)

        if remaining_depth is not None and remaining_depth <= 0:
            self.meta_node.annotate(rem=[["!limit", "x"]])
            if self.meta_node.is_databag():
                return _flatten_annotated(strip_string(safe_repr(obj)),
                                          self.meta_node)
            return None

        if self.meta_node.is_databag():
            hints = {"memo": self.memo, "remaining_depth": remaining_depth}
            for processor in global_repr_processors:
                with capture_internal_exceptions():
                    result = processor(obj, hints)
                    if result is not NotImplemented:
                        return _flatten_annotated(result, self.meta_node)

        if isinstance(obj, Mapping):
            # Create temporary list here to avoid calling too much code that
            # might mutate our dictionary while we're still iterating over it.
            items = []
            for i, (k, v) in enumerate(iteritems(obj)):
                if max_breadth is not None and i >= max_breadth:
                    self.meta_node.annotate(len=max_breadth)
                    break

                items.append((k, v))

            rv_dict = {}  # type: Dict[Any, Any]
            for k, v in items:
                k = text_type(k)

                with self.enter(k):
                    v = self._serialize_node(v,
                                             max_depth=max_depth,
                                             max_breadth=max_breadth)
                    if v is not None:
                        rv_dict[k] = v

            return rv_dict
        elif isinstance(obj, Sequence) and not isinstance(obj, string_types):
            rv_list = []  # type: List[Any]
            for i, v in enumerate(obj):
                if max_breadth is not None and i >= max_breadth:
                    self.meta_node.annotate(len=max_breadth)
                    break

                with self.enter(i):
                    rv_list.append(
                        self._serialize_node(v,
                                             max_depth=max_depth,
                                             max_breadth=max_breadth))

            return rv_list

        if self.meta_node.should_repr_strings():
            obj = safe_repr(obj)
        else:
            if obj is None or isinstance(obj, (bool, number_types)):
                return obj

            if isinstance(obj, datetime):
                return text_type(obj.strftime("%Y-%m-%dT%H:%M:%SZ"))

            if isinstance(obj, bytes):
                obj = obj.decode("utf-8", "replace")

            if not isinstance(obj, string_types):
                obj = safe_repr(obj)

        return _flatten_annotated(strip_string(obj), self.meta_node)
Beispiel #5
0
    def _serialize_node_impl(
        obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth
    ):
        # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any
        if should_repr_strings is None:
            should_repr_strings = _should_repr_strings()

        if is_databag is None:
            is_databag = _is_databag()

        if is_databag and remaining_depth is None:
            remaining_depth = MAX_DATABAG_DEPTH
        if is_databag and remaining_breadth is None:
            remaining_breadth = MAX_DATABAG_BREADTH

        obj = _flatten_annotated(obj)

        if remaining_depth is not None and remaining_depth <= 0:
            _annotate(rem=[["!limit", "x"]])
            if is_databag:
                return _flatten_annotated(strip_string(safe_repr(obj)))
            return None

        if is_databag and global_repr_processors:
            hints = {"memo": memo, "remaining_depth": remaining_depth}
            for processor in global_repr_processors:
                result = processor(obj, hints)
                if result is not NotImplemented:
                    return _flatten_annotated(result)

        if obj is None or isinstance(obj, (bool, number_types)):
            return obj if not should_repr_strings else safe_repr(obj)

        elif isinstance(obj, datetime):
            return (
                text_type(obj.strftime("%Y-%m-%dT%H:%M:%S.%fZ"))
                if not should_repr_strings
                else safe_repr(obj)
            )

        elif isinstance(obj, Mapping):
            # Create temporary copy here to avoid calling too much code that
            # might mutate our dictionary while we're still iterating over it.
            obj = dict(iteritems(obj))

            rv_dict = {}
            i = 0

            for k, v in iteritems(obj):
                if remaining_breadth is not None and i >= remaining_breadth:
                    _annotate(len=len(obj))
                    break

                str_k = text_type(k)
                v = _serialize_node(
                    v,
                    segment=str_k,
                    should_repr_strings=should_repr_strings,
                    is_databag=is_databag,
                    remaining_depth=remaining_depth - 1
                    if remaining_depth is not None
                    else None,
                    remaining_breadth=remaining_breadth,
                )
                if v is not None:
                    rv_dict[str_k] = v
                    i += 1

            return rv_dict

        elif not isinstance(obj, serializable_str_types) and isinstance(obj, Sequence):
            rv_list = []

            for i, v in enumerate(obj):
                if remaining_breadth is not None and i >= remaining_breadth:
                    _annotate(len=len(obj))
                    break

                rv_list.append(
                    _serialize_node(
                        v,
                        segment=i,
                        should_repr_strings=should_repr_strings,
                        is_databag=is_databag,
                        remaining_depth=remaining_depth - 1
                        if remaining_depth is not None
                        else None,
                        remaining_breadth=remaining_breadth,
                    )
                )

            return rv_list

        if should_repr_strings:
            obj = safe_repr(obj)
        else:
            if isinstance(obj, bytes):
                obj = obj.decode("utf-8", "replace")

            if not isinstance(obj, string_types):
                obj = safe_repr(obj)

        return _flatten_annotated(strip_string(obj))
    def _serialize_node_impl(obj, is_databag, should_repr_strings,
                             remaining_depth, remaining_breadth):
        # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any
        if should_repr_strings is None:
            should_repr_strings = _should_repr_strings()

        if is_databag is None:
            is_databag = _is_databag()

        if is_databag and remaining_depth is None:
            remaining_depth = MAX_DATABAG_DEPTH
        if is_databag and remaining_breadth is None:
            remaining_breadth = MAX_DATABAG_BREADTH

        obj = _flatten_annotated(obj)

        if remaining_depth is not None and remaining_depth <= 0:
            _annotate(rem=[["!limit", "x"]])
            if is_databag:
                return _flatten_annotated(strip_string(safe_repr(obj)))
            return None

        if is_databag and global_repr_processors:
            hints = {"memo": memo, "remaining_depth": remaining_depth}
            for processor in global_repr_processors:
                result = processor(obj, hints)
                if result is not NotImplemented:
                    return _flatten_annotated(result)

        if obj is None or isinstance(obj, (bool, number_types)):
            if should_repr_strings or (isinstance(obj, float) and
                                       (math.isinf(obj) or math.isnan(obj))):
                return safe_repr(obj)
            else:
                return obj

        elif isinstance(obj, datetime):
            return (text_type(format_timestamp(obj))
                    if not should_repr_strings else safe_repr(obj))

        elif isinstance(obj, Mapping):
            # Create temporary copy here to avoid calling too much code that
            # might mutate our dictionary while we're still iterating over it.
            obj = dict(iteritems(obj))

            rv_dict = {}  # type: Dict[str, Any]
            i = 0

            for k, v in iteritems(obj):
                if remaining_breadth is not None and i >= remaining_breadth:
                    _annotate(len=len(obj))
                    break

                str_k = text_type(k)
                v = _serialize_node(
                    v,
                    segment=str_k,
                    should_repr_strings=should_repr_strings,
                    is_databag=is_databag,
                    remaining_depth=remaining_depth -
                    1 if remaining_depth is not None else None,
                    remaining_breadth=remaining_breadth,
                )
                rv_dict[str_k] = v
                i += 1

            return rv_dict

        elif not isinstance(obj, serializable_str_types) and isinstance(
                obj, (Set, Sequence)):
            rv_list = []

            for i, v in enumerate(obj):
                if remaining_breadth is not None and i >= remaining_breadth:
                    _annotate(len=len(obj))
                    break

                rv_list.append(
                    _serialize_node(
                        v,
                        segment=i,
                        should_repr_strings=should_repr_strings,
                        is_databag=is_databag,
                        remaining_depth=remaining_depth -
                        1 if remaining_depth is not None else None,
                        remaining_breadth=remaining_breadth,
                    ))

            return rv_list

        if should_repr_strings:
            obj = safe_repr(obj)
        else:
            if isinstance(obj, bytes):
                obj = obj.decode("utf-8", "replace")

            if not isinstance(obj, string_types):
                obj = safe_repr(obj)

        # Allow span descriptions to be longer than other strings.
        #
        # For database auto-instrumented spans, the description contains
        # potentially long SQL queries that are most useful when not truncated.
        # Because arbitrarily large events may be discarded by the server as a
        # protection mechanism, we dynamically limit the description length
        # later in _truncate_span_descriptions.
        if (smart_transaction_trimming and len(path) == 3
                and path[0] == "spans" and path[-1] == "description"):
            span_description_bytes.append(len(obj))
            return obj
        return _flatten_annotated(strip_string(obj))
Beispiel #7
0
def test_safe_repr_regressions():
    assert u"лошадь" in safe_repr(u"лошадь")
Beispiel #8
0
def test_safe_repr_never_broken_for_strings(x):
    r = safe_repr(x)
    assert isinstance(r, text_type)
    assert u"broken repr" not in r
 def large_str_processor(value, hint):
     if isinstance(value, (bytes, str)) and len(value) > 1024:
         return safe_repr(value[:1024]) + f"..., len={len(value)}"
     return NotImplemented
def test_safe_repr_non_printable(prefix, character):
    """Check that non-printable characters are escaped"""
    string = prefix + character
    assert character not in safe_repr(string)
    assert character not in safe_repr(string.encode("utf-8"))
    def _serialize_node_impl(
        obj, max_depth, max_breadth, is_databag, should_repr_strings
    ):
        # type: (Any, Optional[int], Optional[int], Optional[bool], Optional[bool]) -> Any
        if not should_repr_strings:
            should_repr_strings = (
                _startswith_path(
                    ("exception", "values", None, "stacktrace", "frames", None, "vars")
                )
                or _startswith_path(
                    ("threads", "values", None, "stacktrace", "frames", None, "vars")
                )
                or _startswith_path(("stacktrace", "frames", None, "vars"))
            )

        if obj is None or isinstance(obj, (bool, number_types)):
            return obj if not should_repr_strings else safe_repr(obj)

        if isinstance(obj, datetime):
            return (
                text_type(obj.strftime("%Y-%m-%dT%H:%M:%S.%fZ"))
                if not should_repr_strings
                else safe_repr(obj)
            )

        if not is_databag:
            is_databag = (
                should_repr_strings
                or _startswith_path(("request", "data"))
                or _startswith_path(("breadcrumbs", None))
                or _startswith_path(("extra",))
            )

        cur_depth = len(path)
        if max_depth is None and max_breadth is None and is_databag:
            max_depth = cur_depth + MAX_DATABAG_DEPTH
            max_breadth = cur_depth + MAX_DATABAG_BREADTH

        if max_depth is None:
            remaining_depth = None
        else:
            remaining_depth = max_depth - cur_depth

        obj = _flatten_annotated(obj)

        if remaining_depth is not None and remaining_depth <= 0:
            _annotate(rem=[["!limit", "x"]])
            if is_databag:
                return _flatten_annotated(strip_string(safe_repr(obj)))
            return None

        if global_repr_processors and is_databag:
            hints = {"memo": memo, "remaining_depth": remaining_depth}
            for processor in global_repr_processors:
                result = processor(obj, hints)
                if result is not NotImplemented:
                    return _flatten_annotated(result)

        if isinstance(obj, Mapping):
            # Create temporary copy here to avoid calling too much code that
            # might mutate our dictionary while we're still iterating over it.
            if max_breadth is not None and len(obj) >= max_breadth:
                rv_dict = dict(itertools.islice(iteritems(obj), None, max_breadth))
                _annotate(len=len(obj))
            else:
                if type(obj) is dict:
                    rv_dict = dict(obj)
                else:
                    rv_dict = dict(iteritems(obj))

            for k in list(rv_dict):
                str_k = text_type(k)
                v = _serialize_node(
                    rv_dict.pop(k),
                    max_depth=max_depth,
                    max_breadth=max_breadth,
                    segment=str_k,
                    should_repr_strings=should_repr_strings,
                    is_databag=is_databag,
                )
                if v is not None:
                    rv_dict[str_k] = v

            return rv_dict
        elif not isinstance(obj, string_types) and isinstance(obj, Sequence):
            if max_breadth is not None and len(obj) >= max_breadth:
                rv_list = list(obj)[:max_breadth]
                _annotate(len=len(obj))
            else:
                rv_list = list(obj)

            for i in range(len(rv_list)):
                rv_list[i] = _serialize_node(
                    rv_list[i],
                    max_depth=max_depth,
                    max_breadth=max_breadth,
                    segment=i,
                    should_repr_strings=should_repr_strings,
                    is_databag=is_databag,
                )

            return rv_list

        if should_repr_strings:
            obj = safe_repr(obj)
        else:
            if isinstance(obj, bytes):
                obj = obj.decode("utf-8", "replace")

            if not isinstance(obj, string_types):
                obj = safe_repr(obj)

        return _flatten_annotated(strip_string(obj))