Beispiel #1
0
def _key(obj, context):
    """Return key for memoization."""

    # use arbitrary value in place of None since the return of None
    # is used for control flow in .to_bytes
    if obj is None:
        return NONESENSE

    def is_simple(obj):
        return (isinstance(obj, bytes) or isinstance(obj, bytearray)
                or isinstance(obj, string_types)  # noqa: F821
                or isinstance(obj, float) or isinstance(obj, int)
                or isinstance(obj, bool) or obj is None)

    if is_simple(obj):
        return obj

    if isinstance(obj, tuple):
        if all(map(is_simple, obj)):
            return obj

    if isinstance(obj, list):
        if all(map(is_simple, obj)):
            return ("__l", tuple(obj))

    if (type_util.is_type(obj, "pandas.core.frame.DataFrame")
            or type_util.is_type(obj, "numpy.ndarray")
            or inspect.isbuiltin(obj) or inspect.isroutine(obj)
            or inspect.iscode(obj)):
        return id(obj)

    return None
Beispiel #2
0
def _key(obj, context):
    """Return key for memoization."""

    if obj is None:
        return b"none:"  # special value so we can hash None

    def is_simple(obj):
        return (isinstance(obj, bytes) or isinstance(obj, bytearray)
                or isinstance(obj, string_types)  # noqa: F821
                or isinstance(obj, float) or isinstance(obj, int)
                or isinstance(obj, bool) or obj is None)

    if is_simple(obj):
        return obj

    if isinstance(obj, tuple):
        if all(map(is_simple, obj)):
            return obj

    if isinstance(obj, list):
        if all(map(is_simple, obj)):
            return ("__l", tuple(obj))

    if (type_util.is_type(obj, "pandas.core.frame.DataFrame")
            or type_util.is_type(obj, "numpy.ndarray")
            or inspect.isbuiltin(obj) or inspect.isroutine(obj)
            or inspect.iscode(obj)):
        return id(obj)

    return None
Beispiel #3
0
def _key(obj: Optional[Any]) -> Any:
    """Return key for memoization."""

    if obj is None:
        return None

    def is_simple(obj):
        return (isinstance(obj, bytes) or isinstance(obj, bytearray)
                or isinstance(obj, str) or isinstance(obj, float)
                or isinstance(obj, int) or isinstance(obj, bool)
                or obj is None)

    if is_simple(obj):
        return obj

    if isinstance(obj, tuple):
        if all(map(is_simple, obj)):
            return obj

    if isinstance(obj, list):
        if all(map(is_simple, obj)):
            return ("__l", tuple(obj))

    if (type_util.is_type(obj, "pandas.core.frame.DataFrame")
            or type_util.is_type(obj, "numpy.ndarray")
            or inspect.isbuiltin(obj) or inspect.isroutine(obj)
            or inspect.iscode(obj)):
        return id(obj)

    return NoResult
Beispiel #4
0
def convert_anything_to_df(df):
    """Try to convert different formats to a Pandas Dataframe.

    Parameters
    ----------
    df : ndarray, Iterable, dict, DataFrame, Styler, None, or any

    Returns
    -------
    pandas.DataFrame

    """
    if type_util.is_type(df, "pandas.core.frame.DataFrame"):
        return df

    if _is_pandas_styler(df):
        return df.data

    import pandas as pd

    if type_util.is_type(df, "numpy.ndarray") and len(df.shape) == 0:
        return pd.DataFrame([])

    # Try to convert to pandas.DataFrame. This will raise an error is df is not
    # compatible with the pandas.DataFrame constructor.
    return pd.DataFrame(df)
Beispiel #5
0
    def test_dict_items(self):
        a = types.MappingProxyType({"a": 1}).items()
        b = types.MappingProxyType({"a": 1}).items()
        c = types.MappingProxyType({"c": 1}).items()

        assert is_type(a, "builtins.dict_items")
        self.assertEqual(get_hash(a), get_hash(b))
        self.assertNotEqual(get_hash(a), get_hash(c))
Beispiel #6
0
    def test_compiled_ffi(self):
        self._build_cffi("foo")
        self._build_cffi("bar")
        from cffi_bin._foo import ffi as foo
        from cffi_bin._bar import ffi as bar

        # Note: We've verified that all properties on CompiledFFI objects
        # are global, except have not verified `error` either way.
        assert is_type(foo, "builtins.CompiledFFI")
        self.assertEqual(get_hash(foo), get_hash(bar))
Beispiel #7
0
    def test_torch_c_tensorbase(self):
        a = torch.ones([1, 1]).__reduce__()[1][2]
        b = torch.ones([1, 1], requires_grad=True).__reduce__()[1][2]
        c = torch.ones([1, 2]).__reduce__()[1][2]

        assert is_type(a, "torch._C._TensorBase")
        self.assertEqual(get_hash(a), get_hash(b))
        self.assertNotEqual(get_hash(a), get_hash(c))

        b.mean().backward()
        # Calling backward on a tensorbase doesn't seem to affect the gradient
        self.assertEqual(get_hash(a), get_hash(b))
Beispiel #8
0
    def test_getset_descriptor(self):
        class A:
            x = 1

        class B:
            x = 1

        a = A.__dict__["__dict__"]
        b = B.__dict__["__dict__"]
        assert is_type(a, "builtins.getset_descriptor")

        self.assertEqual(get_hash(a), get_hash(a))
        self.assertNotEqual(get_hash(a), get_hash(b))
Beispiel #9
0
        def _check_and_convert_to_indices(opt, default_values):
            if default_values is None and None not in opt:
                return None

            if not isinstance(default_values, list):
                # This if is done before others because calling if not x (done
                # right below) when x is of type pd.Series() or np.array() throws a
                # ValueError exception.
                if is_type(default_values, "numpy.ndarray") or is_type(
                        default_values, "pandas.core.series.Series"):
                    default_values = list(default_values)
                elif not default_values or default_values in opt:
                    default_values = [default_values]
                else:
                    default_values = list(default_values)

            for value in default_values:
                if value not in opt:
                    raise StreamlitAPIException(
                        "Every Multiselect default value must exist in options"
                    )

            return [opt.index(value) for value in default_values]
Beispiel #10
0
def _marshall_av_media(
    coordinates: str,
    proto: Union[AudioProto, VideoProto],
    data: Data,
    mimetype: str,
) -> None:
    """Fill audio or video proto based on contents of data.

    Given a string, check if it's a url; if so, send it out without modification.
    Otherwise assume strings are filenames and let any OS errors raise.

    Load data either from file or through bytes-processing methods into a
    InMemoryFile object.  Pack proto with generated Tornado-based URL.
    """
    # Audio and Video methods have already checked if this is a URL by this point.

    if isinstance(data, str):
        # Assume it's a filename or blank.  Allow OS-based file errors.
        with open(data, "rb") as fh:
            this_file = in_memory_file_manager.add(fh.read(), mimetype,
                                                   coordinates)
            proto.url = this_file.url
            return

    data_as_bytes: bytes
    if data is None:
        # Allow empty values so media players can be shown without media.
        return
    elif isinstance(data, bytes):
        data_as_bytes = data
    elif isinstance(data, io.BytesIO):
        data.seek(0)
        data_as_bytes = data.getvalue()
    elif isinstance(data, io.RawIOBase) or isinstance(data, io.BufferedReader):
        data.seek(0)
        read_data = data.read()
        if read_data is None:
            return
        else:
            data_as_bytes = read_data
    elif type_util.is_type(data, "numpy.ndarray"):
        data_as_bytes = data.tobytes()
    else:
        raise RuntimeError("Invalid binary data format: %s" % type(data))

    this_file = in_memory_file_manager.add(data_as_bytes, mimetype,
                                           coordinates)
    proto.url = this_file.url
Beispiel #11
0
def marshall(
    proto: PlotlyChartProto,
    figure_or_data: FigureOrData,
    use_container_width: bool,
    sharing: SharingMode,
    **kwargs: Any,
) -> None:
    """Marshall a proto with a Plotly spec.

    See DeltaGenerator.plotly_chart for docs.
    """
    # NOTE: "figure_or_data" is the name used in Plotly's .plot() method
    # for their main parameter. I don't like the name, but its best to keep
    # it in sync with what Plotly calls it.

    import plotly.tools

    if type_util.is_type(figure_or_data, "matplotlib.figure.Figure"):
        figure = plotly.tools.mpl_to_plotly(figure_or_data)

    else:
        figure = plotly.tools.return_figure_from_figure_or_data(
            figure_or_data, validate_figure=True)

    if not isinstance(sharing, str) or sharing.lower() not in SHARING_MODES:
        raise ValueError("Invalid sharing mode for Plotly chart: %s" % sharing)

    proto.use_container_width = use_container_width

    if sharing == "streamlit":
        import plotly.utils

        config = dict(kwargs.get("config", {}))
        # Copy over some kwargs to config dict. Plotly does the same in plot().
        config.setdefault("showLink", kwargs.get("show_link", False))
        config.setdefault("linkText", kwargs.get("link_text", False))

        proto.figure.spec = json.dumps(figure,
                                       cls=plotly.utils.PlotlyJSONEncoder)
        proto.figure.config = json.dumps(config)

    else:
        url = _plot_to_url_or_load_cached_url(figure,
                                              sharing=sharing,
                                              auto_open=False,
                                              **kwargs)
        proto.url = _get_embed_url(url)
Beispiel #12
0
    def _to_bytes(self, obj: Any, context: Optional[Context]) -> bytes:
        """Hash objects to bytes, including code with dependencies.

        Python's built in `hash` does not produce consistent results across
        runs.
        """

        if isinstance(obj, unittest.mock.Mock):
            # Mock objects can appear to be infinitely
            # deep, so we don't try to hash them at all.
            return self.to_bytes(id(obj))

        elif isinstance(obj, bytes) or isinstance(obj, bytearray):
            return obj

        elif type_util.get_fqn_type(obj) in self._hash_funcs:
            # Escape hatch for unsupported objects
            hash_func = self._hash_funcs[type_util.get_fqn_type(obj)]
            try:
                output = hash_func(obj)
            except BaseException as e:
                raise UserHashError(e, obj, hash_func=hash_func)

            return self.to_bytes(output)

        elif isinstance(obj, str):
            return obj.encode()

        elif isinstance(obj, float):
            return self.to_bytes(hash(obj))

        elif isinstance(obj, int):
            return _int_to_bytes(obj)

        elif isinstance(obj, (list, tuple)):
            h = hashlib.new("md5")
            for item in obj:
                self.update(h, item, context)
            return h.digest()

        elif isinstance(obj, dict):
            h = hashlib.new("md5")
            for item in obj.items():
                self.update(h, item, context)
            return h.digest()

        elif obj is None:
            return b"0"

        elif obj is True:
            return b"1"

        elif obj is False:
            return b"0"

        elif type_util.is_type(
                obj, "pandas.core.frame.DataFrame") or type_util.is_type(
                    obj, "pandas.core.series.Series"):
            import pandas as pd

            if len(obj) >= _PANDAS_ROWS_LARGE:
                obj = obj.sample(n=_PANDAS_SAMPLE_SIZE, random_state=0)
            try:
                return b"%s" % pd.util.hash_pandas_object(obj).sum()
            except TypeError:
                # Use pickle if pandas cannot hash the object for example if
                # it contains unhashable objects.
                return b"%s" % pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)

        elif type_util.is_type(obj, "numpy.ndarray"):
            h = hashlib.new("md5")
            self.update(h, obj.shape)

            if obj.size >= _NP_SIZE_LARGE:
                import numpy as np

                state = np.random.RandomState(0)
                obj = state.choice(obj.flat, size=_NP_SAMPLE_SIZE)

            self.update(h, obj.tobytes())
            return h.digest()

        elif inspect.isbuiltin(obj):
            return bytes(obj.__name__.encode())

        elif any(
                type_util.is_type(obj, typename)
                for typename in _FFI_TYPE_NAMES):
            return self.to_bytes(None)

        elif type_util.is_type(obj,
                               "builtins.mappingproxy") or type_util.is_type(
                                   obj, "builtins.dict_items"):
            return self.to_bytes(dict(obj))

        elif type_util.is_type(obj, "builtins.getset_descriptor"):
            return bytes(obj.__qualname__.encode())

        elif isinstance(obj, UploadedFile):
            # UploadedFile is a BytesIO (thus IOBase) but has a name.
            # It does not have a timestamp so this must come before
            # temproary files
            h = hashlib.new("md5")
            self.update(h, obj.name)
            self.update(h, obj.tell())
            self.update(h, obj.getvalue())
            return h.digest()

        elif hasattr(
                obj,
                "name") and (isinstance(obj, io.IOBase)
                             # Handle temporary files used during testing
                             or isinstance(obj, tempfile._TemporaryFileWrapper
                                           )  # type: ignore[attr-defined]
                             ):
            # Hash files as name + last modification date + offset.
            # NB: we're using hasattr("name") to differentiate between
            # on-disk and in-memory StringIO/BytesIO file representations.
            # That means that this condition must come *before* the next
            # condition, which just checks for StringIO/BytesIO.
            h = hashlib.new("md5")
            obj_name = getattr(obj, "name",
                               "wonthappen")  # Just to appease MyPy.
            self.update(h, obj_name)
            self.update(h, os.path.getmtime(obj_name))
            self.update(h, obj.tell())
            return h.digest()

        elif isinstance(obj, Pattern):
            return self.to_bytes([obj.pattern, obj.flags])

        elif isinstance(obj, io.StringIO) or isinstance(obj, io.BytesIO):
            # Hash in-memory StringIO/BytesIO by their full contents
            # and seek position.
            h = hashlib.new("md5")
            self.update(h, obj.tell())
            self.update(h, obj.getvalue())
            return h.digest()

        elif any(
                type_util.get_fqn(x) == "sqlalchemy.pool.base.Pool"
                for x in type(obj).__bases__):
            # Get connect_args from the closure of the creator function. It includes
            # arguments parsed from the URL and those passed in via `connect_args`.
            # However if a custom `creator` function is passed in then we don't
            # expect to get this data.
            cargs = obj._creator.__closure__
            cargs = [cargs[0].cell_contents, cargs[1].cell_contents
                     ] if cargs else None

            # Sort kwargs since hashing dicts is sensitive to key order
            if cargs:
                cargs[1] = dict(
                    collections.OrderedDict(
                        sorted(cargs[1].items(), key=lambda t: t[0])))

            reduce_data = obj.__reduce__()

            # Remove thread related objects
            for attr in [
                    "_overflow_lock",
                    "_pool",
                    "_conn",
                    "_fairy",
                    "_threadconns",
                    "logger",
            ]:
                reduce_data[2].pop(attr, None)

            return self.to_bytes([reduce_data, cargs])

        elif type_util.is_type(obj, "sqlalchemy.engine.base.Engine"):
            # Remove the url because it's overwritten by creator and connect_args
            reduce_data = obj.__reduce__()
            reduce_data[2].pop("url", None)
            reduce_data[2].pop("logger", None)

            return self.to_bytes(reduce_data)

        elif type_util.is_type(obj, "numpy.ufunc"):
            # For numpy.remainder, this returns remainder.
            return bytes(obj.__name__.encode())

        elif type_util.is_type(obj, "socket.socket"):
            return self.to_bytes(id(obj))

        elif any(
                type_util.get_fqn(x) == "torch.nn.modules.module.Module"
                for x in type(obj).__bases__):
            return self.to_bytes(id(obj))

        elif type_util.is_type(obj,
                               "tensorflow.python.client.session.Session"):
            return self.to_bytes(id(obj))

        elif type_util.is_type(obj, "torch.Tensor") or type_util.is_type(
                obj, "torch._C._TensorBase"):
            return self.to_bytes([obj.detach().numpy(), obj.grad])

        elif any(
                type_util.is_type(obj, typename)
                for typename in _KERAS_TYPE_NAMES):
            return self.to_bytes(id(obj))

        elif type_util.is_type(
                obj,
                "tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject",
        ):
            return self.to_bytes(id(obj))

        elif inspect.isroutine(obj):
            if hasattr(obj, "__wrapped__"):
                # Ignore the wrapper of wrapped functions.
                return self.to_bytes(obj.__wrapped__)

            if obj.__module__.startswith("streamlit"):
                # Ignore streamlit modules even if they are in the CWD
                # (e.g. during development).
                return self.to_bytes("%s.%s" % (obj.__module__, obj.__name__))

            h = hashlib.new("md5")

            if self._file_should_be_hashed(obj.__code__.co_filename):
                context = _get_context(obj)
                if obj.__defaults__:
                    self.update(h, obj.__defaults__, context)
                h.update(self._code_to_bytes(obj.__code__, context, func=obj))
            else:
                # Don't hash code that is not in the current working directory.
                self.update(h, obj.__module__)
                self.update(h, obj.__name__)
            return h.digest()

        elif inspect.iscode(obj):
            if context is None:
                raise RuntimeError("context must be defined when hashing code")
            return self._code_to_bytes(obj, context)

        elif inspect.ismodule(obj):
            # TODO: Figure out how to best show this kind of warning to the
            # user. In the meantime, show nothing. This scenario is too common,
            # so the current warning is quite annoying...
            # st.warning(('Streamlit does not support hashing modules. '
            #             'We did not hash `%s`.') % obj.__name__)
            # TODO: Hash more than just the name for internal modules.
            return self.to_bytes(obj.__name__)

        elif inspect.isclass(obj):
            # TODO: Figure out how to best show this kind of warning to the
            # user. In the meantime, show nothing. This scenario is too common,
            # (e.g. in every "except" statement) so the current warning is
            # quite annoying...
            # st.warning(('Streamlit does not support hashing classes. '
            #             'We did not hash `%s`.') % obj.__name__)
            # TODO: Hash more than just the name of classes.
            return self.to_bytes(obj.__name__)

        elif isinstance(obj, functools.partial):
            # The return value of functools.partial is not a plain function:
            # it's a callable object that remembers the original function plus
            # the values you pickled into it. So here we need to special-case it.
            h = hashlib.new("md5")
            self.update(h, obj.args)
            self.update(h, obj.func)
            self.update(h, obj.keywords)
            return h.digest()

        else:
            # As a last resort, hash the output of the object's __reduce__ method
            h = hashlib.new("md5")
            try:
                reduce_data = obj.__reduce__()
            except BaseException as e:
                raise UnhashableTypeError(e, obj)

            for item in reduce_data:
                self.update(h, item, context)
            return h.digest()
Beispiel #13
0
    def _to_bytes(self, obj, context):
        """Hash objects to bytes, including code with dependencies.
        Python's built in `hash` does not produce consistent results across
        runs."""

        try:
            if _is_magicmock(obj):
                # MagicMock can result in objects that appear to be infinitely
                # deep, so we don't try to hash them at all.
                return self.to_bytes(id(obj))
            elif isinstance(obj, bytes) or isinstance(obj, bytearray):
                return obj
            elif isinstance(obj, string_types):  # noqa: F821
                # Don't allow the user to override string since
                # str == bytes on python 2
                return obj.encode()
            elif type(obj) in self.hash_funcs:
                # Escape hatch for unsupported objects
                return self.to_bytes(self.hash_funcs[type(obj)](obj))
            elif isinstance(obj, float):
                return self.to_bytes(hash(obj))
            elif isinstance(obj, int):
                return _int_to_bytes(obj)
            elif isinstance(obj, list) or isinstance(obj, tuple):
                h = hashlib.new(self.name)

                # Hash the name of the container so that ["a"] hashes differently from ("a",)
                # Otherwise we'd only be hashing the data and the hashes would be the same.
                self._update(h, type(obj).__name__.encode() + b":")
                for e in obj:
                    self._update(h, e, context)
                return h.digest()
            elif isinstance(obj, dict):
                h = hashlib.new(self.name)

                self._update(h, type(obj).__name__.encode() + b":")
                for e in obj.items():
                    self._update(h, e, context)
                return h.digest()
            elif obj is None:
                # Special string since hashes change between sessions.
                # We don't use Python's `hash` since hashes are not consistent
                # across runs.
                return NONESENSE
            elif obj is True:
                return b"bool:1"
            elif obj is False:
                return b"bool:0"
            elif type_util.is_type(
                    obj, "pandas.core.frame.DataFrame") or type_util.is_type(
                        obj, "pandas.core.series.Series"):
                import pandas as pd

                if len(obj) >= PANDAS_ROWS_LARGE:
                    obj = obj.sample(n=PANDAS_SAMPLE_SIZE, random_state=0)
                try:
                    return pd.util.hash_pandas_object(obj).sum()
                except TypeError:
                    # Use pickle if pandas cannot hash the object for example if
                    # it contains unhashable objects.
                    return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
            elif type_util.is_type(obj, "numpy.ndarray"):
                h = hashlib.new(self.name)
                self._update(h, obj.shape)

                if obj.size >= NP_SIZE_LARGE:
                    import numpy as np

                    state = np.random.RandomState(0)
                    obj = state.choice(obj.flat, size=NP_SAMPLE_SIZE)

                self._update(h, obj.tobytes())
                return h.digest()
            elif inspect.isbuiltin(obj):
                return self.to_bytes(obj.__name__)
            elif hasattr(obj, "name") and (
                    isinstance(obj, io.IOBase)
                    # Handle temporary files used during testing
                    or isinstance(obj, tempfile._TemporaryFileWrapper) or
                (not compatibility.is_running_py3()
                 and isinstance(obj, file))):
                # Hash files as name + last modification date + offset.
                h = hashlib.new(self.name)
                self._update(h, obj.name)
                self._update(h, os.path.getmtime(obj.name))
                self._update(h, obj.tell())
                return h.digest()
            elif inspect.isroutine(obj):
                if hasattr(obj, "__wrapped__"):
                    # Ignore the wrapper of wrapped functions.
                    return self.to_bytes(obj.__wrapped__)

                if obj.__module__.startswith("streamlit"):
                    # Ignore streamlit modules even if they are in the CWD
                    # (e.g. during development).
                    return self.to_bytes("%s.%s" %
                                         (obj.__module__, obj.__name__))

                h = hashlib.new(self.name)
                if self._file_should_be_hashed(obj.__code__.co_filename):
                    context = _get_context(obj)
                    if obj.__defaults__:
                        self._update(h, obj.__defaults__, context)
                    h.update(self._code_to_bytes(obj.__code__, context))
                else:
                    # Don't hash code that is not in the current working directory.
                    self._update(h, obj.__module__)
                    self._update(h, obj.__name__)
                return h.digest()
            elif inspect.iscode(obj):
                return self._code_to_bytes(obj, context)
            elif inspect.ismodule(obj):
                # TODO: Figure out how to best show this kind of warning to the
                # user. In the meantime, show nothing. This scenario is too common,
                # so the current warning is quite annoying...
                # st.warning(('Streamlit does not support hashing modules. '
                #             'We did not hash `%s`.') % obj.__name__)
                # TODO: Hash more than just the name for internal modules.
                return self.to_bytes(obj.__name__)
            elif inspect.isclass(obj):
                # TODO: Figure out how to best show this kind of warning to the
                # user. In the meantime, show nothing. This scenario is too common,
                # (e.g. in every "except" statement) so the current warning is
                # quite annoying...
                # st.warning(('Streamlit does not support hashing classes. '
                #             'We did not hash `%s`.') % obj.__name__)
                # TODO: Hash more than just the name of classes.
                return self.to_bytes(obj.__name__)
            elif isinstance(obj, functools.partial):
                # The return value of functools.partial is not a plain function:
                # it's a callable object that remembers the original function plus
                # the values you pickled into it. So here we need to special-case it.
                h = hashlib.new(self.name)
                self._update(h, obj.args)
                self._update(h, obj.func)
                self._update(h, obj.keywords)
                return h.digest()
            else:
                # As a last resort
                h = hashlib.new(self.name)

                self._update(h, type(obj).__name__.encode() + b":")
                for e in obj.__reduce__():
                    self._update(h, e, context)
                return h.digest()
        except UnhashableType as e:
            raise e
        except Exception as e:
            LOGGER.error(e)
            msg = _hashing_error_message(type(obj))
            raise UnhashableType(msg)
Beispiel #14
0
def _is_magicmock(obj):
    return type_util.is_type(obj,
                             "unittest.mock.MagicMock") or type_util.is_type(
                                 obj, "mock.mock.MagicMock")
Beispiel #15
0
    def _to_bytes(self, obj, context):
        """Hash objects to bytes, including code with dependencies.
        Python's built in `hash` does not produce consistent results across
        runs."""

        try:
            if _is_magicmock(obj):
                # MagicMock can result in objects that appear to be infinitely
                # deep, so we don't try to hash them at all.
                return self.to_bytes(id(obj))
            elif isinstance(obj, bytes) or isinstance(obj, bytearray):
                return obj
            elif isinstance(obj, string_types):  # noqa: F821
                return obj.encode()
            elif isinstance(obj, float):
                return self.to_bytes(hash(obj))
            elif isinstance(obj, int):
                return _int_to_bytes(obj)
            elif isinstance(obj, list) or isinstance(obj, tuple):
                h = hashlib.new(self.name)
                # add type to distingush x from [x]
                self._update(h, type(obj).__name__.encode() + b":")
                for e in obj:
                    self._update(h, e, context)
                return h.digest()
            elif obj is None:
                # Special string since hashes change between sessions.
                # We don't use Python's `hash` since hashes are not consistent
                # across runs.
                return b"none:"
            elif obj is True:
                return b"bool:1"
            elif obj is False:
                return b"bool:0"
            elif type_util.is_type(
                    obj, "pandas.core.frame.DataFrame") or type_util.is_type(
                        obj, "pandas.core.series.Series"):
                import pandas as pd

                if len(obj) >= PANDAS_ROWS_LARGE:
                    obj = obj.sample(n=PANDAS_SAMPLE_SIZE, random_state=0)
                try:
                    return pd.util.hash_pandas_object(obj).sum()
                except TypeError:
                    # Use pickle if pandas cannot hash the object for example if
                    # it contains unhashable objects.
                    return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
            elif type_util.is_type(obj, "numpy.ndarray"):
                h = hashlib.new(self.name)
                self._update(h, obj.shape)

                if obj.size >= NP_SIZE_LARGE:
                    import numpy as np

                    state = np.random.RandomState(0)
                    obj = state.choice(obj.flat, size=NP_SAMPLE_SIZE)

                self._update(h, obj.tobytes())
                return h.digest()
            elif inspect.isbuiltin(obj):
                return self.to_bytes(obj.__name__)
            elif hasattr(obj, "name") and (
                    isinstance(obj, io.IOBase) or
                (isinstance(obj.name, string_types)  # noqa: F821
                 and os.path.exists(obj.name))):
                # Hash files as name + last modification date + offset.
                h = hashlib.new(self.name)
                self._update(h, obj.name)
                self._update(h, os.path.getmtime(obj.name))
                self._update(h, obj.tell())
                return h.digest()
            elif inspect.isroutine(obj):
                if hasattr(obj, "__wrapped__"):
                    # Ignore the wrapper of wrapped functions.
                    return self.to_bytes(obj.__wrapped__)

                if obj.__module__.startswith("streamlit"):
                    # Ignore streamlit modules even if they are in the CWD
                    # (e.g. during development).
                    return self.to_bytes("%s.%s" %
                                         (obj.__module__, obj.__name__))

                h = hashlib.new(self.name)
                filepath = os.path.abspath(obj.__code__.co_filename)

                if file_util.file_is_in_folder_glob(
                        filepath, self._get_main_script_directory()
                ) and not self._folder_black_list.is_blacklisted(filepath):
                    context = _get_context(obj)
                    if obj.__defaults__:
                        self._update(h, obj.__defaults__, context)
                    h.update(self._code_to_bytes(obj.__code__, context))
                else:
                    # Don't hash code that is not in the current working directory.
                    self._update(h, obj.__module__)
                    self._update(h, obj.__name__)
                return h.digest()
            elif inspect.iscode(obj):
                return self._code_to_bytes(obj, context)
            elif inspect.ismodule(obj):
                # TODO: Figure out how to best show this kind of warning to the
                # user. In the meantime, show nothing. This scenario is too common,
                # so the current warning is quite annoying...
                # st.warning(('Streamlit does not support hashing modules. '
                #             'We did not hash `%s`.') % obj.__name__)
                # TODO: Hash more than just the name for internal modules.
                return self.to_bytes(obj.__name__)
            elif inspect.isclass(obj):
                # TODO: Figure out how to best show this kind of warning to the
                # user. In the meantime, show nothing. This scenario is too common,
                # (e.g. in every "except" statement) so the current warning is
                # quite annoying...
                # st.warning(('Streamlit does not support hashing classes. '
                #             'We did not hash `%s`.') % obj.__name__)
                # TODO: Hash more than just the name of classes.
                return self.to_bytes(obj.__name__)
            elif isinstance(obj, functools.partial):
                # The return value of functools.partial is not a plain function:
                # it's a callable object that remembers the original function plus
                # the values you pickled into it. So here we need to special-case it.
                h = hashlib.new(self.name)
                self._update(h, obj.args)
                self._update(h, obj.func)
                self._update(h, obj.keywords)
                return h.digest()
            else:
                try:
                    # As a last resort, we pickle the object to hash it.
                    return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
                except:
                    st.warning(
                        _hashing_error_message(
                            "Streamlit cannot hash an object of type %s." %
                            type(obj)))
        except:
            st.warning(
                _hashing_error_message(
                    "Streamlit failed to hash an object of type %s." %
                    type(obj)))
Beispiel #16
0
def _is_pandas_styler(obj):
    return type_util.is_type(obj, "pandas.io.formats.style.Styler")
Beispiel #17
0
    def write(self, *args, **kwargs):
        """Write arguments to the app.

        This is the Swiss Army knife of Streamlit commands: it does different
        things depending on what you throw at it. Unlike other Streamlit commands,
        write() has some unique properties:

        1. You can pass in multiple arguments, all of which will be written.
        2. Its behavior depends on the input types as follows.
        3. It returns None, so it's "slot" in the App cannot be reused.

        Parameters
        ----------
        *args : any
            One or many objects to print to the App.

            Arguments are handled as follows:

            - write(string)     : Prints the formatted Markdown string, with
                support for LaTeX expression and emoji shortcodes.
                See docs for st.markdown for more.
            - write(data_frame) : Displays the DataFrame as a table.
            - write(error)      : Prints an exception specially.
            - write(func)       : Displays information about a function.
            - write(module)     : Displays information about the module.
            - write(dict)       : Displays dict in an interactive widget.
            - write(obj)        : The default is to print str(obj).
            - write(mpl_fig)    : Displays a Matplotlib figure.
            - write(altair)     : Displays an Altair chart.
            - write(keras)      : Displays a Keras model.
            - write(graphviz)   : Displays a Graphviz graph.
            - write(plotly_fig) : Displays a Plotly figure.
            - write(bokeh_fig)  : Displays a Bokeh figure.
            - write(sympy_expr) : Prints SymPy expression using LaTeX.

        unsafe_allow_html : bool
            This is a keyword-only argument that defaults to False.

            By default, any HTML tags found in strings will be escaped and
            therefore treated as pure text. This behavior may be turned off by
            setting this argument to True.

            That said, *we strongly advise* against it*. It is hard to write secure
            HTML, so by using this argument you may be compromising your users'
            security. For more information, see:

            https://github.com/streamlit/streamlit/issues/152

            **Also note that `unsafe_allow_html` is a temporary measure and may be
            removed from Streamlit at any time.**

            If you decide to turn on HTML anyway, we ask you to please tell us your
            exact use case here:
            https://discuss.streamlit.io/t/96 .

            This will help us come up with safe APIs that allow you to do what you
            want.

        Example
        -------

        Its basic use case is to draw Markdown-formatted text, whenever the
        input is a string:

        >>> write('Hello, *World!* :sunglasses:')

        ..  output::
            https://static.streamlit.io/0.50.2-ZWk9/index.html?id=Pn5sjhgNs4a8ZbiUoSTRxE
            height: 50px

        As mentioned earlier, `st.write()` also accepts other data formats, such as
        numbers, data frames, styled data frames, and assorted objects:

        >>> st.write(1234)
        >>> st.write(pd.DataFrame({
        ...     'first column': [1, 2, 3, 4],
        ...     'second column': [10, 20, 30, 40],
        ... }))

        ..  output::
            https://static.streamlit.io/0.25.0-2JkNY/index.html?id=FCp9AMJHwHRsWSiqMgUZGD
            height: 250px

        Finally, you can pass in multiple arguments to do things like:

        >>> st.write('1 + 1 = ', 2)
        >>> st.write('Below is a DataFrame:', data_frame, 'Above is a dataframe.')

        ..  output::
            https://static.streamlit.io/0.25.0-2JkNY/index.html?id=DHkcU72sxYcGarkFbf4kK1
            height: 300px

        Oh, one more thing: `st.write` accepts chart objects too! For example:

        >>> import pandas as pd
        >>> import numpy as np
        >>> import altair as alt
        >>>
        >>> df = pd.DataFrame(
        ...     np.random.randn(200, 3),
        ...     columns=['a', 'b', 'c'])
        ...
        >>> c = alt.Chart(df).mark_circle().encode(
        ...     x='a', y='b', size='c', color='c', tooltip=['a', 'b', 'c'])
        >>>
        >>> st.write(c)

        ..  output::
            https://static.streamlit.io/0.25.0-2JkNY/index.html?id=8jmmXR8iKoZGV4kXaKGYV5
            height: 200px

        """
        string_buffer = []  # type: List[str]
        unsafe_allow_html = kwargs.get("unsafe_allow_html", False)

        # This bans some valid cases like: e = st.empty(); e.write("a", "b").
        # BUT: 1) such cases are rare, 2) this rule is easy to understand,
        # and 3) this rule should be removed once we have st.container()
        if not self.dg._is_top_level and len(args) > 1:
            raise StreamlitAPIException(
                "Cannot replace a single element with multiple elements.\n\n"
                "The `write()` method only supports multiple elements when "
                "inserting elements rather than replacing. That is, only "
                "when called as `st.write()` or `st.sidebar.write()`.")

        def flush_buffer():
            if string_buffer:
                self.dg.markdown(
                    " ".join(string_buffer),
                    unsafe_allow_html=unsafe_allow_html,
                )
                string_buffer[:] = []

        for arg in args:
            # Order matters!
            if isinstance(arg, str):
                string_buffer.append(arg)
            elif type_util.is_dataframe_like(arg):
                flush_buffer()
                if len(np.shape(arg)) > 2:
                    self.dg.text(arg)
                else:
                    self.dg.dataframe(arg)
            elif isinstance(arg, Exception):
                flush_buffer()
                self.dg.exception(arg)
            elif isinstance(arg, HELP_TYPES):
                flush_buffer()
                self.dg.help(arg)
            elif type_util.is_altair_chart(arg):
                flush_buffer()
                self.dg.altair_chart(arg)
            elif type_util.is_type(arg, "matplotlib.figure.Figure"):
                flush_buffer()
                self.dg.pyplot(arg)
            elif type_util.is_plotly_chart(arg):
                flush_buffer()
                self.dg.plotly_chart(arg)
            elif type_util.is_type(arg, "bokeh.plotting.figure.Figure"):
                flush_buffer()
                self.dg.bokeh_chart(arg)
            elif type_util.is_graphviz_chart(arg):
                flush_buffer()
                self.dg.graphviz_chart(arg)
            elif type_util.is_sympy_expession(arg):
                flush_buffer()
                self.dg.latex(arg)
            elif type_util.is_keras_model(arg):
                from tensorflow.python.keras.utils import vis_utils

                flush_buffer()
                dot = vis_utils.model_to_dot(arg)
                self.dg.graphviz_chart(dot.to_string())
            elif isinstance(arg, (dict, list)):
                flush_buffer()
                self.dg.json(arg)
            elif type_util.is_namedtuple(arg):
                flush_buffer()
                self.dg.json(json.dumps(arg._asdict()))
            elif type_util.is_pydeck(arg):
                flush_buffer()
                self.dg.pydeck_chart(arg)
            else:
                string_buffer.append("`%s`" % str(arg).replace("`", "\\`"))

        flush_buffer()
Beispiel #18
0
    def _to_bytes(self, obj: Any) -> bytes:
        """Hash objects to bytes, including code with dependencies.

        Python's built in `hash` does not produce consistent results across
        runs.
        """

        if isinstance(obj, unittest.mock.Mock):
            # Mock objects can appear to be infinitely
            # deep, so we don't try to hash them at all.
            return self.to_bytes(id(obj))

        elif isinstance(obj, bytes) or isinstance(obj, bytearray):
            return obj

        elif isinstance(obj, str):
            return obj.encode()

        elif isinstance(obj, float):
            return self.to_bytes(hash(obj))

        elif isinstance(obj, int):
            return _int_to_bytes(obj)

        elif isinstance(obj, (list, tuple)):
            h = hashlib.new("md5")
            for item in obj:
                self.update(h, item)
            return h.digest()

        elif isinstance(obj, dict):
            h = hashlib.new("md5")
            for item in obj.items():
                self.update(h, item)
            return h.digest()

        elif obj is None:
            return b"0"

        elif obj is True:
            return b"1"

        elif obj is False:
            return b"0"

        elif type_util.is_type(
                obj, "pandas.core.frame.DataFrame") or type_util.is_type(
                    obj, "pandas.core.series.Series"):
            import pandas as pd

            if len(obj) >= _PANDAS_ROWS_LARGE:
                obj = obj.sample(n=_PANDAS_SAMPLE_SIZE, random_state=0)
            try:
                return b"%s" % pd.util.hash_pandas_object(obj).sum()
            except TypeError:
                # Use pickle if pandas cannot hash the object for example if
                # it contains unhashable objects.
                return b"%s" % pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)

        elif type_util.is_type(obj, "numpy.ndarray"):
            h = hashlib.new("md5")
            self.update(h, obj.shape)

            if obj.size >= _NP_SIZE_LARGE:
                import numpy as np

                state = np.random.RandomState(0)
                obj = state.choice(obj.flat, size=_NP_SAMPLE_SIZE)

            self.update(h, obj.tobytes())
            return h.digest()

        elif inspect.isbuiltin(obj):
            return bytes(obj.__name__.encode())

        elif type_util.is_type(obj,
                               "builtins.mappingproxy") or type_util.is_type(
                                   obj, "builtins.dict_items"):
            return self.to_bytes(dict(obj))

        elif type_util.is_type(obj, "builtins.getset_descriptor"):
            return bytes(obj.__qualname__.encode())

        elif isinstance(obj, UploadedFile):
            # UploadedFile is a BytesIO (thus IOBase) but has a name.
            # It does not have a timestamp so this must come before
            # temproary files
            h = hashlib.new("md5")
            self.update(h, obj.name)
            self.update(h, obj.tell())
            self.update(h, obj.getvalue())
            return h.digest()

        elif hasattr(obj, "name") and (
                isinstance(obj, io.IOBase)
                # Handle temporary files used during testing
                or isinstance(obj, tempfile._TemporaryFileWrapper)):
            # Hash files as name + last modification date + offset.
            # NB: we're using hasattr("name") to differentiate between
            # on-disk and in-memory StringIO/BytesIO file representations.
            # That means that this condition must come *before* the next
            # condition, which just checks for StringIO/BytesIO.
            h = hashlib.new("md5")
            obj_name = getattr(obj, "name",
                               "wonthappen")  # Just to appease MyPy.
            self.update(h, obj_name)
            self.update(h, os.path.getmtime(obj_name))
            self.update(h, obj.tell())
            return h.digest()

        elif isinstance(obj, Pattern):
            return self.to_bytes([obj.pattern, obj.flags])

        elif isinstance(obj, io.StringIO) or isinstance(obj, io.BytesIO):
            # Hash in-memory StringIO/BytesIO by their full contents
            # and seek position.
            h = hashlib.new("md5")
            self.update(h, obj.tell())
            self.update(h, obj.getvalue())
            return h.digest()

        elif type_util.is_type(obj, "numpy.ufunc"):
            # For numpy.remainder, this returns remainder.
            return bytes(obj.__name__.encode())

        elif inspect.ismodule(obj):
            # TODO: Figure out how to best show this kind of warning to the
            # user. In the meantime, show nothing. This scenario is too common,
            # so the current warning is quite annoying...
            # st.warning(('Streamlit does not support hashing modules. '
            #             'We did not hash `%s`.') % obj.__name__)
            # TODO: Hash more than just the name for internal modules.
            return self.to_bytes(obj.__name__)

        elif inspect.isclass(obj):
            # TODO: Figure out how to best show this kind of warning to the
            # user. In the meantime, show nothing. This scenario is too common,
            # (e.g. in every "except" statement) so the current warning is
            # quite annoying...
            # st.warning(('Streamlit does not support hashing classes. '
            #             'We did not hash `%s`.') % obj.__name__)
            # TODO: Hash more than just the name of classes.
            return self.to_bytes(obj.__name__)

        elif isinstance(obj, functools.partial):
            # The return value of functools.partial is not a plain function:
            # it's a callable object that remembers the original function plus
            # the values you pickled into it. So here we need to special-case it.
            h = hashlib.new("md5")
            self.update(h, obj.args)
            self.update(h, obj.func)
            self.update(h, obj.keywords)
            return h.digest()

        else:
            # As a last resort, hash the output of the object's __reduce__ method
            h = hashlib.new("md5")
            try:
                reduce_data = obj.__reduce__()
            except BaseException as e:
                raise UnhashableTypeError() from e

            for item in reduce_data:
                self.update(h, item)
            return h.digest()
Beispiel #19
0
def write(*args, **kwargs):
    """Write arguments to the app.

    This is the swiss-army knife of Streamlit commands. It does different
    things depending on what you throw at it.

    Unlike other Streamlit commands, write() has some unique properties:

        1. You can pass in multiple arguments, all of which will be written.
        2. Its behavior depends on the input types as follows.
        3. It returns None, so it's "slot" in the App cannot be reused.

    Parameters
    ----------
    *args : any
        One or many objects to print to the App.

        Arguments are handled as follows:

            - write(string)     : Prints the formatted Markdown string.
            - write(data_frame) : Displays the DataFrame as a table.
            - write(error)      : Prints an exception specially.
            - write(func)       : Displays information about a function.
            - write(module)     : Displays information about the module.
            - write(dict)       : Displays dict in an interactive widget.
            - write(obj)        : The default is to print str(obj).
            - write(mpl_fig)    : Displays a Matplotlib figure.
            - write(altair)     : Displays an Altair chart.
            - write(keras)      : Displays a Keras model.
            - write(graphviz)   : Displays a Graphviz graph.
            - write(plotly_fig) : Displays a Plotly figure.
            - write(bokeh_fig)  : Displays a Bokeh figure.
            - write(sympy_expr) : Prints SymPy expression using LaTeX.

    unsafe_allow_html : bool
        This is a keyword-only argument that defaults to False.

        By default, any HTML tags found in strings will be escaped and
        therefore treated as pure text. This behavior may be turned off by
        setting this argument to True.

        That said, *we strongly advise* against it*. It is hard to write secure
        HTML, so by using this argument you may be compromising your users'
        security. For more information, see:

        https://github.com/streamlit/streamlit/issues/152

        *Also note that `unsafe_allow_html` is a temporary measure and may be
        removed from Streamlit at any time.*

        If you decide to turn on HTML anyway, we ask you to please tell us your
        exact use case here:

        https://discuss.streamlit.io/t/96

        This will help us come up with safe APIs that allow you to do what you
        want.

    Example
    -------

    Its simplest use case is to draw Markdown-formatted text, whenever the
    input is a string:

    >>> write('Hello, *World!*')

    .. output::
       https://share.streamlit.io/0.25.0-2JkNY/index.html?id=DUJaq97ZQGiVAFi6YvnihF
       height: 50px

    As mentioned earlier, `st.write()` also accepts other data formats, such as
    numbers, data frames, styled data frames, and assorted objects:

    >>> st.write(1234)
    >>> st.write(pd.DataFrame({
    ...     'first column': [1, 2, 3, 4],
    ...     'second column': [10, 20, 30, 40],
    ... }))

    .. output::
       https://share.streamlit.io/0.25.0-2JkNY/index.html?id=FCp9AMJHwHRsWSiqMgUZGD
       height: 250px

    Finally, you can pass in multiple arguments to do things like:

    >>> st.write('1 + 1 = ', 2)
    >>> st.write('Below is a DataFrame:', data_frame, 'Above is a dataframe.')

    .. output::
       https://share.streamlit.io/0.25.0-2JkNY/index.html?id=DHkcU72sxYcGarkFbf4kK1
       height: 300px

    Oh, one more thing: `st.write` accepts chart objects too! For example:

    >>> import pandas as pd
    >>> import numpy as np
    >>> import altair as alt
    >>>
    >>> df = pd.DataFrame(
    ...     np.random.randn(200, 3),
    ...     columns=['a', 'b', 'c'])
    ...
    >>> c = alt.Chart(df).mark_circle().encode(
    ...     x='a', y='b', size='c', color='c')
    >>>
    >>> st.write(c)

    .. output::
       https://share.streamlit.io/0.25.0-2JkNY/index.html?id=8jmmXR8iKoZGV4kXaKGYV5
       height: 200px

    """
    # Python2 doesn't support this syntax
    #   def write(*args, unsafe_allow_html=False)
    # so we do this instead:
    unsafe_allow_html = kwargs.get("unsafe_allow_html", False)

    try:
        string_buffer = []

        def flush_buffer():
            if string_buffer:
                markdown(
                    " ".join(string_buffer), unsafe_allow_html=unsafe_allow_html
                )  # noqa: F821
                string_buffer[:] = []

        for arg in args:
            # Order matters!
            if isinstance(arg, string_types):  # noqa: F821
                string_buffer.append(arg)
            elif type(arg).__name__ in _DATAFRAME_LIKE_TYPES:
                flush_buffer()
                if len(_np.shape(arg)) > 2:
                    text(arg)
                else:
                    dataframe(arg)  # noqa: F821
            elif isinstance(arg, Exception):
                flush_buffer()
                exception(arg)  # noqa: F821
            elif isinstance(arg, _HELP_TYPES):
                flush_buffer()
                help(arg)
            elif _type_util.is_altair_chart(arg):
                flush_buffer()
                altair_chart(arg)
            elif _type_util.is_type(arg, "matplotlib.figure.Figure"):
                flush_buffer()
                pyplot(arg)
            elif _type_util.is_plotly_chart(arg):
                flush_buffer()
                plotly_chart(arg)
            elif _type_util.is_type(arg, "bokeh.plotting.figure.Figure"):
                flush_buffer()
                bokeh_chart(arg)
            elif _type_util.is_graphviz_chart(arg):
                flush_buffer()
                graphviz_chart(arg)
            elif _type_util.is_sympy_expession(arg):
                flush_buffer()
                latex(arg)
            elif _type_util.is_keras_model(arg):
                from tensorflow.python.keras.utils import vis_utils

                flush_buffer()
                dot = vis_utils.model_to_dot(arg)
                graphviz_chart(dot.to_string())
            elif (type(arg) in dict_types) or (isinstance(arg, list)):  # noqa: F821
                flush_buffer()
                json(arg)
            elif _type_util.is_namedtuple(arg):
                flush_buffer()
                json(_json.dumps(arg._asdict()))
            else:
                string_buffer.append("`%s`" % str(arg).replace("`", "\\`"))

        flush_buffer()

    except Exception:
        _, exc, exc_tb = _sys.exc_info()
        exception(exc, exc_tb)  # noqa: F821
    def _to_bytes(self, obj, context):
        """Hash objects to bytes, including code with dependencies.

        Python's built in `hash` does not produce consistent results across
        runs.
        """

        if _is_magicmock(obj):
            # MagicMock can result in objects that appear to be infinitely
            # deep, so we don't try to hash them at all.
            return self.to_bytes(id(obj))

        elif isinstance(obj, bytes) or isinstance(obj, bytearray):
            return obj

        elif type_util.get_fqn_type(obj) in self._hash_funcs:
            # Escape hatch for unsupported objects
            hash_func = self._hash_funcs[type_util.get_fqn_type(obj)]
            try:
                output = hash_func(obj)
            except BaseException as e:
                raise UserHashError(e, obj, hash_func=hash_func)

            return self.to_bytes(output)

        elif isinstance(obj, str):
            return obj.encode()

        elif isinstance(obj, float):
            return self.to_bytes(hash(obj))

        elif isinstance(obj, int):
            return _int_to_bytes(obj)

        elif isinstance(obj, (list, tuple)):
            h = hashlib.new("md5")
            for item in obj:
                self.update(h, item, context)
            return h.digest()

        elif isinstance(obj, dict):
            h = hashlib.new("md5")
            for item in obj.items():
                self.update(h, item, context)
            return h.digest()

        elif obj is None:
            return b"0"

        elif obj is True:
            return b"1"

        elif obj is False:
            return b"0"

        elif type_util.is_type(
                obj, "pandas.core.frame.DataFrame") or type_util.is_type(
                    obj, "pandas.core.series.Series"):
            import pandas as pd

            if len(obj) >= _PANDAS_ROWS_LARGE:
                obj = obj.sample(n=_PANDAS_SAMPLE_SIZE, random_state=0)
            try:
                return b"%s" % pd.util.hash_pandas_object(obj).sum()
            except TypeError:
                # Use pickle if pandas cannot hash the object for example if
                # it contains unhashable objects.
                return b"%s" % pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)

        elif type_util.is_type(obj, "numpy.ndarray"):
            h = hashlib.new("md5")
            self.update(h, obj.shape)

            if obj.size >= _NP_SIZE_LARGE:
                import numpy as np

                state = np.random.RandomState(0)
                obj = state.choice(obj.flat, size=_NP_SAMPLE_SIZE)

            self.update(h, obj.tobytes())
            return h.digest()

        elif inspect.isbuiltin(obj):
            return obj.__name__.encode()

        elif hasattr(
                obj,
                "name") and (isinstance(obj, io.IOBase)
                             # Handle temporary files used during testing
                             or isinstance(obj, tempfile._TemporaryFileWrapper
                                           )  # type: ignore[attr-defined]
                             ):
            # Hash files as name + last modification date + offset.
            # NB: we're using hasattr("name") to differentiate between
            # on-disk and in-memory StringIO/BytesIO file representations.
            # That means that this condition must come *before* the next
            # condition, which just checks for StringIO/BytesIO.
            h = hashlib.new("md5")
            obj_name = getattr(obj, "name",
                               "wonthappen")  # Just to appease MyPy.
            self.update(h, obj_name)
            self.update(h, os.path.getmtime(obj_name))
            self.update(h, obj.tell())
            return h.digest()

        elif isinstance(obj, Pattern):
            return self.to_bytes([obj.pattern, obj.flags])

        elif isinstance(obj, io.StringIO) or isinstance(obj, io.BytesIO):
            # Hash in-memory StringIO/BytesIO by their full contents
            # and seek position.
            h = hashlib.new("md5")
            self.update(h, obj.tell())
            self.update(h, obj.getvalue())
            return h.digest()

        elif type_util.is_type(obj, "numpy.ufunc"):
            # For numpy.remainder, this returns remainder.
            return obj.__name__.encode()

        elif type_util.is_type(obj,
                               "tensorflow.python.client.session.Session"):
            return self.to_bytes(id(obj))

        elif inspect.isroutine(obj):
            if hasattr(obj, "__wrapped__"):
                # Ignore the wrapper of wrapped functions.
                return self.to_bytes(obj.__wrapped__)

            if obj.__module__.startswith("streamlit"):
                # Ignore streamlit modules even if they are in the CWD
                # (e.g. during development).
                return self.to_bytes("%s.%s" % (obj.__module__, obj.__name__))

            h = hashlib.new("md5")

            if self._file_should_be_hashed(obj.__code__.co_filename):
                context = _get_context(obj)
                if obj.__defaults__:
                    self.update(h, obj.__defaults__, context)
                h.update(self._code_to_bytes(obj.__code__, context))
            else:
                # Don't hash code that is not in the current working directory.
                self.update(h, obj.__module__)
                self.update(h, obj.__name__)
            return h.digest()

        elif inspect.iscode(obj):
            return self._code_to_bytes(obj, context)

        elif inspect.ismodule(obj):
            # TODO: Figure out how to best show this kind of warning to the
            # user. In the meantime, show nothing. This scenario is too common,
            # so the current warning is quite annoying...
            # st.warning(('Streamlit does not support hashing modules. '
            #             'We did not hash `%s`.') % obj.__name__)
            # TODO: Hash more than just the name for internal modules.
            return self.to_bytes(obj.__name__)

        elif inspect.isclass(obj):
            # TODO: Figure out how to best show this kind of warning to the
            # user. In the meantime, show nothing. This scenario is too common,
            # (e.g. in every "except" statement) so the current warning is
            # quite annoying...
            # st.warning(('Streamlit does not support hashing classes. '
            #             'We did not hash `%s`.') % obj.__name__)
            # TODO: Hash more than just the name of classes.
            return self.to_bytes(obj.__name__)

        elif isinstance(obj, functools.partial):
            # The return value of functools.partial is not a plain function:
            # it's a callable object that remembers the original function plus
            # the values you pickled into it. So here we need to special-case it.
            h = hashlib.new("md5")
            self.update(h, obj.args)
            self.update(h, obj.func)
            self.update(h, obj.keywords)
            return h.digest()

        else:
            # As a last resort, hash the output of the object's __reduce__ method
            h = hashlib.new("md5")
            try:
                reduce_data = obj.__reduce__()
            except BaseException as e:
                raise UnhashableTypeError(e, obj)

            for item in reduce_data:
                self.update(h, item, context)
            return h.digest()