def _key(obj, context): """Return key for memoization.""" # use arbitrary value in place of None since the return of None # is used for control flow in .to_bytes if obj is None: return NONESENSE def is_simple(obj): return (isinstance(obj, bytes) or isinstance(obj, bytearray) or isinstance(obj, string_types) # noqa: F821 or isinstance(obj, float) or isinstance(obj, int) or isinstance(obj, bool) or obj is None) if is_simple(obj): return obj if isinstance(obj, tuple): if all(map(is_simple, obj)): return obj if isinstance(obj, list): if all(map(is_simple, obj)): return ("__l", tuple(obj)) if (type_util.is_type(obj, "pandas.core.frame.DataFrame") or type_util.is_type(obj, "numpy.ndarray") or inspect.isbuiltin(obj) or inspect.isroutine(obj) or inspect.iscode(obj)): return id(obj) return None
def _key(obj, context): """Return key for memoization.""" if obj is None: return b"none:" # special value so we can hash None def is_simple(obj): return (isinstance(obj, bytes) or isinstance(obj, bytearray) or isinstance(obj, string_types) # noqa: F821 or isinstance(obj, float) or isinstance(obj, int) or isinstance(obj, bool) or obj is None) if is_simple(obj): return obj if isinstance(obj, tuple): if all(map(is_simple, obj)): return obj if isinstance(obj, list): if all(map(is_simple, obj)): return ("__l", tuple(obj)) if (type_util.is_type(obj, "pandas.core.frame.DataFrame") or type_util.is_type(obj, "numpy.ndarray") or inspect.isbuiltin(obj) or inspect.isroutine(obj) or inspect.iscode(obj)): return id(obj) return None
def _key(obj: Optional[Any]) -> Any: """Return key for memoization.""" if obj is None: return None def is_simple(obj): return (isinstance(obj, bytes) or isinstance(obj, bytearray) or isinstance(obj, str) or isinstance(obj, float) or isinstance(obj, int) or isinstance(obj, bool) or obj is None) if is_simple(obj): return obj if isinstance(obj, tuple): if all(map(is_simple, obj)): return obj if isinstance(obj, list): if all(map(is_simple, obj)): return ("__l", tuple(obj)) if (type_util.is_type(obj, "pandas.core.frame.DataFrame") or type_util.is_type(obj, "numpy.ndarray") or inspect.isbuiltin(obj) or inspect.isroutine(obj) or inspect.iscode(obj)): return id(obj) return NoResult
def convert_anything_to_df(df): """Try to convert different formats to a Pandas Dataframe. Parameters ---------- df : ndarray, Iterable, dict, DataFrame, Styler, None, or any Returns ------- pandas.DataFrame """ if type_util.is_type(df, "pandas.core.frame.DataFrame"): return df if _is_pandas_styler(df): return df.data import pandas as pd if type_util.is_type(df, "numpy.ndarray") and len(df.shape) == 0: return pd.DataFrame([]) # Try to convert to pandas.DataFrame. This will raise an error is df is not # compatible with the pandas.DataFrame constructor. return pd.DataFrame(df)
def test_dict_items(self): a = types.MappingProxyType({"a": 1}).items() b = types.MappingProxyType({"a": 1}).items() c = types.MappingProxyType({"c": 1}).items() assert is_type(a, "builtins.dict_items") self.assertEqual(get_hash(a), get_hash(b)) self.assertNotEqual(get_hash(a), get_hash(c))
def test_compiled_ffi(self): self._build_cffi("foo") self._build_cffi("bar") from cffi_bin._foo import ffi as foo from cffi_bin._bar import ffi as bar # Note: We've verified that all properties on CompiledFFI objects # are global, except have not verified `error` either way. assert is_type(foo, "builtins.CompiledFFI") self.assertEqual(get_hash(foo), get_hash(bar))
def test_torch_c_tensorbase(self): a = torch.ones([1, 1]).__reduce__()[1][2] b = torch.ones([1, 1], requires_grad=True).__reduce__()[1][2] c = torch.ones([1, 2]).__reduce__()[1][2] assert is_type(a, "torch._C._TensorBase") self.assertEqual(get_hash(a), get_hash(b)) self.assertNotEqual(get_hash(a), get_hash(c)) b.mean().backward() # Calling backward on a tensorbase doesn't seem to affect the gradient self.assertEqual(get_hash(a), get_hash(b))
def test_getset_descriptor(self): class A: x = 1 class B: x = 1 a = A.__dict__["__dict__"] b = B.__dict__["__dict__"] assert is_type(a, "builtins.getset_descriptor") self.assertEqual(get_hash(a), get_hash(a)) self.assertNotEqual(get_hash(a), get_hash(b))
def _check_and_convert_to_indices(opt, default_values): if default_values is None and None not in opt: return None if not isinstance(default_values, list): # This if is done before others because calling if not x (done # right below) when x is of type pd.Series() or np.array() throws a # ValueError exception. if is_type(default_values, "numpy.ndarray") or is_type( default_values, "pandas.core.series.Series"): default_values = list(default_values) elif not default_values or default_values in opt: default_values = [default_values] else: default_values = list(default_values) for value in default_values: if value not in opt: raise StreamlitAPIException( "Every Multiselect default value must exist in options" ) return [opt.index(value) for value in default_values]
def _marshall_av_media( coordinates: str, proto: Union[AudioProto, VideoProto], data: Data, mimetype: str, ) -> None: """Fill audio or video proto based on contents of data. Given a string, check if it's a url; if so, send it out without modification. Otherwise assume strings are filenames and let any OS errors raise. Load data either from file or through bytes-processing methods into a InMemoryFile object. Pack proto with generated Tornado-based URL. """ # Audio and Video methods have already checked if this is a URL by this point. if isinstance(data, str): # Assume it's a filename or blank. Allow OS-based file errors. with open(data, "rb") as fh: this_file = in_memory_file_manager.add(fh.read(), mimetype, coordinates) proto.url = this_file.url return data_as_bytes: bytes if data is None: # Allow empty values so media players can be shown without media. return elif isinstance(data, bytes): data_as_bytes = data elif isinstance(data, io.BytesIO): data.seek(0) data_as_bytes = data.getvalue() elif isinstance(data, io.RawIOBase) or isinstance(data, io.BufferedReader): data.seek(0) read_data = data.read() if read_data is None: return else: data_as_bytes = read_data elif type_util.is_type(data, "numpy.ndarray"): data_as_bytes = data.tobytes() else: raise RuntimeError("Invalid binary data format: %s" % type(data)) this_file = in_memory_file_manager.add(data_as_bytes, mimetype, coordinates) proto.url = this_file.url
def marshall( proto: PlotlyChartProto, figure_or_data: FigureOrData, use_container_width: bool, sharing: SharingMode, **kwargs: Any, ) -> None: """Marshall a proto with a Plotly spec. See DeltaGenerator.plotly_chart for docs. """ # NOTE: "figure_or_data" is the name used in Plotly's .plot() method # for their main parameter. I don't like the name, but its best to keep # it in sync with what Plotly calls it. import plotly.tools if type_util.is_type(figure_or_data, "matplotlib.figure.Figure"): figure = plotly.tools.mpl_to_plotly(figure_or_data) else: figure = plotly.tools.return_figure_from_figure_or_data( figure_or_data, validate_figure=True) if not isinstance(sharing, str) or sharing.lower() not in SHARING_MODES: raise ValueError("Invalid sharing mode for Plotly chart: %s" % sharing) proto.use_container_width = use_container_width if sharing == "streamlit": import plotly.utils config = dict(kwargs.get("config", {})) # Copy over some kwargs to config dict. Plotly does the same in plot(). config.setdefault("showLink", kwargs.get("show_link", False)) config.setdefault("linkText", kwargs.get("link_text", False)) proto.figure.spec = json.dumps(figure, cls=plotly.utils.PlotlyJSONEncoder) proto.figure.config = json.dumps(config) else: url = _plot_to_url_or_load_cached_url(figure, sharing=sharing, auto_open=False, **kwargs) proto.url = _get_embed_url(url)
def _to_bytes(self, obj: Any, context: Optional[Context]) -> bytes: """Hash objects to bytes, including code with dependencies. Python's built in `hash` does not produce consistent results across runs. """ if isinstance(obj, unittest.mock.Mock): # Mock objects can appear to be infinitely # deep, so we don't try to hash them at all. return self.to_bytes(id(obj)) elif isinstance(obj, bytes) or isinstance(obj, bytearray): return obj elif type_util.get_fqn_type(obj) in self._hash_funcs: # Escape hatch for unsupported objects hash_func = self._hash_funcs[type_util.get_fqn_type(obj)] try: output = hash_func(obj) except BaseException as e: raise UserHashError(e, obj, hash_func=hash_func) return self.to_bytes(output) elif isinstance(obj, str): return obj.encode() elif isinstance(obj, float): return self.to_bytes(hash(obj)) elif isinstance(obj, int): return _int_to_bytes(obj) elif isinstance(obj, (list, tuple)): h = hashlib.new("md5") for item in obj: self.update(h, item, context) return h.digest() elif isinstance(obj, dict): h = hashlib.new("md5") for item in obj.items(): self.update(h, item, context) return h.digest() elif obj is None: return b"0" elif obj is True: return b"1" elif obj is False: return b"0" elif type_util.is_type( obj, "pandas.core.frame.DataFrame") or type_util.is_type( obj, "pandas.core.series.Series"): import pandas as pd if len(obj) >= _PANDAS_ROWS_LARGE: obj = obj.sample(n=_PANDAS_SAMPLE_SIZE, random_state=0) try: return b"%s" % pd.util.hash_pandas_object(obj).sum() except TypeError: # Use pickle if pandas cannot hash the object for example if # it contains unhashable objects. return b"%s" % pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) elif type_util.is_type(obj, "numpy.ndarray"): h = hashlib.new("md5") self.update(h, obj.shape) if obj.size >= _NP_SIZE_LARGE: import numpy as np state = np.random.RandomState(0) obj = state.choice(obj.flat, size=_NP_SAMPLE_SIZE) self.update(h, obj.tobytes()) return h.digest() elif inspect.isbuiltin(obj): return bytes(obj.__name__.encode()) elif any( type_util.is_type(obj, typename) for typename in _FFI_TYPE_NAMES): return self.to_bytes(None) elif type_util.is_type(obj, "builtins.mappingproxy") or type_util.is_type( obj, "builtins.dict_items"): return self.to_bytes(dict(obj)) elif type_util.is_type(obj, "builtins.getset_descriptor"): return bytes(obj.__qualname__.encode()) elif isinstance(obj, UploadedFile): # UploadedFile is a BytesIO (thus IOBase) but has a name. # It does not have a timestamp so this must come before # temproary files h = hashlib.new("md5") self.update(h, obj.name) self.update(h, obj.tell()) self.update(h, obj.getvalue()) return h.digest() elif hasattr( obj, "name") and (isinstance(obj, io.IOBase) # Handle temporary files used during testing or isinstance(obj, tempfile._TemporaryFileWrapper ) # type: ignore[attr-defined] ): # Hash files as name + last modification date + offset. # NB: we're using hasattr("name") to differentiate between # on-disk and in-memory StringIO/BytesIO file representations. # That means that this condition must come *before* the next # condition, which just checks for StringIO/BytesIO. h = hashlib.new("md5") obj_name = getattr(obj, "name", "wonthappen") # Just to appease MyPy. self.update(h, obj_name) self.update(h, os.path.getmtime(obj_name)) self.update(h, obj.tell()) return h.digest() elif isinstance(obj, Pattern): return self.to_bytes([obj.pattern, obj.flags]) elif isinstance(obj, io.StringIO) or isinstance(obj, io.BytesIO): # Hash in-memory StringIO/BytesIO by their full contents # and seek position. h = hashlib.new("md5") self.update(h, obj.tell()) self.update(h, obj.getvalue()) return h.digest() elif any( type_util.get_fqn(x) == "sqlalchemy.pool.base.Pool" for x in type(obj).__bases__): # Get connect_args from the closure of the creator function. It includes # arguments parsed from the URL and those passed in via `connect_args`. # However if a custom `creator` function is passed in then we don't # expect to get this data. cargs = obj._creator.__closure__ cargs = [cargs[0].cell_contents, cargs[1].cell_contents ] if cargs else None # Sort kwargs since hashing dicts is sensitive to key order if cargs: cargs[1] = dict( collections.OrderedDict( sorted(cargs[1].items(), key=lambda t: t[0]))) reduce_data = obj.__reduce__() # Remove thread related objects for attr in [ "_overflow_lock", "_pool", "_conn", "_fairy", "_threadconns", "logger", ]: reduce_data[2].pop(attr, None) return self.to_bytes([reduce_data, cargs]) elif type_util.is_type(obj, "sqlalchemy.engine.base.Engine"): # Remove the url because it's overwritten by creator and connect_args reduce_data = obj.__reduce__() reduce_data[2].pop("url", None) reduce_data[2].pop("logger", None) return self.to_bytes(reduce_data) elif type_util.is_type(obj, "numpy.ufunc"): # For numpy.remainder, this returns remainder. return bytes(obj.__name__.encode()) elif type_util.is_type(obj, "socket.socket"): return self.to_bytes(id(obj)) elif any( type_util.get_fqn(x) == "torch.nn.modules.module.Module" for x in type(obj).__bases__): return self.to_bytes(id(obj)) elif type_util.is_type(obj, "tensorflow.python.client.session.Session"): return self.to_bytes(id(obj)) elif type_util.is_type(obj, "torch.Tensor") or type_util.is_type( obj, "torch._C._TensorBase"): return self.to_bytes([obj.detach().numpy(), obj.grad]) elif any( type_util.is_type(obj, typename) for typename in _KERAS_TYPE_NAMES): return self.to_bytes(id(obj)) elif type_util.is_type( obj, "tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject", ): return self.to_bytes(id(obj)) elif inspect.isroutine(obj): if hasattr(obj, "__wrapped__"): # Ignore the wrapper of wrapped functions. return self.to_bytes(obj.__wrapped__) if obj.__module__.startswith("streamlit"): # Ignore streamlit modules even if they are in the CWD # (e.g. during development). return self.to_bytes("%s.%s" % (obj.__module__, obj.__name__)) h = hashlib.new("md5") if self._file_should_be_hashed(obj.__code__.co_filename): context = _get_context(obj) if obj.__defaults__: self.update(h, obj.__defaults__, context) h.update(self._code_to_bytes(obj.__code__, context, func=obj)) else: # Don't hash code that is not in the current working directory. self.update(h, obj.__module__) self.update(h, obj.__name__) return h.digest() elif inspect.iscode(obj): if context is None: raise RuntimeError("context must be defined when hashing code") return self._code_to_bytes(obj, context) elif inspect.ismodule(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # so the current warning is quite annoying... # st.warning(('Streamlit does not support hashing modules. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name for internal modules. return self.to_bytes(obj.__name__) elif inspect.isclass(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # (e.g. in every "except" statement) so the current warning is # quite annoying... # st.warning(('Streamlit does not support hashing classes. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name of classes. return self.to_bytes(obj.__name__) elif isinstance(obj, functools.partial): # The return value of functools.partial is not a plain function: # it's a callable object that remembers the original function plus # the values you pickled into it. So here we need to special-case it. h = hashlib.new("md5") self.update(h, obj.args) self.update(h, obj.func) self.update(h, obj.keywords) return h.digest() else: # As a last resort, hash the output of the object's __reduce__ method h = hashlib.new("md5") try: reduce_data = obj.__reduce__() except BaseException as e: raise UnhashableTypeError(e, obj) for item in reduce_data: self.update(h, item, context) return h.digest()
def _to_bytes(self, obj, context): """Hash objects to bytes, including code with dependencies. Python's built in `hash` does not produce consistent results across runs.""" try: if _is_magicmock(obj): # MagicMock can result in objects that appear to be infinitely # deep, so we don't try to hash them at all. return self.to_bytes(id(obj)) elif isinstance(obj, bytes) or isinstance(obj, bytearray): return obj elif isinstance(obj, string_types): # noqa: F821 # Don't allow the user to override string since # str == bytes on python 2 return obj.encode() elif type(obj) in self.hash_funcs: # Escape hatch for unsupported objects return self.to_bytes(self.hash_funcs[type(obj)](obj)) elif isinstance(obj, float): return self.to_bytes(hash(obj)) elif isinstance(obj, int): return _int_to_bytes(obj) elif isinstance(obj, list) or isinstance(obj, tuple): h = hashlib.new(self.name) # Hash the name of the container so that ["a"] hashes differently from ("a",) # Otherwise we'd only be hashing the data and the hashes would be the same. self._update(h, type(obj).__name__.encode() + b":") for e in obj: self._update(h, e, context) return h.digest() elif isinstance(obj, dict): h = hashlib.new(self.name) self._update(h, type(obj).__name__.encode() + b":") for e in obj.items(): self._update(h, e, context) return h.digest() elif obj is None: # Special string since hashes change between sessions. # We don't use Python's `hash` since hashes are not consistent # across runs. return NONESENSE elif obj is True: return b"bool:1" elif obj is False: return b"bool:0" elif type_util.is_type( obj, "pandas.core.frame.DataFrame") or type_util.is_type( obj, "pandas.core.series.Series"): import pandas as pd if len(obj) >= PANDAS_ROWS_LARGE: obj = obj.sample(n=PANDAS_SAMPLE_SIZE, random_state=0) try: return pd.util.hash_pandas_object(obj).sum() except TypeError: # Use pickle if pandas cannot hash the object for example if # it contains unhashable objects. return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) elif type_util.is_type(obj, "numpy.ndarray"): h = hashlib.new(self.name) self._update(h, obj.shape) if obj.size >= NP_SIZE_LARGE: import numpy as np state = np.random.RandomState(0) obj = state.choice(obj.flat, size=NP_SAMPLE_SIZE) self._update(h, obj.tobytes()) return h.digest() elif inspect.isbuiltin(obj): return self.to_bytes(obj.__name__) elif hasattr(obj, "name") and ( isinstance(obj, io.IOBase) # Handle temporary files used during testing or isinstance(obj, tempfile._TemporaryFileWrapper) or (not compatibility.is_running_py3() and isinstance(obj, file))): # Hash files as name + last modification date + offset. h = hashlib.new(self.name) self._update(h, obj.name) self._update(h, os.path.getmtime(obj.name)) self._update(h, obj.tell()) return h.digest() elif inspect.isroutine(obj): if hasattr(obj, "__wrapped__"): # Ignore the wrapper of wrapped functions. return self.to_bytes(obj.__wrapped__) if obj.__module__.startswith("streamlit"): # Ignore streamlit modules even if they are in the CWD # (e.g. during development). return self.to_bytes("%s.%s" % (obj.__module__, obj.__name__)) h = hashlib.new(self.name) if self._file_should_be_hashed(obj.__code__.co_filename): context = _get_context(obj) if obj.__defaults__: self._update(h, obj.__defaults__, context) h.update(self._code_to_bytes(obj.__code__, context)) else: # Don't hash code that is not in the current working directory. self._update(h, obj.__module__) self._update(h, obj.__name__) return h.digest() elif inspect.iscode(obj): return self._code_to_bytes(obj, context) elif inspect.ismodule(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # so the current warning is quite annoying... # st.warning(('Streamlit does not support hashing modules. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name for internal modules. return self.to_bytes(obj.__name__) elif inspect.isclass(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # (e.g. in every "except" statement) so the current warning is # quite annoying... # st.warning(('Streamlit does not support hashing classes. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name of classes. return self.to_bytes(obj.__name__) elif isinstance(obj, functools.partial): # The return value of functools.partial is not a plain function: # it's a callable object that remembers the original function plus # the values you pickled into it. So here we need to special-case it. h = hashlib.new(self.name) self._update(h, obj.args) self._update(h, obj.func) self._update(h, obj.keywords) return h.digest() else: # As a last resort h = hashlib.new(self.name) self._update(h, type(obj).__name__.encode() + b":") for e in obj.__reduce__(): self._update(h, e, context) return h.digest() except UnhashableType as e: raise e except Exception as e: LOGGER.error(e) msg = _hashing_error_message(type(obj)) raise UnhashableType(msg)
def _is_magicmock(obj): return type_util.is_type(obj, "unittest.mock.MagicMock") or type_util.is_type( obj, "mock.mock.MagicMock")
def _to_bytes(self, obj, context): """Hash objects to bytes, including code with dependencies. Python's built in `hash` does not produce consistent results across runs.""" try: if _is_magicmock(obj): # MagicMock can result in objects that appear to be infinitely # deep, so we don't try to hash them at all. return self.to_bytes(id(obj)) elif isinstance(obj, bytes) or isinstance(obj, bytearray): return obj elif isinstance(obj, string_types): # noqa: F821 return obj.encode() elif isinstance(obj, float): return self.to_bytes(hash(obj)) elif isinstance(obj, int): return _int_to_bytes(obj) elif isinstance(obj, list) or isinstance(obj, tuple): h = hashlib.new(self.name) # add type to distingush x from [x] self._update(h, type(obj).__name__.encode() + b":") for e in obj: self._update(h, e, context) return h.digest() elif obj is None: # Special string since hashes change between sessions. # We don't use Python's `hash` since hashes are not consistent # across runs. return b"none:" elif obj is True: return b"bool:1" elif obj is False: return b"bool:0" elif type_util.is_type( obj, "pandas.core.frame.DataFrame") or type_util.is_type( obj, "pandas.core.series.Series"): import pandas as pd if len(obj) >= PANDAS_ROWS_LARGE: obj = obj.sample(n=PANDAS_SAMPLE_SIZE, random_state=0) try: return pd.util.hash_pandas_object(obj).sum() except TypeError: # Use pickle if pandas cannot hash the object for example if # it contains unhashable objects. return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) elif type_util.is_type(obj, "numpy.ndarray"): h = hashlib.new(self.name) self._update(h, obj.shape) if obj.size >= NP_SIZE_LARGE: import numpy as np state = np.random.RandomState(0) obj = state.choice(obj.flat, size=NP_SAMPLE_SIZE) self._update(h, obj.tobytes()) return h.digest() elif inspect.isbuiltin(obj): return self.to_bytes(obj.__name__) elif hasattr(obj, "name") and ( isinstance(obj, io.IOBase) or (isinstance(obj.name, string_types) # noqa: F821 and os.path.exists(obj.name))): # Hash files as name + last modification date + offset. h = hashlib.new(self.name) self._update(h, obj.name) self._update(h, os.path.getmtime(obj.name)) self._update(h, obj.tell()) return h.digest() elif inspect.isroutine(obj): if hasattr(obj, "__wrapped__"): # Ignore the wrapper of wrapped functions. return self.to_bytes(obj.__wrapped__) if obj.__module__.startswith("streamlit"): # Ignore streamlit modules even if they are in the CWD # (e.g. during development). return self.to_bytes("%s.%s" % (obj.__module__, obj.__name__)) h = hashlib.new(self.name) filepath = os.path.abspath(obj.__code__.co_filename) if file_util.file_is_in_folder_glob( filepath, self._get_main_script_directory() ) and not self._folder_black_list.is_blacklisted(filepath): context = _get_context(obj) if obj.__defaults__: self._update(h, obj.__defaults__, context) h.update(self._code_to_bytes(obj.__code__, context)) else: # Don't hash code that is not in the current working directory. self._update(h, obj.__module__) self._update(h, obj.__name__) return h.digest() elif inspect.iscode(obj): return self._code_to_bytes(obj, context) elif inspect.ismodule(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # so the current warning is quite annoying... # st.warning(('Streamlit does not support hashing modules. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name for internal modules. return self.to_bytes(obj.__name__) elif inspect.isclass(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # (e.g. in every "except" statement) so the current warning is # quite annoying... # st.warning(('Streamlit does not support hashing classes. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name of classes. return self.to_bytes(obj.__name__) elif isinstance(obj, functools.partial): # The return value of functools.partial is not a plain function: # it's a callable object that remembers the original function plus # the values you pickled into it. So here we need to special-case it. h = hashlib.new(self.name) self._update(h, obj.args) self._update(h, obj.func) self._update(h, obj.keywords) return h.digest() else: try: # As a last resort, we pickle the object to hash it. return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) except: st.warning( _hashing_error_message( "Streamlit cannot hash an object of type %s." % type(obj))) except: st.warning( _hashing_error_message( "Streamlit failed to hash an object of type %s." % type(obj)))
def _is_pandas_styler(obj): return type_util.is_type(obj, "pandas.io.formats.style.Styler")
def write(self, *args, **kwargs): """Write arguments to the app. This is the Swiss Army knife of Streamlit commands: it does different things depending on what you throw at it. Unlike other Streamlit commands, write() has some unique properties: 1. You can pass in multiple arguments, all of which will be written. 2. Its behavior depends on the input types as follows. 3. It returns None, so it's "slot" in the App cannot be reused. Parameters ---------- *args : any One or many objects to print to the App. Arguments are handled as follows: - write(string) : Prints the formatted Markdown string, with support for LaTeX expression and emoji shortcodes. See docs for st.markdown for more. - write(data_frame) : Displays the DataFrame as a table. - write(error) : Prints an exception specially. - write(func) : Displays information about a function. - write(module) : Displays information about the module. - write(dict) : Displays dict in an interactive widget. - write(obj) : The default is to print str(obj). - write(mpl_fig) : Displays a Matplotlib figure. - write(altair) : Displays an Altair chart. - write(keras) : Displays a Keras model. - write(graphviz) : Displays a Graphviz graph. - write(plotly_fig) : Displays a Plotly figure. - write(bokeh_fig) : Displays a Bokeh figure. - write(sympy_expr) : Prints SymPy expression using LaTeX. unsafe_allow_html : bool This is a keyword-only argument that defaults to False. By default, any HTML tags found in strings will be escaped and therefore treated as pure text. This behavior may be turned off by setting this argument to True. That said, *we strongly advise* against it*. It is hard to write secure HTML, so by using this argument you may be compromising your users' security. For more information, see: https://github.com/streamlit/streamlit/issues/152 **Also note that `unsafe_allow_html` is a temporary measure and may be removed from Streamlit at any time.** If you decide to turn on HTML anyway, we ask you to please tell us your exact use case here: https://discuss.streamlit.io/t/96 . This will help us come up with safe APIs that allow you to do what you want. Example ------- Its basic use case is to draw Markdown-formatted text, whenever the input is a string: >>> write('Hello, *World!* :sunglasses:') .. output:: https://static.streamlit.io/0.50.2-ZWk9/index.html?id=Pn5sjhgNs4a8ZbiUoSTRxE height: 50px As mentioned earlier, `st.write()` also accepts other data formats, such as numbers, data frames, styled data frames, and assorted objects: >>> st.write(1234) >>> st.write(pd.DataFrame({ ... 'first column': [1, 2, 3, 4], ... 'second column': [10, 20, 30, 40], ... })) .. output:: https://static.streamlit.io/0.25.0-2JkNY/index.html?id=FCp9AMJHwHRsWSiqMgUZGD height: 250px Finally, you can pass in multiple arguments to do things like: >>> st.write('1 + 1 = ', 2) >>> st.write('Below is a DataFrame:', data_frame, 'Above is a dataframe.') .. output:: https://static.streamlit.io/0.25.0-2JkNY/index.html?id=DHkcU72sxYcGarkFbf4kK1 height: 300px Oh, one more thing: `st.write` accepts chart objects too! For example: >>> import pandas as pd >>> import numpy as np >>> import altair as alt >>> >>> df = pd.DataFrame( ... np.random.randn(200, 3), ... columns=['a', 'b', 'c']) ... >>> c = alt.Chart(df).mark_circle().encode( ... x='a', y='b', size='c', color='c', tooltip=['a', 'b', 'c']) >>> >>> st.write(c) .. output:: https://static.streamlit.io/0.25.0-2JkNY/index.html?id=8jmmXR8iKoZGV4kXaKGYV5 height: 200px """ string_buffer = [] # type: List[str] unsafe_allow_html = kwargs.get("unsafe_allow_html", False) # This bans some valid cases like: e = st.empty(); e.write("a", "b"). # BUT: 1) such cases are rare, 2) this rule is easy to understand, # and 3) this rule should be removed once we have st.container() if not self.dg._is_top_level and len(args) > 1: raise StreamlitAPIException( "Cannot replace a single element with multiple elements.\n\n" "The `write()` method only supports multiple elements when " "inserting elements rather than replacing. That is, only " "when called as `st.write()` or `st.sidebar.write()`.") def flush_buffer(): if string_buffer: self.dg.markdown( " ".join(string_buffer), unsafe_allow_html=unsafe_allow_html, ) string_buffer[:] = [] for arg in args: # Order matters! if isinstance(arg, str): string_buffer.append(arg) elif type_util.is_dataframe_like(arg): flush_buffer() if len(np.shape(arg)) > 2: self.dg.text(arg) else: self.dg.dataframe(arg) elif isinstance(arg, Exception): flush_buffer() self.dg.exception(arg) elif isinstance(arg, HELP_TYPES): flush_buffer() self.dg.help(arg) elif type_util.is_altair_chart(arg): flush_buffer() self.dg.altair_chart(arg) elif type_util.is_type(arg, "matplotlib.figure.Figure"): flush_buffer() self.dg.pyplot(arg) elif type_util.is_plotly_chart(arg): flush_buffer() self.dg.plotly_chart(arg) elif type_util.is_type(arg, "bokeh.plotting.figure.Figure"): flush_buffer() self.dg.bokeh_chart(arg) elif type_util.is_graphviz_chart(arg): flush_buffer() self.dg.graphviz_chart(arg) elif type_util.is_sympy_expession(arg): flush_buffer() self.dg.latex(arg) elif type_util.is_keras_model(arg): from tensorflow.python.keras.utils import vis_utils flush_buffer() dot = vis_utils.model_to_dot(arg) self.dg.graphviz_chart(dot.to_string()) elif isinstance(arg, (dict, list)): flush_buffer() self.dg.json(arg) elif type_util.is_namedtuple(arg): flush_buffer() self.dg.json(json.dumps(arg._asdict())) elif type_util.is_pydeck(arg): flush_buffer() self.dg.pydeck_chart(arg) else: string_buffer.append("`%s`" % str(arg).replace("`", "\\`")) flush_buffer()
def _to_bytes(self, obj: Any) -> bytes: """Hash objects to bytes, including code with dependencies. Python's built in `hash` does not produce consistent results across runs. """ if isinstance(obj, unittest.mock.Mock): # Mock objects can appear to be infinitely # deep, so we don't try to hash them at all. return self.to_bytes(id(obj)) elif isinstance(obj, bytes) or isinstance(obj, bytearray): return obj elif isinstance(obj, str): return obj.encode() elif isinstance(obj, float): return self.to_bytes(hash(obj)) elif isinstance(obj, int): return _int_to_bytes(obj) elif isinstance(obj, (list, tuple)): h = hashlib.new("md5") for item in obj: self.update(h, item) return h.digest() elif isinstance(obj, dict): h = hashlib.new("md5") for item in obj.items(): self.update(h, item) return h.digest() elif obj is None: return b"0" elif obj is True: return b"1" elif obj is False: return b"0" elif type_util.is_type( obj, "pandas.core.frame.DataFrame") or type_util.is_type( obj, "pandas.core.series.Series"): import pandas as pd if len(obj) >= _PANDAS_ROWS_LARGE: obj = obj.sample(n=_PANDAS_SAMPLE_SIZE, random_state=0) try: return b"%s" % pd.util.hash_pandas_object(obj).sum() except TypeError: # Use pickle if pandas cannot hash the object for example if # it contains unhashable objects. return b"%s" % pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) elif type_util.is_type(obj, "numpy.ndarray"): h = hashlib.new("md5") self.update(h, obj.shape) if obj.size >= _NP_SIZE_LARGE: import numpy as np state = np.random.RandomState(0) obj = state.choice(obj.flat, size=_NP_SAMPLE_SIZE) self.update(h, obj.tobytes()) return h.digest() elif inspect.isbuiltin(obj): return bytes(obj.__name__.encode()) elif type_util.is_type(obj, "builtins.mappingproxy") or type_util.is_type( obj, "builtins.dict_items"): return self.to_bytes(dict(obj)) elif type_util.is_type(obj, "builtins.getset_descriptor"): return bytes(obj.__qualname__.encode()) elif isinstance(obj, UploadedFile): # UploadedFile is a BytesIO (thus IOBase) but has a name. # It does not have a timestamp so this must come before # temproary files h = hashlib.new("md5") self.update(h, obj.name) self.update(h, obj.tell()) self.update(h, obj.getvalue()) return h.digest() elif hasattr(obj, "name") and ( isinstance(obj, io.IOBase) # Handle temporary files used during testing or isinstance(obj, tempfile._TemporaryFileWrapper)): # Hash files as name + last modification date + offset. # NB: we're using hasattr("name") to differentiate between # on-disk and in-memory StringIO/BytesIO file representations. # That means that this condition must come *before* the next # condition, which just checks for StringIO/BytesIO. h = hashlib.new("md5") obj_name = getattr(obj, "name", "wonthappen") # Just to appease MyPy. self.update(h, obj_name) self.update(h, os.path.getmtime(obj_name)) self.update(h, obj.tell()) return h.digest() elif isinstance(obj, Pattern): return self.to_bytes([obj.pattern, obj.flags]) elif isinstance(obj, io.StringIO) or isinstance(obj, io.BytesIO): # Hash in-memory StringIO/BytesIO by their full contents # and seek position. h = hashlib.new("md5") self.update(h, obj.tell()) self.update(h, obj.getvalue()) return h.digest() elif type_util.is_type(obj, "numpy.ufunc"): # For numpy.remainder, this returns remainder. return bytes(obj.__name__.encode()) elif inspect.ismodule(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # so the current warning is quite annoying... # st.warning(('Streamlit does not support hashing modules. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name for internal modules. return self.to_bytes(obj.__name__) elif inspect.isclass(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # (e.g. in every "except" statement) so the current warning is # quite annoying... # st.warning(('Streamlit does not support hashing classes. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name of classes. return self.to_bytes(obj.__name__) elif isinstance(obj, functools.partial): # The return value of functools.partial is not a plain function: # it's a callable object that remembers the original function plus # the values you pickled into it. So here we need to special-case it. h = hashlib.new("md5") self.update(h, obj.args) self.update(h, obj.func) self.update(h, obj.keywords) return h.digest() else: # As a last resort, hash the output of the object's __reduce__ method h = hashlib.new("md5") try: reduce_data = obj.__reduce__() except BaseException as e: raise UnhashableTypeError() from e for item in reduce_data: self.update(h, item) return h.digest()
def write(*args, **kwargs): """Write arguments to the app. This is the swiss-army knife of Streamlit commands. It does different things depending on what you throw at it. Unlike other Streamlit commands, write() has some unique properties: 1. You can pass in multiple arguments, all of which will be written. 2. Its behavior depends on the input types as follows. 3. It returns None, so it's "slot" in the App cannot be reused. Parameters ---------- *args : any One or many objects to print to the App. Arguments are handled as follows: - write(string) : Prints the formatted Markdown string. - write(data_frame) : Displays the DataFrame as a table. - write(error) : Prints an exception specially. - write(func) : Displays information about a function. - write(module) : Displays information about the module. - write(dict) : Displays dict in an interactive widget. - write(obj) : The default is to print str(obj). - write(mpl_fig) : Displays a Matplotlib figure. - write(altair) : Displays an Altair chart. - write(keras) : Displays a Keras model. - write(graphviz) : Displays a Graphviz graph. - write(plotly_fig) : Displays a Plotly figure. - write(bokeh_fig) : Displays a Bokeh figure. - write(sympy_expr) : Prints SymPy expression using LaTeX. unsafe_allow_html : bool This is a keyword-only argument that defaults to False. By default, any HTML tags found in strings will be escaped and therefore treated as pure text. This behavior may be turned off by setting this argument to True. That said, *we strongly advise* against it*. It is hard to write secure HTML, so by using this argument you may be compromising your users' security. For more information, see: https://github.com/streamlit/streamlit/issues/152 *Also note that `unsafe_allow_html` is a temporary measure and may be removed from Streamlit at any time.* If you decide to turn on HTML anyway, we ask you to please tell us your exact use case here: https://discuss.streamlit.io/t/96 This will help us come up with safe APIs that allow you to do what you want. Example ------- Its simplest use case is to draw Markdown-formatted text, whenever the input is a string: >>> write('Hello, *World!*') .. output:: https://share.streamlit.io/0.25.0-2JkNY/index.html?id=DUJaq97ZQGiVAFi6YvnihF height: 50px As mentioned earlier, `st.write()` also accepts other data formats, such as numbers, data frames, styled data frames, and assorted objects: >>> st.write(1234) >>> st.write(pd.DataFrame({ ... 'first column': [1, 2, 3, 4], ... 'second column': [10, 20, 30, 40], ... })) .. output:: https://share.streamlit.io/0.25.0-2JkNY/index.html?id=FCp9AMJHwHRsWSiqMgUZGD height: 250px Finally, you can pass in multiple arguments to do things like: >>> st.write('1 + 1 = ', 2) >>> st.write('Below is a DataFrame:', data_frame, 'Above is a dataframe.') .. output:: https://share.streamlit.io/0.25.0-2JkNY/index.html?id=DHkcU72sxYcGarkFbf4kK1 height: 300px Oh, one more thing: `st.write` accepts chart objects too! For example: >>> import pandas as pd >>> import numpy as np >>> import altair as alt >>> >>> df = pd.DataFrame( ... np.random.randn(200, 3), ... columns=['a', 'b', 'c']) ... >>> c = alt.Chart(df).mark_circle().encode( ... x='a', y='b', size='c', color='c') >>> >>> st.write(c) .. output:: https://share.streamlit.io/0.25.0-2JkNY/index.html?id=8jmmXR8iKoZGV4kXaKGYV5 height: 200px """ # Python2 doesn't support this syntax # def write(*args, unsafe_allow_html=False) # so we do this instead: unsafe_allow_html = kwargs.get("unsafe_allow_html", False) try: string_buffer = [] def flush_buffer(): if string_buffer: markdown( " ".join(string_buffer), unsafe_allow_html=unsafe_allow_html ) # noqa: F821 string_buffer[:] = [] for arg in args: # Order matters! if isinstance(arg, string_types): # noqa: F821 string_buffer.append(arg) elif type(arg).__name__ in _DATAFRAME_LIKE_TYPES: flush_buffer() if len(_np.shape(arg)) > 2: text(arg) else: dataframe(arg) # noqa: F821 elif isinstance(arg, Exception): flush_buffer() exception(arg) # noqa: F821 elif isinstance(arg, _HELP_TYPES): flush_buffer() help(arg) elif _type_util.is_altair_chart(arg): flush_buffer() altair_chart(arg) elif _type_util.is_type(arg, "matplotlib.figure.Figure"): flush_buffer() pyplot(arg) elif _type_util.is_plotly_chart(arg): flush_buffer() plotly_chart(arg) elif _type_util.is_type(arg, "bokeh.plotting.figure.Figure"): flush_buffer() bokeh_chart(arg) elif _type_util.is_graphviz_chart(arg): flush_buffer() graphviz_chart(arg) elif _type_util.is_sympy_expession(arg): flush_buffer() latex(arg) elif _type_util.is_keras_model(arg): from tensorflow.python.keras.utils import vis_utils flush_buffer() dot = vis_utils.model_to_dot(arg) graphviz_chart(dot.to_string()) elif (type(arg) in dict_types) or (isinstance(arg, list)): # noqa: F821 flush_buffer() json(arg) elif _type_util.is_namedtuple(arg): flush_buffer() json(_json.dumps(arg._asdict())) else: string_buffer.append("`%s`" % str(arg).replace("`", "\\`")) flush_buffer() except Exception: _, exc, exc_tb = _sys.exc_info() exception(exc, exc_tb) # noqa: F821
def _to_bytes(self, obj, context): """Hash objects to bytes, including code with dependencies. Python's built in `hash` does not produce consistent results across runs. """ if _is_magicmock(obj): # MagicMock can result in objects that appear to be infinitely # deep, so we don't try to hash them at all. return self.to_bytes(id(obj)) elif isinstance(obj, bytes) or isinstance(obj, bytearray): return obj elif type_util.get_fqn_type(obj) in self._hash_funcs: # Escape hatch for unsupported objects hash_func = self._hash_funcs[type_util.get_fqn_type(obj)] try: output = hash_func(obj) except BaseException as e: raise UserHashError(e, obj, hash_func=hash_func) return self.to_bytes(output) elif isinstance(obj, str): return obj.encode() elif isinstance(obj, float): return self.to_bytes(hash(obj)) elif isinstance(obj, int): return _int_to_bytes(obj) elif isinstance(obj, (list, tuple)): h = hashlib.new("md5") for item in obj: self.update(h, item, context) return h.digest() elif isinstance(obj, dict): h = hashlib.new("md5") for item in obj.items(): self.update(h, item, context) return h.digest() elif obj is None: return b"0" elif obj is True: return b"1" elif obj is False: return b"0" elif type_util.is_type( obj, "pandas.core.frame.DataFrame") or type_util.is_type( obj, "pandas.core.series.Series"): import pandas as pd if len(obj) >= _PANDAS_ROWS_LARGE: obj = obj.sample(n=_PANDAS_SAMPLE_SIZE, random_state=0) try: return b"%s" % pd.util.hash_pandas_object(obj).sum() except TypeError: # Use pickle if pandas cannot hash the object for example if # it contains unhashable objects. return b"%s" % pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) elif type_util.is_type(obj, "numpy.ndarray"): h = hashlib.new("md5") self.update(h, obj.shape) if obj.size >= _NP_SIZE_LARGE: import numpy as np state = np.random.RandomState(0) obj = state.choice(obj.flat, size=_NP_SAMPLE_SIZE) self.update(h, obj.tobytes()) return h.digest() elif inspect.isbuiltin(obj): return obj.__name__.encode() elif hasattr( obj, "name") and (isinstance(obj, io.IOBase) # Handle temporary files used during testing or isinstance(obj, tempfile._TemporaryFileWrapper ) # type: ignore[attr-defined] ): # Hash files as name + last modification date + offset. # NB: we're using hasattr("name") to differentiate between # on-disk and in-memory StringIO/BytesIO file representations. # That means that this condition must come *before* the next # condition, which just checks for StringIO/BytesIO. h = hashlib.new("md5") obj_name = getattr(obj, "name", "wonthappen") # Just to appease MyPy. self.update(h, obj_name) self.update(h, os.path.getmtime(obj_name)) self.update(h, obj.tell()) return h.digest() elif isinstance(obj, Pattern): return self.to_bytes([obj.pattern, obj.flags]) elif isinstance(obj, io.StringIO) or isinstance(obj, io.BytesIO): # Hash in-memory StringIO/BytesIO by their full contents # and seek position. h = hashlib.new("md5") self.update(h, obj.tell()) self.update(h, obj.getvalue()) return h.digest() elif type_util.is_type(obj, "numpy.ufunc"): # For numpy.remainder, this returns remainder. return obj.__name__.encode() elif type_util.is_type(obj, "tensorflow.python.client.session.Session"): return self.to_bytes(id(obj)) elif inspect.isroutine(obj): if hasattr(obj, "__wrapped__"): # Ignore the wrapper of wrapped functions. return self.to_bytes(obj.__wrapped__) if obj.__module__.startswith("streamlit"): # Ignore streamlit modules even if they are in the CWD # (e.g. during development). return self.to_bytes("%s.%s" % (obj.__module__, obj.__name__)) h = hashlib.new("md5") if self._file_should_be_hashed(obj.__code__.co_filename): context = _get_context(obj) if obj.__defaults__: self.update(h, obj.__defaults__, context) h.update(self._code_to_bytes(obj.__code__, context)) else: # Don't hash code that is not in the current working directory. self.update(h, obj.__module__) self.update(h, obj.__name__) return h.digest() elif inspect.iscode(obj): return self._code_to_bytes(obj, context) elif inspect.ismodule(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # so the current warning is quite annoying... # st.warning(('Streamlit does not support hashing modules. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name for internal modules. return self.to_bytes(obj.__name__) elif inspect.isclass(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # (e.g. in every "except" statement) so the current warning is # quite annoying... # st.warning(('Streamlit does not support hashing classes. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name of classes. return self.to_bytes(obj.__name__) elif isinstance(obj, functools.partial): # The return value of functools.partial is not a plain function: # it's a callable object that remembers the original function plus # the values you pickled into it. So here we need to special-case it. h = hashlib.new("md5") self.update(h, obj.args) self.update(h, obj.func) self.update(h, obj.keywords) return h.digest() else: # As a last resort, hash the output of the object's __reduce__ method h = hashlib.new("md5") try: reduce_data = obj.__reduce__() except BaseException as e: raise UnhashableTypeError(e, obj) for item in reduce_data: self.update(h, item, context) return h.digest()