def marshall(proto: ArrowProto, data: Data, default_uuid: Optional[str] = None) -> None: """Marshall pandas.DataFrame into an Arrow proto. Parameters ---------- proto : proto.Arrow Output. The protobuf for Streamlit Arrow proto. data : pandas.DataFrame, pandas.Styler, pyarrow.Table, numpy.ndarray, Iterable, dict, or None Something that is or can be converted to a dataframe. default_uuid : Optional[str] If pandas.Styler UUID is not provided, this value will be used. This attribute is optional and only used for pandas.Styler, other elements (e.g. charts) can ignore it. """ if type_util.is_pandas_styler(data): # default_uuid is a string only if the data is a `Styler`, # and `None` otherwise. assert isinstance( default_uuid, str ), "Default UUID must be a string for Styler data." _marshall_styler(proto, data, default_uuid) if isinstance(data, pa.Table): proto.data = type_util.pyarrow_table_to_bytes(data) else: df = type_util.convert_anything_to_df(data) proto.data = type_util.data_frame_to_bytes(df)
def marshall_data_frame(data, proto_df): """Convert a pandas.DataFrame into a proto.DataFrame. Parameters ---------- data : pandas.DataFrame, numpy.ndarray, Iterable, dict, DataFrame, Styler, or None Something that is or can be converted to a dataframe. proto_df : proto.DataFrame Output. The protobuf for a Streamlit DataFrame proto. """ df = type_util.convert_anything_to_df(data) # Convert df into an iterable of columns (each of type Series). df_data = (df.iloc[:, col] for col in range(len(df.columns))) import numpy as np import pandas as pd _marshall_table(df_data, proto_df.data) _marshall_index(df.columns, proto_df.columns) _marshall_index(df.index, proto_df.index) styler = data if type_util.is_pandas_styler(data) else None _marshall_styles(proto_df.style, df, styler)
def marshall_data_frame(data, proto_df): """Convert a pandas.DataFrame into a proto.DataFrame. Parameters ---------- data : pandas.DataFrame, numpy.ndarray, Iterable, dict, DataFrame, Styler, or None Something that is or can be converted to a dataframe. proto_df : proto.DataFrame Output. The protobuf for a Streamlit DataFrame proto. """ if isinstance(data, pa.Table): raise errors.StreamlitAPIException(""" pyarrow tables are not supported by Streamlit's legacy DataFrame serialization (i.e. with `config.dataFrameSerialization = "legacy"`). To be able to use pyarrow tables, please enable pyarrow by changing the config setting, `config.dataFrameSerialization = "arrow"` """) df = type_util.convert_anything_to_df(data) # Convert df into an iterable of columns (each of type Series). df_data = (df.iloc[:, col] for col in range(len(df.columns))) _marshall_table(df_data, proto_df.data) _marshall_index(df.columns, proto_df.columns) _marshall_index(df.index, proto_df.index) styler = data if type_util.is_pandas_styler(data) else None _marshall_styles(proto_df.style, df, styler)
def test_pyarrow_table(self): """Test that a DataFrame can be constructed from a pyarrow.Table""" d = {"a": [1], "b": [2], "c": [3]} table = pa.Table.from_pandas(pd.DataFrame(d)) df = type_util.convert_anything_to_df(table) self.assertEqual(type(df), pd.DataFrame) self.assertEqual(df.shape, (1, 3))
def test_styler(self): """Test that a DataFrame can be constructed from a pandas.Styler""" d = {"a": [1], "b": [2], "c": [3]} styler = pd.DataFrame(d).style.format("{:.2%}") df = type_util.convert_anything_to_df(styler) self.assertEqual(type(df), pd.DataFrame) self.assertEqual(df.shape, (1, 3))
def last_index_for_melted_dataframes(data): if type_util.is_dataframe_compatible(data): data = type_util.convert_anything_to_df(data) if data.index.size > 0: return data.index[-1] return None
def test_empty_numpy_array(self): """Test that a single-column empty DataFrame can be constructed from an empty numpy array. """ arr = np.array([]) df = type_util.convert_anything_to_df(arr) self.assertEqual(type(df), pd.DataFrame) self.assertEqual(df.shape, (0, 1))
def test_dict_of_lists(self): """Test that a DataFrame can be constructed from a dict of equal-length lists """ d = {"a": [1], "b": [2], "c": [3]} df = type_util.convert_anything_to_df(d) self.assertEqual(type(df), pd.DataFrame) self.assertEqual(df.shape, (1, 3))
def generate_chart(chart_type, data, width=0, height=0): if data is None: # Use an empty-ish dict because if we use None the x axis labels rotate # 90 degrees. No idea why. Need to debug. data = {"": []} if isinstance(data, pa.Table): raise errors.StreamlitAPIException( """ pyarrow tables are not supported by Streamlit's legacy DataFrame serialization (i.e. with `config.dataFrameSerialization = "legacy"`). To be able to use pyarrow tables, please enable pyarrow by changing the config setting, `config.dataFrameSerialization = "arrow"` """ ) if not isinstance(data, pd.DataFrame): data = type_util.convert_anything_to_df(data) index_name = data.index.name if index_name is None: index_name = "index" data = pd.melt(data.reset_index(), id_vars=[index_name]) if chart_type == "area": opacity = {"value": 0.7} else: opacity = {"value": 1.0} # Set the X and Y axes' scale to "utc" if they contain date values. # This causes time data to be displayed in UTC, rather the user's local # time zone. (By default, vega-lite displays time data in the browser's # local time zone, regardless of which time zone the data specifies: # https://vega.github.io/vega-lite/docs/timeunit.html#output). x_scale = ( alt.Scale(type="utc") if _is_date_column(data, index_name) else alt.Undefined ) y_scale = alt.Scale(type="utc") if _is_date_column(data, "value") else alt.Undefined x_type = alt.Undefined # Bar charts should have a discrete (ordinal) x-axis, UNLESS type is date/time # https://github.com/streamlit/streamlit/pull/2097#issuecomment-714802475 if chart_type == "bar" and not _is_date_column(data, index_name): x_type = "ordinal" chart = ( getattr(alt.Chart(data, width=width, height=height), "mark_" + chart_type)() .encode( alt.X(index_name, title="", scale=x_scale, type=x_type), alt.Y("value", title="", scale=y_scale), alt.Color("variable", title="", type="nominal"), alt.Tooltip([index_name, "value", "variable"]), opacity=opacity, ) .interactive() ) return chart
def _generate_chart( chart_type: ChartType, data: Data, width: int = 0, height: int = 0 ) -> Chart: """This function uses the chart's type, data columns and indices to figure out the chart's spec.""" if data is None: # Use an empty-ish dict because if we use None the x axis labels rotate # 90 degrees. No idea why. Need to debug. data = {"": []} if not isinstance(data, pd.DataFrame): data = type_util.convert_anything_to_df(data) index_name = data.index.name if index_name is None: index_name = "index" data = pd.melt(data.reset_index(), id_vars=[index_name]) if chart_type == ChartType.AREA: opacity = {"value": 0.7} else: opacity = {"value": 1.0} # Set the X and Y axes' scale to "utc" if they contain date values. # This causes time data to be displayed in UTC, rather the user's local # time zone. (By default, vega-lite displays time data in the browser's # local time zone, regardless of which time zone the data specifies: # https://vega.github.io/vega-lite/docs/timeunit.html#output). x_scale = ( alt.Scale(type="utc") if _is_date_column(data, index_name) else alt.Undefined ) y_scale = alt.Scale(type="utc") if _is_date_column(data, "value") else alt.Undefined x_type = alt.Undefined # Bar charts should have a discrete (ordinal) x-axis, UNLESS type is date/time # https://github.com/streamlit/streamlit/pull/2097#issuecomment-714802475 if chart_type == ChartType.BAR and not _is_date_column(data, index_name): x_type = "ordinal" chart = ( getattr( alt.Chart(data, width=width, height=height), "mark_" + chart_type.value )() .encode( alt.X(index_name, title="", scale=x_scale, type=x_type), alt.Y("value", title="", scale=y_scale), alt.Color("variable", title="", type="nominal"), alt.Tooltip([index_name, "value", "variable"]), opacity=opacity, ) .interactive() ) return chart
def marshall(proto, data, default_uuid=None): """Marshall data into an ArrowTable proto. Parameters ---------- proto : proto.ArrowTable Output. The protobuf for a Streamlit ArrowTable proto. data : pandas.DataFrame, pandas.Styler, numpy.ndarray, Iterable, dict, or None Something that is or can be converted to a dataframe. """ if type_util.is_pandas_styler(data): _marshall_styler(proto, data, default_uuid) df = type_util.convert_anything_to_df(data) _marshall_index(proto, df.index) _marshall_columns(proto, df.columns) _marshall_data(proto, df.to_numpy())
def generate_chart(chart_type, data, width=0, height=0): if data is None: # Use an empty-ish dict because if we use None the x axis labels rotate # 90 degrees. No idea why. Need to debug. data = {"": []} if not isinstance(data, pd.DataFrame): data = type_util.convert_anything_to_df(data) index_name = data.index.name if index_name is None: index_name = "index" data = pd.melt(data.reset_index(), id_vars=[index_name]) if chart_type == "area": opacity = {"value": 0.7} else: opacity = {"value": 1.0} # Set the X and Y axes' scale to "utc" if they contain date values. # This causes time data to be displayed in UTC, rather the user's local # time zone. (By default, vega-lite displays time data in the browser's # local time zone, regardless of which time zone the data specifies: # https://vega.github.io/vega-lite/docs/timeunit.html#output). x_scale = ( alt.Scale(type="utc") if _is_date_column(data, index_name) else alt.Undefined ) y_scale = alt.Scale(type="utc") if _is_date_column(data, "value") else alt.Undefined chart = ( getattr(alt.Chart(data, width=width, height=height), "mark_" + chart_type)() .encode( alt.X(index_name, title="", scale=x_scale), alt.Y("value", title="", scale=y_scale), alt.Color("variable", title="", type="nominal"), alt.Tooltip([index_name, "value", "variable"]), opacity=opacity, ) .interactive() ) return chart
def _maybe_melt_data_for_add_rows(data, delta_type, last_index): import pandas as pd # For some delta types we have to reshape the data structure # otherwise the input data and the actual data used # by vega_lite will be different and it will throw an error. if ( delta_type in DELTA_TYPES_THAT_MELT_DATAFRAMES or delta_type in ARROW_DELTA_TYPES_THAT_MELT_DATAFRAMES ): if not isinstance(data, pd.DataFrame): data = type_util.convert_anything_to_df(data) if type(data.index) is pd.RangeIndex: old_step = _get_pandas_index_attr(data, "step") # We have to drop the predefined index data = data.reset_index(drop=True) old_stop = _get_pandas_index_attr(data, "stop") if old_step is None or old_stop is None: raise StreamlitAPIException( "'RangeIndex' object has no attribute 'step'" ) start = last_index + old_step stop = last_index + old_step + old_stop data.index = pd.RangeIndex(start=start, stop=stop, step=old_step) last_index = stop - 1 index_name = data.index.name if index_name is None: index_name = "index" data = pd.melt(data.reset_index(), id_vars=[index_name]) return data, last_index