def marshall_data_frame(data, proto_df): """Convert a pandas.DataFrame into a proto.DataFrame. Parameters ---------- data : pandas.DataFrame, numpy.ndarray, Iterable, dict, DataFrame, Styler, or None Something that is or can be converted to a dataframe. proto_df : proto.DataFrame Output. The protobuf for a Streamlit DataFrame proto. """ if isinstance(data, pa.Table): raise errors.StreamlitAPIException(""" pyarrow tables are not supported by Streamlit's legacy DataFrame serialization (i.e. with `config.dataFrameSerialization = "legacy"`). To be able to use pyarrow tables, please enable pyarrow by changing the config setting, `config.dataFrameSerialization = "arrow"` """) df = type_util.convert_anything_to_df(data) # Convert df into an iterable of columns (each of type Series). df_data = (df.iloc[:, col] for col in range(len(df.columns))) _marshall_table(df_data, proto_df.data) _marshall_index(df.columns, proto_df.columns) _marshall_index(df.index, proto_df.index) styler = data if type_util.is_pandas_styler(data) else None _marshall_styles(proto_df.style, df, styler)
def generate_chart(chart_type, data, width=0, height=0): if data is None: # Use an empty-ish dict because if we use None the x axis labels rotate # 90 degrees. No idea why. Need to debug. data = {"": []} if isinstance(data, pa.Table): raise errors.StreamlitAPIException( """ pyarrow tables are not supported by Streamlit's legacy DataFrame serialization (i.e. with `config.dataFrameSerialization = "legacy"`). To be able to use pyarrow tables, please enable pyarrow by changing the config setting, `config.dataFrameSerialization = "arrow"` """ ) if not isinstance(data, pd.DataFrame): data = type_util.convert_anything_to_df(data) index_name = data.index.name if index_name is None: index_name = "index" data = pd.melt(data.reset_index(), id_vars=[index_name]) if chart_type == "area": opacity = {"value": 0.7} else: opacity = {"value": 1.0} # Set the X and Y axes' scale to "utc" if they contain date values. # This causes time data to be displayed in UTC, rather the user's local # time zone. (By default, vega-lite displays time data in the browser's # local time zone, regardless of which time zone the data specifies: # https://vega.github.io/vega-lite/docs/timeunit.html#output). x_scale = ( alt.Scale(type="utc") if _is_date_column(data, index_name) else alt.Undefined ) y_scale = alt.Scale(type="utc") if _is_date_column(data, "value") else alt.Undefined x_type = alt.Undefined # Bar charts should have a discrete (ordinal) x-axis, UNLESS type is date/time # https://github.com/streamlit/streamlit/pull/2097#issuecomment-714802475 if chart_type == "bar" and not _is_date_column(data, index_name): x_type = "ordinal" chart = ( getattr(alt.Chart(data, width=width, height=height), "mark_" + chart_type)() .encode( alt.X(index_name, title="", scale=x_scale, type=x_type), alt.Y("value", title="", scale=y_scale), alt.Color("variable", title="", type="nominal"), alt.Tooltip([index_name, "value", "variable"]), opacity=opacity, ) .interactive() ) return chart
def convert_anything_to_df(df): """Try to convert different formats to a Pandas Dataframe. Parameters ---------- df : ndarray, Iterable, dict, DataFrame, Styler, pa.Table, None, dict, list, or any Returns ------- pandas.DataFrame """ # This is inefficent as the data will be converted back to Arrow # when marshalled to protobuf, but area/bar/line charts need # DataFrame magic to generate the correct output. if isinstance(df, pa.Table): return df.to_pandas() if is_type(df, _PANDAS_DF_TYPE_STR): return df if is_pandas_styler(df): return df.data import pandas as pd if is_type(df, "numpy.ndarray") and len(df.shape) == 0: return pd.DataFrame([]) # Try to convert to pandas.DataFrame. This will raise an error is df is not # compatible with the pandas.DataFrame constructor. try: return pd.DataFrame(df) except ValueError: raise errors.StreamlitAPIException(""" Unable to convert object of type `%(type)s` to `pandas.DataFrame`. Offending object: ```py %(object)s ```""" % { "type": type(df), "object": df, })
def convert_anything_to_df(df): """Try to convert different formats to a Pandas Dataframe. Parameters ---------- df : ndarray, Iterable, dict, DataFrame, Styler, None, dict, list, or any Returns ------- pandas.DataFrame """ if is_type(df, _PANDAS_DF_TYPE_STR): return df if is_pandas_styler(df): return df.data import pandas as pd if is_type(df, "numpy.ndarray") and len(df.shape) == 0: return pd.DataFrame([]) # Try to convert to pandas.DataFrame. This will raise an error is df is not # compatible with the pandas.DataFrame constructor. try: return pd.DataFrame(df) except ValueError: raise errors.StreamlitAPIException(""" Unable to convert object of type `%(type)s` to `pandas.DataFrame`. Offending object: ```py %(object)s ```""" % { "type": type(df), "object": df, })
def data_frame_to_bytes(df: DataFrame) -> bytes: """Serialize pandas.DataFrame to bytes using Apache Arrow. Parameters ---------- df : pandas.DataFrame A dataframe to convert. """ try: table = pa.Table.from_pandas(df) return pyarrow_table_to_bytes(table) except Exception as e: _NUMPY_DTYPE_ERROR_MESSAGE = "Could not convert dtype" if _NUMPY_DTYPE_ERROR_MESSAGE in str(e): raise errors.NumpyDtypeException(""" Unable to convert `numpy.dtype` to `pyarrow.DataType`. This is likely due to a bug in Arrow (see https://issues.apache.org/jira/browse/ARROW-14087). As a temporary workaround, you can convert the DataFrame cells to strings with `df.astype(str)`. """) else: raise errors.StreamlitAPIException(e)