예제 #1
0
def marshall_data_frame(data, proto_df):
    """Convert a pandas.DataFrame into a proto.DataFrame.

    Parameters
    ----------
    data : pandas.DataFrame, numpy.ndarray, Iterable, dict, DataFrame, Styler, or None
        Something that is or can be converted to a dataframe.

    proto_df : proto.DataFrame
        Output. The protobuf for a Streamlit DataFrame proto.
    """
    if isinstance(data, pa.Table):
        raise errors.StreamlitAPIException("""
pyarrow tables are not supported  by Streamlit's legacy DataFrame serialization (i.e. with `config.dataFrameSerialization = "legacy"`).

To be able to use pyarrow tables, please enable pyarrow by changing the config setting,
`config.dataFrameSerialization = "arrow"`
""")
    df = type_util.convert_anything_to_df(data)

    # Convert df into an iterable of columns (each of type Series).
    df_data = (df.iloc[:, col] for col in range(len(df.columns)))

    _marshall_table(df_data, proto_df.data)
    _marshall_index(df.columns, proto_df.columns)
    _marshall_index(df.index, proto_df.index)

    styler = data if type_util.is_pandas_styler(data) else None
    _marshall_styles(proto_df.style, df, styler)
예제 #2
0
def generate_chart(chart_type, data, width=0, height=0):
    if data is None:
        # Use an empty-ish dict because if we use None the x axis labels rotate
        # 90 degrees. No idea why. Need to debug.
        data = {"": []}

    if isinstance(data, pa.Table):
        raise errors.StreamlitAPIException(
            """
pyarrow tables are not supported  by Streamlit's legacy DataFrame serialization (i.e. with `config.dataFrameSerialization = "legacy"`).

To be able to use pyarrow tables, please enable pyarrow by changing the config setting,
`config.dataFrameSerialization = "arrow"`
"""
        )

    if not isinstance(data, pd.DataFrame):
        data = type_util.convert_anything_to_df(data)

    index_name = data.index.name
    if index_name is None:
        index_name = "index"

    data = pd.melt(data.reset_index(), id_vars=[index_name])

    if chart_type == "area":
        opacity = {"value": 0.7}
    else:
        opacity = {"value": 1.0}

    # Set the X and Y axes' scale to "utc" if they contain date values.
    # This causes time data to be displayed in UTC, rather the user's local
    # time zone. (By default, vega-lite displays time data in the browser's
    # local time zone, regardless of which time zone the data specifies:
    # https://vega.github.io/vega-lite/docs/timeunit.html#output).
    x_scale = (
        alt.Scale(type="utc") if _is_date_column(data, index_name) else alt.Undefined
    )
    y_scale = alt.Scale(type="utc") if _is_date_column(data, "value") else alt.Undefined

    x_type = alt.Undefined
    # Bar charts should have a discrete (ordinal) x-axis, UNLESS type is date/time
    # https://github.com/streamlit/streamlit/pull/2097#issuecomment-714802475
    if chart_type == "bar" and not _is_date_column(data, index_name):
        x_type = "ordinal"

    chart = (
        getattr(alt.Chart(data, width=width, height=height), "mark_" + chart_type)()
        .encode(
            alt.X(index_name, title="", scale=x_scale, type=x_type),
            alt.Y("value", title="", scale=y_scale),
            alt.Color("variable", title="", type="nominal"),
            alt.Tooltip([index_name, "value", "variable"]),
            opacity=opacity,
        )
        .interactive()
    )
    return chart
예제 #3
0
def convert_anything_to_df(df):
    """Try to convert different formats to a Pandas Dataframe.

    Parameters
    ----------
    df : ndarray, Iterable, dict, DataFrame, Styler, pa.Table, None, dict, list, or any

    Returns
    -------
    pandas.DataFrame

    """
    # This is inefficent as the data will be converted back to Arrow
    # when marshalled to protobuf, but area/bar/line charts need
    # DataFrame magic to generate the correct output.
    if isinstance(df, pa.Table):
        return df.to_pandas()

    if is_type(df, _PANDAS_DF_TYPE_STR):
        return df

    if is_pandas_styler(df):
        return df.data

    import pandas as pd

    if is_type(df, "numpy.ndarray") and len(df.shape) == 0:
        return pd.DataFrame([])

    # Try to convert to pandas.DataFrame. This will raise an error is df is not
    # compatible with the pandas.DataFrame constructor.
    try:
        return pd.DataFrame(df)

    except ValueError:
        raise errors.StreamlitAPIException("""
Unable to convert object of type `%(type)s` to `pandas.DataFrame`.

Offending object:
```py
%(object)s
```""" % {
            "type": type(df),
            "object": df,
        })
예제 #4
0
def convert_anything_to_df(df):
    """Try to convert different formats to a Pandas Dataframe.

    Parameters
    ----------
    df : ndarray, Iterable, dict, DataFrame, Styler, None, dict, list, or any

    Returns
    -------
    pandas.DataFrame

    """
    if is_type(df, _PANDAS_DF_TYPE_STR):
        return df

    if is_pandas_styler(df):
        return df.data

    import pandas as pd

    if is_type(df, "numpy.ndarray") and len(df.shape) == 0:
        return pd.DataFrame([])

    # Try to convert to pandas.DataFrame. This will raise an error is df is not
    # compatible with the pandas.DataFrame constructor.
    try:
        return pd.DataFrame(df)

    except ValueError:
        raise errors.StreamlitAPIException("""
Unable to convert object of type `%(type)s` to `pandas.DataFrame`.

Offending object:
```py
%(object)s
```""" % {
            "type": type(df),
            "object": df,
        })
예제 #5
0
def data_frame_to_bytes(df: DataFrame) -> bytes:
    """Serialize pandas.DataFrame to bytes using Apache Arrow.

    Parameters
    ----------
    df : pandas.DataFrame
        A dataframe to convert.

    """
    try:
        table = pa.Table.from_pandas(df)
        return pyarrow_table_to_bytes(table)
    except Exception as e:
        _NUMPY_DTYPE_ERROR_MESSAGE = "Could not convert dtype"
        if _NUMPY_DTYPE_ERROR_MESSAGE in str(e):
            raise errors.NumpyDtypeException("""
Unable to convert `numpy.dtype` to `pyarrow.DataType`.
This is likely due to a bug in Arrow (see https://issues.apache.org/jira/browse/ARROW-14087).
As a temporary workaround, you can convert the DataFrame cells to strings with `df.astype(str)`.
""")
        else:
            raise errors.StreamlitAPIException(e)