def test_arideda_numfeature(): """ Ensure data frame is appropriate size according to features """ features = ["sepalLength", "sepalWidth"] out, _ = aa.arid_eda(data.iris(), "species", "categorical", features) assert out.shape == (8, len(features))
def test_multiscatterplot_args_alternate(): """Test multiscatterplot.""" d = data.iris() return ar.multiscatterplot( columns=[d["sepalLength"], d["sepalWidth"], d["petalLength"]], color=d["species"], )
def test_arideda_return(): """ Test return data type """ _, out = aa.arid_eda( data.iris(), "species", "categorical", ["sepalLength", "sepalWidth"] ) assert isinstance(out, alt.HConcatChart)
def test_arideda_features(): """ Test calling with valid features list """ out, _ = aa.arid_eda( data.iris(), "species", "categorical", ["sepalLength", "sepalWidth"] ) assert isinstance(out, pd.core.frame.DataFrame)
def test_iris_column_names(): iris = data.iris() assert type(iris) is pd.DataFrame assert tuple(iris.columns) == ('petalLength', 'petalWidth', 'sepalLength', 'sepalWidth', 'species') iris = data.iris.raw() assert type(iris) is bytes
def test_download_iris(): iris = data.iris(use_local=False) assert type(iris) is pd.DataFrame assert tuple(iris.columns) == ('petalLength', 'petalWidth', 'sepalLength', 'sepalWidth', 'species') iris = data.iris.raw(use_local=False) assert type(iris) is bytes
def test_scatterplot(): return ar.scatterplot( data.iris(), x="petalWidth", y="petalLength", color="sepalWidth", tooltip="species", )
def test_scatterplot_alternate_data(): d = data.iris() return ar.scatterplot( x=d["petalWidth"], y=d["petalLength"], color=d["sepalWidth"], tooltip=d["species"], )
def make_static_chart(): ''' ''' return alt.Chart(data=data.iris()).mark_circle(size=60).encode( x='petalLength:Q', y='petalWidth:Q', color='species:N', tooltip='sepalWidth:Q', ).interactive()
def test_arideda_empty_df(): """ Test if error occurs when repsonse type is not categorical or continuous """ with pytest.raises(AssertionError): aa.arid_eda( data.iris(), "species", "ORDINAL", ["sepalLength", "sepalWidth"])
def test_response_type_incorrect(): """ Test if an error occurs when wrong response type is given """ with pytest.raises(AssertionError): aa.arid_eda( data.iris(), "petalLength", "categorical", ["sepalLength", "sepalWidth"] )
def example_scatterplot(): import altair as alt from vega_datasets import data df = data.iris() p = alt.Chart(df).mark_circle().encode(x='petalLength:Q', y='sepalLength:Q', color='species:N').properties( width=200, height=200) return p
def test_iris_column_names(): iris = data.iris() assert type(iris) is pd.DataFrame assert sorted(iris.columns) == [ "petalLength", "petalWidth", "sepalLength", "sepalWidth", "species", ] iris = data.iris.raw() assert type(iris) is bytes
def test_download_iris(): iris = data.iris(use_local=False) assert type(iris) is pd.DataFrame assert sorted(iris.columns) == [ "petalLength", "petalWidth", "sepalLength", "sepalWidth", "species", ] iris = data.iris.raw(use_local=False) assert type(iris) is bytes
def example_scatterplot_matrix(): import altair as alt from vega_datasets import data df = data.iris() p = alt.Chart(df).mark_circle().encode( x=alt.X(alt.repeat('column'), type='quantitative'), y=alt.Y(alt.repeat('row'), type='quantitative'), color='species:N').properties(width=150, height=150).repeat( row=['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'], column=['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth']).interactive() return p
def test_arideda_returns_tuple(): """ Check that function returns two items """ assert ( len( aa.arid_eda( data.iris(), "species", "categorical", ["sepalLength", "sepalWidth"] ) ) == 2 )
def test_boxplot_cast(): iris = data.iris() return ar.boxplot(iris, columns=list(iris.columns[:-1]))
def load_data(): dataframe = data.iris() return dataframe
def test_multiscatterplot_args(): """Test multiscatterplot.""" return ar.multiscatterplot( data.iris(), columns=data.iris().columns[:-1], color="species" )
def test_multiscatterplot_defaults(): return ar.multiscatterplot(data.iris())
from __future__ import annotations import typing as t from vega_datasets import data if t.TYPE_CHECKING: from pandas.core.frame import DataFrame df: DataFrame = data.cars() print(df.head()) print(df.describe()) print(df.columns) grouped_df = df.groupby(by=["Year"]).agg( {"Horsepower": ["max", "min", "mean", "std", "count"]}) print(grouped_df) print("") print("----------------------------------------") print("") df: DataFrame = data.iris() print(df.head()) print(df.describe()) print(df.columns) grouped_df = df.groupby(by=["species"]).agg({ "sepalLength": ["max", "min", "mean", "std", "count"], "sepalWidth": ["max", "min", "mean", "std", "count"], }) print(grouped_df)
In [70]: # scatter plot # data x_scatter = cars.Weight_in_lbs y_scatter = cars.Miles_per_Gallon # plot scatter_plot = figure(plot_width=500, plot_height=300, x_axis_label='Weight_in_lbs', y_axis_label='Miles_per_Gallon') scatter_plot.circle(x_scatter, y_scatter, size=15, line_color='navy', fill_color='orange', fill_alpha=0.5) show(scatter_plot) Other scatter plot variations include: cross, x, diamond, diamond_cross, circle_x, circle_cross, triangle, inverted_triangle, square, square_x, square_cross, asterisk In [71]: # vega data sets iris data iris = vds.iris() iris.tail() Out[71]: petalLength petalWidth sepalLength sepalWidth species 145 5.2 2.3 6.7 3.0 virginica 146 5.0 1.9 6.3 2.5 virginica 147 5.2 2.0 6.5 3.0 virginica 148 5.4 2.3 6.2 3.4 virginica 149 5.1 1.8 5.9 3.0 virginica In [72]: # scatter plot subgroups using iris data from bokeh.transform import factor_cmap, factor_mark # data # use vega_datasets iris data
def test_boxplot_melted(): return ar.boxplot(data.iris(), columns="petalLength", group_by="species")
""" Parallel Coordinates Example ---------------------------- A `Parallel Coordinates <https://en.wikipedia.org/wiki/Parallel_coordinates>`_ chart is a chart that lets you visualize the individual data points by drawing a single line for each of them. Such a chart can be created in Altair, but requires some data preprocessing to transform the data into a suitable representation. This example shows a parallel coordinates chart with the Iris dataset. """ # category: other charts import altair as alt from vega_datasets import data source = data.iris() source_transformed = source.reset_index().melt(['species', 'index']) alt.Chart(source_transformed).mark_line().encode( x='variable:N', y='value:Q', color='species:N', detail='index:N', opacity=alt.value(0.5) ).properties(width=500)
from vega_datasets import data df = data.iris() print(df.describe())
import altair as alt from vega_datasets import data alt.renderers.enable('notebook') iris = data.iris() chart = alt.Chart(iris).mark_point().encode(x='petalLength', y='petalWidth', color='species') # 크롬 브라우저에서 표시 chart.show() # pip install altair vega_datasets vega # pip install altair_viewer # alt.renderers.enable('altair_viewer') # 렌더링 # https://altair-viz.github.io/user_guide/display_frontends.html#display-general
import altair as alt from vega_datasets import data source = data.iris() base = alt.Chart(source) xscale = alt.Scale(domain=(4.0, 8.0)) yscale = alt.Scale(domain=(1.9, 4.55)) area_args = {'opacity': .3, 'interpolate': 'step'} points = base.mark_circle().encode( alt.X('sepalLength', scale=xscale), alt.Y('sepalWidth', scale=yscale), color='species', ) top_hist = base.mark_area(**area_args).encode( alt.X( 'sepalLength:Q', # when using bins, the axis scale is set through # the bin extent, so we do not specify the scale here # (which would be ignored anyway) bin=alt.Bin(maxbins=20, extent=xscale.domain), stack=None, title=''), alt.Y('count()', stack=None, title=''), alt.Color('species:N'), ).properties(height=60)
def test_multiscatter_defaults(): return ar.multiscatter(data.iris())