def test_parallel_coordinates(): data = pd.DataFrame({'x': range(10), 'y': range(10), 'z': range(10), 'c': list('ABABABABAB')}) plot = pdvega.parallel_coordinates(data, 'c', alpha=0.5) utils.validate_vegalite(plot.spec) utils.check_encodings(plot.spec, x='variable', y='value', color='c', detail='index', opacity=utils.IGNORE) enc = plot.spec['encoding'] assert plot.spec['mark'] == 'line' assert enc['x']['type'] == 'nominal' assert enc['y']['type'] == 'quantitative' assert enc['color']['type'] == 'nominal' assert enc['detail']['type'] == 'quantitative' assert enc['opacity']['value'] == 0.5 df = utils.get_data(plot.spec) assert set(pd.unique(df['variable'])) == {'x', 'y', 'z'} plot = pdvega.parallel_coordinates(data, 'c', cols=['x', 'y']) utils.validate_vegalite(plot.spec) utils.check_encodings(plot.spec, x='variable', y='value', color='c', detail='index') enc = plot.spec['encoding'] assert plot.spec['mark'] == 'line' assert enc['x']['type'] == 'nominal' assert enc['y']['type'] == 'quantitative' assert enc['color']['type'] == 'nominal' assert enc['detail']['type'] == 'quantitative' df = utils.get_data(plot.spec) assert set(pd.unique(df['variable'])) == {'x', 'y'}
def test_lag_plot(lag): data = pd.DataFrame({'x': range(10), 'y': range(10)}) # test series input plot = pdvega.lag_plot(data['x'], lag=lag) lag_data = utils.get_data(plot) spec = plot.to_dict() assert spec['mark'] == 'point' assert spec['encoding']['x']['type'] == 'quantitative' assert spec['encoding']['y']['type'] == 'quantitative' utils.check_encodings(plot, x='y(t)', y='y(t + {0})'.format(lag)) assert lag_data.shape == (data.shape[0] - lag, 2) # test dataframe input plot = pdvega.lag_plot(data, lag=lag) lag_data = utils.get_data(plot) spec = plot.to_dict() assert spec['mark'] == 'point' assert spec['encoding']['x']['type'] == 'quantitative' assert spec['encoding']['y']['type'] == 'quantitative' assert spec['encoding']['color']['type'] == 'nominal' utils.check_encodings(plot, x='y(t)', y='y(t + {0})'.format(lag), color='variable') assert lag_data.shape == (2 * (data.shape[0] - lag), 3)
def test_scatter_common_columns(): df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) plot = df.vgplot.scatter(x="x", y="y", c="y") utils.validate_vegalite(plot) assert plot.mark == "point" utils.check_encodings(plot, x="x", y="y", color="y")
def test_df_hist(stacked, histtype): df = pd.DataFrame({'x': range(10), 'y': range(10)}) marks = { 'bar': 'bar', 'step': { 'type': 'line', 'interpolate': 'step' }, 'stepfilled': { 'type': 'area', 'interpolate': 'step' } } # bar histogram plot = df.vgplot.hist(bins=5, stacked=stacked, histtype=histtype) assert plot.spec['mark'] == marks[histtype] if stacked: # No default opacity for a stacked histogram utils.check_encodings(plot.spec, x='value', y=utils.IGNORE, color='variable') else: utils.check_encodings(plot.spec, x='value', y=utils.IGNORE, color='variable', opacity=utils.IGNORE) assert plot.spec['encoding']['x']['bin'] == {'maxbins': 5} assert plot.spec['encoding']['y']['aggregate'] == 'count' assert plot.spec['encoding']['y']['stack'] == ('zero' if stacked else None)
def test_df_hist(stacked, histtype, maxbins): df = pd.DataFrame({"x": range(10), "y": range(10)}) marks = { "bar": "bar", "step": { "type": "line", "interpolate": "step" }, "stepfilled": { "type": "area", "interpolate": "step" }, } # bar histogram plot = df.vgplot.hist(bins=maxbins, stacked=stacked, histtype=histtype) assert plot.mark == marks[histtype] if stacked: # No default opacity for a stacked histogram utils.check_encodings(plot, x="value", y=utils.IGNORE, color="variable") else: utils.check_encodings(plot, x="value", y=utils.IGNORE, color="variable", opacity=utils.IGNORE) assert plot["encoding"]["x"]["bin"] == {"maxbins": maxbins} assert plot["encoding"]["y"]["aggregate"] == "count" assert plot["encoding"]["y"]["stack"] == ("zero" if stacked else None)
def test_df_kde_y(): df = pd.DataFrame({"x": range(10), "y": range(10)}) plot = df.vgplot.kde(y="y", bw_method="scott") assert plot.mark == "line" utils.check_encodings(plot, x=" ", y="Density", color=utils.IGNORE) data = plot.data assert set(pd.unique(data["variable"])) == {"y"}
def test_scatter_common_columns(): df = pd.DataFrame({'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2]}) plot = df.vgplot.scatter(x='x', y='y', c='y') utils.validate_vegalite(plot.spec) assert plot.spec['mark'] == 'circle' utils.check_encodings(plot.spec, x='x', y='y', color='y')
def test_andrews_curves(): data = pd.DataFrame({ 'x': range(10), 'y': range(10), 'z': range(10), 'c': list('ABABABABAB') }) n_samples = 120 n_points = len(data) plot = pdvega.andrews_curves(data, 'c', samples=120, alpha=0.5) utils.validate_vegalite(plot) utils.check_encodings(plot, x='t', y=' ', color='c', detail='sample', opacity=utils.IGNORE) spec = plot.to_dict() enc = spec['encoding'] assert spec['mark'] == 'line' assert enc['x']['type'] == 'quantitative' assert enc['y']['type'] == 'quantitative' assert enc['color']['type'] == 'nominal' assert enc['detail']['type'] == 'quantitative' assert enc['opacity']['value'] == 0.5 df = utils.get_data(plot) assert len(df) == n_samples * n_points
def test_df_kde_y(): df = pd.DataFrame({'x': range(10), 'y': range(10)}) plot = df.vgplot.kde(y='y', bw_method='scott') assert plot.spec['mark'] == 'line' utils.check_encodings(plot.spec, x=' ', y='Density', color=utils.IGNORE) data = utils.get_data(plot.spec) assert set(pd.unique(data['variable'])) == {'y'}
def test_line_simple(): df = pd.DataFrame({'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2]}) plot = df.vgplot.line() utils.validate_vegalite(plot.spec) assert plot.spec['mark'] == 'line' utils.check_encodings(plot.spec, x='index', y='value', color='variable') data = utils.get_data(plot.spec) assert set(pd.unique(data['variable'])) == {'x', 'y'}
def test_df_hexbin_C(): df = pd.DataFrame({'x': range(10), 'y': range(10), 'C': range(10)}) gridsize = 10 plot = df.vgplot.hexbin(x='x', y='y', C='C', gridsize=gridsize) assert plot.spec['mark'] == 'rect' utils.check_encodings(plot.spec, x='x', y='y', color='C') assert plot.spec['encoding']['x']['bin'] == {"maxbins": gridsize} assert plot.spec['encoding']['y']['bin'] == {"maxbins": gridsize} assert plot.spec['encoding']['color']['aggregate'] == "mean"
def test_ser_kde(): ser = pd.Series(range(10), name="x") plot = ser.vgplot.kde(bw_method="scott") assert plot.mark == "line" utils.check_encodings( plot, x=' ', y='x', )
def test_df_hexbin_C(): df = pd.DataFrame({"x": range(10), "y": range(10), "C": range(10)}) gridsize = 10 plot = df.vgplot.hexbin(x="x", y="y", C="C", gridsize=gridsize) assert plot.mark == "rect" utils.check_encodings(plot, x="x", y="y", color="C") assert plot["encoding"]["x"]["bin"] == alt.Bin(maxbins=gridsize) assert plot["encoding"]["y"]["bin"] == alt.Bin(maxbins=gridsize) assert plot["encoding"]["color"]["aggregate"] == "mean"
def test_bar_stacked(): df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) plot = df.vgplot.bar(stacked=True) utils.validate_vegalite(plot) assert plot.mark == "bar" utils.check_encodings(plot, x="index", y="value", color="variable") data = plot.data assert set(pd.unique(data["variable"])) == {"x", "y"} assert plot["encoding"]["y"]["stack"] == "zero"
def test_bar_stacked(): df = pd.DataFrame({'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2]}) plot = df.vgplot.bar(stacked=True) utils.validate_vegalite(plot.spec) assert plot.spec['mark'] == 'bar' utils.check_encodings(plot.spec, x='index', y='value', color='variable') data = utils.get_data(plot.spec) assert set(pd.unique(data['variable'])) == {'x', 'y'} assert plot.spec['encoding']['y']['stack'] == "zero"
def test_barh_xy(): df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) plot = df.vgplot.barh(x="x", y="y") utils.validate_vegalite(plot) assert plot.mark == "bar" utils.check_encodings(plot, x="value", y="x", color="variable") data = plot.data assert set(pd.unique(data["variable"])) == {"y"} assert plot["encoding"]["x"]["stack"] is None
def test_line_simple(): df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) plot = df.vgplot.line() utils.validate_vegalite(plot) assert plot.mark == "line" utils.check_encodings(plot, x="index", y="value", color="variable") data = plot.data assert set(pd.unique(data["variable"])) == {"x", "y"}
def test_scatter_color_size(): df = pd.DataFrame({ "x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2], "c": range(5), "s": range(5) }) plot = df.vgplot.scatter(x="x", y="y", c="c", s="s") utils.validate_vegalite(plot) assert plot.mark == "point" utils.check_encodings(plot, x="x", y="y", color="c", size="s")
def test_scatter_color_size(): df = pd.DataFrame({ 'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2], 'c': range(5), 's': range(5) }) plot = df.vgplot.scatter(x='x', y='y', c='c', s='s') utils.validate_vegalite(plot.spec) assert plot.spec['mark'] == 'circle' utils.check_encodings(plot.spec, x='x', y='y', color='c', size='s')
def test_df_area_xy_unstacked(): df = pd.DataFrame({ "x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2], "z": range(5) }) plot = df.vgplot.area(x="x", y="y", stacked=False) utils.validate_vegalite(plot) assert plot.mark == "area" utils.check_encodings(plot, x="x", y="value", color="variable") data = plot.data assert set(pd.unique(data["variable"])) == {"y"} assert plot["encoding"]["y"]["stack"] is None
def test_barh_simple(): df = pd.DataFrame({'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2]}) plot = df.vgplot.barh() utils.validate_vegalite(plot.spec) assert plot.spec['mark'] == 'bar' utils.check_encodings(plot.spec, y='index', x='value', color='variable', opacity=utils.IGNORE) data = utils.get_data(plot.spec) assert set(pd.unique(data['variable'])) == {'x', 'y'} assert plot.spec['encoding']['x']['stack'] is None
def test_df_area_xy_unstacked(): df = pd.DataFrame({ 'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2], 'z': range(5) }) plot = df.vgplot.area(x='x', y='y', stacked=False) utils.validate_vegalite(plot.spec) assert plot.spec['mark'] == 'area' utils.check_encodings(plot.spec, x='x', y='value', color='variable') data = utils.get_data(plot.spec) assert set(pd.unique(data['variable'])) == {'y'} assert plot.spec['encoding']['y']['stack'] is None
def test_df_area_unstacked(): df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) plot = df.vgplot.area(stacked=False) utils.validate_vegalite(plot) assert plot.mark == "area" utils.check_encodings(plot, x="index", y="value", color="variable", opacity=utils.IGNORE) data = plot.data assert set(pd.unique(data["variable"])) == {"x", "y"} assert plot["encoding"]["y"]["stack"] is None assert plot["encoding"]["opacity"]["value"] == 0.7
def test_df_area_unstacked(): df = pd.DataFrame({'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2]}) plot = df.vgplot.area(stacked=False) utils.validate_vegalite(plot.spec) assert plot.spec['mark'] == 'area' utils.check_encodings(plot.spec, x='index', y='value', color='variable', opacity=utils.IGNORE) data = utils.get_data(plot.spec) assert set(pd.unique(data['variable'])) == {'x', 'y'} assert plot.spec['encoding']['y']['stack'] is None assert plot.spec['encoding']['opacity']['value'] == 0.7
def test_series_hist(histtype): ser = pd.Series(range(10)) marks = { 'bar': 'bar', 'step': { 'type': 'line', 'interpolate': 'step' }, 'stepfilled': { 'type': 'area', 'interpolate': 'step' } } plot = ser.vgplot.hist(bins=5, histtype=histtype) assert plot.spec['mark'] == marks[histtype] utils.check_encodings(plot.spec, x='0', y=utils.IGNORE) assert plot.spec['encoding']['x']['bin'] == {'maxbins': 5} assert plot.spec['encoding']['y']['aggregate'] == 'count'
def test_series_hist(histtype, maxbins): ser = pd.Series(range(10)) marks = { "bar": "bar", "step": { "type": "line", "interpolate": "step" }, "stepfilled": { "type": "area", "interpolate": "step" }, } plot = ser.vgplot.hist(bins=maxbins, histtype=histtype) assert plot.mark == marks[histtype] utils.check_encodings(plot, x="0", y=utils.IGNORE) assert plot["encoding"]["x"]["bin"] == {"maxbins": maxbins} assert plot["encoding"]["y"]["aggregate"] == "count"
def test_series_barh(): ser = pd.Series([4, 5, 4, 5], index=['A', 'B', 'C', 'D']) plot = ser.vgplot.barh() utils.validate_vegalite(plot.spec) assert plot.spec['mark'] == 'bar' utils.check_encodings(plot.spec, y='index', x='0')
def test_df_hexbin_Cfunc(): df = pd.DataFrame({'x': range(10), 'y': range(10), 'C': range(10)}) plot = df.vgplot.hexbin(x='x', y='y', C='C', reduce_C_function=min) utils.check_encodings(plot.spec, x='x', y='y', color='C') assert plot.spec['encoding']['color']['aggregate'] == "min"
def test_series_line(): ser = pd.Series([3, 2, 3, 2, 3]) plot = ser.vgplot.line() utils.validate_vegalite(plot.spec) assert plot.spec['mark'] == 'line' utils.check_encodings(plot.spec, x='index', y='0')
def test_ser_kde(): ser = pd.Series(range(10), name='x') plot = ser.vgplot.kde(bw_method='scott') assert plot.spec['mark'] == 'line' utils.check_encodings(plot.spec, x=' ', y='x')