Example #1
0
def test_parallel_coordinates():
    data = pd.DataFrame({'x': range(10),
                         'y': range(10),
                         'z': range(10),
                         'c': list('ABABABABAB')})
    plot = pdvega.parallel_coordinates(data, 'c', alpha=0.5)
    utils.validate_vegalite(plot.spec)
    utils.check_encodings(plot.spec, x='variable', y='value',
                          color='c', detail='index', opacity=utils.IGNORE)
    enc = plot.spec['encoding']
    assert plot.spec['mark'] == 'line'
    assert enc['x']['type'] == 'nominal'
    assert enc['y']['type'] == 'quantitative'
    assert enc['color']['type'] == 'nominal'
    assert enc['detail']['type'] == 'quantitative'
    assert enc['opacity']['value'] == 0.5
    df = utils.get_data(plot.spec)
    assert set(pd.unique(df['variable'])) == {'x', 'y', 'z'}

    plot = pdvega.parallel_coordinates(data, 'c', cols=['x', 'y'])
    utils.validate_vegalite(plot.spec)
    utils.check_encodings(plot.spec, x='variable', y='value',
                          color='c', detail='index')
    enc = plot.spec['encoding']
    assert plot.spec['mark'] == 'line'
    assert enc['x']['type'] == 'nominal'
    assert enc['y']['type'] == 'quantitative'
    assert enc['color']['type'] == 'nominal'
    assert enc['detail']['type'] == 'quantitative'
    df = utils.get_data(plot.spec)
    assert set(pd.unique(df['variable'])) == {'x', 'y'}
Example #2
0
def test_lag_plot(lag):
    data = pd.DataFrame({'x': range(10), 'y': range(10)})

    # test series input
    plot = pdvega.lag_plot(data['x'], lag=lag)
    lag_data = utils.get_data(plot)

    spec = plot.to_dict()
    assert spec['mark'] == 'point'
    assert spec['encoding']['x']['type'] == 'quantitative'
    assert spec['encoding']['y']['type'] == 'quantitative'

    utils.check_encodings(plot, x='y(t)', y='y(t + {0})'.format(lag))
    assert lag_data.shape == (data.shape[0] - lag, 2)

    # test dataframe input
    plot = pdvega.lag_plot(data, lag=lag)
    lag_data = utils.get_data(plot)
    spec = plot.to_dict()

    assert spec['mark'] == 'point'
    assert spec['encoding']['x']['type'] == 'quantitative'
    assert spec['encoding']['y']['type'] == 'quantitative'
    assert spec['encoding']['color']['type'] == 'nominal'
    utils.check_encodings(plot,
                          x='y(t)',
                          y='y(t + {0})'.format(lag),
                          color='variable')
    assert lag_data.shape == (2 * (data.shape[0] - lag), 3)
Example #3
0
def test_scatter_common_columns():
    df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})

    plot = df.vgplot.scatter(x="x", y="y", c="y")
    utils.validate_vegalite(plot)
    assert plot.mark == "point"
    utils.check_encodings(plot, x="x", y="y", color="y")
Example #4
0
def test_df_hist(stacked, histtype):
    df = pd.DataFrame({'x': range(10), 'y': range(10)})

    marks = {
        'bar': 'bar',
        'step': {
            'type': 'line',
            'interpolate': 'step'
        },
        'stepfilled': {
            'type': 'area',
            'interpolate': 'step'
        }
    }

    # bar histogram
    plot = df.vgplot.hist(bins=5, stacked=stacked, histtype=histtype)
    assert plot.spec['mark'] == marks[histtype]
    if stacked:
        # No default opacity for a stacked histogram
        utils.check_encodings(plot.spec,
                              x='value',
                              y=utils.IGNORE,
                              color='variable')
    else:
        utils.check_encodings(plot.spec,
                              x='value',
                              y=utils.IGNORE,
                              color='variable',
                              opacity=utils.IGNORE)
    assert plot.spec['encoding']['x']['bin'] == {'maxbins': 5}
    assert plot.spec['encoding']['y']['aggregate'] == 'count'
    assert plot.spec['encoding']['y']['stack'] == ('zero' if stacked else None)
Example #5
0
def test_df_hist(stacked, histtype, maxbins):
    df = pd.DataFrame({"x": range(10), "y": range(10)})

    marks = {
        "bar": "bar",
        "step": {
            "type": "line",
            "interpolate": "step"
        },
        "stepfilled": {
            "type": "area",
            "interpolate": "step"
        },
    }

    # bar histogram
    plot = df.vgplot.hist(bins=maxbins, stacked=stacked, histtype=histtype)
    assert plot.mark == marks[histtype]
    if stacked:
        # No default opacity for a stacked histogram
        utils.check_encodings(plot,
                              x="value",
                              y=utils.IGNORE,
                              color="variable")
    else:
        utils.check_encodings(plot,
                              x="value",
                              y=utils.IGNORE,
                              color="variable",
                              opacity=utils.IGNORE)
    assert plot["encoding"]["x"]["bin"] == {"maxbins": maxbins}
    assert plot["encoding"]["y"]["aggregate"] == "count"
    assert plot["encoding"]["y"]["stack"] == ("zero" if stacked else None)
Example #6
0
def test_df_kde_y():
    df = pd.DataFrame({"x": range(10), "y": range(10)})
    plot = df.vgplot.kde(y="y", bw_method="scott")
    assert plot.mark == "line"
    utils.check_encodings(plot, x=" ", y="Density", color=utils.IGNORE)
    data = plot.data
    assert set(pd.unique(data["variable"])) == {"y"}
Example #7
0
def test_scatter_common_columns():
    df = pd.DataFrame({'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2]})

    plot = df.vgplot.scatter(x='x', y='y', c='y')
    utils.validate_vegalite(plot.spec)
    assert plot.spec['mark'] == 'circle'
    utils.check_encodings(plot.spec, x='x', y='y', color='y')
Example #8
0
def test_andrews_curves():
    data = pd.DataFrame({
        'x': range(10),
        'y': range(10),
        'z': range(10),
        'c': list('ABABABABAB')
    })
    n_samples = 120
    n_points = len(data)
    plot = pdvega.andrews_curves(data, 'c', samples=120, alpha=0.5)
    utils.validate_vegalite(plot)
    utils.check_encodings(plot,
                          x='t',
                          y=' ',
                          color='c',
                          detail='sample',
                          opacity=utils.IGNORE)

    spec = plot.to_dict()
    enc = spec['encoding']
    assert spec['mark'] == 'line'
    assert enc['x']['type'] == 'quantitative'
    assert enc['y']['type'] == 'quantitative'
    assert enc['color']['type'] == 'nominal'
    assert enc['detail']['type'] == 'quantitative'
    assert enc['opacity']['value'] == 0.5

    df = utils.get_data(plot)
    assert len(df) == n_samples * n_points
Example #9
0
def test_df_kde_y():
    df = pd.DataFrame({'x': range(10), 'y': range(10)})
    plot = df.vgplot.kde(y='y', bw_method='scott')
    assert plot.spec['mark'] == 'line'
    utils.check_encodings(plot.spec, x=' ', y='Density', color=utils.IGNORE)
    data = utils.get_data(plot.spec)
    assert set(pd.unique(data['variable'])) == {'y'}
Example #10
0
def test_line_simple():
    df = pd.DataFrame({'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2]})

    plot = df.vgplot.line()
    utils.validate_vegalite(plot.spec)
    assert plot.spec['mark'] == 'line'
    utils.check_encodings(plot.spec, x='index', y='value', color='variable')
    data = utils.get_data(plot.spec)
    assert set(pd.unique(data['variable'])) == {'x', 'y'}
Example #11
0
def test_df_hexbin_C():
    df = pd.DataFrame({'x': range(10), 'y': range(10), 'C': range(10)})
    gridsize = 10
    plot = df.vgplot.hexbin(x='x', y='y', C='C', gridsize=gridsize)
    assert plot.spec['mark'] == 'rect'
    utils.check_encodings(plot.spec, x='x', y='y', color='C')
    assert plot.spec['encoding']['x']['bin'] == {"maxbins": gridsize}
    assert plot.spec['encoding']['y']['bin'] == {"maxbins": gridsize}
    assert plot.spec['encoding']['color']['aggregate'] == "mean"
Example #12
0
def test_ser_kde():
    ser = pd.Series(range(10), name="x")
    plot = ser.vgplot.kde(bw_method="scott")
    assert plot.mark == "line"
    utils.check_encodings(
        plot,
        x=' ',
        y='x',
    )
Example #13
0
def test_df_hexbin_C():
    df = pd.DataFrame({"x": range(10), "y": range(10), "C": range(10)})
    gridsize = 10
    plot = df.vgplot.hexbin(x="x", y="y", C="C", gridsize=gridsize)
    assert plot.mark == "rect"
    utils.check_encodings(plot, x="x", y="y", color="C")
    assert plot["encoding"]["x"]["bin"] == alt.Bin(maxbins=gridsize)
    assert plot["encoding"]["y"]["bin"] == alt.Bin(maxbins=gridsize)
    assert plot["encoding"]["color"]["aggregate"] == "mean"
Example #14
0
def test_bar_stacked():
    df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})

    plot = df.vgplot.bar(stacked=True)
    utils.validate_vegalite(plot)
    assert plot.mark == "bar"
    utils.check_encodings(plot, x="index", y="value", color="variable")
    data = plot.data
    assert set(pd.unique(data["variable"])) == {"x", "y"}
    assert plot["encoding"]["y"]["stack"] == "zero"
Example #15
0
def test_bar_stacked():
    df = pd.DataFrame({'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2]})

    plot = df.vgplot.bar(stacked=True)
    utils.validate_vegalite(plot.spec)
    assert plot.spec['mark'] == 'bar'
    utils.check_encodings(plot.spec, x='index', y='value', color='variable')
    data = utils.get_data(plot.spec)
    assert set(pd.unique(data['variable'])) == {'x', 'y'}
    assert plot.spec['encoding']['y']['stack'] == "zero"
Example #16
0
def test_barh_xy():
    df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})

    plot = df.vgplot.barh(x="x", y="y")
    utils.validate_vegalite(plot)
    assert plot.mark == "bar"
    utils.check_encodings(plot, x="value", y="x", color="variable")
    data = plot.data
    assert set(pd.unique(data["variable"])) == {"y"}
    assert plot["encoding"]["x"]["stack"] is None
Example #17
0
def test_line_simple():
    df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})

    plot = df.vgplot.line()
    utils.validate_vegalite(plot)

    assert plot.mark == "line"

    utils.check_encodings(plot, x="index", y="value", color="variable")
    data = plot.data
    assert set(pd.unique(data["variable"])) == {"x", "y"}
Example #18
0
def test_scatter_color_size():
    df = pd.DataFrame({
        "x": [1, 4, 2, 3, 5],
        "y": [6, 3, 4, 5, 2],
        "c": range(5),
        "s": range(5)
    })

    plot = df.vgplot.scatter(x="x", y="y", c="c", s="s")
    utils.validate_vegalite(plot)
    assert plot.mark == "point"
    utils.check_encodings(plot, x="x", y="y", color="c", size="s")
Example #19
0
def test_scatter_color_size():
    df = pd.DataFrame({
        'x': [1, 4, 2, 3, 5],
        'y': [6, 3, 4, 5, 2],
        'c': range(5),
        's': range(5)
    })

    plot = df.vgplot.scatter(x='x', y='y', c='c', s='s')
    utils.validate_vegalite(plot.spec)
    assert plot.spec['mark'] == 'circle'
    utils.check_encodings(plot.spec, x='x', y='y', color='c', size='s')
Example #20
0
def test_df_area_xy_unstacked():
    df = pd.DataFrame({
        "x": [1, 4, 2, 3, 5],
        "y": [6, 3, 4, 5, 2],
        "z": range(5)
    })

    plot = df.vgplot.area(x="x", y="y", stacked=False)
    utils.validate_vegalite(plot)
    assert plot.mark == "area"
    utils.check_encodings(plot, x="x", y="value", color="variable")
    data = plot.data
    assert set(pd.unique(data["variable"])) == {"y"}
    assert plot["encoding"]["y"]["stack"] is None
Example #21
0
def test_barh_simple():
    df = pd.DataFrame({'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2]})

    plot = df.vgplot.barh()
    utils.validate_vegalite(plot.spec)
    assert plot.spec['mark'] == 'bar'
    utils.check_encodings(plot.spec,
                          y='index',
                          x='value',
                          color='variable',
                          opacity=utils.IGNORE)
    data = utils.get_data(plot.spec)
    assert set(pd.unique(data['variable'])) == {'x', 'y'}
    assert plot.spec['encoding']['x']['stack'] is None
Example #22
0
def test_df_area_xy_unstacked():
    df = pd.DataFrame({
        'x': [1, 4, 2, 3, 5],
        'y': [6, 3, 4, 5, 2],
        'z': range(5)
    })

    plot = df.vgplot.area(x='x', y='y', stacked=False)
    utils.validate_vegalite(plot.spec)
    assert plot.spec['mark'] == 'area'
    utils.check_encodings(plot.spec, x='x', y='value', color='variable')
    data = utils.get_data(plot.spec)
    assert set(pd.unique(data['variable'])) == {'y'}
    assert plot.spec['encoding']['y']['stack'] is None
Example #23
0
def test_df_area_unstacked():
    df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})

    plot = df.vgplot.area(stacked=False)
    utils.validate_vegalite(plot)
    assert plot.mark == "area"
    utils.check_encodings(plot,
                          x="index",
                          y="value",
                          color="variable",
                          opacity=utils.IGNORE)
    data = plot.data
    assert set(pd.unique(data["variable"])) == {"x", "y"}
    assert plot["encoding"]["y"]["stack"] is None
    assert plot["encoding"]["opacity"]["value"] == 0.7
Example #24
0
def test_df_area_unstacked():
    df = pd.DataFrame({'x': [1, 4, 2, 3, 5], 'y': [6, 3, 4, 5, 2]})

    plot = df.vgplot.area(stacked=False)
    utils.validate_vegalite(plot.spec)
    assert plot.spec['mark'] == 'area'
    utils.check_encodings(plot.spec,
                          x='index',
                          y='value',
                          color='variable',
                          opacity=utils.IGNORE)
    data = utils.get_data(plot.spec)
    assert set(pd.unique(data['variable'])) == {'x', 'y'}
    assert plot.spec['encoding']['y']['stack'] is None
    assert plot.spec['encoding']['opacity']['value'] == 0.7
Example #25
0
def test_series_hist(histtype):
    ser = pd.Series(range(10))

    marks = {
        'bar': 'bar',
        'step': {
            'type': 'line',
            'interpolate': 'step'
        },
        'stepfilled': {
            'type': 'area',
            'interpolate': 'step'
        }
    }
    plot = ser.vgplot.hist(bins=5, histtype=histtype)
    assert plot.spec['mark'] == marks[histtype]
    utils.check_encodings(plot.spec, x='0', y=utils.IGNORE)
    assert plot.spec['encoding']['x']['bin'] == {'maxbins': 5}
    assert plot.spec['encoding']['y']['aggregate'] == 'count'
Example #26
0
def test_series_hist(histtype, maxbins):
    ser = pd.Series(range(10))

    marks = {
        "bar": "bar",
        "step": {
            "type": "line",
            "interpolate": "step"
        },
        "stepfilled": {
            "type": "area",
            "interpolate": "step"
        },
    }
    plot = ser.vgplot.hist(bins=maxbins, histtype=histtype)
    assert plot.mark == marks[histtype]

    utils.check_encodings(plot, x="0", y=utils.IGNORE)
    assert plot["encoding"]["x"]["bin"] == {"maxbins": maxbins}
    assert plot["encoding"]["y"]["aggregate"] == "count"
Example #27
0
def test_series_barh():
    ser = pd.Series([4, 5, 4, 5], index=['A', 'B', 'C', 'D'])
    plot = ser.vgplot.barh()
    utils.validate_vegalite(plot.spec)
    assert plot.spec['mark'] == 'bar'
    utils.check_encodings(plot.spec, y='index', x='0')
Example #28
0
def test_df_hexbin_Cfunc():
    df = pd.DataFrame({'x': range(10), 'y': range(10), 'C': range(10)})
    plot = df.vgplot.hexbin(x='x', y='y', C='C', reduce_C_function=min)
    utils.check_encodings(plot.spec, x='x', y='y', color='C')
    assert plot.spec['encoding']['color']['aggregate'] == "min"
Example #29
0
def test_series_line():
    ser = pd.Series([3, 2, 3, 2, 3])
    plot = ser.vgplot.line()
    utils.validate_vegalite(plot.spec)
    assert plot.spec['mark'] == 'line'
    utils.check_encodings(plot.spec, x='index', y='0')
Example #30
0
def test_ser_kde():
    ser = pd.Series(range(10), name='x')
    plot = ser.vgplot.kde(bw_method='scott')
    assert plot.spec['mark'] == 'line'
    utils.check_encodings(plot.spec, x=' ', y='x')