예제 #1
0
def test_categorical_mean(ddf):
    sol = np.array([[[2, nan, nan, nan], [nan, nan, 12, nan]],
                    [[nan, 7, nan, nan], [nan, nan, nan, 17]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f64')))
    assert_eq_xr(agg, out)

    out = xr.DataArray(sol,
                       coords=(coords + [range(4)]),
                       dims=(dims + ['cat_int']))
    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.mean('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.mean('f64')))
    assert_eq_xr(agg, out)
예제 #2
0
def test_categorical_sum(ddf):
    sol = np.array([[[10, 0, 0, 0],
                     [0, 0, 60, 0]],
                    [[0, 35, 0, 0],
                     [0, 0, 0, 85]]])
    out = xr.DataArray(
        sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])
    )
    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i64')))
    assert_eq_xr(agg, out)

    sol = np.array([[[8.0, 0, 0, 0],
                     [0, 0, 60.0, 0]],
                    [[0, 35.0, 0, 0],
                     [0, 0, 0, 85.0]]])
    out = xr.DataArray(
        sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])
    )
    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f64')))
    assert_eq_xr(agg, out)
예제 #3
0
def test_categorical_var(ddf):
    sol = np.array([[[ 2.5,  nan,  nan,  nan],
                     [ nan,  nan,   2.,  nan]],
                    [[ nan,   2.,  nan,  nan],
                     [ nan,  nan,  nan,   2.]]])
    out = xr.DataArray(
        sol,
        coords=(coords + [['a', 'b', 'c', 'd']]),
        dims=(dims + ['cat']))

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.var('f32')))
    assert_eq_xr(agg, out, True)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.var('f64')))
    assert_eq_xr(agg, out, True)
예제 #4
0
def test_categorical_var(ddf):
    if cudf and isinstance(ddf._meta, cudf.DataFrame):
        pytest.skip("The 'var' reduction is yet supported on the GPU")

    sol = np.array([[[2.5, nan, nan, nan], [nan, nan, 2., nan]],
                    [[nan, 2., nan, nan], [nan, nan, nan, 2.]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.var('f32')))
    assert_eq_xr(agg, out, True)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.var('f64')))
    assert_eq_xr(agg, out, True)
예제 #5
0
def _compute_datashader_assets(data, x, aggregate_col, aggregate_fn,
                               color_palette):
    aggregator = None
    cmap = {"cmap": color_palette}

    if isinstance(data[x].dtype, cudf.core.dtypes.CategoricalDtype):
        if ds_version >= "0.11":
            aggregator = ds.by(
                x,
                getattr(ds, aggregate_fn)(aggregate_col),
            )
        else:
            print("only count_cat supported by datashader <=0.10")
            aggregator = ds.count_cat(x)
        cmap = {
            "color_key": {
                k: v
                for k, v in zip(
                    list(data[x].cat.categories),
                    color_palette,
                )
            }
        }
    else:
        if aggregate_fn:
            aggregator = getattr(ds, aggregate_fn)(aggregate_col)
    return aggregator, cmap
예제 #6
0
    def _get_points(self):
        embeddings = self.embeddings
        classes = self.classes
        if (self.label_flag) and (classes is not None):
            data = pd.DataFrame(embeddings)
            data.columns = ['ivis 1', 'ivis 2']
            data['label'] = classes
            num_ks = len(np.unique(classes))
            color_key = list(enumerate(Sets1to3[0:num_ks]))

            embed = {
                k: hv.Points(data.values[classes == k, :],
                             ['ivis 1', 'ivis 2'],
                             'k',
                             label=str(k)).opts(color=v, size=0)
                for k, v in color_key
            }
            dse = dynspread(
                datashade(hv.NdOverlay(embed, kdims=['k']),
                          aggregator=ds.by('k', ds.count())))
            color_points = hv.NdOverlay({
                k: hv.Points([0, 0]).opts(color=v, size=0)
                for k, v in color_key
            })
            points = color_points * dse
        else:
            points = datashade(hv.Points(embeddings))

        points.opts(height=400, width=500, xaxis=None, yaxis=None)
        return points
예제 #7
0
def test_count_cat(ddf):
    sol = np.array([[[5, 0, 0, 0], [0, 0, 5, 0]], [[0, 5, 0, 0], [0, 0, 0,
                                                                  5]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))
    agg = c.points(ddf, 'x', 'y', ds.count_cat('cat'))
    assert_eq_xr(agg, out)

    # categorizing by (cat_int-10)%4 ought to give the same result
    out = xr.DataArray(sol,
                       coords=(coords + [range(4)]),
                       dims=(dims + ['cat_int']))
    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.count()))
    assert_eq_xr(agg, out)

    # easier to write these tests in here, since we expect the same result with only slight tweaks

    # add an extra category (this will count nans and out of bounds)
    sol = np.append(sol, [[[0], [0]], [[0], [0]]], axis=2)

    # categorizing by binning the integer arange columns using [0,20] into 4 bins. Same result as for count_cat
    for col in 'i32', 'i64':
        out = xr.DataArray(sol,
                           coords=(coords + [range(5)]),
                           dims=(dims + [col]))
        agg = c.points(ddf, 'x', 'y',
                       ds.by(ds.category_binning(col, 0, 20, 4), ds.count()))
        assert_eq_xr(agg, out)

    # as above, but for the float arange columns. Element 2 has a nan, so the first bin is one short, and the nan bin is +1
    sol[0, 0, 0] = 4
    sol[0, 0, 4] = 1

    for col in 'f32', 'f64':
        out = xr.DataArray(sol,
                           coords=(coords + [range(5)]),
                           dims=(dims + [col]))
        agg = c.points(ddf, 'x', 'y',
                       ds.by(ds.category_binning(col, 0, 20, 4), ds.count()))
        assert_eq_xr(agg, out)
예제 #8
0
def test_categorical_std(ddf):
    if cudf and isinstance(ddf._meta, cudf.DataFrame):
        pytest.skip("The 'std' reduction is yet supported on the GPU")

    sol = np.sqrt(
        np.array([[[2.5, nan, nan, nan], [nan, nan, 2., nan]],
                  [[nan, 2., nan, nan], [nan, nan, nan, 2.]]]))
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.std('f32')))
    assert_eq_xr(agg, out, True)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.std('f64')))
    assert_eq_xr(agg, out, True)

    out = xr.DataArray(sol,
                       coords=(coords + [range(4)]),
                       dims=(dims + ['cat_int']))
    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.std('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.std('f64')))
    assert_eq_xr(agg, out)

    # add an extra category (this will count nans and out of bounds)
    sol = np.append(sol, [[[nan], [nan]], [[nan], [nan]]], axis=2)

    for col in 'f32', 'f64':
        out = xr.DataArray(sol,
                           coords=(coords + [range(5)]),
                           dims=(dims + [col]))
        agg = c.points(ddf, 'x', 'y',
                       ds.by(ds.category_binning(col, 0, 20, 4), ds.std(col)))
        assert_eq_xr(agg, out)
예제 #9
0
def test_categorical_mean(ddf):
    sol = np.array([[[2, nan, nan, nan], [nan, nan, 12, nan]],
                    [[nan, 7, nan, nan], [nan, nan, nan, 17]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f64')))
    assert_eq_xr(agg, out)

    out = xr.DataArray(sol,
                       coords=(coords + [range(4)]),
                       dims=(dims + ['cat_int']))
    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.mean('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.mean('f64')))
    assert_eq_xr(agg, out)

    # add an extra category (this will count nans and out of bounds)
    sol = np.append(sol, [[[nan], [nan]], [[nan], [nan]]], axis=2)

    for col in 'f32', 'f64':
        out = xr.DataArray(sol,
                           coords=(coords + [range(5)]),
                           dims=(dims + [col]))
        agg = c.points(ddf, 'x', 'y',
                       ds.by(ds.category_binning(col, 0, 20, 4), ds.mean(col)))
        assert_eq_xr(agg, out)
예제 #10
0
def test_categorical_mean_binning(ddf):
    if cudf and isinstance(ddf._meta, cudf.DataFrame):
        pytest.skip(
            "The categorical binning of 'mean' reduction is yet supported on the GPU"
        )
    sol = np.array([[[2, nan, nan, nan], [nan, nan, 12, nan]],
                    [[nan, 7, nan, nan], [nan, nan, nan, 17]]])

    # add an extra category (this will count nans and out of bounds)
    sol = np.append(sol, [[[nan], [nan]], [[nan], [nan]]], axis=2)

    for col in 'f32', 'f64':
        out = xr.DataArray(sol,
                           coords=(coords + [range(5)]),
                           dims=(dims + [col]))
        agg = c.points(ddf, 'x', 'y',
                       ds.by(ds.category_binning(col, 0, 20, 4), ds.mean(col)))
        assert_eq_xr(agg, out)
예제 #11
0
def test_categorical_sum(ddf):
    sol = np.array([[[10, nan, nan, nan], [nan, nan, 60, nan]],
                    [[nan, 35, nan, nan], [nan, nan, nan, 85]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))
    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i64')))
    assert_eq_xr(agg, out)

    out = xr.DataArray(sol,
                       coords=(coords + [range(4)]),
                       dims=(dims + ['cat_int']))
    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.sum('i32')))
    assert_eq_xr(agg, out)

    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.sum('i64')))
    assert_eq_xr(agg, out)

    sol = np.array([[[8.0, nan, nan, nan], [nan, nan, 60.0, nan]],
                    [[nan, 35.0, nan, nan], [nan, nan, nan, 85.0]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))
    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f64')))
    assert_eq_xr(agg, out)
예제 #12
0
def create_plot(
    data,
    x,
    y,
    plot_type="scatter",
    selected=None,
    show_selected=True,
    slow_render=False,
    legend=True,
    colours=True,
    smaller_axes_limits=False,
    bounds=None,
    legend_position=None,
):
    assert x in list(
        data.columns), f"Column {x} is not a column in your dataframe."
    assert y in list(
        data.columns), f"Column {y} is not a column in your dataframe."

    if bounds is not None:
        data = data[data[x] >= bounds[0]]
        data = data[data[y] <= bounds[1]]
        data = data[data[x] <= bounds[2]]
        data = data[data[y] >= bounds[3]]

    if plot_type == "scatter":
        p = hv.Points(
            data,
            [x, y],
        ).opts(active_tools=["pan", "wheel_zoom"])
    elif plot_type == "line":
        p = hv.Path(
            data,
            [x, y],
        ).opts(active_tools=["pan", "wheel_zoom"])
    if show_selected:

        if selected is not None:
            cols = list(data.columns)

            if len(selected.data[cols[0]]) == 1:
                selected = pd.DataFrame(selected.data, columns=cols, index=[0])
                if bounds is not None:
                    if ((selected[x][0] < bounds[0])
                            or (selected[y][0] > bounds[1])
                            or (selected[x][0] > bounds[2])
                            or (selected[y][0] < bounds[3])):
                        selected = pd.DataFrame(columns=cols)
            else:
                selected = pd.DataFrame(columns=cols)

            selected_plot = hv.Scatter(
                selected,
                x,
                y,
            ).opts(
                fill_color="black",
                marker="circle",
                size=10,
                active_tools=["pan", "wheel_zoom"],
            )

    if colours:
        color_key = config.settings["label_colours"]

        color_points = hv.NdOverlay({
            config.settings["labels_to_strings"][f"{n}"]:
            hv.Points([0, 0],
                      label=config.settings["labels_to_strings"][f"{n}"]).opts(
                          style=dict(color=color_key[n], size=0))
            for n in color_key
        })

    if smaller_axes_limits:

        max_x = np.max(data[x])
        min_x = np.min(data[x])

        max_y = np.max(data[y])
        min_y = np.min(data[y])

        x_sd = np.std(data[x])
        x_mu = np.mean(data[x])
        y_sd = np.std(data[y])
        y_mu = np.mean(data[y])

        max_x = np.min([x_mu + 4 * x_sd, max_x])
        min_x = np.max([x_mu - 4 * x_sd, min_x])

        max_y = np.min([y_mu + 4 * y_sd, max_y])
        min_y = np.max([y_mu - 4 * y_sd, min_y])

        if show_selected:
            if selected is not None:
                if selected.shape[0] > 0:

                    max_x = np.max([max_x, np.max(selected[x])])
                    min_x = np.min([min_x, np.min(selected[x])])

                    max_y = np.max([max_y, np.max(selected[y])])
                    min_y = np.min([min_y, np.min(selected[y])])

    if colours:
        if smaller_axes_limits:
            plot = dynspread(
                datashade(
                    p,
                    color_key=color_key,
                    aggregator=ds.by(config.settings["label_col"], ds.count()),
                ).opts(xlim=(min_x, max_x),
                       ylim=(min_y, max_y),
                       responsive=True),
                threshold=0.75,
                how="saturate",
            )
        else:
            plot = dynspread(
                datashade(
                    p,
                    color_key=color_key,
                    aggregator=ds.by(config.settings["label_col"], ds.count()),
                ).opts(responsive=True),
                threshold=0.75,
                how="saturate",
            )

    else:
        if smaller_axes_limits:
            plot = dynspread(
                datashade(p, ).opts(xlim=(min_x, max_x),
                                    ylim=(min_y, max_y),
                                    responsive=True),
                threshold=0.75,
                how="saturate",
            ).redim.range(xdim=(min_x, max_x), ydim=(min_y, max_y))
        else:
            plot = dynspread(
                datashade(p, ).opts(responsive=True),
                threshold=0.75,
                how="saturate",
            )

    if slow_render:
        plot = p

    if show_selected and (selected is not None):
        plot = plot * selected_plot

    if legend and colours:
        plot = plot * color_points

    if legend_position is not None:
        plot = plot.opts(legend_position=legend_position)

    return plot
예제 #13
0
    def plot(self, x_var=None, y_var=None):
        """Create a basic scatter plot of the data with the selected axis.

        The data is represented as a Holoviews Datashader object allowing for
        large numbers of points to be rendered at once. Plotted using a Bokeh
        renderer, the user has full manuverabilty of the data in the plot.

        Returns
        -------
        plot : Holoviews Object
            A Holoviews plot

        """

        if x_var is None:
            x_var = self.X_variable

        if y_var is None:
            y_var = self.Y_variable

        p = hv.Points(
            self.df,
            [x_var, y_var],
        ).opts(active_tools=["pan", "wheel_zoom"])

        sample_region = hv.Points(
            self.sample_region,
            [x_var, y_var],
        ).opts(active_tools=["pan", "wheel_zoom"])

        cols = list(self.df.columns)

        if len(self.src.data[cols[0]]) == 1:
            selected = pd.DataFrame(self.src.data, columns=cols, index=[0])
        else:
            selected = pd.DataFrame(columns=cols)

        selected_plot = hv.Scatter(
            selected,
            x_var,
            y_var,
        ).opts(
            fill_color="black",
            marker="circle",
            size=10,
            active_tools=["pan", "wheel_zoom"],
        )

        color_key = config.settings["label_colours"]

        color_points = hv.NdOverlay({
            config.settings["labels_to_strings"][f"{n}"]:
            hv.Points([0, 0],
                      label=config.settings["labels_to_strings"][f"{n}"]).opts(
                          style=dict(color=color_key[n], size=0))
            for n in color_key
        })

        max_x = np.max(self.df[x_var])
        min_x = np.min(self.df[x_var])

        max_y = np.max(self.df[y_var])
        min_y = np.min(self.df[y_var])

        x_sd = np.std(self.df[x_var])
        x_mu = np.mean(self.df[x_var])
        y_sd = np.std(self.df[y_var])
        y_mu = np.mean(self.df[y_var])

        max_x = np.min([x_mu + 4 * x_sd, max_x])
        min_x = np.max([x_mu - 4 * x_sd, min_x])

        max_y = np.min([y_mu + 4 * y_sd, max_y])
        min_y = np.max([y_mu - 4 * y_sd, min_y])

        if selected.shape[0] > 0:

            max_x = np.max([max_x, np.max(selected[x_var])])
            min_x = np.min([min_x, np.min(selected[x_var])])

            max_y = np.max([max_y, np.max(selected[y_var])])
            min_y = np.min([min_y, np.min(selected[y_var])])

        new_key = {}

        for k in list(color_key.keys()):
            new_key[k] = "#333333"

        all_points = dynspread(
            datashade(
                p,
                color_key=new_key,
                aggregator=ds.by(config.settings["label_col"], ds.count()),
            ).opts(
                xlim=(min_x, max_x),
                ylim=(min_y, max_y),
                responsive=True,
                alpha=0.5,
                shared_axes=False,
            ),
            threshold=0.3,
            how="over",
        )

        sample_region_plot = dynspread(
            datashade(
                sample_region,
                color_key=color_key,
                aggregator=ds.by(config.settings["label_col"], ds.count()),
                min_alpha=70,
                alpha=100,
            ).opts(
                xlim=(min_x, max_x),
                ylim=(min_y, max_y),
                responsive=True,
                shared_axes=False,
            ),
            threshold=0.7,
            how="saturate",
        )
        plot = (all_points * sample_region_plot * selected_plot *
                color_points).opts(shared_axes=False, )

        return plot
df_mle = _create_df_mles_conf_ints()

colors = ['crimson', 'orange', 'greenyellow', 'forestgreen', 'blue']

# generate the base Points figure
points = hv.Points(
    data=df_reps_mles,
    kdims=['alpha', 'beta (1/s)'],
    vdims='concentration',
).groupby('concentration').overlay()

# use datashader so we're not plotting tons of points
plot = hv.operation.datashader.dynspread(
    hv.operation.datashader.datashade(
        points,
        aggregator=datashader.by('concentration', datashader.count()),
        color_key=colors,
    ))

# make segments to show range of D
D_segments = hv.NdOverlay({
    concentration: hv.Segments((
        df_mle.loc[(df_mle['concentration'] == concentration) &
                   (df_mle['parameter'] == 'alpha'), 'conf_start'],
        df_mle.loc[(df_mle['concentration'] == concentration) &
                   (df_mle['parameter'] == 'beta (1/s)'), 'mle'],
        df_mle.loc[(df_mle['concentration'] == concentration) &
                   (df_mle['parameter'] == 'alpha'), 'conf_end'],
        df_mle.loc[(df_mle['concentration'] == concentration) &
                   (df_mle['parameter'] == 'beta (1/s)'), 'mle'],
    ), ).opts(color=color, line_width=2)