Beispiel #1
0
    def test_default_groups(self, df, orient):

        other = {"x": "y", "y": "x"}[orient]
        gb = GroupBy(["grp2"])
        res = Norm()(df, gb, orient)
        for _, grp in res.groupby("grp2"):
            assert grp[other].max() == pytest.approx(1)
Beispiel #2
0
def test_apply_no_grouper(df):

    df = df[["x", "y"]]
    res = GroupBy(["a"]).apply(df, lambda x: x.sort_values("x"))
    assert_array_equal(res.columns, ["x", "y"])
    assert_array_equal(res["x"], df["x"].sort_values())
    assert_array_equal(res["y"], df.loc[np.argsort(df["x"]), "y"])
Beispiel #3
0
def test_agg_one_grouper(df):

    res = GroupBy(["a"]).agg(df, {"y": "max"})
    assert_array_equal(res.index, [0, 1])
    assert_array_equal(res.columns, ["a", "y"])
    assert_array_equal(res["a"], ["a", "b"])
    assert_array_equal(res["y"], [.8, .5])
Beispiel #4
0
    def __call__(self, data: DataFrame, groupby: GroupBy,
                 orient: str) -> DataFrame:

        # TODO where to ensure that other semantic variables are sorted properly?
        # TODO why are we not using the passed in groupby here?
        groupers = ["col", "row", orient]
        return GroupBy(groupers).apply(data, self._stack, orient)
Beispiel #5
0
    def test_faceted(self, toy_df_facets):

        groupby = GroupBy(["color", "group"])
        res = Stack()(toy_df_facets, groupby, "x")

        assert_array_equal(res["x"], [0, 0, 1, 0, 1, 2])
        assert_array_equal(res["y"], [1, 3, 3, 1, 2, 3])
        assert_array_equal(res["baseline"], [0, 1, 0, 0, 0, 0])
Beispiel #6
0
    def test_basic(self, toy_df):

        groupby = GroupBy(["color", "group"])
        res = Stack()(toy_df, groupby, "x")

        assert_array_equal(res["x"], [0, 0, 1])
        assert_array_equal(res["y"], [1, 3, 3])
        assert_array_equal(res["baseline"], [0, 1, 0])
Beispiel #7
0
    def test_faceted_drop(self, toy_df_facets):

        groupby = GroupBy(["x", "grp", "col"])
        res = Dodge(empty="drop")(toy_df_facets, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3, 1, 2, 3])
        assert_array_almost_equal(res["x"], [-.2, .2, 1, 0, 1, 2])
        assert_array_almost_equal(res["width"], [.4] * 6)
Beispiel #8
0
    def test_faceted_default(self, toy_df_facets):

        groupby = GroupBy(["x", "grp", "col"])
        res = Dodge()(toy_df_facets, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3, 1, 2, 3])
        assert_array_almost_equal(res["x"], [-.2, .2, .8, .2, .8, 2.2])
        assert_array_almost_equal(res["width"], [.4] * 6)
Beispiel #9
0
    def test_widths_drop(self, toy_df_widths):

        groupby = GroupBy(["x", "grp"])
        res = Dodge(empty="drop")(toy_df_widths, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3])
        assert_array_almost_equal(res["x"], [-.08, .32, 1])
        assert_array_almost_equal(res["width"], [.64, .16, .2])
Beispiel #10
0
    def test_gap(self, toy_df):

        groupby = GroupBy(["x", "grp"])
        res = Dodge(gap=.25)(toy_df, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3])
        assert_array_almost_equal(res["x"], [-.2, .2, 1.2])
        assert_array_almost_equal(res["width"], [.3, .3, .3])
Beispiel #11
0
    def test_drop(self, toy_df):

        groupby = GroupBy(["x", "grp"])
        res = Dodge("drop")(toy_df, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3])
        assert_array_almost_equal(res["x"], [-.2, .2, 1])
        assert_array_almost_equal(res["width"], [.4, .4, .4])
Beispiel #12
0
    def test_baseline_homogeneity_check(self, toy_df):

        toy_df["baseline"] = [0, 1, 2]
        groupby = GroupBy(["color", "group"])
        move = Stack()
        err = "Stack move cannot be used when baselines"
        with pytest.raises(RuntimeError, match=err):
            move(toy_df, groupby, "x")
Beispiel #13
0
def test_apply_one_grouper(df):

    res = GroupBy(["a"]).apply(df, lambda x: x.sort_values("x"))
    assert_array_equal(res.index, [0, 1, 2, 3, 4])
    assert_array_equal(res.columns, ["a", "b", "x", "y"])
    assert_array_equal(res["a"], ["a", "a", "a", "b", "b"])
    assert_array_equal(res["b"], ["g", "h", "f", "f", "h"])
    assert_array_equal(res["x"], [1, 1, 2, 2, 3])
Beispiel #14
0
    def triple_args(self):

        groupby = GroupBy(["group", "a", "s"])

        class Scale:
            scale_type = "continuous"

        return groupby, "x", {"x": Scale()}
Beispiel #15
0
    def test_fill(self, toy_df):

        groupby = GroupBy(["x", "grp"])
        res = Dodge(empty="fill")(toy_df, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3]),
        assert_array_almost_equal(res["x"], [-.2, .2, 1])
        assert_array_almost_equal(res["width"], [.4, .4, .8])
Beispiel #16
0
def test_agg_two_groupers(df):

    res = GroupBy(["a", "x"]).agg(df, {"y": "min"})
    assert_array_equal(res.index, [0, 1, 2, 3, 4, 5])
    assert_array_equal(res.columns, ["a", "x", "y"])
    assert_array_equal(res["a"], ["a", "a", "a", "b", "b", "b"])
    assert_array_equal(res["x"], [1, 2, 3, 1, 2, 3])
    assert_array_equal(res["y"], [.2, .8, np.nan, np.nan, .4, .5])
Beispiel #17
0
    def test_orient(self, toy_df):

        df = toy_df.assign(x=toy_df["y"], y=toy_df["x"])

        groupby = GroupBy(["y", "grp"])
        res = Dodge("drop")(df, groupby, "y")

        assert_array_equal(res["x"], [1, 2, 3])
        assert_array_almost_equal(res["y"], [-.2, .2, 1])
        assert_array_almost_equal(res["width"], [.4, .4, .4])
Beispiel #18
0
    def test_no_grouper(self, df):

        groupby = GroupBy(["group"])
        res = PolyFit(order=1, gridsize=100)(df[["x", "y"]], groupby, "x", {})

        assert_array_equal(res.columns, ["x", "y"])

        grid = np.linspace(df["x"].min(), df["x"].max(), 100)
        assert_array_equal(res["x"], grid)
        assert_array_almost_equal(res["y"].diff().diff().dropna(),
                                  np.zeros(grid.size - 2))
Beispiel #19
0
def test_agg_two_groupers_ordered(df):

    order = {"b": ["h", "g", "f"], "x": [3, 2, 1]}
    res = GroupBy(order).agg(df, {"a": "min", "y": lambda x: x.iloc[0]})
    assert_array_equal(res.index, [0, 1, 2, 3, 4, 5, 6, 7, 8])
    assert_array_equal(res.columns, ["a", "b", "x", "y"])
    assert_array_equal(res["b"], ["h", "h", "h", "g", "g", "g", "f", "f", "f"])
    assert_array_equal(res["x"], [3, 2, 1, 3, 2, 1, 3, 2, 1])

    T, F = True, False
    assert_array_equal(res["a"].isna(), [F, T, F, T, T, F, T, F, T])
    assert_array_equal(res["a"].dropna(), ["b", "a", "a", "a"])
    assert_array_equal(res["y"].dropna(), [.5, .3, .2, .8])
Beispiel #20
0
    def __call__(self, data, groupby, orient, scales):

        # TODO better to do this as an isinstance check?
        # We are only asking about Nominal scales now,
        # but presumably would apply to Ordinal too?
        scale_type = scales[orient].__class__.__name__.lower()
        grouping_vars = [v for v in data if v in groupby.order]
        if not grouping_vars or self.common_bins is True:
            bin_kws = self._define_bin_params(data, orient, scale_type)
            data = groupby.apply(data, self._eval, orient, bin_kws)
        else:
            if self.common_bins is False:
                bin_groupby = GroupBy(grouping_vars)
            else:
                bin_groupby = GroupBy(self.common_bins)
            data = bin_groupby.apply(
                data,
                self._get_bins_and_eval,
                orient,
                groupby,
                scale_type,
            )

        # TODO Make this an option?
        # (This needs to be tested if enabled, and maybe should be in _eval)
        # other = {"x": "y", "y": "x"}[orient]
        # data = data[data[other] > 0]

        if not grouping_vars or self.common_norm is True:
            data = self._normalize(data, orient)
        else:
            if self.common_norm is False:
                norm_grouper = grouping_vars
            else:
                norm_grouper = self.common_norm
            normalize = partial(self._normalize, orient=orient)
            data = GroupBy(norm_grouper).apply(data, normalize)

        return data
Beispiel #21
0
def test_apply_replace_columns(df):

    def add_sorted_cumsum(df):

        x = df["x"].sort_values()
        z = df.loc[x.index, "y"].cumsum()
        return pd.DataFrame(dict(x=x.values, z=z.values))

    res = GroupBy(["a"]).apply(df, add_sorted_cumsum)
    assert_array_equal(res.index, df.index)
    assert_array_equal(res.columns, ["a", "x", "z"])
    assert_array_equal(res["a"], ["a", "a", "a", "b", "b"])
    assert_array_equal(res["x"], [1, 1, 2, 2, 3])
    assert_array_equal(res["z"], [.2, .5, 1.3, .4, .9])
Beispiel #22
0
    def test_one_grouper(self, df):

        groupby = GroupBy(["group"])
        gridsize = 50
        res = PolyFit(gridsize=gridsize)(df, groupby, "x", {})

        assert res.columns.to_list() == ["x", "y", "group"]

        ngroups = df["group"].nunique()
        assert_array_equal(res.index, np.arange(ngroups * gridsize))

        for _, part in res.groupby("group"):
            grid = np.linspace(part["x"].min(), part["x"].max(), gridsize)
            assert_array_equal(part["x"], grid)
            assert part["y"].diff().diff().dropna().abs().gt(0).all()
Beispiel #23
0
    def __call__(self, data, groupby, orient, scales):

        scale_type = scales[orient].scale_type
        grouping_vars = [v for v in data if v in groupby.order]
        if not grouping_vars or self.common_bins is True:
            bin_kws = self._define_bin_params(data, orient, scale_type)
            data = groupby.apply(data, self._eval, orient, bin_kws)
        else:
            if self.common_bins is False:
                bin_groupby = GroupBy(grouping_vars)
            else:
                bin_groupby = GroupBy(self.common_bins)
            data = bin_groupby.apply(
                data,
                self._get_bins_and_eval,
                orient,
                groupby,
                scale_type,
            )

        # TODO Make this an option?
        # (This needs to be tested if enabled, and maybe should be in _eval)
        # other = {"x": "y", "y": "x"}[orient]
        # data = data[data[other] > 0]

        if not grouping_vars or self.common_norm is True:
            data = self._normalize(data, orient)
        else:
            if self.common_norm is False:
                norm_grouper = grouping_vars
            else:
                norm_grouper = self.common_norm
            normalize = partial(self._normalize, orient=orient)
            data = GroupBy(norm_grouper).apply(data, normalize)

        return data
Beispiel #24
0
    def test_single_semantic(self, df, grp):

        groupby = GroupBy(["x", grp])
        res = Dodge()(df, groupby, "x")

        levels = categorical_order(df[grp])
        w, n = 0.8, len(levels)

        shifts = np.linspace(0, w - w / n, n)
        shifts -= shifts.mean()

        assert_series_equal(res["y"], df["y"])
        assert_series_equal(res["width"], df["width"] / n)

        for val, shift in zip(levels, shifts):
            rows = df[grp] == val
            assert_series_equal(res.loc[rows, "x"], df.loc[rows, "x"] + shift)
Beispiel #25
0
    def test_two_semantics(self, df):

        groupby = GroupBy(["x", "grp2", "grp3"])
        res = Dodge()(df, groupby, "x")

        levels = categorical_order(df["grp2"]), categorical_order(df["grp3"])
        w, n = 0.8, len(levels[0]) * len(levels[1])

        shifts = np.linspace(0, w - w / n, n)
        shifts -= shifts.mean()

        assert_series_equal(res["y"], df["y"])
        assert_series_equal(res["width"], df["width"] / n)

        for (v2, v3), shift in zip(product(*levels), shifts):
            rows = (df["grp2"] == v2) & (df["grp3"] == v3)
            assert_series_equal(res.loc[rows, "x"], df.loc[rows, "x"] + shift)
Beispiel #26
0
def test_apply_mutate_columns(df):

    xx = np.arange(0, 5)
    hats = []

    def polyfit(df):
        fit = np.polyfit(df["x"], df["y"], 1)
        hat = np.polyval(fit, xx)
        hats.append(hat)
        return pd.DataFrame(dict(x=xx, y=hat))

    res = GroupBy(["a"]).apply(df, polyfit)
    assert_array_equal(res.index, np.arange(xx.size * 2))
    assert_array_equal(res.columns, ["a", "x", "y"])
    assert_array_equal(res["a"], ["a"] * xx.size + ["b"] * xx.size)
    assert_array_equal(res["x"], xx.tolist() + xx.tolist())
    assert_array_equal(res["y"], np.concatenate(hats))
Beispiel #27
0
    def get_groupby(self, df, orient):

        other = {"x": "y", "y": "x"}[orient]
        cols = [c for c in df if c != other]
        return GroupBy(cols)
Beispiel #28
0
def test_init_requires_order():

    with pytest.raises(ValueError, match="GroupBy requires at least one"):
        GroupBy([])
Beispiel #29
0
def test_init_from_dict():
    order = {"a": [3, 2, 1], "c": None, "b": ["x", "y", "z"]}
    g = GroupBy(order)
    assert g.order == order
Beispiel #30
0
def test_at_least_one_grouping_variable_required(df):

    with pytest.raises(ValueError, match="No grouping variables are present"):
        GroupBy(["z"]).agg(df, x="mean")