def test_bin_transform_simple(data): transform = {"bin": True, "field": "x", "as": "xbin"} out = apply(data, transform) assert "xbin" in out.columns transform = {"bin": True, "field": "x", "as": ["xbin1", "xbin2"]} out = apply(data, transform) assert "xbin1" in out.columns assert "xbin2" in out.columns
def test_bin_transform(data): transform = {'bin': True, 'field': 'x', 'as': 'xbin'} out = apply(data, transform) assert 'xbin' in out.columns transform = {'bin': True, 'field': 'x', 'as': ['xbin1', 'xbin2']} out = apply(data, transform) assert 'xbin1' in out.columns assert 'xbin2' in out.columns
def test_bin_transform_simple(data: pd.DataFrame) -> None: transform = {"bin": True, "field": "x", "as": "xbin"} out = altair_transform.apply(data, transform) assert "xbin" in out.columns transform = {"bin": True, "field": "x", "as": ["xbin1", "xbin2"]} out = altair_transform.apply(data, transform) assert "xbin1" in out.columns assert "xbin2" in out.columns
def test_quantile_transform_groupby(data: pd.DataFrame) -> None: group = "c" transform = {"quantile": "x", "step": 0.1, "groupby": [group]} out = altair_transform.apply(data, transform) assert list(out.columns) == ["c", "prob", "value"] for key in data[group].unique(): out_group_1 = altair_transform.apply(data[data[group] == key], transform) out_group_2 = out[out[group] == key][out_group_1.columns].reset_index( drop=True) assert_frame_equal(out_group_1, out_group_2)
def test_flatten_transform(data: pd.DataFrame) -> None: out = altair_transform.apply(data, {"flatten": ["x"]}) assert out.shape == (9, 3) assert out.columns.tolist() == ["x", "y", "cat"] assert_equal(out.x.values, range(1, 10)) assert_equal(out.cat.values, list("AAABBBBCC")) out = altair_transform.apply(data, {"flatten": ["x", "y"]}) assert out.shape == (9, 3) assert out.columns.tolist() == ["x", "y", "cat"] assert_equal(out.x.values, range(1, 10)) assert_equal(out.y.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6]) assert_equal(out.cat.values, list("AAABBBBCC"))
def test_fold_transform(data, as_: Optional[List[str]]): if as_ is None: out = altair_transform.apply(data, {"fold": ["y1", "y2"]}) as_ = ["key", "value"] else: out = altair_transform.apply(data, {"fold": ["y1", "y2"], "as": as_}) expected = pd.DataFrame({ "x": np.repeat(data["x"], 2), as_[0]: 3 * ["y1", "y2"], as_[1]: np.ravel((data["y1"], data["y2"]), "F"), "y1": np.repeat(data["y1"], 2), "y2": np.repeat(data["y2"], 2), }).reset_index(drop=True) assert_frame_equal(out, expected)
def test_flatten_transform_with_as(data: pd.DataFrame): out = altair_transform.apply(data, {"flatten": ["y"], "as": ["yflat"]}) assert out.shape == (6, 4) assert out.columns.tolist() == ["yflat", "x", "y", "cat"] assert_equal(out.yflat.values, range(1, 7)) assert_equal(out.cat.values, list("AABBCC")) out = altair_transform.apply( data, {"flatten": ["x", "y"], "as": ["xflat", "yflat"]} ) assert out.shape == (9, 5) assert out.columns.tolist() == ["xflat", "yflat", "x", "y", "cat"] assert_equal(out.xflat.values, range(1, 10)) assert_equal(out.yflat.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6]) assert_equal(out.cat.values, list("AAABBBBCC"))
def test_pivot_transform_limit(data: pd.DataFrame) -> None: transform = {"pivot": "c", "value": "x", "limit": 2} expected = pd.DataFrame( {key: [data.x[data.c == key].sum()] for key in sorted(data.c.unique())[:2]} ) out = altair_transform.apply(data, transform) assert_frame_equal(out, expected)
def test_window_against_js( driver, data: pd.DataFrame, groupby: Optional[List[str]], sort: Optional[str], frame: Optional[List[Optional[int]]], ) -> None: transform: Dict[str, Any] = { "window": [{ "op": "sum", "field": "x", "as": "xsum" }], "ignorePeers": False, } if groupby is not None: transform["groupby"] = groupby if sort is not None: transform["sort"] = [{"field": sort}] if frame is not None: transform["frame"] = frame got = altair_transform.apply(data, transform) want = driver.apply(data, transform) assert_frame_equal( got[sorted(got.columns)], want[sorted(want.columns)], check_dtype=False, check_index_type=False, check_less_precise=True, )
def test_regression_against_js( driver, data: pd.DataFrame, method: str, params: str, groupby: Optional[List[str]], ) -> None: transform: Dict[str, Any] = { "regression": "y", "on": "x", "method": method, "params": params, } if groupby: transform["groupby"] = groupby got = altair_transform.apply(data, transform) want = driver.apply(data, transform) # Account for differences in handling of undefined between browsers. if params and not groupby and got.shape != want.shape: got["keys"] = [None] assert_frame_equal( got[sorted(got.columns)], want[sorted(want.columns)], check_dtype=False, check_index_type=False, check_less_precise=True, )
def test_quantile_against_js( driver, data: pd.DataFrame, step: Optional[float], groupby: Optional[List[str]], probs: Optional[List[float]], as_: Optional[List[str]], ) -> None: transform: Dict[str, Any] = {"quantile": "x"} if step is not None: transform["step"] = step if groupby is not None: transform["groupby"] = groupby if probs is not None: transform["probs"] = probs if as_ is not None: transform["as"] = as_ got = altair_transform.apply(data, transform) want = driver.apply(data, transform) assert_frame_equal( got[sorted(got.columns)], want[sorted(want.columns)], check_dtype=False, check_index_type=False, check_less_precise=True, )
def test_poly_vs_linear(groupby: List[str], method: str, order: int) -> None: data = pd.DataFrame( { "x": [0, 1, 2, 3, 4, 1, 2, 3], "y": [2, 4, 6, 8, 10, 2, 3, 4], "g": [0, 0, 0, 0, 0, 1, 1, 1], } ) kwds = {} if not groupby else {"groupby": groupby} out1 = altair_transform.apply( data, {"regression": "y", "on": "x", "method": method, **kwds} ) out2 = altair_transform.apply( data, {"regression": "y", "on": "x", "method": "poly", "order": order, **kwds} ) assert_frame_equal(out1, out2, check_dtype=False)
def test_linear() -> None: data = pd.DataFrame({"x": [0, 1, 2, 3, 4], "y": [2, 4, 6, 8, 10]}) transform = {"regression": "y", "on": "x"} out = altair_transform.apply(data, transform) assert_frame_equal( out, pd.DataFrame({"x": [0.0, 4.0], "y": [2.0, 10.0]}), check_dtype=False )
def test_calculate_transform(data): transform = {"calculate": "datum.x + datum.y", "as": "z"} out1 = apply(data, transform) out2 = data.copy() out2["z"] = data.x + data.y assert out1.equals(out2)
def test_calculate_transform(data): transform = {"calculate": "datum.x + datum.y", "as": "z"} out1 = altair_transform.apply(data, transform) out2 = data.copy() out2["z"] = data.x + data.y assert_frame_equal(out1, out2)
def test_filter_transform( data: pd.DataFrame, filter: Union[str, Dict[str, Any]], calc: Callable[[pd.DataFrame], pd.DataFrame], ): out1 = altair_transform.apply(data, {"filter": filter}) out2 = calc(data).reset_index(drop=True) assert_frame_equal(out1, out2)
def test_calculate_transform(data): transform = {'calculate': 'datum.x + datum.y', 'as': 'z'} out1 = apply(data, transform) out2 = data.copy() out2['z'] = data.x + data.y assert out1.equals(out2)
def test_fold_transform(as_): data = pd.DataFrame({ "x": [1, 2, 3], "y1": ["A", "B", "C"], "y2": ["D", "E", "F"] }) if as_ is None: out = apply(data, {"fold": ["y1", "y2"]}) as_ = ["key", "value"] else: out = apply(data, {"fold": ["y1", "y2"], "as": as_}) expected = pd.DataFrame({ "x": 2 * data["x"].tolist(), as_[0]: 3 * ["y1"] + 3 * ["y2"], as_[1]: data["y1"].tolist() + data["y2"].tolist(), }) assert out.equals(expected)
def test_sample_transform(data, N): transform = {'sample': N} out = apply(data, transform) # Ensure the shape is correct assert out.shape == (min(N, data.shape[0]), data.shape[1]) # Ensure the content are correct assert out.equals(data.iloc[out.index])
def test_sample_transform(data, N): transform = {"sample": N} out = altair_transform.apply(data, transform) # Ensure the shape is correct assert out.shape == (min(N, data.shape[0]), data.shape[1]) # Ensure the content are correct assert_frame_equal(out, data.iloc[out.index])
def test_flatten_transform_with_as(): data = pd.DataFrame({ "x": [[1, 2, 3], [4, 5, 6, 7], [8, 9]], "y": [[1, 2], [3, 4], [5, 6]], "cat": list("ABC"), }) out = apply(data, {"flatten": ["y"], "as": ["yflat"]}) assert out.shape == (6, 3) assert out.columns.tolist() == ["yflat", "x", "cat"] assert_equal(out.yflat.values, range(1, 7)) assert_equal(out.cat.values, list("AABBCC")) out = apply(data, {"flatten": ["x", "y"], "as": ["xflat", "yflat"]}) assert out.shape == (9, 3) assert out.columns.tolist() == ["xflat", "yflat", "cat"] assert_equal(out.xflat.values, range(1, 10)) assert_equal(out.yflat.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6]) assert_equal(out.cat.values, list("AAABBBBCC"))
def test_flatten_transform_with_as(): data = pd.DataFrame({ 'x': [[1, 2, 3], [4, 5, 6, 7], [8, 9]], 'y': [[1, 2], [3, 4], [5, 6]], 'cat': list('ABC') }) out = apply(data, {'flatten': ['y'], 'as': ['yflat']}) assert out.shape == (6, 3) assert out.columns.tolist() == ['yflat', 'x', 'cat'] assert_equal(out.yflat.values, range(1, 7)) assert_equal(out.cat.values, list('AABBCC')) out = apply(data, {'flatten': ['x', 'y'], 'as': ['xflat', 'yflat']}) assert out.shape == (9, 3) assert out.columns.tolist() == ['xflat', 'yflat', 'cat'] assert_equal(out.xflat.values, range(1, 10)) assert_equal(out.yflat.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6]) assert_equal(out.cat.values, list('AAABBBBCC'))
def test_window_transform_basic(data): transform = { 'window': [{ 'op': 'sum', 'field': 'x', 'as': 'xsum' }], } out = apply(data, transform) expected = data['x'].cumsum() assert out['xsum'].equals(expected.astype(float))
def test_bin_against_js(driver, data: pd.DataFrame, transform: Dict[str, Any]) -> None: got = altair_transform.apply(data, transform) want = driver.apply(data, transform) assert_frame_equal( got[sorted(got.columns)], want[sorted(want.columns)], check_dtype=False, check_index_type=False, check_less_precise=True, )
def test_lookup_transform(data: pd.DataFrame, lookup_key: str) -> None: lookup = pd.DataFrame({lookup_key: list("ABCD"), "z": [3, 1, 4, 5]}) transform = { "lookup": "c", "from": {"data": to_values(lookup), "key": lookup_key, "fields": ["z"]}, } out1 = altair_transform.apply(data, transform) out2 = pd.merge(data, lookup, left_on="c", right_on=lookup_key) if lookup_key != "c": out2 = out2.drop(lookup_key, axis=1) assert_frame_equal(out1, out2)
def test_multiple_transforms(data): transform = [{ 'calculate': '0.5 * (datum.x + datum.y)', 'as': 'xy_mean' }, { 'filter': 'datum.x < datum.xy_mean' }] out1 = apply(data, transform) out2 = data.copy() out2['xy_mean'] = 0.5 * (data.x + data.y) out2 = out2[out2.x < out2.xy_mean] assert out1.equals(out2)
def test_window_transform_grouped(data): transform = { 'window': [{ 'op': 'sum', 'field': 'x', 'as': 'xsum' }], 'groupby': ['y'], } out = apply(data, transform) expected = data.groupby('y').rolling(len(data), min_periods=1) expected = expected['x'].sum().reset_index('y', drop=True).sort_index() assert out['xsum'].equals(expected)
def test_window_transform_grouped(data): transform = { "window": [{ "op": "sum", "field": "x", "as": "xsum" }], "groupby": ["y"], } out = apply(data, transform) expected = data.groupby("y").rolling(len(data), min_periods=1) expected = expected["x"].sum().reset_index("y", drop=True).sort_index() assert out["xsum"].equals(expected)
def test_window_transform_sorted(data): transform = { 'window': [{ 'op': 'sum', 'field': 'x', 'as': 'xsum' }], 'sort': [{ 'field': 'x' }] } out = apply(data, transform) expected = data['x'].sort_values().cumsum().sort_index() assert out['xsum'].equals(expected.astype(float))
def test_bin_transform_step(nice: bool, step: int) -> None: data = pd.DataFrame({"x": np.arange(100)}) transform = { "bin": { "step": step, "nice": nice }, "field": "x", "as": "xbin" } out = altair_transform.apply(data, transform) bins = np.sort(out.xbin.unique()) assert np.allclose(bins[1:] - bins[:-1], step) assert not out.xbin.isnull().any()