def test_multiple_aggregators_with_dict_api(): s = Series(range(6), dtype="int64", name="series") # nested renaming msg = "nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]})
def test_no_args_raises(self): gr = Series([1, 2]).groupby([0, 1]) with pytest.raises(TypeError, match="Must provide"): gr.agg() # but we do allow this result = gr.agg([]) expected = DataFrame() tm.assert_frame_equal(result, expected)
def test_demo(): # demonstration tests s = Series(range(6), dtype="int64", name="series") result = s.agg(["min", "max"]) expected = Series([0, 5], index=["min", "max"], name="series") tm.assert_series_equal(result, expected) result = s.agg({"foo": "min"}) expected = Series([0], index=["foo"], name="series") tm.assert_series_equal(result, expected)
def test_non_callable_aggregates(self): # test agg using non-callable series attributes s = Series([1, 2, None]) # Calling agg w/ just a string arg same as calling s.arg result = s.agg("size") expected = s.size assert result == expected # test when mixed w/ callable reducers result = s.agg(["size", "count", "mean"]) expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5}) tm.assert_series_equal(result[expected.index], expected)
def test_non_callable_aggregates(self): # test agg using non-callable series attributes s = Series([1, 2, None]) # Calling agg w/ just a string arg same as calling s.arg result = s.agg('size') expected = s.size assert result == expected # test when mixed w/ callable reducers result = s.agg(['size', 'count', 'mean']) expected = Series( OrderedDict([('size', 3.0), ('count', 2.0), ('mean', 1.5)])) assert_series_equal(result[expected.index], expected)
def test_non_callable_aggregates(self): # test agg using non-callable series attributes s = Series([1, 2, None]) # Calling agg w/ just a string arg same as calling s.arg result = s.agg('size') expected = s.size assert result == expected # test when mixed w/ callable reducers result = s.agg(['size', 'count', 'mean']) expected = Series(OrderedDict({'size': 3.0, 'count': 2.0, 'mean': 1.5})) assert_series_equal(result[expected.index], expected)
def numerical_summary(series: pd.Series) -> dict: """ Args: series: series to summarize Returns: """ aggregates = [ "mean", "std", "var", "max", "min", "median", "kurt", "skew", "sum", "mad", ] summary = series.agg(aggregates).to_dict() quantiles = [0.05, 0.25, 0.5, 0.75, 0.95] for percentile, value in series.quantile(quantiles).to_dict().items(): summary["quantile_{:d}".format(int(percentile * 100))] = value summary["iqr"] = summary["quantile_75"] - summary["quantile_25"] summary["range"] = summary["max"] - summary["min"] summary["cv"] = summary["std"] / summary["mean"] if summary["mean"] else np.NaN # TODO: only calculations for histogram, not the plotting # summary['image'] = plotting.histogram(series) return summary
def test_demo(self): # demonstration tests s = Series(range(6), dtype="int64", name="series") result = s.agg(["min", "max"]) expected = Series([0, 5], index=["min", "max"], name="series") tm.assert_series_equal(result, expected) result = s.agg({"foo": "min"}) expected = Series([0], index=["foo"], name="series") tm.assert_series_equal(result, expected) # nested renaming msg = "nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): s.agg({"foo": ["min", "max"]})
def test_agg_args(args, kwargs, increment): # GH 43357 def f(x, a=0, b=0, c=0): return x + a + 10 * b + 100 * c s = Series([1, 2]) result = s.agg(f, 0, *args, **kwargs) expected = s + increment tm.assert_series_equal(result, expected)
def test_demo(self): # demonstration tests s = Series(range(6), dtype='int64', name='series') result = s.agg(['min', 'max']) expected = Series([0, 5], index=['min', 'max'], name='series') tm.assert_series_equal(result, expected) result = s.agg({'foo': 'min'}) expected = Series([0], index=['foo'], name='series') tm.assert_series_equal(result, expected) # nested renaming with tm.assert_produces_warning(FutureWarning): result = s.agg({'foo': ['min', 'max']}) expected = DataFrame({ 'foo': [0, 5] }, index=['min', 'max']).unstack().rename('series') tm.assert_series_equal(result, expected)
def test_demo(self): # demonstration tests s = Series(range(6), dtype='int64', name='series') result = s.agg(['min', 'max']) expected = Series([0, 5], index=['min', 'max'], name='series') tm.assert_series_equal(result, expected) result = s.agg({'foo': 'min'}) expected = Series([0], index=['foo'], name='series') tm.assert_series_equal(result, expected) # nested renaming with tm.assert_produces_warning(FutureWarning): result = s.agg({'foo': ['min', 'max']}) expected = DataFrame( {'foo': [0, 5]}, index=['min', 'max']).unstack().rename('series') tm.assert_series_equal(result, expected)
def test_demo(self): # demonstration tests s = Series(range(6), dtype="int64", name="series") result = s.agg(["min", "max"]) expected = Series([0, 5], index=["min", "max"], name="series") tm.assert_series_equal(result, expected) result = s.agg({"foo": "min"}) expected = Series([0], index=["foo"], name="series") tm.assert_series_equal(result, expected) # nested renaming with tm.assert_produces_warning(FutureWarning): result = s.agg({"foo": ["min", "max"]}) expected = (DataFrame({ "foo": [0, 5] }, index=["min", "max"]).unstack().rename("series")) tm.assert_series_equal(result, expected)
def get_extremes(data: pd.Series) -> pd.Series: """Get the minimum and maximum values of the `data`. Args: data (pandas.Series): Data array. Returns: pandas.Series: An array of maximum and minimum values. """ extremes = data.agg([min, max]) return data[data.isin(extremes)]
def numerical_basic_summary(series: pd.Series) -> dict: """Summary with basic aggregates Args: series: series to summarize Returns: A summary of aggregates of `mean`, `std`, `var`, `min`, `max` and `sum`. """ aggregates = ["mean", "std", "var", "min", "max", "sum"] summary = series.agg(aggregates).to_dict() return summary
def test_multiple_aggregators_with_dict_api(self): s = Series(range(6), dtype='int64', name='series') # nested renaming with tm.assert_produces_warning(FutureWarning): result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']}) expected = DataFrame( {'foo': [5.0, np.nan, 0.0, np.nan], 'bar': [np.nan, 2.5, np.nan, 15.0]}, columns=['foo', 'bar'], index=['max', 'mean', 'min', 'sum']).unstack().rename('series') tm.assert_series_equal(result.reindex_like(expected), expected)
def range_summary(series: pd.Series) -> dict: """Summarize min, max and calculate the range Args: series: series to summarize Returns: A dict with `min`, `max` and `range`. """ aggregates = ["min", "max"] summary = series.agg(aggregates).to_dict() summary["range"] = summary["max"] - summary["min"] return summary
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype): # GH#46560 kernel = arithmetic_win_operators ser = Series([1], dtype=dtype) expanding = ser.expanding() op = getattr(expanding, kernel) if numeric_only and dtype is object: msg = f"Expanding.{kernel} does not implement numeric_only" with pytest.raises(NotImplementedError, match=msg): op(numeric_only=numeric_only) else: result = op(numeric_only=numeric_only) expected = ser.agg([kernel]).reset_index(drop=True).astype(float) tm.assert_series_equal(result, expected)
def test_multiple_aggregators_with_dict_api(self): s = Series(range(6), dtype="int64", name="series") # nested renaming with tm.assert_produces_warning(FutureWarning): result = s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]}) expected = (DataFrame( { "foo": [5.0, np.nan, 0.0, np.nan], "bar": [np.nan, 2.5, np.nan, 15.0] }, columns=["foo", "bar"], index=["max", "mean", "min", "sum"], ).unstack().rename("series")) tm.assert_series_equal(result.reindex_like(expected), expected)
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype): # GH#46560 kernel = arithmetic_win_operators ser = Series([1], dtype=dtype) ewm = ser.ewm(span=2, min_periods=1) op = getattr(ewm, kernel, None) if op is None: # Nothing to test return if numeric_only and dtype is object: msg = f"ExponentialMovingWindow.{kernel} does not implement numeric_only" with pytest.raises(NotImplementedError, match=msg): op(numeric_only=numeric_only) else: result = op(numeric_only=numeric_only) expected = ser.agg([kernel]).reset_index(drop=True).astype(float) tm.assert_series_equal(result, expected)
def bar_by_group(x: pd.Series, g: pd.Series, aggfunc="mean", *args, **kwargs): """ >>> mpg = data('mpg') >>> bar_by_group(mpg.hwy, mpg['class']) (<Figure size ... with 1 Axes>, <matplotlib.axes._subplots.AxesSubplot object at ...>) """ g = g.top_n(3) fig, ax = plt.subplots() x.groupby(g).agg(aggfunc).plot.bar(ax=ax, color="pink", width=1) ax.set(title=f"{aggfunc} of {x.name} by {g.name}") if aggfunc == "mean": xbar = x.agg(aggfunc) z = 2.58 # 99% ci ci = z * (x.std() / math.sqrt(x.shape[0])) ub, lb = xbar + ci, xbar - ci ax.hlines(xbar, -0.5, 3.5, ls="--", color="gray") ax.hlines([lb, ub], -0.5, 3.5, ls=":", color="gray") return fig, ax
def test_series_agg_nested_renamer(): s = Series(range(6), dtype="int64", name="series") msg = "nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): s.agg({"foo": ["min", "max"]})
def test_mangle_series_groupby(self): gr = Series([1, 2, 3, 4]).groupby([0, 0, 1, 1]) result = gr.agg([lambda x: 0, lambda x: 1]) expected = DataFrame({"<lambda_0>": [0, 0], "<lambda_1>": [1, 1]}) tm.assert_frame_equal(result, expected)
def test_mangled(self): gr = Series([1, 2, 3]).groupby([0, 0, 1]) result = gr.agg(a=lambda x: 0, b=lambda x: 1) expected = DataFrame({"a": [0, 0], "b": [1, 1]}) tm.assert_frame_equal(result, expected)
def test_series_named_agg_duplicates_no_raises(self): # GH28426 gr = Series([1, 2, 3]).groupby([0, 0, 1]) grouped = gr.agg(a="sum", b="sum") expected = DataFrame({"a": [3, 3], "b": [3, 3]}) tm.assert_frame_equal(expected, grouped)
def test_series_nested_renamer(renamer): s = Series(range(6), dtype="int64", name="series") msg = "nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): s.agg(renamer)
print(ten) print(ten.values) print("Indexes:") print(list(ten.index)) # Extract numbers by condition pos_nums = nums[nums > 0] print(f"There are {len(pos_nums)} positive numbers") # You can plot a Series with matplotlib _, axis = plt.subplots() axis.plot(pos_nums) plt.show() # Find aggregate values (functions that work on sequences) print(nums.agg(['min', 'max', 'sum'])) # Quoted values are accessible function names print(ten.min(), ten.max()) print('3 largest:\n', ten.nlargest(3), sep='') print('3 smallest:\n', ten.nsmallest(3), sep='') # Map print("Adding 50 to ten numbers:") print(ten.map(lambda x: x + 50)) print(ten.map('I am {}'.format)) print(ten.transform([np.sqrt, np.exp])) # transform allows multiple "maps" # Use apply for functions that take more than 1 parm (subsequent args in `args`) print("Cubing ten numbers:") cubes = ten.apply(operator.pow, args=(3, )) print(cubes) cubes2 = ten.pow(3) # Series has a pow function as well