def test_series_set_equal_length_object_by_mask(replace_data): psr = pd.Series([1, 2, 3, 4, 5]) gsr = Series.from_pandas(psr) # Lengths match in trivial case pd_bool_col = pd.Series([True] * len(psr)) gd_bool_col = Series.from_pandas(pd_bool_col) psr[pd_bool_col] = ( replace_data.to_pandas() if hasattr(replace_data, "to_pandas") else replace_data ) gsr[gd_bool_col] = replace_data assert_eq(psr.astype("float"), gsr.astype("float")) # Test partial masking psr[psr > 1] = ( replace_data.to_pandas() if hasattr(replace_data, "to_pandas") else replace_data ) gsr[gsr > 1] = replace_data assert_eq(psr.astype("float"), gsr.astype("float"))
def test_series_set_item(psr, arg): gsr = Series.from_pandas(psr) psr[arg] = 11 gsr[arg] = 11 assert_eq(psr, gsr)
def test_scan_boolean_cumprod(): s = Series([0, -1, -300, 23, 4, -3, 0, 0, 100]) # cumprod test got = (s > 0).cumprod() expect = (s > 0).to_pandas().cumprod() assert_eq(expect, got)
def test_series_setitem_index(): df = pd.DataFrame( data={"b": [-1, -2, -3], "c": [1, 2, 3]}, index=[1, 2, 3] ) df["b"] = pd.Series(data=[12, 11, 10], index=[3, 2, 1]) gdf = DataFrame(data={"b": [-1, -2, -3], "c": [1, 2, 3]}, index=[1, 2, 3]) gdf["b"] = Series(data=[12, 11, 10], index=[3, 2, 1]) assert_eq(df, gdf, check_dtype=False)
def test_column_set_equal_length_object_by_mask(): # Series.__setitem__ might bypass some of the cases # handled in column.__setitem__ so this test is needed data = Series([0, 0, 1, 1, 1])._column replace_data = Series([100, 200, 300, 400, 500])._column bool_col = Series([True, True, True, True, True])._column data[bool_col] = replace_data assert_eq(Series(data), Series(replace_data)) data = Series([0, 0, 1, 1, 1])._column bool_col = Series([True, False, True, False, True])._column data[bool_col] = replace_data assert_eq(Series(data), Series([100, 0, 300, 1, 500]))
def test_cummin(dtype, nelem): if dtype == np.int8: # to keep data in range data = gen_rand(dtype, nelem, low=-2, high=2) else: data = gen_rand(dtype, nelem) decimal = 4 if dtype == np.float32 else 6 # series gs = Series(data) ps = pd.Series(data) np.testing.assert_array_almost_equal(gs.cummin().to_array(), ps.cummin(), decimal=decimal) # dataframe series (named series) gdf = DataFrame() gdf["a"] = Series(data) pdf = pd.DataFrame() pdf["a"] = pd.Series(data) np.testing.assert_array_almost_equal(gdf.a.cummin().to_array(), pdf.a.cummin(), decimal=decimal)
def test_cummin_masked(): data = [1, 2, None, 4, 5] float_types = ["float32", "float64"] for type_ in float_types: gs = Series(data).astype(type_) ps = pd.Series(data).astype(type_) assert_eq(gs.cummin(), ps.cummin()) for type_ in INTEGER_TYPES: gs = Series(data).astype(type_) expected = pd.Series([1, 1, np.nan, 1, 1]).astype("float64") assert_eq(gs.cummin(), expected)
def test_cumsum_masked(): data = [1, 2, None, 4, 5] float_types = ["float32", "float64"] for type_ in float_types: gs = Series(data).astype(type_) ps = pd.Series(data).astype(type_) assert_eq(gs.cumsum(), ps.cumsum()) for type_ in INTEGER_TYPES: gs = Series(data).astype(type_) got = gs.cumsum() expected = pd.Series([1, 3, np.nan, 7, 12], dtype="float64") assert_eq(got, expected)
def test_cummin_masked(): data = [1, 2, None, 4, 5] float_types = ["float32", "float64"] int_types = ["int8", "int16", "int32", "int64"] for type_ in float_types: gs = Series(data).astype(type_) ps = pd.Series(data).astype(type_) assert_eq(gs.cummin(), ps.cummin()) for type_ in int_types: expected = pd.Series([1, 1, -1, 1, 1]).astype(type_) gs = Series(data).astype(type_) assert_eq(gs.cummin(), expected)
assert_eq(pdf, gdf) psr_a = pdf["a"] gsr_a = gdf["a"] psr_a.replace(500, 501, inplace=True) gsr_a.replace(500, 501, inplace=True) assert_eq(pdf, gdf) @pytest.mark.parametrize( "replace_data", [ [100, 200, 300, 400, 500], Series([100, 200, 300, 400, 500]), Series([100, 200, 300, 400, 500], index=[2, 3, 4, 5, 6]), ], ) def test_series_set_equal_length_object_by_mask(replace_data): psr = pd.Series([1, 2, 3, 4, 5]) gsr = Series.from_pandas(psr) # Lengths match in trivial case pd_bool_col = pd.Series([True] * len(psr)) gd_bool_col = Series.from_pandas(pd_bool_col) psr[pd_bool_col] = ( replace_data.to_pandas() if hasattr(replace_data, "to_pandas")