def test_fillna_dataframe(fill_type, inplace): pdf = pd.DataFrame({"a": [1, 2, None], "b": [None, None, 5]}) gdf = DataFrame.from_pandas(pdf) if fill_type == "scalar": fill_value_pd = 5 fill_value_cudf = fill_value_pd elif fill_type == "series": fill_value_pd = pd.Series([3, 4, 5]) fill_value_cudf = Series.from_pandas(fill_value_pd) else: fill_value_pd = {"a": 5, "b": pd.Series([3, 4, 5])} fill_value_cudf = { "a": fill_value_pd["a"], "b": Series.from_pandas(fill_value_pd["b"]), } # https://github.com/pandas-dev/pandas/issues/27197 # pandas df.fill_value with series is not working if isinstance(fill_value_pd, pd.Series): expect = pd.DataFrame() for col in pdf.columns: expect[col] = pdf[col].fillna(fill_value_pd) else: expect = pdf.fillna(fill_value_pd) got = gdf.fillna(fill_value_cudf, inplace=inplace) if inplace: got = gdf assert_eq(expect, got)
def test_string_numeric_astype(dtype): if dtype.startswith("bool"): data = [1, 0, 1, 0, 1] elif dtype.startswith("int"): data = [1, 2, 3, 4, 5] elif dtype.startswith("float"): data = [1.0, 2.0, 3.0, 4.0, 5.0] elif dtype.startswith("datetime64"): data = [1000000000, 2000000000, 3000000000, 4000000000, 5000000000] if dtype.startswith("datetime64"): ps = pd.Series(data, dtype="datetime64[ns]") gs = Series.from_pandas(ps) else: ps = pd.Series(data, dtype=dtype) gs = Series(data, dtype=dtype) # Pandas datetime64 --> str typecasting returns arbitrary format depending # on the data, so making it consistent unless we choose to match the # behavior if dtype.startswith("datetime64"): expect = ps.dt.strftime("%Y-%m-%dT%H:%M:%SZ") else: expect = ps.astype("str") got = gs.astype("str") assert_eq(expect, got)
def test_series_replace(): a1 = np.array([0, 1, 2, 3, 4]) # Numerical a2 = np.array([5, 1, 2, 3, 4]) sr1 = Series(a1) sr2 = sr1.replace(0, 5) np.testing.assert_equal(sr2.to_array(), a2) # Categorical psr3 = pd.Series(["one", "two", "three"], dtype='category') psr4 = psr3.replace("one", "two") sr3 = Series.from_pandas(psr3) sr4 = sr3.replace("one", "two") pd.testing.assert_series_equal(sr4.to_pandas(), psr4) # List input a6 = np.array([5, 6, 2, 3, 4]) sr6 = sr1.replace([0, 1], [5, 6]) np.testing.assert_equal(sr6.to_array(), a6) a7 = np.array([5.5, 6.5, 2, 3, 4]) sr7 = sr1.replace([0, 1], [5.5, 6.5]) np.testing.assert_equal(sr7.to_array(), a7) # Series input a8 = np.array([5, 5, 5, 3, 4]) sr8 = sr1.replace(sr1[:3], 5) np.testing.assert_equal(sr8.to_array(), a8)
def test_groupby_series_level_zero(agg): pdf = pd.Series([1, 2, 3], index=[0, 1, 1]) gdf = Series.from_pandas(pdf) pdg = pdf.groupby(level=0) gdg = gdf.groupby(level=0) pdresult = getattr(pdg, agg)() gdresult = getattr(gdg, agg)() assert_eq(pdresult, gdresult)
def test_groupby_series_level_zero(agg): pdf = pd.Series([1, 2, 3], index=[0, 1, 1]) gdf = Series.from_pandas(pdf) pdg = pdf.groupby(level=0) gdg = gdf.groupby(level=0) pdresult = getattr(pdg, agg)() gdresult = getattr(gdg, agg)() check_dtype = False if agg == 'count' else True assert_eq(pdresult, gdresult, check_dtype=check_dtype)
def test_fillna_string(fill_type, inplace): psr = pd.Series(["z", None, "z", None]) if fill_type == "scalar": fill_value_pd = "a" fill_value_cudf = fill_value_pd elif fill_type == "series": fill_value_pd = pd.Series(["a", "b", "c", "d"]) fill_value_cudf = Series.from_pandas(fill_value_pd) sr = Series.from_pandas(psr) expect = psr.fillna(fill_value_pd) got = sr.fillna(fill_value_cudf, inplace=inplace) if inplace: got = sr assert_eq(expect, got)
def test_to_from_pandas_nulls(data, nulls): pd_data = pd.Series(data.copy()) if nulls == "some": # Fill half the values with NaT pd_data[list(range(0, len(pd_data), 2))] = np.datetime64("nat", "ns") elif nulls == "all": # Fill all the values with NaT pd_data[:] = np.datetime64("nat", "ns") gdf_data = Series.from_pandas(pd_data) expect = pd_data got = gdf_data.to_pandas() assert_eq(expect, got)
def test_to_from_pandas_nulls(data, nulls): pd_data = pd.Series(data.copy().astype('datetime64[ms]')) if nulls == 'some': # Fill half the values with NaT pd_data[list(range(0, len(pd_data), 2))] = np.datetime64('nat') elif nulls == 'all': # Fill all the values with NaT pd_data[:] = np.datetime64('nat') gdf_data = Series.from_pandas(pd_data) expect = pd_data got = gdf_data.to_pandas() assert_eq(expect, got)
def test_series_replace(): a1 = np.array([0, 1, 2, 3, 4]) # Numerical a2 = np.array([5, 1, 2, 3, 4]) sr1 = Series(a1) sr2 = sr1.replace(0, 5) np.testing.assert_equal(sr2.to_array(), a2) # Categorical psr3 = pd.Series(["one", "two", "three"], dtype="category") psr4 = psr3.replace("one", "two") sr3 = Series.from_pandas(psr3) sr4 = sr3.replace("one", "two") pd.testing.assert_series_equal(sr4.to_pandas(), psr4) # List input a6 = np.array([5, 6, 2, 3, 4]) sr6 = sr1.replace([0, 1], [5, 6]) np.testing.assert_equal(sr6.to_array(), a6) a7 = np.array([5.5, 6.5, 2, 3, 4]) sr7 = sr1.replace([0, 1], [5.5, 6.5]) np.testing.assert_equal(sr7.to_array(), a7) # Series input a8 = np.array([5, 5, 5, 3, 4]) sr8 = sr1.replace(sr1[:3], 5) np.testing.assert_equal(sr8.to_array(), a8) # large input containing null sr9 = Series(list(range(400)) + [None]) sr10 = sr9.replace([22, 323, 27, 0], None) assert sr10.null_count == 5 assert len(sr10.to_array()) == (401 - 5) sr11 = sr9.replace([22, 323, 27, 0], -1) assert sr11.null_count == 1 assert len(sr11.to_array()) == (401 - 1) # large input not containing nulls sr9 = sr9.fillna(-11) sr12 = sr9.replace([22, 323, 27, 0], None) assert sr12.null_count == 4 assert len(sr12.to_array()) == (401 - 4) sr13 = sr9.replace([22, 323, 27, 0], -1) assert sr13.null_count == 0 assert len(sr13.to_array()) == 401
def test_series_with_nulls_where(fill_value): psr = pd.Series([None] * 3 + list(range(5))) sr = Series.from_pandas(psr) expect = psr.where(psr > 0, fill_value) got = sr.where(sr > 0, fill_value) assert_eq(expect, got) expect = psr.where(psr < 0, fill_value) got = sr.where(sr < 0, fill_value) assert_eq(expect, got) expect = psr.where(psr == 0, fill_value) got = sr.where(sr == 0, fill_value) assert_eq(expect, got)
def test_series_where(data_dtype, fill_value): psr = pd.Series(list(range(10)), dtype=data_dtype) sr = Series.from_pandas(psr) expect = psr.where(psr > 0, fill_value) got = sr.where(sr > 0, fill_value) assert_eq(expect, got) expect = psr.where(psr < 0, fill_value) got = sr.where(sr < 0, fill_value) assert_eq(expect, got) expect = psr.where(psr == 0, fill_value) got = sr.where(sr == 0, fill_value) assert_eq(expect, got)
def test_fillna_string(fill_type, inplace): psr = pd.Series(['z', None, 'z', None]) if fill_type == 'scalar': fill_value = 'a' elif fill_type == 'series': fill_value = Series(['a', 'b', 'c', 'd']) sr = Series.from_pandas(psr) expect = psr.fillna(fill_value) got = sr.fillna(fill_value, inplace=inplace) if inplace: got = sr assert_eq(expect, got)
def test_fillna_datetime(fill_type, inplace): psr = pd.Series(pd.date_range('2010-01-01', '2020-01-10', freq='1y')) if fill_type == 'scalar': fill_value = pd.Timestamp('2010-01-02') elif fill_type == 'series': fill_value = psr + pd.Timedelta('1d') psr[[5, 9]] = None sr = Series.from_pandas(psr) expect = psr.fillna(fill_value) got = sr.fillna(fill_value, inplace=inplace) if inplace: got = sr assert_eq(expect, got)
def test_fillna_datetime(fill_type, inplace): psr = pd.Series(pd.date_range("2010-01-01", "2020-01-10", freq="1y")) if fill_type == "scalar": fill_value = pd.Timestamp("2010-01-02") elif fill_type == "series": fill_value = psr + pd.Timedelta("1d") psr[[5, 9]] = None sr = Series.from_pandas(psr) expect = psr.fillna(fill_value) got = sr.fillna(fill_value, inplace=inplace) if inplace: got = sr assert_eq(expect, got)
def test_fillna_categorical(fill_type, null_value, inplace): data = pd.Series(['a', 'b', 'a', null_value, 'c', null_value], dtype='category') sr = Series.from_pandas(data) if fill_type == 'scalar': fill_value = 'c' expect = pd.Series(['a', 'b', 'a', 'c', 'c', 'c'], dtype='category') elif fill_type == 'series': fill_value = pd.Series(['c', 'c', 'c', 'c', 'c', 'a'], dtype='category') expect = pd.Series(['a', 'b', 'a', 'c', 'c', 'a'], dtype='category') got = sr.fillna(fill_value, inplace=inplace) if inplace: got = sr assert_eq(expect, got)
def test_fillna_categorical(fill_type, null_value, inplace): data = pd.Series(["a", "b", "a", null_value, "c", null_value], dtype="category") sr = Series.from_pandas(data) if fill_type == "scalar": fill_value = "c" expect = pd.Series(["a", "b", "a", "c", "c", "c"], dtype="category") elif fill_type == "series": fill_value = pd.Series(["c", "c", "c", "c", "c", "a"], dtype="category") expect = pd.Series(["a", "b", "a", "c", "c", "a"], dtype="category") got = sr.fillna(fill_value, inplace=inplace) if inplace: got = sr assert_eq(expect, got)