def test_series_fillna_invalid_dtype(data_dtype): gdf = Series([1, 2, None, 3], dtype=data_dtype) fill_value = 2.5 with pytest.raises(TypeError) as raises: gdf.fillna(fill_value) raises.match("Cannot safely cast non-equivalent {} to {}".format( type(fill_value).__name__, gdf.dtype.type.__name__))
def test_series_fillna_numerical( data_dtype, fill_dtype, fill_type, null_value, inplace ): # TODO: These tests should use Pandas' nullable int type # when we support a recent enough version of Pandas # https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html if fill_type == "scalar": fill_value = np.random.randint(0, 5) expect = np.array([0, 1, fill_value, 2, fill_value], dtype=data_dtype) elif fill_type == "series": data = np.random.randint(0, 5, (5,)) fill_value = pd.Series(data, dtype=data_dtype) expect = np.array( [0, 1, fill_value[2], 2, fill_value[4]], dtype=data_dtype ) sr = Series([0, 1, null_value, 2, null_value], dtype=data_dtype) result = sr.fillna(fill_value, inplace=inplace) if inplace: result = sr got = result.to_array() np.testing.assert_equal(expect, got)
def test_series_replace(): a1 = np.array([0, 1, 2, 3, 4]) # Numerical a2 = np.array([5, 1, 2, 3, 4]) sr1 = Series(a1) sr2 = sr1.replace(0, 5) assert_eq(a2, sr2.to_array()) # Categorical psr3 = pd.Series(["one", "two", "three"], dtype="category") psr4 = psr3.replace("one", "two") sr3 = Series.from_pandas(psr3) sr4 = sr3.replace("one", "two") assert_eq(psr4, sr4) psr5 = psr3.replace("one", "five") sr5 = sr3.replace("one", "five") assert_eq(psr5, sr5) # List input a6 = np.array([5, 6, 2, 3, 4]) sr6 = sr1.replace([0, 1], [5, 6]) assert_eq(a6, sr6.to_array()) with pytest.raises(TypeError): sr1.replace([0, 1], [5.5, 6.5]) # Series input a8 = np.array([5, 5, 5, 3, 4]) sr8 = sr1.replace(sr1[:3].to_array(), 5) assert_eq(a8, sr8.to_array()) # large input containing null sr9 = Series(list(range(400)) + [None]) sr10 = sr9.replace([22, 323, 27, 0], None) assert sr10.null_count == 5 assert len(sr10.to_array()) == (401 - 5) sr11 = sr9.replace([22, 323, 27, 0], -1) assert sr11.null_count == 1 assert len(sr11.to_array()) == (401 - 1) # large input not containing nulls sr9 = sr9.fillna(-11) sr12 = sr9.replace([22, 323, 27, 0], None) assert sr12.null_count == 4 assert len(sr12.to_array()) == (401 - 4) sr13 = sr9.replace([22, 323, 27, 0], -1) assert sr13.null_count == 0 assert len(sr13.to_array()) == 401
def test_series_fillna(data, index, value): psr = pd.Series( data, index=index if index is not None and len(index) == len(data) else None, ) gsr = Series( data, index=index if index is not None and len(index) == len(data) else None, ) expect = psr.fillna(pd.Series(value)) got = gsr.fillna(Series(value)) assert_eq(expect, got)
def test_series_replace(): a1 = np.array([0, 1, 2, 3, 4]) # Numerical a2 = np.array([5, 1, 2, 3, 4]) sr1 = Series(a1) sr2 = sr1.replace(0, 5) np.testing.assert_equal(sr2.to_array(), a2) # Categorical psr3 = pd.Series(["one", "two", "three"], dtype="category") psr4 = psr3.replace("one", "two") sr3 = Series.from_pandas(psr3) sr4 = sr3.replace("one", "two") pd.testing.assert_series_equal(sr4.to_pandas(), psr4) # List input a6 = np.array([5, 6, 2, 3, 4]) sr6 = sr1.replace([0, 1], [5, 6]) np.testing.assert_equal(sr6.to_array(), a6) a7 = np.array([5.5, 6.5, 2, 3, 4]) sr7 = sr1.replace([0, 1], [5.5, 6.5]) np.testing.assert_equal(sr7.to_array(), a7) # Series input a8 = np.array([5, 5, 5, 3, 4]) sr8 = sr1.replace(sr1[:3], 5) np.testing.assert_equal(sr8.to_array(), a8) # large input containing null sr9 = Series(list(range(400)) + [None]) sr10 = sr9.replace([22, 323, 27, 0], None) assert sr10.null_count == 5 assert len(sr10.to_array()) == (401 - 5) sr11 = sr9.replace([22, 323, 27, 0], -1) assert sr11.null_count == 1 assert len(sr11.to_array()) == (401 - 1) # large input not containing nulls sr9 = sr9.fillna(-11) sr12 = sr9.replace([22, 323, 27, 0], None) assert sr12.null_count == 4 assert len(sr12.to_array()) == (401 - 4) sr13 = sr9.replace([22, 323, 27, 0], -1) assert sr13.null_count == 0 assert len(sr13.to_array()) == 401