def test_series_replace_with_nulls(): a1 = np.array([0, 1, 2, 3, 4]) # Numerical a2 = np.array([-10, 1, 2, 3, 4]) sr1 = Series(a1) sr2 = sr1.replace(0, None).fillna(-10) np.testing.assert_equal(sr2.to_array(), a2) # List input a6 = np.array([-10, 6, 2, 3, 4]) sr6 = sr1.replace([0, 1], [None, 6]).fillna(-10) np.testing.assert_equal(sr6.to_array(), a6) a7 = np.array([5.5, 6.5, 2, 3, 4, -10]) sr1 = Series([0, 1, 2, 3, 4, None]) sr7 = sr1.replace([0, 1], [5.5, 6.5]).fillna(-10) np.testing.assert_equal(sr7.to_array(), a7) # Series input a8 = np.array([-10, -10, -10, 3, 4, -10]) sr8 = sr1.replace(sr1[:3], None).fillna(-10) np.testing.assert_equal(sr8.to_array(), a8) a9 = np.array([-10, 6.5, 2, 3, 4, -10]) sr9 = sr1.replace([0, 1], [None, 6.5]).fillna(-10) np.testing.assert_equal(sr9.to_array(), a9)
def test_series_replace(): a1 = np.array([0, 1, 2, 3, 4]) # Numerical a2 = np.array([5, 1, 2, 3, 4]) sr1 = Series(a1) sr2 = sr1.replace(0, 5) assert_eq(a2, sr2.to_array()) # Categorical psr3 = pd.Series(["one", "two", "three"], dtype="category") psr4 = psr3.replace("one", "two") sr3 = Series.from_pandas(psr3) sr4 = sr3.replace("one", "two") assert_eq(psr4, sr4) psr5 = psr3.replace("one", "five") sr5 = sr3.replace("one", "five") assert_eq(psr5, sr5) # List input a6 = np.array([5, 6, 2, 3, 4]) sr6 = sr1.replace([0, 1], [5, 6]) assert_eq(a6, sr6.to_array()) with pytest.raises(TypeError): sr1.replace([0, 1], [5.5, 6.5]) # Series input a8 = np.array([5, 5, 5, 3, 4]) sr8 = sr1.replace(sr1[:3].to_array(), 5) assert_eq(a8, sr8.to_array()) # large input containing null sr9 = Series(list(range(400)) + [None]) sr10 = sr9.replace([22, 323, 27, 0], None) assert sr10.null_count == 5 assert len(sr10.to_array()) == (401 - 5) sr11 = sr9.replace([22, 323, 27, 0], -1) assert sr11.null_count == 1 assert len(sr11.to_array()) == (401 - 1) # large input not containing nulls sr9 = sr9.fillna(-11) sr12 = sr9.replace([22, 323, 27, 0], None) assert sr12.null_count == 4 assert len(sr12.to_array()) == (401 - 4) sr13 = sr9.replace([22, 323, 27, 0], -1) assert sr13.null_count == 0 assert len(sr13.to_array()) == 401
def test_series_replace_with_nulls(): a1 = np.array([0, 1, 2, 3, 4]) # Numerical a2 = np.array([-10, 1, 2, 3, 4]) sr1 = Series(a1) sr2 = sr1.replace(0, None).fillna(-10) assert_eq(a2, sr2.to_array()) # List input a6 = np.array([-10, 6, 2, 3, 4]) sr6 = sr1.replace([0, 1], [None, 6]).fillna(-10) assert_eq(a6, sr6.to_array()) sr1 = Series([0, 1, 2, 3, 4, None]) with pytest.raises(TypeError): sr1.replace([0, 1], [5.5, 6.5]).fillna(-10) # Series input a8 = np.array([-10, -10, -10, 3, 4, -10]) sr8 = sr1.replace(cudf.Series([-10] * 3, index=sr1[:3]), None).fillna(-10) assert_eq(a8, sr8.to_array()) a9 = np.array([-10, 6, 2, 3, 4, -10]) sr9 = sr1.replace([0, 1], [None, 6]).fillna(-10) assert_eq(a9, sr9.to_array())
def test_series_replace(): a1 = np.array([0, 1, 2, 3, 4]) # Numerical a2 = np.array([5, 1, 2, 3, 4]) sr1 = Series(a1) sr2 = sr1.replace(0, 5) np.testing.assert_equal(sr2.to_array(), a2) # Categorical psr3 = pd.Series(["one", "two", "three"], dtype="category") psr4 = psr3.replace("one", "two") sr3 = Series.from_pandas(psr3) sr4 = sr3.replace("one", "two") pd.testing.assert_series_equal(sr4.to_pandas(), psr4) # List input a6 = np.array([5, 6, 2, 3, 4]) sr6 = sr1.replace([0, 1], [5, 6]) np.testing.assert_equal(sr6.to_array(), a6) a7 = np.array([5.5, 6.5, 2, 3, 4]) sr7 = sr1.replace([0, 1], [5.5, 6.5]) np.testing.assert_equal(sr7.to_array(), a7) # Series input a8 = np.array([5, 5, 5, 3, 4]) sr8 = sr1.replace(sr1[:3], 5) np.testing.assert_equal(sr8.to_array(), a8) # large input containing null sr9 = Series(list(range(400)) + [None]) sr10 = sr9.replace([22, 323, 27, 0], None) assert sr10.null_count == 5 assert len(sr10.to_array()) == (401 - 5) sr11 = sr9.replace([22, 323, 27, 0], -1) assert sr11.null_count == 1 assert len(sr11.to_array()) == (401 - 1) # large input not containing nulls sr9 = sr9.fillna(-11) sr12 = sr9.replace([22, 323, 27, 0], None) assert sr12.null_count == 4 assert len(sr12.to_array()) == (401 - 4) sr13 = sr9.replace([22, 323, 27, 0], -1) assert sr13.null_count == 0 assert len(sr13.to_array()) == 401
def test_series_multiple_times_with_nulls(): sr = Series([1, 2, 3, None]) expected = Series([None, None, None, None], dtype=np.int64) for i in range(3): got = sr.replace([1, 2, 3], None) assert_eq(expected, got) # BUG: #2695 # The following series will acquire a chunk of memory and update with # values, but these values may still linger even after the memory # gets released. This memory space might get used for replace in # subsequent calls and the memory used for mask may have junk values. # So, if it is not updated properly, the result would be wrong. # So, this will help verify that scenario. Series([1, 1, 1, None])
def test_replace_inplace(): data = np.array([5, 1, 2, 3, 4]) sr = Series(data) psr = pd.Series(data) sr_copy = sr.copy() psr_copy = psr.copy() assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr.replace(5, 0, inplace=True) psr.replace(5, 0, inplace=True) assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr = Series(data) psr = pd.Series(data) sr_copy = sr.copy() psr_copy = psr.copy() assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr.replace({5: 0, 3: -5}) psr.replace({5: 0, 3: -5}) assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) srr = sr.replace() psrr = psr.replace() assert_eq(srr, psrr) psr = pd.Series(["one", "two", "three"], dtype="category") sr = Series.from_pandas(psr) sr_copy = sr.copy() psr_copy = psr.copy() assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr.replace("one", "two", inplace=True) psr.replace("one", "two", inplace=True) assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) pdf = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]}) gdf = DataFrame.from_pandas(pdf) pdf_copy = pdf.copy() gdf_copy = gdf.copy() assert_eq(pdf, gdf) assert_eq(pdf_copy, gdf_copy) pdf.replace(5, 0, inplace=True) gdf.replace(5, 0, inplace=True) assert_eq(pdf, gdf) assert_eq(pdf_copy, gdf_copy) pds = pd.Series([1, 2, 3, 45]) gds = Series.from_pandas(pds) vals = np.array([]).astype(int) assert_eq(pds.replace(vals, -1), gds.replace(vals, -1)) pds.replace(vals, 77, inplace=True) gds.replace(vals, 77, inplace=True) assert_eq(pds, gds) pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}) gdf = DataFrame.from_pandas(pdf) assert_eq(pdf.replace({"a": 2}, {"a": -33}), gdf.replace({"a": 2}, {"a": -33})) assert_eq( pdf.replace({"a": [2, 5]}, {"a": [9, 10]}), gdf.replace({"a": [2, 5]}, {"a": [9, 10]}), ) assert_eq( pdf.replace([], []), gdf.replace([], []), ) with pytest.raises(TypeError): pdf.replace(-1, []) with pytest.raises(TypeError): gdf.replace(-1, [])
def test_replace_strings(): pdf = pd.Series(["a", "b", "c", "d"]) gdf = Series(["a", "b", "c", "d"]) assert_eq(pdf.replace("a", "e"), gdf.replace("a", "e"))
def test_replace_inplace(): data = np.array([5, 1, 2, 3, 4]) sr = Series(data) psr = pd.Series(data) sr_copy = sr.copy() psr_copy = psr.copy() assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr.replace(5, 0, inplace=True) psr.replace(5, 0, inplace=True) assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr = Series(data) psr = pd.Series(data) sr_copy = sr.copy() psr_copy = psr.copy() assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr.replace({5: 0, 3: -5}) psr.replace({5: 0, 3: -5}) assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) srr = sr.replace() psrr = psr.replace() assert_eq(srr, psrr) psr = pd.Series(["one", "two", "three"], dtype="category") sr = Series.from_pandas(psr) sr_copy = sr.copy() psr_copy = psr.copy() assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr.replace("one", "two", inplace=True) psr.replace("one", "two", inplace=True) assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) pdf = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]}) gdf = DataFrame.from_pandas(pdf) pdf_copy = pdf.copy() gdf_copy = gdf.copy() assert_eq(pdf, gdf) assert_eq(pdf_copy, gdf_copy) pdf.replace(5, 0, inplace=True) gdf.replace(5, 0, inplace=True) assert_eq(pdf, gdf) assert_eq(pdf_copy, gdf_copy) pds = pd.Series([1, 2, 3, 45]) gds = Series.from_pandas(pds) vals = np.array([]).astype(int) assert_eq(pds.replace(vals, -1), gds.replace(vals, -1)) pds.replace(vals, 77, inplace=True) gds.replace(vals, 77, inplace=True) assert_eq(pds, gds) pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}) gdf = DataFrame.from_pandas(pdf) assert_eq(pdf.replace({"a": 2}, {"a": -33}), gdf.replace({"a": 2}, {"a": -33})) assert_eq( pdf.replace({"a": [2, 5]}, {"a": [9, 10]}), gdf.replace({"a": [2, 5]}, {"a": [9, 10]}), ) assert_eq( pdf.replace([], []), gdf.replace([], []), ) assert_exceptions_equal( lfunc=pdf.replace, rfunc=gdf.replace, lfunc_args_and_kwargs=([], { "to_replace": -1, "value": [] }), rfunc_args_and_kwargs=([], { "to_replace": -1, "value": [] }), compare_error_message=False, )