def test_dataframe_setitem_new_columns(df, arg, value): gdf = DataFrame.from_pandas(df) cudf_replace_value = value if isinstance(cudf_replace_value, pd.DataFrame): cudf_replace_value = DataFrame.from_pandas(value) df[arg] = value gdf[arg] = cudf_replace_value assert_eq(df, gdf, check_dtype=True)
def test_kernel_shallow_copy(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=False) sr = gdf["a"] add_one[1, len(sr)](sr.to_gpu_array()) assert_eq(gdf, cdf)
def test_cudf_dataframe_copy(copy_fn, ncols, data_type): pdf = pd.DataFrame() for i in range(ncols): pdf[chr(i + ord("a"))] = pd.Series(np.random.randint( 0, 1000, 20)).astype(data_type) df = DataFrame.from_pandas(pdf) copy_df = copy_fn(df) assert_eq(df, copy_df)
def test_kernel_deep_copy(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=True) sr = gdf["b"] add_one[1, len(sr)](sr._column.data_array_view) assert not gdf.to_string().split() == cdf.to_string().split()
def test_dataframe_copy_shallow(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) gdf = DataFrame.from_pandas(pdf) copy_pdf = pdf.copy(deep=False) copy_gdf = gdf.copy(deep=False) copy_pdf["b"] = [0, 0, 0] copy_gdf["b"] = [0, 0, 0] assert_eq(pdf["b"], copy_pdf["b"]) assert_eq(gdf["b"], copy_gdf["b"])
def test_kernel_deep_copy(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=True) sr = gdf["b"] # column.to_gpu_array calls to_dense_buffer which returns a copy # need to access buffer directly and then call gpu_array add_one[1, len(sr)](sr.data.to_gpu_array()) assert not gdf.to_string().split() == cdf.to_string().split()
def test_dataframe_deep_copy_and_insert(copy_parameters): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) gdf = DataFrame.from_pandas(pdf) copy_pdf = copy_parameters["fn"](pdf) copy_gdf = copy_parameters["fn"](gdf) copy_pdf["b"] = [0, 0, 0] copy_gdf["b"] = [0, 0, 0] pdf_is_equal = np.array_equal(pdf["b"].values, copy_pdf["b"].values) gdf_is_equal = np.array_equal(gdf["b"].to_array(), copy_gdf["b"].to_array()) assert pdf_is_equal == copy_parameters["expected_equality"] assert gdf_is_equal == copy_parameters["expected_equality"]
def test_cudf_dataframe_copy_then_insert(copy_fn, ncols, data_type): pdf = pd.DataFrame() for i in range(ncols): pdf[chr(i + ord("a"))] = pd.Series(np.random.randint( 0, 1000, 20)).astype(data_type) df = DataFrame.from_pandas(pdf) copy_df = copy_fn(df) copy_pdf = copy_fn(pdf) copy_df["aa"] = pd.Series(np.random.randint(0, 1000, 20)).astype(data_type) copy_pdf["aa"] = pd.Series(np.random.randint(0, 1000, 20)).astype(data_type) assert not copy_pdf.to_string().split() == pdf.to_string().split() assert not copy_df.to_string().split() == df.to_string().split()
def test_setitem_dataframe_series_inplace(df): pdf = df gdf = DataFrame.from_pandas(pdf) pdf["a"].replace(1, 500, inplace=True) gdf["a"].replace(1, 500, inplace=True) assert_eq(pdf, gdf) psr_a = pdf["a"] gsr_a = gdf["a"] psr_a.replace(500, 501, inplace=True) gsr_a.replace(500, 501, inplace=True) assert_eq(pdf, gdf)
def test_dataframe_setitem_bool_mask_scaler(df, arg, value): gdf = DataFrame.from_pandas(df) df[arg] = value gdf[arg] = value assert_eq(df, gdf)