def test_dataframe_setitem_from_masked_object(): ary = np.random.randn(100) mask = np.zeros(100, dtype=bool) mask[:20] = True np.random.shuffle(mask) ary[mask] = np.nan test1_null = Series(ary, nan_as_null=True) assert(test1_null.has_null_mask) assert(test1_null.null_count == 20) test1_nan = Series(ary, nan_as_null=False) assert(test1_nan.null_count == 0) test2_null = DataFrame.from_pandas(pd.DataFrame({'a': ary}), nan_as_null=True) assert(test2_null['a'].has_null_mask) assert(test2_null['a'].null_count == 20) test2_nan = DataFrame.from_pandas(pd.DataFrame({'a': ary}), nan_as_null=False) assert(test2_nan['a'].null_count == 0) gpu_ary = rmm.to_device(ary) test3_null = Series(gpu_ary, nan_as_null=True) assert(test3_null.has_null_mask) assert(test3_null.null_count == 20) test3_nan = Series(gpu_ary, nan_as_null=False) assert(test3_nan.null_count == 0) test4 = DataFrame() lst = [1, 2, None, 4, 5, 6, None, 8, 9] test4['lst'] = lst assert(test4['lst'].has_null_mask) assert(test4['lst'].null_count == 2)
def test_dataframe_column_name_indexing(): df = DataFrame() data = np.asarray(range(10), dtype=np.int32) df['a'] = data df[1] = data np.testing.assert_equal(df['a'].to_array(), np.asarray(range(10), dtype=np.int32)) np.testing.assert_equal(df[1].to_array(), np.asarray(range(10), dtype=np.int32)) pdf = pd.DataFrame() nelem = 10 pdf['key1'] = np.random.randint(0, 5, nelem) pdf['key2'] = np.random.randint(0, 3, nelem) pdf[1] = np.arange(1, 1 + nelem) pdf[2] = np.random.random(nelem) df = DataFrame.from_pandas(pdf) for i in range(1, len(pdf.columns)+1): for idx in combinations(pdf.columns, i): assert(pdf[list(idx)].equals(df[list(idx)].to_pandas())) # test for only numeric columns df = pd.DataFrame() for i in range(0, 10): df[i] = range(nelem) gdf = DataFrame.from_pandas(df) assert_eq(gdf, df)
def test_kernel_shallow_copy(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=['a', 'b', 'c']) gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=False) sr = gdf['a'] add_one[1, len(sr)](sr.to_gpu_array()) assert_eq(gdf, cdf)
def test_kernel_deep_copy(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=['a', 'b', 'c']) gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=True) sr = gdf['b'] add_one[1, len(sr)](sr.to_gpu_array()) assert not gdf.to_string().split() == cdf.to_string().split()
def test_cudf_dataframe_copy(copy_fn, ncols, data_type): pdf = pd.DataFrame() for i in range(ncols): pdf[chr(i+ord('a'))] = pd.Series(np.random.randint(0, 1000, 20))\ .astype(data_type) df = DataFrame.from_pandas(pdf) copy_df = copy_fn(df) assert_eq(df, copy_df)
def test_dataframe_copy_shallow(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=['a', 'b', 'c']) gdf = DataFrame.from_pandas(pdf) copy_pdf = pdf.copy(deep=False) copy_gdf = gdf.copy(deep=False) copy_pdf['b'] = [0, 0, 0] copy_gdf['b'] = [0, 0, 0] assert_eq(pdf['b'], copy_pdf['b']) assert_eq(gdf['b'], copy_gdf['b'])
def test_kernel_deep_copy(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=['a', 'b', 'c']) gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=True) sr = gdf['b'] # column.to_gpu_array calls to_dense_buffer which returns a copy # need to access buffer directly and then call gpu_array add_one[1, len(sr)](sr.data.to_gpu_array()) assert not gdf.to_string().split() == cdf.to_string().split()
def test_dataframe_deep_copy_and_insert(copy_parameters): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=['a', 'b', 'c']) gdf = DataFrame.from_pandas(pdf) copy_pdf = copy_parameters['fn'](pdf) copy_gdf = copy_parameters['fn'](gdf) copy_pdf['b'] = [0, 0, 0] copy_gdf['b'] = [0, 0, 0] pdf_is_equal = np.array_equal(pdf['b'].values, copy_pdf['b'].values) gdf_is_equal = np.array_equal(gdf['b'].to_array(), copy_gdf['b'].to_array()) assert pdf_is_equal == copy_parameters['expected_equality'] assert gdf_is_equal == copy_parameters['expected_equality']
def test_cudf_dataframe_copy_then_insert(copy_fn, ncols, data_type): pdf = pd.DataFrame() for i in range(ncols): pdf[chr(i+ord('a'))] = pd.Series(np.random.randint(0, 1000, 20))\ .astype(data_type) df = DataFrame.from_pandas(pdf) copy_df = copy_fn(df) copy_pdf = copy_fn(pdf) copy_df['aa'] = pd.Series(np.random.randint(0, 1000, 20)).astype(data_type) copy_pdf['aa'] = pd.Series(np.random.randint(0, 1000, 20))\ .astype(data_type) assert not copy_pdf.to_string().split() == pdf.to_string().split() assert not copy_df.to_string().split() == df.to_string().split()
def test_dataframe_append_empty(): pdf = pd.DataFrame({ "key": [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4], "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] }) gdf = DataFrame.from_pandas(pdf) gdf['newcol'] = 100 pdf['newcol'] = 100 assert len(gdf['newcol']) == len(pdf) assert len(pdf['newcol']) == len(pdf) pd.testing.assert_frame_equal(gdf.to_pandas(), pdf)
def test_dataframe_deep_copy_and_insert(copy_parameters): pdf = pd.DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"] ) gdf = DataFrame.from_pandas(pdf) copy_pdf = copy_parameters["fn"](pdf) copy_gdf = copy_parameters["fn"](gdf) copy_pdf["b"] = [0, 0, 0] copy_gdf["b"] = [0, 0, 0] pdf_is_equal = np.array_equal(pdf["b"].values, copy_pdf["b"].values) gdf_is_equal = np.array_equal( gdf["b"].to_array(), copy_gdf["b"].to_array() ) assert pdf_is_equal == copy_parameters["expected_equality"] assert gdf_is_equal == copy_parameters["expected_equality"]
def test_dataframe_shape(): pdf = pd.DataFrame({'a': [0, 1, 2, 3], 'b': [0.1, 0.2, None, 0.3]}) gdf = DataFrame.from_pandas(pdf) assert pdf.shape == gdf.shape
def test_dataframe_boolean_mask_with_None(): pdf = pd.DataFrame({'a': [0, 1, 2, 3], 'b': [0.1, 0.2, None, 0.3]}) gdf = DataFrame.from_pandas(pdf) pdf_masked = pdf[[True, False, True, False]] gdf_masked = gdf[[True, False, True, False]] assert pdf_masked.to_string().split() == gdf_masked.to_string().split()