Beispiel #1
0
def test_dataframe_setitem_from_masked_object():
    ary = np.random.randn(100)
    mask = np.zeros(100, dtype=bool)
    mask[:20] = True
    np.random.shuffle(mask)
    ary[mask] = np.nan

    test1_null = Series(ary, nan_as_null=True)
    assert(test1_null.has_null_mask)
    assert(test1_null.null_count == 20)
    test1_nan = Series(ary, nan_as_null=False)
    assert(test1_nan.null_count == 0)

    test2_null = DataFrame.from_pandas(pd.DataFrame({'a': ary}),
                                       nan_as_null=True)
    assert(test2_null['a'].has_null_mask)
    assert(test2_null['a'].null_count == 20)
    test2_nan = DataFrame.from_pandas(pd.DataFrame({'a': ary}),
                                      nan_as_null=False)
    assert(test2_nan['a'].null_count == 0)

    gpu_ary = rmm.to_device(ary)
    test3_null = Series(gpu_ary, nan_as_null=True)
    assert(test3_null.has_null_mask)
    assert(test3_null.null_count == 20)
    test3_nan = Series(gpu_ary, nan_as_null=False)
    assert(test3_nan.null_count == 0)

    test4 = DataFrame()
    lst = [1, 2, None, 4, 5, 6, None, 8, 9]
    test4['lst'] = lst
    assert(test4['lst'].has_null_mask)
    assert(test4['lst'].null_count == 2)
Beispiel #2
0
def test_dataframe_column_name_indexing():
    df = DataFrame()
    data = np.asarray(range(10), dtype=np.int32)
    df['a'] = data
    df[1] = data
    np.testing.assert_equal(df['a'].to_array(),
                            np.asarray(range(10), dtype=np.int32))
    np.testing.assert_equal(df[1].to_array(),
                            np.asarray(range(10), dtype=np.int32))

    pdf = pd.DataFrame()
    nelem = 10
    pdf['key1'] = np.random.randint(0, 5, nelem)
    pdf['key2'] = np.random.randint(0, 3, nelem)
    pdf[1] = np.arange(1, 1 + nelem)
    pdf[2] = np.random.random(nelem)
    df = DataFrame.from_pandas(pdf)
    for i in range(1, len(pdf.columns)+1):
        for idx in combinations(pdf.columns, i):
            assert(pdf[list(idx)].equals(df[list(idx)].to_pandas()))

    # test for only numeric columns
    df = pd.DataFrame()
    for i in range(0, 10):
        df[i] = range(nelem)
    gdf = DataFrame.from_pandas(df)
    assert_eq(gdf, df)
Beispiel #3
0
def test_kernel_shallow_copy():
    pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                       columns=['a', 'b', 'c'])
    gdf = DataFrame.from_pandas(pdf)
    cdf = gdf.copy(deep=False)
    sr = gdf['a']
    add_one[1, len(sr)](sr.to_gpu_array())
    assert_eq(gdf, cdf)
Beispiel #4
0
def test_kernel_deep_copy():
    pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                       columns=['a', 'b', 'c'])
    gdf = DataFrame.from_pandas(pdf)
    cdf = gdf.copy(deep=True)
    sr = gdf['b']
    add_one[1, len(sr)](sr.to_gpu_array())
    assert not gdf.to_string().split() == cdf.to_string().split()
Beispiel #5
0
def test_cudf_dataframe_copy(copy_fn, ncols, data_type):
    pdf = pd.DataFrame()
    for i in range(ncols):
        pdf[chr(i+ord('a'))] = pd.Series(np.random.randint(0, 1000, 20))\
                                         .astype(data_type)
    df = DataFrame.from_pandas(pdf)
    copy_df = copy_fn(df)
    assert_eq(df, copy_df)
Beispiel #6
0
def test_dataframe_copy_shallow():
    pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                       columns=['a', 'b', 'c'])
    gdf = DataFrame.from_pandas(pdf)
    copy_pdf = pdf.copy(deep=False)
    copy_gdf = gdf.copy(deep=False)
    copy_pdf['b'] = [0, 0, 0]
    copy_gdf['b'] = [0, 0, 0]
    assert_eq(pdf['b'], copy_pdf['b'])
    assert_eq(gdf['b'], copy_gdf['b'])
def test_kernel_deep_copy():
    pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                       columns=['a', 'b', 'c'])
    gdf = DataFrame.from_pandas(pdf)
    cdf = gdf.copy(deep=True)
    sr = gdf['b']
    # column.to_gpu_array calls to_dense_buffer which returns a copy
    # need to access buffer directly and then call gpu_array
    add_one[1, len(sr)](sr.data.to_gpu_array())
    assert not gdf.to_string().split() == cdf.to_string().split()
Beispiel #8
0
def test_dataframe_deep_copy_and_insert(copy_parameters):
    pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                       columns=['a', 'b', 'c'])
    gdf = DataFrame.from_pandas(pdf)
    copy_pdf = copy_parameters['fn'](pdf)
    copy_gdf = copy_parameters['fn'](gdf)
    copy_pdf['b'] = [0, 0, 0]
    copy_gdf['b'] = [0, 0, 0]
    pdf_is_equal = np.array_equal(pdf['b'].values, copy_pdf['b'].values)
    gdf_is_equal = np.array_equal(gdf['b'].to_array(),
                                  copy_gdf['b'].to_array())
    assert pdf_is_equal == copy_parameters['expected_equality']
    assert gdf_is_equal == copy_parameters['expected_equality']
Beispiel #9
0
def test_cudf_dataframe_copy_then_insert(copy_fn, ncols, data_type):
    pdf = pd.DataFrame()
    for i in range(ncols):
        pdf[chr(i+ord('a'))] = pd.Series(np.random.randint(0, 1000, 20))\
                                         .astype(data_type)
    df = DataFrame.from_pandas(pdf)
    copy_df = copy_fn(df)
    copy_pdf = copy_fn(pdf)
    copy_df['aa'] = pd.Series(np.random.randint(0, 1000, 20)).astype(data_type)
    copy_pdf['aa'] = pd.Series(np.random.randint(0, 1000, 20))\
        .astype(data_type)
    assert not copy_pdf.to_string().split() == pdf.to_string().split()
    assert not copy_df.to_string().split() == df.to_string().split()
Beispiel #10
0
def test_dataframe_append_empty():
    pdf = pd.DataFrame({
        "key": [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
        "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
    })
    gdf = DataFrame.from_pandas(pdf)

    gdf['newcol'] = 100
    pdf['newcol'] = 100

    assert len(gdf['newcol']) == len(pdf)
    assert len(pdf['newcol']) == len(pdf)
    pd.testing.assert_frame_equal(gdf.to_pandas(), pdf)
Beispiel #11
0
def test_dataframe_deep_copy_and_insert(copy_parameters):
    pdf = pd.DataFrame(
        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]
    )
    gdf = DataFrame.from_pandas(pdf)
    copy_pdf = copy_parameters["fn"](pdf)
    copy_gdf = copy_parameters["fn"](gdf)
    copy_pdf["b"] = [0, 0, 0]
    copy_gdf["b"] = [0, 0, 0]
    pdf_is_equal = np.array_equal(pdf["b"].values, copy_pdf["b"].values)
    gdf_is_equal = np.array_equal(
        gdf["b"].to_array(), copy_gdf["b"].to_array()
    )
    assert pdf_is_equal == copy_parameters["expected_equality"]
    assert gdf_is_equal == copy_parameters["expected_equality"]
Beispiel #12
0
def test_dataframe_shape():
    pdf = pd.DataFrame({'a': [0, 1, 2, 3], 'b': [0.1, 0.2, None, 0.3]})
    gdf = DataFrame.from_pandas(pdf)

    assert pdf.shape == gdf.shape
Beispiel #13
0
def test_dataframe_boolean_mask_with_None():
    pdf = pd.DataFrame({'a': [0, 1, 2, 3], 'b': [0.1, 0.2, None, 0.3]})
    gdf = DataFrame.from_pandas(pdf)
    pdf_masked = pdf[[True, False, True, False]]
    gdf_masked = gdf[[True, False, True, False]]
    assert pdf_masked.to_string().split() == gdf_masked.to_string().split()