Example #1
0
def test_operator_func_between_series(dtype, func, has_nulls, fill_value):
    nelem = 1000
    arr1 = utils.gen_rand(dtype, nelem) * 10000
    # Keeping a low value because CUDA 'pow' has 2 full range error
    arr2 = utils.gen_rand(dtype, nelem) * 100

    if has_nulls == 'some':
        nulls1 = utils.random_bitmask(nelem)
        nulls2 = utils.random_bitmask(nelem)
        sr1 = Series.from_masked_array(arr1, nulls1)
        sr2 = Series.from_masked_array(arr2, nulls2)
    else:
        sr1 = Series(arr1)
        sr2 = Series(arr2)

    psr1 = sr1.to_pandas()
    psr2 = sr2.to_pandas()

    expect = getattr(psr1, func)(psr2, fill_value=fill_value)
    got = getattr(sr1, func)(sr2, fill_value=fill_value)

    # This is being done because of the various gymnastics required to support
    # equality for null values. cudf.Series().to_pandas() replaces nulls with
    # None and so a bool Series becomes object Series. Which does not match the
    # output of equality op in pandas which remains a bool. Furthermore, NaN
    # values are treated as not comparable and always return False in a bool op
    # except in not-equal op where bool(Nan != Nan) gives True.
    if got.dtype == np.bool:
        got = got.fillna(True) if func == 'ne' else got.fillna(False)

    utils.assert_eq(expect, got)
Example #2
0
def test_searchsorted(side, obj_class, vals_class):
    nelem = 1000
    column_data = gen_rand("float64", nelem)
    column_mask = random_bitmask(nelem)

    values_data = gen_rand("float64", nelem)
    values_mask = random_bitmask(nelem)

    sr = cudf.Series.from_masked_array(column_data, column_mask)
    vals = cudf.Series.from_masked_array(values_data, values_mask)

    sr = sr.sort_values()

    # Reference object can be Series, Index, or Column
    if obj_class == "index":
        sr = cudf.Series.as_index(sr)
    elif obj_class == "column":
        sr = sr._column

    # Values can be Series or Index
    if vals_class == "index":
        vals = cudf.Series.as_index(vals)

    psr = sr.to_pandas()
    pvals = vals.to_pandas()

    expect = psr.searchsorted(pvals, side)
    got = sr.searchsorted(vals, side)

    assert_eq(expect, cupy.asnumpy(got))
Example #3
0
def test_reflected_ops_cudf_scalar(funcs, dtype, obj_class):
    cpu_func, gpu_func = funcs

    # create random series
    np.random.seed(12)
    random_series = utils.gen_rand(dtype, 100, low=10)

    # gpu series
    gs = Series(random_series)

    # class typing
    if obj_class == "Index":
        gs = as_index(gs)

    gs_result = gpu_func(gs)

    # class typing
    if obj_class == "Index":
        gs = Series(gs)

    # pandas
    ps_result = cpu_func(random_series)

    # verify
    np.testing.assert_allclose(ps_result, gs_result.to_array())
Example #4
0
def test_sum_decimal(dtype, nelem):
    data = [str(x) for x in gen_rand("int64", nelem) / 100]

    expected = pd.Series([Decimal(x) for x in data]).sum()
    got = cudf.Series(data).astype(dtype).sum()

    assert_eq(expected, got)
Example #5
0
def test_product_decimal(dtype):
    data = [str(x) for x in gen_rand("int8", 3) / 10]

    expected = pd.Series([Decimal(x) for x in data]).product()
    got = cudf.Series(data).astype(dtype).product()

    assert_eq(expected, got)
Example #6
0
def test_sum_of_squares_decimal(dtype):
    data = [str(x) for x in gen_rand("int8", 3) / 10]

    expected = pd.Series([Decimal(x) for x in data]).pow(2).sum()
    got = cudf.Series(data).astype(dtype).sum_of_squares()

    assert_eq(expected, got)
Example #7
0
def test_max(dtype, nelem):
    data = gen_rand(dtype, nelem)
    sr = Series(data)

    got = sr.max()
    expect = dtype(data.max())

    assert expect == got
Example #8
0
def test_sum(dtype, nelem):
    data = gen_rand(dtype, nelem)
    sr = Series(data)

    got = sr.sum()
    expect = dtype(data.sum())

    significant = 4 if dtype == np.float32 else 6
    np.testing.assert_approx_equal(expect, got, significant=significant)
Example #9
0
def test_min(dtype, nelem):
    dtype = np.dtype(dtype).type
    data = gen_rand(dtype, nelem)
    sr = Series(data)

    got = sr.min()
    expect = dtype(data.min())

    assert expect == got
Example #10
0
def test_series_binop(binop, obj_class):
    nelem = 1000
    arr1 = utils.gen_rand("float64", nelem) * 10000
    # Keeping a low value because CUDA 'pow' has 2 full range error
    arr2 = utils.gen_rand("float64", nelem) * 10

    sr1 = Series(arr1)
    sr2 = Series(arr2)

    if obj_class == "Index":
        sr1 = as_index(sr1)
        sr2 = as_index(sr2)

    result = binop(sr1, sr2)
    expect = binop(pd.Series(arr1), pd.Series(arr2))

    if obj_class == "Index":
        result = Series(result)

    utils.assert_eq(result, expect)
Example #11
0
def test_cummin(dtype, nelem):
    if dtype == np.int8:
        # to keep data in range
        data = gen_rand(dtype, nelem, low=-2, high=2)
    else:
        data = gen_rand(dtype, nelem)

    decimal = 4 if dtype == np.float32 else 6

    # series
    gs = Series(data)
    ps = pd.Series(data)
    np.testing.assert_array_almost_equal(gs.cummin(), ps.cummin(),
                                         decimal=decimal)

    # dataframe series (named series)
    gdf = DataFrame()
    gdf['a'] = Series(data)
    pdf = pd.DataFrame()
    pdf['a'] = pd.Series(data)
    np.testing.assert_array_almost_equal(gdf.a.cummin(), pdf.a.cummin(),
                                         decimal=decimal)
Example #12
0
def test_searchsorted(side, obj_class):
    nelem = 1000
    column_data = gen_rand("float64", nelem)
    column_mask = random_bitmask(nelem)

    values_data = gen_rand("float64", nelem)
    values_mask = random_bitmask(nelem)

    sr = cudf.Series.from_masked_array(column_data, column_mask)
    vals = cudf.Series.from_masked_array(values_data, values_mask)

    sr = sr.sort_values()

    if obj_class == "series":
        sr = cudf.Series.as_index(sr)

    psr = sr.to_pandas()
    pvals = vals.to_pandas()

    expect = psr.searchsorted(pvals, side)
    got = sr.searchsorted(vals, side)

    assert_eq(expect, got.to_array())
Example #13
0
    def gen_df():
        pdf = pd.DataFrame()
        from string import ascii_lowercase
        cols = np.random.choice(num_cols + 5, num_cols, replace=False)

        for i in range(num_cols):
            colname = ascii_lowercase[cols[i]]
            data = utils.gen_rand('float64', num_rows) * 10000
            if nulls == 'some':
                idx = np.random.choice(num_rows,
                                       size=int(num_rows/2),
                                       replace=False)
                data[idx] = np.nan
            pdf[colname] = data
        return pdf
Example #14
0
def test_sum_masked(nelem):
    dtype = np.float64
    data = gen_rand(dtype, nelem)

    mask = utils.random_bitmask(nelem)
    bitmask = utils.expand_bits_to_bytes(mask)[:nelem]
    null_count = utils.count_zero(bitmask)

    sr = Series.from_masked_array(data, mask, null_count)

    got = sr.sum()
    res_mask = np.asarray(bitmask, dtype=np.bool_)[: data.size]
    expect = data[res_mask].sum()

    significant = 4 if dtype == np.float32 else 6
    np.testing.assert_approx_equal(expect, got, significant=significant)
Example #15
0
def test_sum_of_squares(dtype, nelem):
    data = gen_rand(dtype, nelem)
    sr = Series(data)

    got = sr.sum_of_squares()
    expect = (data ** 2).sum()

    if np.dtype(dtype).kind == "i":
        if 0 <= expect <= np.iinfo(dtype).max:
            np.testing.assert_array_almost_equal(expect, got)
        else:
            print("overflow, passing")
    else:
        np.testing.assert_approx_equal(
            expect, got, significant=accuracy_for_dtype[dtype]
        )
Example #16
0
def test_product(dtype, nelem):
    if np.dtype(dtype).kind == "i":
        data = np.ones(nelem, dtype=dtype)
        # Set at most 30 items to [0..2) to keep the value within 2^32
        for _ in range(30):
            data[random.randrange(nelem)] = random.random() * 2
    else:
        data = gen_rand(dtype, nelem)

    sr = Series(data)

    got = sr.product()
    expect = np.product(data)

    significant = 4 if dtype == np.float32 else 6
    np.testing.assert_approx_equal(expect, got, significant=significant)
Example #17
0
def test_product(dtype, nelem):
    np.random.seed(0)
    dtype = np.dtype(dtype).type
    if np.dtype(dtype).kind in {"u", "i"}:
        data = np.ones(nelem, dtype=dtype)
        # Set at most 30 items to [0..2) to keep the value within 2^32
        for _ in range(30):
            data[np.random.randint(low=0, high=nelem,
                                   size=1)] = (np.random.uniform() * 2)
    else:
        data = gen_rand(dtype, nelem)

    sr = Series(data)

    got = sr.product()
    expect = np.product(data)

    significant = 4 if dtype == np.float32 else 6
    np.testing.assert_approx_equal(expect, got, significant=significant)
Example #18
0
def math_op_test(dtype, fn, nelem=128, test_df=False, positive_only=False):
    randvals = gen_rand(dtype, nelem, positive_only=positive_only)
    h_series = pd.Series(randvals.astype(dtype))
    d_series = cudf.Series(h_series)

    if test_df:
        d_in = cudf.DataFrame()
        d_in[0] = d_series
        h_in = pd.DataFrame()
        h_in[0] = h_series
    else:
        d_in = d_series
        h_in = h_series

    expect = fn(h_in)
    got = fn(d_in)

    print("got")
    print(got)
    print("expect")
    print(expect)
    assert_eq(expect, got)
Example #19
0
def test_reflected_ops_scalar(func, dtype, obj_class):
    # create random series
    np.random.seed(12)
    random_series = utils.gen_rand(dtype, 100, low=10)

    # gpu series
    gs = Series(random_series)

    # class typing
    if obj_class == 'Index':
        gs = as_index(gs)

    gs_result = func(gs)

    # class typing
    if obj_class == 'Index':
        gs = Series(gs)

    # pandas
    ps_result = func(random_series)

    # verify
    np.testing.assert_allclose(ps_result, gs_result)
Example #20
0
def math_op_test(dtype,
                 fn,
                 nelem=128,
                 test_df=False,
                 positive_only=False,
                 check_dtype=True):
    np.random.seed(0)
    randvals = gen_rand(dtype, nelem, positive_only=positive_only)
    h_series = pd.Series(randvals.astype(dtype))
    d_series = cudf.Series(h_series)

    if test_df:
        d_in = cudf.DataFrame()
        d_in[0] = d_series
        h_in = pd.DataFrame()
        h_in[0] = h_series
    else:
        d_in = d_series
        h_in = h_series

    expect = fn(h_in)
    got = fn(d_in)

    assert_eq(expect, got, check_dtype=check_dtype)