예제 #1
0
파일: test_search.py 프로젝트: rongou/cudf
def test_searchsorted(side, obj_class, vals_class):
    nelem = 1000
    column_data = gen_rand("float64", nelem)
    column_mask = random_bitmask(nelem)

    values_data = gen_rand("float64", nelem)
    values_mask = random_bitmask(nelem)

    sr = cudf.Series.from_masked_array(column_data, column_mask)
    vals = cudf.Series.from_masked_array(values_data, values_mask)

    sr = sr.sort_values()

    # Reference object can be Series, Index, or Column
    if obj_class == "index":
        sr.reset_index(drop=True)
    elif obj_class == "column":
        sr = sr._column

    # Values can be Series or Index
    if vals_class == "index":
        vals.reset_index(drop=True)

    psr = sr.to_pandas()
    pvals = vals.to_pandas()

    expect = psr.searchsorted(pvals, side)
    got = sr.searchsorted(vals, side)

    assert_eq(expect, cupy.asnumpy(got))
예제 #2
0
def test_sum_decimal(dtype, nelem):
    np.random.seed(0)
    data = [str(x) for x in gen_rand("int64", nelem) / 100]

    expected = pd.Series([Decimal(x) for x in data]).sum()
    got = cudf.Series(data).astype(dtype).sum()

    assert_eq(expected, got)
예제 #3
0
def test_sum_of_squares_decimal(dtype):
    np.random.seed(0)
    data = [str(x) for x in gen_rand("int8", 3) / 10]

    expected = pd.Series([Decimal(x) for x in data]).pow(2).sum()
    got = cudf.Series(data).astype(dtype).sum_of_squares()

    assert_eq(expected, got)
예제 #4
0
def test_product_decimal(dtype):
    np.random.seed(0)
    data = [str(x) for x in gen_rand("int8", 3) / 10]

    expected = pd.Series([Decimal(x) for x in data]).product()
    got = cudf.Series(data).astype(dtype).product()

    assert_eq(expected, got)
예제 #5
0
def test_sum(dtype, nelem):
    dtype = cudf.dtype(dtype).type
    data = gen_rand(dtype, nelem)
    sr = Series(data)

    got = sr.sum()
    expect = data.sum()
    significant = 4 if dtype == np.float32 else 6
    np.testing.assert_approx_equal(expect, got, significant=significant)
예제 #6
0
def test_max(dtype, nelem):
    dtype = cudf.dtype(dtype).type
    data = gen_rand(dtype, nelem)
    sr = Series(data)

    got = sr.max()
    expect = dtype(data.max())

    assert expect == got
예제 #7
0
def test_cummin(dtype, nelem):
    if dtype == np.int8:
        # to keep data in range
        data = gen_rand(dtype, nelem, low=-2, high=2)
    else:
        data = gen_rand(dtype, nelem)

    decimal = 4 if dtype == np.float32 else 6

    # series
    gs = cudf.Series(data)
    ps = pd.Series(data)
    np.testing.assert_array_almost_equal(gs.cummin().to_numpy(),
                                         ps.cummin(),
                                         decimal=decimal)

    # dataframe series (named series)
    gdf = cudf.DataFrame()
    gdf["a"] = cudf.Series(data)
    pdf = pd.DataFrame()
    pdf["a"] = pd.Series(data)
    np.testing.assert_array_almost_equal(gdf.a.cummin().to_numpy(),
                                         pdf.a.cummin(),
                                         decimal=decimal)
예제 #8
0
def test_sum_masked(nelem):
    dtype = np.float64
    data = gen_rand(dtype, nelem)

    mask = utils.random_bitmask(nelem)
    bitmask = utils.expand_bits_to_bytes(mask)[:nelem]
    null_count = utils.count_zero(bitmask)

    sr = Series.from_masked_array(data, mask, null_count)

    got = sr.sum()
    res_mask = np.asarray(bitmask, dtype=np.bool_)[: data.size]
    expect = data[res_mask].sum()

    significant = 4 if dtype == np.float32 else 6
    np.testing.assert_approx_equal(expect, got, significant=significant)
예제 #9
0
def test_product(dtype, nelem):
    np.random.seed(0)
    dtype = cudf.dtype(dtype).type
    if cudf.dtype(dtype).kind in {"u", "i"}:
        data = np.ones(nelem, dtype=dtype)
        # Set at most 30 items to [0..2) to keep the value within 2^32
        for _ in range(30):
            data[np.random.randint(low=0, high=nelem, size=1)] = (
                np.random.uniform() * 2
            )
    else:
        data = gen_rand(dtype, nelem)

    sr = Series(data)

    got = sr.product()
    expect = pd.Series(data).product()
    significant = 4 if dtype == np.float32 else 6
    np.testing.assert_approx_equal(expect, got, significant=significant)
예제 #10
0
def test_sum_of_squares(dtype, nelem):
    dtype = cudf.dtype(dtype).type
    data = gen_rand(dtype, nelem)
    sr = Series(data)
    df = cudf.DataFrame(sr)

    got = sr.sum_of_squares()
    got_df = df.sum_of_squares()
    expect = (data ** 2).sum()

    if cudf.dtype(dtype).kind in {"u", "i"}:
        if 0 <= expect <= np.iinfo(dtype).max:
            np.testing.assert_array_almost_equal(expect, got)
            np.testing.assert_array_almost_equal(expect, got_df.iloc[0])
        else:
            print("overflow, passing")
    else:
        np.testing.assert_approx_equal(
            expect, got, significant=accuracy_for_dtype[dtype]
        )
        np.testing.assert_approx_equal(
            expect, got_df.iloc[0], significant=accuracy_for_dtype[dtype]
        )
예제 #11
0
def math_op_test(dtype,
                 fn,
                 nelem=128,
                 test_df=False,
                 positive_only=False,
                 check_dtype=True):
    np.random.seed(0)
    randvals = gen_rand(dtype, nelem, positive_only=positive_only)
    h_series = pd.Series(randvals.astype(dtype))
    d_series = cudf.Series(h_series)

    if test_df:
        d_in = cudf.DataFrame()
        d_in[0] = d_series
        h_in = pd.DataFrame()
        h_in[0] = h_series
    else:
        d_in = d_series
        h_in = h_series

    expect = fn(h_in)
    got = fn(d_in)

    assert_eq(expect, got, check_dtype=check_dtype)