Ejemplo n.º 1
0
def test_index_append_error(data, other):
    gd_data = cudf.core.index.as_index(data)
    gd_other = cudf.core.index.as_index(other)

    got_dtype = (gd_other.dtype
                 if gd_data.dtype == np.dtype("object") else gd_data.dtype)
    with pytest.raises(
            TypeError,
            match=re.escape(f"cudf does not support appending an Index of "
                            f"dtype `{np.dtype('object')}` with an Index "
                            f"of dtype `{got_dtype}`, please type-cast "
                            f"either one of them to same dtypes."),
    ):
        gd_data.append(gd_other)

    with pytest.raises(
            TypeError,
            match=re.escape(f"cudf does not support appending an Index of "
                            f"dtype `{np.dtype('object')}` with an Index "
                            f"of dtype `{got_dtype}`, please type-cast "
                            f"either one of them to same dtypes."),
    ):
        gd_other.append(gd_data)

    sr = gd_other.to_series()

    assert_exceptions_equal(
        lfunc=gd_data.to_pandas().append,
        rfunc=gd_data.append,
        lfunc_args_and_kwargs=([[sr.to_pandas()]], ),
        rfunc_args_and_kwargs=([[sr]], ),
        expected_error_message=r"all inputs must be Index",
    )
Ejemplo n.º 2
0
def test_fillna_categorical(psr_data, fill_value, inplace):
    psr = psr_data.copy(deep=True)
    gsr = Series.from_pandas(psr)

    if isinstance(fill_value, pd.Series):
        fill_value_cudf = cudf.from_pandas(fill_value)
    else:
        fill_value_cudf = fill_value

    if (isinstance(fill_value_cudf, cudf.Series)
            and gsr.dtype != fill_value_cudf.dtype):
        assert_exceptions_equal(
            lfunc=psr.fillna,
            rfunc=gsr.fillna,
            lfunc_args_and_kwargs=([fill_value], {
                "inplace": inplace
            }),
            rfunc_args_and_kwargs=([fill_value_cudf], {
                "inplace": inplace
            }),
        )
    else:
        expected = psr.fillna(fill_value, inplace=inplace)
        got = gsr.fillna(fill_value_cudf, inplace=inplace)

        if inplace:
            expected = psr
            got = gsr

        assert_eq(expected, got)
Ejemplo n.º 3
0
def test_categorical_remove_categories(pd_str_cat, inplace):

    pd_sr = pd.Series(pd_str_cat.copy())
    cd_sr = cudf.Series(pd_str_cat.copy())

    assert_eq(pd_sr, cd_sr)

    assert str(pd_sr) == str(cd_sr)

    pd_sr_1 = pd_sr.cat.remove_categories(["a"], inplace=inplace)
    cd_sr_1 = cd_sr.cat.remove_categories(["a"], inplace=inplace)
    pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
    cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1

    assert "a" not in pd_sr_1.cat.categories.to_list()
    assert "a" not in cd_sr_1.cat.categories.to_pandas().to_list()

    assert_eq(pd_sr_1, cd_sr_1)

    # test using ordered operators
    assert_exceptions_equal(
        lfunc=cd_sr.to_pandas().cat.remove_categories,
        rfunc=cd_sr.cat.remove_categories,
        lfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}),
        rfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}),
        expected_error_message="removals must all be in old categories",
    )
Ejemplo n.º 4
0
def test_datetime_series_ops_with_scalars(data, other_scalars, dtype, op):
    gsr = cudf.Series(data=data, dtype=dtype)
    psr = gsr.to_pandas()

    if op == "add":
        expected = psr + other_scalars
        actual = gsr + other_scalars
    elif op == "sub":
        expected = psr - other_scalars
        actual = gsr - other_scalars

    assert_eq(expected, actual)

    if op == "add":
        expected = other_scalars + psr
        actual = other_scalars + gsr

        assert_eq(expected, actual)

    elif op == "sub":
        assert_exceptions_equal(
            lfunc=operator.sub,
            rfunc=operator.sub,
            lfunc_args_and_kwargs=([other_scalars, psr], ),
            rfunc_args_and_kwargs=([other_scalars, gsr], ),
            compare_error_message=False,
        )
Ejemplo n.º 5
0
def test_categorical_compare_unordered():
    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
    pdsr = pd.Series(cat)

    sr = cudf.Series(cat)

    # test equal
    out = sr == sr
    assert out.dtype == np.bool_
    assert type(out[0]) == np.bool_
    assert np.all(out.to_array())
    assert np.all(pdsr == pdsr)

    # test inequality
    out = sr != sr
    assert not np.any(out.to_array())
    assert not np.any(pdsr != pdsr)

    assert not pdsr.cat.ordered
    assert not sr.cat.ordered

    # test using ordered operators
    assert_exceptions_equal(
        lfunc=operator.lt,
        rfunc=operator.lt,
        lfunc_args_and_kwargs=([pdsr, pdsr],),
        rfunc_args_and_kwargs=([sr, sr],),
    )
Ejemplo n.º 6
0
def test_series_drop_raises():
    gs = cudf.Series([10, 20, 30], index=["x", "y", "z"], name="c")
    ps = gs.to_pandas()

    assert_exceptions_equal(
        lfunc=ps.drop,
        rfunc=gs.drop,
        lfunc_args_and_kwargs=(["p"],),
        rfunc_args_and_kwargs=(["p"],),
        expected_error_message="One or more values not found in axis",
    )

    # dtype specified mismatch
    assert_exceptions_equal(
        lfunc=ps.drop,
        rfunc=gs.drop,
        lfunc_args_and_kwargs=([3],),
        rfunc_args_and_kwargs=([3],),
        expected_error_message="One or more values not found in axis",
    )

    expect = ps.drop("p", errors="ignore")
    actual = gs.drop("p", errors="ignore")

    assert_eq(actual, expect)
Ejemplo n.º 7
0
def test_categorical_reductions(op):
    gsr = cudf.Series([1, 2, 3, None], dtype="category")
    psr = gsr.to_pandas()

    utils.assert_exceptions_equal(
        getattr(psr, op), getattr(gsr, op), compare_error_message=False
    )
Ejemplo n.º 8
0
def test_cat_series_binop_error():
    df = cudf.DataFrame()
    df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
    df["b"] = np.arange(len(df))

    dfa = df["a"]
    dfb = df["b"]

    # lhs is a categorical
    assert_exceptions_equal(
        lfunc=operator.add,
        rfunc=operator.add,
        lfunc_args_and_kwargs=([dfa, dfb],),
        rfunc_args_and_kwargs=([dfa, dfb],),
        check_exception_type=False,
        expected_error_message="Series of dtype `category` cannot "
        "perform the operation: add",
    )
    # if lhs is a numerical
    assert_exceptions_equal(
        lfunc=operator.add,
        rfunc=operator.add,
        lfunc_args_and_kwargs=([dfb, dfa],),
        rfunc_args_and_kwargs=([dfb, dfa],),
        check_exception_type=False,
        expected_error_message="'add' operator not supported",
    )
Ejemplo n.º 9
0
def test_groupby_nonempty_no_keys(pdf):
    gdf = cudf.from_pandas(pdf)
    assert_exceptions_equal(
        lambda: pdf.groupby([]),
        lambda: gdf.groupby([]),
        compare_error_message=False,
    )
Ejemplo n.º 10
0
def test_timedelta_ops_datetime_inputs(datetime_data, timedelta_data,
                                       datetime_dtype, timedelta_dtype, ops):
    gsr_datetime = cudf.Series(datetime_data, dtype=datetime_dtype)
    gsr_timedelta = cudf.Series(timedelta_data, dtype=timedelta_dtype)

    psr_datetime = gsr_datetime.to_pandas()
    psr_timedelta = gsr_timedelta.to_pandas()

    expected = getattr(psr_datetime, ops)(psr_timedelta)
    actual = getattr(gsr_datetime, ops)(gsr_timedelta)

    assert_eq(expected, actual)

    if ops == "add":
        expected = getattr(psr_timedelta, ops)(psr_datetime)
        actual = getattr(gsr_timedelta, ops)(gsr_datetime)

        assert_eq(expected, actual)
    elif ops == "sub":
        assert_exceptions_equal(
            lfunc=operator.sub,
            rfunc=operator.sub,
            lfunc_args_and_kwargs=([psr_timedelta, psr_datetime], ),
            rfunc_args_and_kwargs=([gsr_timedelta, gsr_datetime], ),
            expected_error_message=re.escape(
                f"Subtraction of {gsr_timedelta.dtype} with "
                f"{gsr_datetime.dtype} cannot be performed."),
        )
Ejemplo n.º 11
0
def test_datetime_unsupported_reductions(op):
    gsr = cudf.Series([1, 2, 3, None], dtype="datetime64[ns]")
    psr = gsr.to_pandas()

    utils.assert_exceptions_equal(
        lfunc=getattr(psr, op),
        rfunc=getattr(gsr, op),
    )
Ejemplo n.º 12
0
def test_to_datetime_errors(data):
    pd_data = data
    if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
        gd_data = cudf.from_pandas(pd_data)
    else:
        gd_data = pd_data

    assert_exceptions_equal(pd.to_datetime, cudf.to_datetime, ([pd_data], ),
                            ([gd_data], ))
Ejemplo n.º 13
0
def test_multiindex_sample_basic(n, frac, replace, axis):
    # as we currently don't support column with same name
    if axis == 1 and replace:
        return
    pdf = pd.DataFrame(
        {
            "a": [1, 2, 3, 4, 5],
            "float": [0.05, 0.2, 0.3, 0.2, 0.25],
            "int": [1, 3, 5, 4, 2],
        },
    )
    mul_index = cudf.Index(DataFrame.from_pandas(pdf))
    random_state = 0

    try:
        pout = pdf.sample(
            n=n,
            frac=frac,
            replace=replace,
            random_state=random_state,
            axis=axis,
        )
    except BaseException:
        assert_exceptions_equal(
            lfunc=pdf.sample,
            rfunc=mul_index.sample,
            lfunc_args_and_kwargs=(
                [],
                {
                    "n": n,
                    "frac": frac,
                    "replace": replace,
                    "random_state": random_state,
                    "axis": axis,
                },
            ),
            rfunc_args_and_kwargs=(
                [],
                {
                    "n": n,
                    "frac": frac,
                    "replace": replace,
                    "random_state": random_state,
                    "axis": axis,
                },
            ),
        )
    else:
        gout = mul_index.sample(
            n=n,
            frac=frac,
            replace=replace,
            random_state=random_state,
            axis=axis,
        )
        assert pout.shape == gout.shape
Ejemplo n.º 14
0
def test_index_difference_sort_error():
    pdi = pd.Index([1, 2, 3])
    gdi = cudf.Index([1, 2, 3])

    assert_exceptions_equal(
        pdi.difference,
        gdi.difference,
        ([pdi], {"sort": True}),
        ([gdi], {"sort": True}),
    )
Ejemplo n.º 15
0
def test_categorical_setitem_invalid():
    ps = pd.Series([1, 2, 3], dtype="category")
    gs = cudf.Series([1, 2, 3], dtype="category")

    assert_exceptions_equal(
        lfunc=ps.__setitem__,
        rfunc=gs.__setitem__,
        lfunc_args_and_kwargs=([0, 5], {}),
        rfunc_args_and_kwargs=([0, 5], {}),
    )
Ejemplo n.º 16
0
def test_multiindex_rename_error(names):
    pi = pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]])
    gi = cudf.from_pandas(pi)

    assert_exceptions_equal(
        lfunc=pi.rename,
        rfunc=gi.rename,
        lfunc_args_and_kwargs=([], {"names": names}),
        rfunc_args_and_kwargs=([], {"names": names}),
    )
Ejemplo n.º 17
0
def test_series_fillna_error():
    psr = pd.Series([1, 2, None, 3, None])
    gsr = cudf.from_pandas(psr)

    assert_exceptions_equal(
        psr.fillna,
        gsr.fillna,
        ([pd.DataFrame({"a": [1, 2, 3]})], ),
        ([cudf.DataFrame({"a": [1, 2, 3]})], ),
    )
Ejemplo n.º 18
0
def test_index_set_names_error(idx, level, names):
    pi = idx.copy()
    gi = cudf.from_pandas(idx)

    assert_exceptions_equal(
        lfunc=pi.set_names,
        rfunc=gi.set_names,
        lfunc_args_and_kwargs=([], {"names": names, "level": level}),
        rfunc_args_and_kwargs=([], {"names": names, "level": level}),
    )
Ejemplo n.º 19
0
def test_raise_data_error():

    pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
    gdf = cudf.from_pandas(pdf)

    assert_exceptions_equal(
        pdf.groupby("a").mean,
        gdf.groupby("a").mean,
        compare_error_message=False,
    )
Ejemplo n.º 20
0
def test_timedelta_unsupported_reductions(op):
    gsr = cudf.Series([1, 2, 3, None], dtype="timedelta64[ns]")
    psr = gsr.to_pandas()

    utils.assert_exceptions_equal(
        lfunc=getattr(psr, op),
        rfunc=getattr(gsr, op),
        expected_error_message=re.escape("cannot perform " +
                                         ("kurtosis" if op == "kurt" else op) +
                                         " with type timedelta64[ns]"),
    )
Ejemplo n.º 21
0
def test_column_view_invalid_numeric_to_numeric(data, from_dtype, to_dtype):
    cpu_data = np.asarray(data, dtype=from_dtype)
    gpu_data = as_column(data, dtype=from_dtype)

    assert_exceptions_equal(
        lfunc=cpu_data.view,
        rfunc=gpu_data.view,
        lfunc_args_and_kwargs=([to_dtype], ),
        rfunc_args_and_kwargs=([to_dtype], ),
        expected_error_message="Can not divide",
    )
Ejemplo n.º 22
0
def test_multiindex_loc_rows_0(pdf, gdf, pdfIndex):
    gdfIndex = cudf.from_pandas(pdfIndex)
    pdf.index = pdfIndex
    gdf.index = gdfIndex

    assert_exceptions_equal(
        lfunc=pdf.loc.__getitem__,
        rfunc=gdf.loc.__getitem__,
        lfunc_args_and_kwargs=([(("d", ), slice(None, None, None))], ),
        rfunc_args_and_kwargs=([(("d", ), slice(None, None, None))], ),
    )
Ejemplo n.º 23
0
def test_duplicated_with_misspelled_column_name(subset):
    df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
    gdf = cudf.DataFrame.from_pandas(df)

    assert_exceptions_equal(
        lfunc=df.drop_duplicates,
        rfunc=gdf.drop_duplicates,
        lfunc_args_and_kwargs=([subset], ),
        rfunc_args_and_kwargs=([subset], ),
        compare_error_message=False,
    )
Ejemplo n.º 24
0
def test_add_categories_error(data, add):
    pds = pd.Series(data, dtype="category")
    gds = cudf.Series(data, dtype="category")

    assert_exceptions_equal(
        pds.cat.add_categories,
        gds.cat.add_categories,
        ([add],),
        ([add],),
        compare_error_message=False,
    )
Ejemplo n.º 25
0
def test_datetime_to_datetime_error():
    assert_exceptions_equal(
        lfunc=pd.to_datetime,
        rfunc=cudf.to_datetime,
        lfunc_args_and_kwargs=(["02-Oct-2017 09:30", "%d-%B-%Y %H:%M"], ),
        rfunc_args_and_kwargs=(["02-Oct-2017 09:30", "%d-%B-%Y %H:%M"], ),
        check_exception_type=False,
        expected_error_message=re.escape(
            "errors parameter has to be either one of: ['ignore', 'raise', "
            "'coerce', 'warn'], found: %d-%B-%Y %H:%M"),
    )
Ejemplo n.º 26
0
def test_dataframe_loc_outbound():
    df = DataFrame()
    size = 10
    df["a"] = ha = np.random.randint(low=0, high=100,
                                     size=size).astype(np.int32)
    df["b"] = hb = np.random.random(size).astype(np.float32)

    pdf = pd.DataFrame()
    pdf["a"] = ha
    pdf["b"] = hb

    assert_exceptions_equal(lambda: pdf.loc[11], lambda: df.loc[11])
Ejemplo n.º 27
0
def test_df_different_index_shape(df2, binop):
    df1 = cudf.DataFrame([1, 2, 3], index=[1, 2, 3])

    pdf1 = df1.to_pandas()
    pdf2 = df2.to_pandas()

    utils.assert_exceptions_equal(
        lfunc=binop,
        rfunc=binop,
        lfunc_args_and_kwargs=([pdf1, pdf2], ),
        rfunc_args_and_kwargs=([df1, df2], ),
    )
Ejemplo n.º 28
0
def test_pandas_concat_compatibility_axis1_eq_index():
    s1 = gd.Series(["a", "b", "c"], index=[0, 1, 2])
    s2 = gd.Series(["a", "b", "c"], index=[1, 1, 1])
    ps1 = s1.to_pandas()
    ps2 = s2.to_pandas()

    assert_exceptions_equal(
        lfunc=pd.concat,
        rfunc=gd.concat,
        lfunc_args_and_kwargs=([], {"objs": [ps1, ps2], "axis": 1}),
        rfunc_args_and_kwargs=([], {"objs": [s1, s2], "axis": 1}),
    )
Ejemplo n.º 29
0
def test_multiindex_set_names_error(level, names):
    pi = pd.MultiIndex.from_product(
        [["python", "cobra"], [2018, 2019], ["aab", "bcd"]]
    )
    gi = cudf.from_pandas(pi)

    assert_exceptions_equal(
        lfunc=pi.set_names,
        rfunc=gi.set_names,
        lfunc_args_and_kwargs=([], {"names": names, "level": level}),
        rfunc_args_and_kwargs=([], {"names": names, "level": level}),
    )
Ejemplo n.º 30
0
def test_timedelta_datetime_cast_invalid():
    sr = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
    psr = sr.to_pandas()

    assert_exceptions_equal(psr.astype, sr.astype, (["datetime64[ns]"], ),
                            (["datetime64[ns]"], ))

    sr = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
    psr = sr.to_pandas()

    assert_exceptions_equal(psr.astype, sr.astype, (["timedelta64[ns]"], ),
                            (["timedelta64[ns]"], ))