Ejemplo n.º 1
0
def test_cat_series_binop_error():
    df = cudf.DataFrame()
    df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
    df["b"] = np.arange(len(df))

    dfa = df["a"]
    dfb = df["b"]

    # lhs is a categorical
    assert_exceptions_equal(
        lfunc=operator.add,
        rfunc=operator.add,
        lfunc_args_and_kwargs=([dfa, dfb], ),
        rfunc_args_and_kwargs=([dfa, dfb], ),
        check_exception_type=False,
        expected_error_message="Series of dtype `category` cannot "
        "perform the operation: add",
    )
    # if lhs is a numerical
    assert_exceptions_equal(
        lfunc=operator.add,
        rfunc=operator.add,
        lfunc_args_and_kwargs=([dfb, dfa], ),
        rfunc_args_and_kwargs=([dfb, dfa], ),
        check_exception_type=False,
        expected_error_message="'add' operator not supported",
    )
Ejemplo n.º 2
0
def test_series_drop_raises():
    gs = cudf.Series([10, 20, 30], index=["x", "y", "z"], name="c")
    ps = gs.to_pandas()

    assert_exceptions_equal(
        lfunc=ps.drop,
        rfunc=gs.drop,
        lfunc_args_and_kwargs=(["p"], ),
        rfunc_args_and_kwargs=(["p"], ),
        expected_error_message="One or more values not found in axis",
    )

    # dtype specified mismatch
    assert_exceptions_equal(
        lfunc=ps.drop,
        rfunc=gs.drop,
        lfunc_args_and_kwargs=([3], ),
        rfunc_args_and_kwargs=([3], ),
        expected_error_message="One or more values not found in axis",
    )

    expect = ps.drop("p", errors="ignore")
    actual = gs.drop("p", errors="ignore")

    assert_eq(actual, expect)
Ejemplo n.º 3
0
def test_categorical_compare_unordered():
    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
    pdsr = pd.Series(cat)

    sr = cudf.Series(cat)

    # test equal
    out = sr == sr
    assert out.dtype == np.bool_
    assert type(out[0]) == np.bool_
    assert np.all(out.to_numpy())
    assert np.all(pdsr == pdsr)

    # test inequality
    out = sr != sr
    assert not np.any(out.to_numpy())
    assert not np.any(pdsr != pdsr)

    assert not pdsr.cat.ordered
    assert not sr.cat.ordered

    # test using ordered operators
    assert_exceptions_equal(
        lfunc=operator.lt,
        rfunc=operator.lt,
        lfunc_args_and_kwargs=([pdsr, pdsr], ),
        rfunc_args_and_kwargs=([sr, sr], ),
    )
Ejemplo n.º 4
0
def test_timedelta_datetime_cast_invalid():
    sr = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
    psr = sr.to_pandas()

    assert_exceptions_equal(
        psr.astype,
        sr.astype,
        (["datetime64[ns]"], ),
        (["datetime64[ns]"], ),
        expected_error_message=re.escape(
            "cannot astype a timedelta from timedelta64[ns] to datetime64[ns]"
        ),
    )

    sr = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
    psr = sr.to_pandas()

    assert_exceptions_equal(
        psr.astype,
        sr.astype,
        (["timedelta64[ns]"], ),
        (["timedelta64[ns]"], ),
        expected_error_message=re.escape("cannot astype a datetimelike from "
                                         "datetime64[ns] to timedelta64[ns]"),
    )
Ejemplo n.º 5
0
def test_fillna_categorical(psr_data, fill_value, inplace):
    psr = psr_data.copy(deep=True)
    gsr = cudf.from_pandas(psr)

    if isinstance(fill_value, pd.Series):
        fill_value_cudf = cudf.from_pandas(fill_value)
    else:
        fill_value_cudf = fill_value

    if (isinstance(fill_value_cudf, cudf.Series)
            and gsr.dtype != fill_value_cudf.dtype):
        assert_exceptions_equal(
            lfunc=psr.fillna,
            rfunc=gsr.fillna,
            lfunc_args_and_kwargs=([fill_value], {
                "inplace": inplace
            }),
            rfunc_args_and_kwargs=([fill_value_cudf], {
                "inplace": inplace
            }),
        )
    else:
        expected = psr.fillna(fill_value, inplace=inplace)
        got = gsr.fillna(fill_value_cudf, inplace=inplace)

        if inplace:
            expected = psr
            got = gsr

        assert_eq(expected, got)
Ejemplo n.º 6
0
def test_datetime_series_ops_with_scalars(data, other_scalars, dtype, op):
    gsr = cudf.Series(data=data, dtype=dtype)
    psr = gsr.to_pandas()

    if op == "add":
        expected = psr + other_scalars
        actual = gsr + other_scalars
    elif op == "sub":
        expected = psr - other_scalars
        actual = gsr - other_scalars

    assert_eq(expected, actual)

    if op == "add":
        expected = other_scalars + psr
        actual = other_scalars + gsr

        assert_eq(expected, actual)

    elif op == "sub":
        assert_exceptions_equal(
            lfunc=operator.sub,
            rfunc=operator.sub,
            lfunc_args_and_kwargs=([other_scalars, psr], ),
            rfunc_args_and_kwargs=([other_scalars, gsr], ),
            compare_error_message=False,
        )
Ejemplo n.º 7
0
def test_categorical_reductions(op):
    gsr = cudf.Series([1, 2, 3, None], dtype="category")
    psr = gsr.to_pandas()

    utils.assert_exceptions_equal(
        getattr(psr, op), getattr(gsr, op), compare_error_message=False
    )
Ejemplo n.º 8
0
def test_column_set_unequal_length_object_by_mask():
    data = [1, 2, 3, 4, 5]
    replace_data_1 = [8, 9]
    replace_data_2 = [8, 9, 10, 11]
    mask = [True, True, False, True, False]

    psr = pd.Series(data)
    gsr = cudf.Series(data)
    assert_exceptions_equal(
        psr.__setitem__,
        gsr.__setitem__,
        ([mask, replace_data_1], {}),
        ([mask, replace_data_1], {}),
        compare_error_message=False,
    )

    psr = pd.Series(data)
    gsr = cudf.Series(data)
    assert_exceptions_equal(
        psr.__setitem__,
        gsr.__setitem__,
        ([mask, replace_data_2], {}),
        ([mask, replace_data_2], {}),
        compare_error_message=False,
    )
Ejemplo n.º 9
0
def test_timedelta_ops_datetime_inputs(datetime_data, timedelta_data,
                                       datetime_dtype, timedelta_dtype, ops):
    gsr_datetime = cudf.Series(datetime_data, dtype=datetime_dtype)
    gsr_timedelta = cudf.Series(timedelta_data, dtype=timedelta_dtype)

    psr_datetime = gsr_datetime.to_pandas()
    psr_timedelta = gsr_timedelta.to_pandas()

    expected = getattr(psr_datetime, ops)(psr_timedelta)
    actual = getattr(gsr_datetime, ops)(gsr_timedelta)

    assert_eq(expected, actual)

    if ops == "add":
        expected = getattr(psr_timedelta, ops)(psr_datetime)
        actual = getattr(gsr_timedelta, ops)(gsr_datetime)

        assert_eq(expected, actual)
    elif ops == "sub":
        assert_exceptions_equal(
            lfunc=operator.sub,
            rfunc=operator.sub,
            lfunc_args_and_kwargs=([psr_timedelta, psr_datetime], ),
            rfunc_args_and_kwargs=([gsr_timedelta, gsr_datetime], ),
            expected_error_message=re.escape(
                f"Subtraction of {gsr_timedelta.dtype} with "
                f"{gsr_datetime.dtype} cannot be performed."),
        )
Ejemplo n.º 10
0
def test_categorical_remove_categories(pd_str_cat, inplace):

    pd_sr = pd.Series(pd_str_cat.copy())
    cd_sr = cudf.Series(pd_str_cat.copy())

    assert_eq(pd_sr, cd_sr)

    assert str(pd_sr) == str(cd_sr)

    pd_sr_1 = pd_sr.cat.remove_categories(["a"], inplace=inplace)
    cd_sr_1 = cd_sr.cat.remove_categories(["a"], inplace=inplace)
    pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
    cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1

    assert "a" not in pd_sr_1.cat.categories.to_list()
    assert "a" not in cd_sr_1.cat.categories.to_pandas().to_list()

    assert_eq(pd_sr_1, cd_sr_1)

    # test using ordered operators
    assert_exceptions_equal(
        lfunc=cd_sr.to_pandas().cat.remove_categories,
        rfunc=cd_sr.cat.remove_categories,
        lfunc_args_and_kwargs=([["a", "d"]], {
            "inplace": inplace
        }),
        rfunc_args_and_kwargs=([["a", "d"]], {
            "inplace": inplace
        }),
        expected_error_message="removals must all be in old categories",
    )
Ejemplo n.º 11
0
def test_to_datetime_errors(data):
    pd_data = data
    if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
        gd_data = cudf.from_pandas(pd_data)
    else:
        gd_data = pd_data

    assert_exceptions_equal(pd.to_datetime, cudf.to_datetime, ([pd_data], ),
                            ([gd_data], ))
Ejemplo n.º 12
0
def test_multiindex_rename_error(names):
    pi = pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]])
    gi = cudf.from_pandas(pi)

    assert_exceptions_equal(
        lfunc=pi.rename,
        rfunc=gi.rename,
        lfunc_args_and_kwargs=([], {"names": names}),
        rfunc_args_and_kwargs=([], {"names": names}),
    )
Ejemplo n.º 13
0
def test_series_fillna_error():
    psr = pd.Series([1, 2, None, 3, None])
    gsr = cudf.from_pandas(psr)

    assert_exceptions_equal(
        psr.fillna,
        gsr.fillna,
        ([pd.DataFrame({"a": [1, 2, 3]})], ),
        ([cudf.DataFrame({"a": [1, 2, 3]})], ),
    )
Ejemplo n.º 14
0
def test_categorical_setitem_invalid():
    ps = pd.Series([1, 2, 3], dtype="category")
    gs = cudf.Series([1, 2, 3], dtype="category")

    assert_exceptions_equal(
        lfunc=ps.__setitem__,
        rfunc=gs.__setitem__,
        lfunc_args_and_kwargs=([0, 5], {}),
        rfunc_args_and_kwargs=([0, 5], {}),
    )
Ejemplo n.º 15
0
def test_interpolate_dataframe_error_cases(data, kwargs):
    gsr = cudf.DataFrame(data)
    psr = gsr.to_pandas()

    assert_exceptions_equal(
        lfunc=psr.interpolate,
        rfunc=gsr.interpolate,
        lfunc_args_and_kwargs=([], kwargs),
        rfunc_args_and_kwargs=([], kwargs),
    )
Ejemplo n.º 16
0
def test_datetime_to_datetime_error():
    assert_exceptions_equal(
        lfunc=pd.to_datetime,
        rfunc=cudf.to_datetime,
        lfunc_args_and_kwargs=(["02-Oct-2017 09:30", "%d-%B-%Y %H:%M"], ),
        rfunc_args_and_kwargs=(["02-Oct-2017 09:30", "%d-%B-%Y %H:%M"], ),
        check_exception_type=False,
        expected_error_message=re.escape(
            "errors parameter has to be either one of: ['ignore', 'raise', "
            "'coerce', 'warn'], found: %d-%B-%Y %H:%M"),
    )
Ejemplo n.º 17
0
def test_duplicated_with_misspelled_column_name(subset):
    df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
    gdf = cudf.DataFrame.from_pandas(df)

    assert_exceptions_equal(
        lfunc=df.drop_duplicates,
        rfunc=gdf.drop_duplicates,
        lfunc_args_and_kwargs=([subset], ),
        rfunc_args_and_kwargs=([subset], ),
        compare_error_message=False,
    )
Ejemplo n.º 18
0
def test_add_categories_error(data, add):
    pds = pd.Series(data, dtype="category")
    gds = cudf.Series(data, dtype="category")

    assert_exceptions_equal(
        pds.cat.add_categories,
        gds.cat.add_categories,
        ([add], ),
        ([add], ),
        compare_error_message=False,
    )
Ejemplo n.º 19
0
def test_multiindex_loc_rows_0(pdf, gdf, pdfIndex):
    gdfIndex = cudf.from_pandas(pdfIndex)
    pdf.index = pdfIndex
    gdf.index = gdfIndex

    assert_exceptions_equal(
        lfunc=pdf.loc.__getitem__,
        rfunc=gdf.loc.__getitem__,
        lfunc_args_and_kwargs=([(("d", ), slice(None, None, None))], ),
        rfunc_args_and_kwargs=([(("d", ), slice(None, None, None))], ),
    )
Ejemplo n.º 20
0
def test_column_view_invalid_numeric_to_numeric(data, from_dtype, to_dtype):
    cpu_data = np.asarray(data, dtype=from_dtype)
    gpu_data = as_column(data, dtype=from_dtype)

    assert_exceptions_equal(
        lfunc=cpu_data.view,
        rfunc=gpu_data.view,
        lfunc_args_and_kwargs=([to_dtype],),
        rfunc_args_and_kwargs=([to_dtype],),
        expected_error_message="Can not divide",
    )
Ejemplo n.º 21
0
def test_dataframe_loc_outbound():
    df = cudf.DataFrame()
    size = 10
    df["a"] = ha = np.random.randint(low=0, high=100,
                                     size=size).astype(np.int32)
    df["b"] = hb = np.random.random(size).astype(np.float32)

    pdf = pd.DataFrame()
    pdf["a"] = ha
    pdf["b"] = hb

    assert_exceptions_equal(lambda: pdf.loc[11], lambda: df.loc[11])
Ejemplo n.º 22
0
def test_multiindex_set_names_error(level, names):
    pi = pd.MultiIndex.from_product(
        [["python", "cobra"], [2018, 2019], ["aab", "bcd"]]
    )
    gi = cudf.from_pandas(pi)

    assert_exceptions_equal(
        lfunc=pi.set_names,
        rfunc=gi.set_names,
        lfunc_args_and_kwargs=([], {"names": names, "level": level}),
        rfunc_args_and_kwargs=([], {"names": names, "level": level}),
    )
Ejemplo n.º 23
0
def test_timedelta_datetime_cast_invalid():
    sr = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
    psr = sr.to_pandas()

    assert_exceptions_equal(psr.astype, sr.astype, (["datetime64[ns]"], ),
                            (["datetime64[ns]"], ))

    sr = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
    psr = sr.to_pandas()

    assert_exceptions_equal(psr.astype, sr.astype, (["timedelta64[ns]"], ),
                            (["timedelta64[ns]"], ))
Ejemplo n.º 24
0
def test_dataframe_drop_duplicates_method():
    pdf = DataFrame(
        [(1, 2, "a"), (2, 3, "b"), (3, 4, "c"), (2, 3, "d"), (3, 5, "c")],
        columns=["n1", "n2", "s1"],
    )
    gdf = cudf.DataFrame.from_pandas(pdf)
    assert_df(gdf.drop_duplicates(), pdf.drop_duplicates())

    assert_eq(
        gdf.drop_duplicates("n1")["n1"].reset_index(drop=True),
        pdf.drop_duplicates("n1")["n1"].reset_index(drop=True),
    )
    assert_eq(
        gdf.drop_duplicates("n2")["n2"].reset_index(drop=True),
        pdf.drop_duplicates("n2")["n2"].reset_index(drop=True),
    )
    assert_eq(
        gdf.drop_duplicates("s1")["s1"].reset_index(drop=True),
        pdf.drop_duplicates("s1")["s1"].reset_index(drop=True),
    )
    assert_eq(
        gdf.drop_duplicates(
            "s1", keep="last")["s1"].sort_index().reset_index(drop=True),
        pdf.drop_duplicates("s1", keep="last")["s1"].reset_index(drop=True),
    )
    assert gdf.drop_duplicates("s1", inplace=True) is None

    gdf = cudf.DataFrame.from_pandas(pdf)
    assert_df(gdf.drop_duplicates("n1"), pdf.drop_duplicates("n1"))
    assert_df(gdf.drop_duplicates("n2"), pdf.drop_duplicates("n2"))
    assert_df(gdf.drop_duplicates("s1"), pdf.drop_duplicates("s1"))
    assert_df(gdf.drop_duplicates(["n1", "n2"]),
              pdf.drop_duplicates(["n1", "n2"]))
    assert_df(gdf.drop_duplicates(["n1", "s1"]),
              pdf.drop_duplicates(["n1", "s1"]))

    # Test drop error
    assert_exceptions_equal(
        lfunc=pdf.drop_duplicates,
        rfunc=gdf.drop_duplicates,
        lfunc_args_and_kwargs=(["n3"], ),
        rfunc_args_and_kwargs=(["n3"], ),
        expected_error_message="columns {'n3'} do not exist",
    )

    assert_exceptions_equal(
        lfunc=pdf.drop_duplicates,
        rfunc=gdf.drop_duplicates,
        lfunc_args_and_kwargs=([["n1", "n4", "n3"]], ),
        rfunc_args_and_kwargs=([["n1", "n4", "n3"]], ),
        expected_error_message="columns {'n[34]', 'n[34]'} do not exist",
    )
Ejemplo n.º 25
0
def test_str_to_datetime_error():
    psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"])
    gsr = Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"])

    assert_exceptions_equal(
        lfunc=psr.astype,
        rfunc=gsr.astype,
        lfunc_args_and_kwargs=(["datetime64[s]"], ),
        rfunc_args_and_kwargs=(["datetime64[s]"], ),
        check_exception_type=False,
        expected_error_message=re.escape(
            "Could not convert `None` value to datetime"),
    )
Ejemplo n.º 26
0
def test_categorical_unary_ceil():
    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
    pdsr = pd.Series(cat)
    sr = cudf.Series(cat)

    assert_exceptions_equal(
        lfunc=getattr,
        rfunc=sr.ceil,
        lfunc_args_and_kwargs=([pdsr, "ceil"], ),
        check_exception_type=False,
        expected_error_message="Series of dtype `category` cannot "
        "perform the operation: ceil",
    )
Ejemplo n.º 27
0
def test_categorical_binary_add():
    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
    pdsr = pd.Series(cat)
    sr = cudf.Series(cat)

    assert_exceptions_equal(
        lfunc=operator.add,
        rfunc=operator.add,
        lfunc_args_and_kwargs=([pdsr, pdsr], ),
        rfunc_args_and_kwargs=([sr, sr], ),
        expected_error_message="Series of dtype `category` cannot perform "
        "the operation: add",
    )
Ejemplo n.º 28
0
def test_timedelta_unsupported_reductions(op):
    gsr = cudf.Series([1, 2, 3, None], dtype="timedelta64[ns]")
    psr = gsr.to_pandas()

    utils.assert_exceptions_equal(
        lfunc=getattr(psr, op),
        rfunc=getattr(gsr, op),
        expected_error_message=re.escape(
            "cannot perform "
            + ("kurtosis" if op == "kurt" else op)
            + " with type timedelta64[ns]"
        ),
    )
Ejemplo n.º 29
0
def test_multiindex_column_shape():
    pdf = pd.DataFrame(np.random.rand(5, 0))
    gdf = cudf.from_pandas(pdf)
    pdfIndex = pd.MultiIndex([["a", "b", "c"]], [[0]])
    pdfIndex.names = ["alpha"]
    gdfIndex = cudf.from_pandas(pdfIndex)
    assert_eq(pdfIndex, gdfIndex)

    assert_exceptions_equal(
        lfunc=operator.setitem,
        rfunc=operator.setitem,
        lfunc_args_and_kwargs=([], {"a": pdf, "b": "columns", "c": pdfIndex}),
        rfunc_args_and_kwargs=([], {"a": gdf, "b": "columns", "c": gdfIndex}),
    )
Ejemplo n.º 30
0
def test_series_replace_errors():
    gsr = cudf.Series([1, 2, None, 3, None])
    psr = gsr.to_pandas()

    with pytest.raises(
            TypeError,
            match=re.escape("to_replace and value should be of same types,"
                            "got to_replace dtype: int64 and "
                            "value dtype: object"),
    ):
        gsr.replace(1, "a")

    gsr = cudf.Series(["a", "b", "c"])
    with pytest.raises(
            TypeError,
            match=re.escape("to_replace and value should be of same types,"
                            "got to_replace dtype: int64 and "
                            "value dtype: object"),
    ):
        gsr.replace([1, 2], ["a", "b"])

    assert_exceptions_equal(
        psr.replace,
        gsr.replace,
        ([{
            "a": 1
        }, 1], ),
        ([{
            "a": 1
        }, 1], ),
    )

    assert_exceptions_equal(
        lfunc=psr.replace,
        rfunc=gsr.replace,
        lfunc_args_and_kwargs=([[1, 2], [1]], ),
        rfunc_args_and_kwargs=([[1, 2], [1]], ),
        expected_error_message=re.escape(
            "Replacement lists must be of same length. "
            "Expected 2, got 1."),
    )

    assert_exceptions_equal(
        lfunc=psr.replace,
        rfunc=gsr.replace,
        lfunc_args_and_kwargs=([object(), [1]], ),
        rfunc_args_and_kwargs=([object(), [1]], ),
    )

    assert_exceptions_equal(
        lfunc=psr.replace,
        rfunc=gsr.replace,
        lfunc_args_and_kwargs=([{
            "a": 1
        }, object()], ),
        rfunc_args_and_kwargs=([{
            "a": 1
        }, object()], ),
    )