Beispiel #1
0
def test_fillna_dataframe(fill_type, inplace):
    pdf = pd.DataFrame({"a": [1, 2, None], "b": [None, None, 5]})
    gdf = DataFrame.from_pandas(pdf)

    if fill_type == "scalar":
        fill_value_pd = 5
        fill_value_cudf = fill_value_pd
    elif fill_type == "series":
        fill_value_pd = pd.Series([3, 4, 5])
        fill_value_cudf = Series.from_pandas(fill_value_pd)
    else:
        fill_value_pd = {"a": 5, "b": pd.Series([3, 4, 5])}
        fill_value_cudf = {
            "a": fill_value_pd["a"],
            "b": Series.from_pandas(fill_value_pd["b"]),
        }

    # https://github.com/pandas-dev/pandas/issues/27197
    # pandas df.fill_value with series is not working

    if isinstance(fill_value_pd, pd.Series):
        expect = pd.DataFrame()
        for col in pdf.columns:
            expect[col] = pdf[col].fillna(fill_value_pd)
    else:
        expect = pdf.fillna(fill_value_pd)

    got = gdf.fillna(fill_value_cudf, inplace=inplace)

    if inplace:
        got = gdf

    assert_eq(expect, got)
Beispiel #2
0
def test_string_series_compare(obj, cmpop, cmp_obj):

    g_obj = obj
    if isinstance(g_obj, pd.Series):
        g_obj = Series.from_pandas(g_obj)
    g_cmp_obj = cmp_obj
    if isinstance(g_cmp_obj, pd.Series):
        g_cmp_obj = Series.from_pandas(g_cmp_obj)

    got = cmpop(g_obj, g_cmp_obj)
    expected = cmpop(obj, cmp_obj)

    utils.assert_eq(expected, got)
Beispiel #3
0
def test_series_where(data_dtype, fill_value):
    psr = pd.Series(list(range(10)), dtype=data_dtype)
    sr = Series.from_pandas(psr)

    if sr.dtype.type(fill_value) != fill_value:
        with pytest.raises(TypeError):
            sr.where(sr > 0, fill_value)
    else:
        # Cast back to original dtype as pandas automatically upcasts
        expect = psr.where(psr > 0, fill_value).astype(psr.dtype)
        got = sr.where(sr > 0, fill_value)
        assert_eq(expect, got)

    if sr.dtype.type(fill_value) != fill_value:
        with pytest.raises(TypeError):
            sr.where(sr < 0, fill_value)
    else:
        expect = psr.where(psr < 0, fill_value).astype(psr.dtype)
        got = sr.where(sr < 0, fill_value)
        assert_eq(expect, got)

    if sr.dtype.type(fill_value) != fill_value:
        with pytest.raises(TypeError):
            sr.where(sr == 0, fill_value)
    else:
        expect = psr.where(psr == 0, fill_value).astype(psr.dtype)
        got = sr.where(sr == 0, fill_value)
        assert_eq(expect, got)
Beispiel #4
0
def test_string_numeric_astype(dtype):
    if dtype.startswith("bool"):
        data = [1, 0, 1, 0, 1]
    elif dtype.startswith("int"):
        data = [1, 2, 3, 4, 5]
    elif dtype.startswith("float"):
        data = [1.0, 2.0, 3.0, 4.0, 5.0]
    elif dtype.startswith("datetime64"):
        data = [1000000000, 2000000000, 3000000000, 4000000000, 5000000000]
    if dtype.startswith("datetime64"):
        ps = pd.Series(data, dtype="datetime64[ns]")
        gs = Series.from_pandas(ps)
    else:
        ps = pd.Series(data, dtype=dtype)
        gs = Series(data, dtype=dtype)

    # Pandas datetime64 --> str typecasting returns arbitrary format depending
    # on the data, so making it consistent unless we choose to match the
    # behavior
    if dtype.startswith("datetime64"):
        expect = ps.dt.strftime("%Y-%m-%dT%H:%M:%SZ")
    else:
        expect = ps.astype("str")
    got = gs.astype("str")

    assert_eq(expect, got)
Beispiel #5
0
def test_fillna_categorical(psr_data, fill_value, inplace):
    psr = psr_data.copy(deep=True)
    gsr = Series.from_pandas(psr)

    if isinstance(fill_value, pd.Series):
        fill_value_cudf = cudf.from_pandas(fill_value)
    else:
        fill_value_cudf = fill_value

    if (isinstance(fill_value_cudf, cudf.Series)
            and gsr.dtype != fill_value_cudf.dtype):
        assert_exceptions_equal(
            lfunc=psr.fillna,
            rfunc=gsr.fillna,
            lfunc_args_and_kwargs=([fill_value], {
                "inplace": inplace
            }),
            rfunc_args_and_kwargs=([fill_value_cudf], {
                "inplace": inplace
            }),
        )
    else:
        expected = psr.fillna(fill_value, inplace=inplace)
        got = gsr.fillna(fill_value_cudf, inplace=inplace)

        if inplace:
            expected = psr
            got = gsr

        assert_eq(expected, got)
Beispiel #6
0
def test_groupby_series_level_zero(agg):
    pdf = pd.Series([1, 2, 3], index=[0, 1, 1])
    gdf = Series.from_pandas(pdf)
    pdg = pdf.groupby(level=0)
    gdg = gdf.groupby(level=0)
    pdresult = getattr(pdg, agg)()
    gdresult = getattr(gdg, agg)()
    check_dtype = False if agg == "count" else True
    assert_eq(pdresult, gdresult, check_dtype=check_dtype)
Beispiel #7
0
def test_groupby_series_level_zero(agg):
    pdf = pd.Series([1, 2, 3], index=[2, 5, 5])
    gdf = Series.from_pandas(pdf)
    pdg = pdf.groupby(level=0)
    gdg = gdf.groupby(level=0)
    pdresult = getattr(pdg, agg)()
    gdresult = getattr(gdg, agg)()
    check_dtype = False if agg in _index_type_aggs else True
    assert_groupby_results_equal(pdresult, gdresult, check_dtype=check_dtype)
Beispiel #8
0
def test_string_table_view_creation():
    data = ["hi"] * 25 + [None] * 2027
    psr = pd.Series(data)
    gsr = Series.from_pandas(psr)

    expect = psr[:1]
    got = gsr[:1]

    assert_eq(expect, got)
Beispiel #9
0
def test_fillna_string(fill_type, inplace):
    psr = pd.Series(["z", None, "z", None])

    if fill_type == "scalar":
        fill_value_pd = "a"
        fill_value_cudf = fill_value_pd
    elif fill_type == "series":
        fill_value_pd = pd.Series(["a", "b", "c", "d"])
        fill_value_cudf = Series.from_pandas(fill_value_pd)

    sr = Series.from_pandas(psr)

    expect = psr.fillna(fill_value_pd)
    got = sr.fillna(fill_value_cudf, inplace=inplace)

    if inplace:
        got = sr

    assert_eq(expect, got)
Beispiel #10
0
def test_series_clip(data, lower, upper, inplace):
    psr = pd.Series(data)
    gsr = Series.from_pandas(data)

    expect = psr.clip(lower=lower, upper=upper)
    got = gsr.clip(lower=lower, upper=upper, inplace=inplace)

    if inplace is True:
        assert_eq(expect, gsr)
    else:
        assert_eq(expect, got)
Beispiel #11
0
def test_series_replace():
    a1 = np.array([0, 1, 2, 3, 4])

    # Numerical
    a2 = np.array([5, 1, 2, 3, 4])
    sr1 = Series(a1)
    sr2 = sr1.replace(0, 5)
    assert_eq(a2, sr2.to_array())

    # Categorical
    psr3 = pd.Series(["one", "two", "three"], dtype="category")
    psr4 = psr3.replace("one", "two")
    sr3 = Series.from_pandas(psr3)
    sr4 = sr3.replace("one", "two")
    assert_eq(psr4, sr4)

    psr5 = psr3.replace("one", "five")
    sr5 = sr3.replace("one", "five")

    assert_eq(psr5, sr5)

    # List input
    a6 = np.array([5, 6, 2, 3, 4])
    sr6 = sr1.replace([0, 1], [5, 6])
    assert_eq(a6, sr6.to_array())

    with pytest.raises(TypeError):
        sr1.replace([0, 1], [5.5, 6.5])

    # Series input
    a8 = np.array([5, 5, 5, 3, 4])
    sr8 = sr1.replace(sr1[:3].to_array(), 5)
    assert_eq(a8, sr8.to_array())

    # large input containing null
    sr9 = Series(list(range(400)) + [None])
    sr10 = sr9.replace([22, 323, 27, 0], None)
    assert sr10.null_count == 5
    assert len(sr10.to_array()) == (401 - 5)

    sr11 = sr9.replace([22, 323, 27, 0], -1)
    assert sr11.null_count == 1
    assert len(sr11.to_array()) == (401 - 1)

    # large input not containing nulls
    sr9 = sr9.fillna(-11)
    sr12 = sr9.replace([22, 323, 27, 0], None)
    assert sr12.null_count == 4
    assert len(sr12.to_array()) == (401 - 4)

    sr13 = sr9.replace([22, 323, 27, 0], -1)
    assert sr13.null_count == 0
    assert len(sr13.to_array()) == 401
Beispiel #12
0
def test_string_replace_multi():
    ps = pd.Series(["hello", "goodbye"])
    gs = Series(["hello", "goodbye"])
    expect = ps.str.replace("e", "E").str.replace("o", "O")
    got = gs.str.replace(["e", "o"], ["E", "O"])

    assert_eq(expect, got)

    ps = pd.Series(["foo", "fuz", np.nan])
    gs = Series.from_pandas(ps)

    expect = ps.str.replace("f.", "ba", regex=True)
    got = gs.str.replace(["f."], ["ba"], regex=True)
    assert_eq(expect, got)

    ps = pd.Series(["f.o", "fuz", np.nan])
    gs = Series.from_pandas(ps)

    expect = ps.str.replace("f.", "ba", regex=False)
    got = gs.str.replace(["f."], ["ba"], regex=False)
    assert_eq(expect, got)
Beispiel #13
0
def test_to_from_pandas_nulls(data, nulls):
    pd_data = pd.Series(data.copy())
    if nulls == "some":
        # Fill half the values with NaT
        pd_data[list(range(0, len(pd_data), 2))] = np.datetime64("nat", "ns")
    elif nulls == "all":
        # Fill all the values with NaT
        pd_data[:] = np.datetime64("nat", "ns")
    gdf_data = Series.from_pandas(pd_data)

    expect = pd_data
    got = gdf_data.to_pandas()

    assert_eq(expect, got)
Beispiel #14
0
def test_series_replace():
    a1 = np.array([0, 1, 2, 3, 4])

    # Numerical
    a2 = np.array([5, 1, 2, 3, 4])
    sr1 = Series(a1)
    sr2 = sr1.replace(0, 5)
    np.testing.assert_equal(sr2.to_array(), a2)

    # Categorical
    psr3 = pd.Series(["one", "two", "three"], dtype="category")
    psr4 = psr3.replace("one", "two")
    sr3 = Series.from_pandas(psr3)
    sr4 = sr3.replace("one", "two")
    pd.testing.assert_series_equal(sr4.to_pandas(), psr4)

    # List input
    a6 = np.array([5, 6, 2, 3, 4])
    sr6 = sr1.replace([0, 1], [5, 6])
    np.testing.assert_equal(sr6.to_array(), a6)

    a7 = np.array([5.5, 6.5, 2, 3, 4])
    sr7 = sr1.replace([0, 1], [5.5, 6.5])
    np.testing.assert_equal(sr7.to_array(), a7)

    # Series input
    a8 = np.array([5, 5, 5, 3, 4])
    sr8 = sr1.replace(sr1[:3], 5)
    np.testing.assert_equal(sr8.to_array(), a8)

    # large input containing null
    sr9 = Series(list(range(400)) + [None])
    sr10 = sr9.replace([22, 323, 27, 0], None)
    assert sr10.null_count == 5
    assert len(sr10.to_array()) == (401 - 5)

    sr11 = sr9.replace([22, 323, 27, 0], -1)
    assert sr11.null_count == 1
    assert len(sr11.to_array()) == (401 - 1)

    # large input not containing nulls
    sr9 = sr9.fillna(-11)
    sr12 = sr9.replace([22, 323, 27, 0], None)
    assert sr12.null_count == 4
    assert len(sr12.to_array()) == (401 - 4)

    sr13 = sr9.replace([22, 323, 27, 0], -1)
    assert sr13.null_count == 0
    assert len(sr13.to_array()) == 401
Beispiel #15
0
def test_series_where(data_dtype, fill_value):
    psr = pd.Series(list(range(10)), dtype=data_dtype)
    sr = Series.from_pandas(psr)

    expect = psr.where(psr > 0, fill_value)
    got = sr.where(sr > 0, fill_value)
    assert_eq(expect, got)

    expect = psr.where(psr < 0, fill_value)
    got = sr.where(sr < 0, fill_value)
    assert_eq(expect, got)

    expect = psr.where(psr == 0, fill_value)
    got = sr.where(sr == 0, fill_value)
    assert_eq(expect, got)
Beispiel #16
0
def test_series_with_nulls_where(fill_value):
    psr = pd.Series([None] * 3 + list(range(5)))
    sr = Series.from_pandas(psr)

    expect = psr.where(psr > 0, fill_value)
    got = sr.where(sr > 0, fill_value)
    assert_eq(expect, got)

    expect = psr.where(psr < 0, fill_value)
    got = sr.where(sr < 0, fill_value)
    assert_eq(expect, got)

    expect = psr.where(psr == 0, fill_value)
    got = sr.where(sr == 0, fill_value)
    assert_eq(expect, got)
Beispiel #17
0
def test_fillna_categorical(psr, fill_value, inplace):

    gsr = Series.from_pandas(psr)

    if isinstance(fill_value, pd.Series):
        fill_value_cudf = cudf.from_pandas(fill_value)
    else:
        fill_value_cudf = fill_value

    expected = psr.fillna(fill_value, inplace=inplace)
    got = gsr.fillna(fill_value_cudf, inplace=inplace)

    if inplace:
        expected = psr
        got = gsr

    assert_eq(expected, got)
Beispiel #18
0
def test_fillna_datetime(fill_type, inplace):
    psr = pd.Series(pd.date_range("2010-01-01", "2020-01-10", freq="1y"))

    if fill_type == "scalar":
        fill_value = pd.Timestamp("2010-01-02")
    elif fill_type == "series":
        fill_value = psr + pd.Timedelta("1d")

    psr[[5, 9]] = None
    sr = Series.from_pandas(psr)

    expect = psr.fillna(fill_value)
    got = sr.fillna(fill_value, inplace=inplace)

    if inplace:
        got = sr

    assert_eq(expect, got)
Beispiel #19
0
def test_fillna_categorical(fill_type, null_value, inplace):
    data = pd.Series(["a", "b", "a", null_value, "c", null_value],
                     dtype="category")
    sr = Series.from_pandas(data)

    if fill_type == "scalar":
        fill_value = "c"
        expect = pd.Series(["a", "b", "a", "c", "c", "c"], dtype="category")
    elif fill_type == "series":
        fill_value = pd.Series(["c", "c", "c", "c", "c", "a"],
                               dtype="category")
        expect = pd.Series(["a", "b", "a", "c", "c", "a"], dtype="category")

    got = sr.fillna(fill_value, inplace=inplace)

    if inplace:
        got = sr

    assert_eq(expect, got)
Beispiel #20
0
def test_numeric_series_replace_dtype(series_dtype, replacement):
    psr = pd.Series([0, 1, 2, 3, 4, 5], dtype=series_dtype)
    sr = Series.from_pandas(psr)

    # Both Scalar
    if sr.dtype.type(replacement) != replacement:
        with pytest.raises(TypeError):
            sr.replace(1, replacement)
    else:
        expect = psr.replace(1, replacement).astype(psr.dtype)
        got = sr.replace(1, replacement)
        assert_eq(expect, got)

    # to_replace is a list, replacement is a scalar
    if sr.dtype.type(replacement) != replacement:
        with pytest.raises(TypeError):
            sr.replace([2, 3], replacement)
    else:
        expect = psr.replace([2, 3], replacement).astype(psr.dtype)
        got = sr.replace([2, 3], replacement)
        assert_eq(expect, got)

    # If to_replace is a scalar and replacement is a list
    with pytest.raises(TypeError):
        sr.replace(0, [replacement, 2])

    # Both list of unequal length
    with pytest.raises(ValueError):
        sr.replace([0, 1], [replacement])

    # Both lists of equal length
    if (
        np.dtype(type(replacement)).kind == "f" and sr.dtype.kind in {"i", "u"}
    ) or (sr.dtype.type(replacement) != replacement):
        with pytest.raises(TypeError):
            sr.replace([2, 3], [replacement, replacement])
    else:
        expect = psr.replace([2, 3], [replacement, replacement]).astype(
            psr.dtype
        )
        got = sr.replace([2, 3], [replacement, replacement])
        assert_eq(expect, got)
Beispiel #21
0
def test_replace_inplace():
    data = np.array([5, 1, 2, 3, 4])
    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace(5, 0, inplace=True)
    psr.replace(5, 0, inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace({5: 0, 3: -5})
    psr.replace({5: 0, 3: -5})
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    srr = sr.replace()
    psrr = psr.replace()
    assert_eq(srr, psrr)

    psr = pd.Series(["one", "two", "three"], dtype="category")
    sr = Series.from_pandas(psr)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace("one", "two", inplace=True)
    psr.replace("one", "two", inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    pdf = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]})
    gdf = DataFrame.from_pandas(pdf)

    pdf_copy = pdf.copy()
    gdf_copy = gdf.copy()
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)
    pdf.replace(5, 0, inplace=True)
    gdf.replace(5, 0, inplace=True)
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)

    pds = pd.Series([1, 2, 3, 45])
    gds = Series.from_pandas(pds)
    vals = np.array([]).astype(int)

    assert_eq(pds.replace(vals, -1), gds.replace(vals, -1))

    pds.replace(vals, 77, inplace=True)
    gds.replace(vals, 77, inplace=True)
    assert_eq(pds, gds)

    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]})
    gdf = DataFrame.from_pandas(pdf)

    assert_eq(pdf.replace({"a": 2}, {"a": -33}),
              gdf.replace({"a": 2}, {"a": -33}))

    assert_eq(
        pdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
        gdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
    )

    assert_eq(
        pdf.replace([], []),
        gdf.replace([], []),
    )

    assert_exceptions_equal(
        lfunc=pdf.replace,
        rfunc=gdf.replace,
        lfunc_args_and_kwargs=([], {
            "to_replace": -1,
            "value": []
        }),
        rfunc_args_and_kwargs=([], {
            "to_replace": -1,
            "value": []
        }),
        compare_error_message=False,
    )
Beispiel #22
0
def test_replace_inplace():
    data = np.array([5, 1, 2, 3, 4])
    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace(5, 0, inplace=True)
    psr.replace(5, 0, inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace({5: 0, 3: -5})
    psr.replace({5: 0, 3: -5})
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    srr = sr.replace()
    psrr = psr.replace()
    assert_eq(srr, psrr)

    psr = pd.Series(["one", "two", "three"], dtype="category")
    sr = Series.from_pandas(psr)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace("one", "two", inplace=True)
    psr.replace("one", "two", inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    pdf = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]})
    gdf = DataFrame.from_pandas(pdf)

    pdf_copy = pdf.copy()
    gdf_copy = gdf.copy()
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)
    pdf.replace(5, 0, inplace=True)
    gdf.replace(5, 0, inplace=True)
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)

    pds = pd.Series([1, 2, 3, 45])
    gds = Series.from_pandas(pds)
    vals = np.array([]).astype(int)

    assert_eq(pds.replace(vals, -1), gds.replace(vals, -1))

    pds.replace(vals, 77, inplace=True)
    gds.replace(vals, 77, inplace=True)
    assert_eq(pds, gds)

    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]})
    gdf = DataFrame.from_pandas(pdf)

    assert_eq(pdf.replace({"a": 2}, {"a": -33}),
              gdf.replace({"a": 2}, {"a": -33}))

    assert_eq(
        pdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
        gdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
    )

    assert_eq(
        pdf.replace([], []),
        gdf.replace([], []),
    )

    with pytest.raises(TypeError):
        pdf.replace(-1, [])

    with pytest.raises(TypeError):
        gdf.replace(-1, [])