Exemple #1
0
def test_fillna_dataframe(fill_type, inplace):
    pdf = pd.DataFrame({"a": [1, 2, None], "b": [None, None, 5]})
    gdf = DataFrame.from_pandas(pdf)

    if fill_type == "scalar":
        fill_value_pd = 5
        fill_value_cudf = fill_value_pd
    elif fill_type == "series":
        fill_value_pd = pd.Series([3, 4, 5])
        fill_value_cudf = Series.from_pandas(fill_value_pd)
    else:
        fill_value_pd = {"a": 5, "b": pd.Series([3, 4, 5])}
        fill_value_cudf = {
            "a": fill_value_pd["a"],
            "b": Series.from_pandas(fill_value_pd["b"]),
        }

    # https://github.com/pandas-dev/pandas/issues/27197
    # pandas df.fill_value with series is not working

    if isinstance(fill_value_pd, pd.Series):
        expect = pd.DataFrame()
        for col in pdf.columns:
            expect[col] = pdf[col].fillna(fill_value_pd)
    else:
        expect = pdf.fillna(fill_value_pd)

    got = gdf.fillna(fill_value_cudf, inplace=inplace)

    if inplace:
        got = gdf

    assert_eq(expect, got)
Exemple #2
0
def test_string_numeric_astype(dtype):
    if dtype.startswith("bool"):
        data = [1, 0, 1, 0, 1]
    elif dtype.startswith("int"):
        data = [1, 2, 3, 4, 5]
    elif dtype.startswith("float"):
        data = [1.0, 2.0, 3.0, 4.0, 5.0]
    elif dtype.startswith("datetime64"):
        data = [1000000000, 2000000000, 3000000000, 4000000000, 5000000000]
    if dtype.startswith("datetime64"):
        ps = pd.Series(data, dtype="datetime64[ns]")
        gs = Series.from_pandas(ps)
    else:
        ps = pd.Series(data, dtype=dtype)
        gs = Series(data, dtype=dtype)

    # Pandas datetime64 --> str typecasting returns arbitrary format depending
    # on the data, so making it consistent unless we choose to match the
    # behavior
    if dtype.startswith("datetime64"):
        expect = ps.dt.strftime("%Y-%m-%dT%H:%M:%SZ")
    else:
        expect = ps.astype("str")
    got = gs.astype("str")

    assert_eq(expect, got)
Exemple #3
0
def test_series_replace():
    a1 = np.array([0, 1, 2, 3, 4])

    # Numerical
    a2 = np.array([5, 1, 2, 3, 4])
    sr1 = Series(a1)
    sr2 = sr1.replace(0, 5)
    np.testing.assert_equal(sr2.to_array(), a2)

    # Categorical
    psr3 = pd.Series(["one", "two", "three"], dtype='category')
    psr4 = psr3.replace("one", "two")
    sr3 = Series.from_pandas(psr3)
    sr4 = sr3.replace("one", "two")
    pd.testing.assert_series_equal(sr4.to_pandas(), psr4)

    # List input
    a6 = np.array([5, 6, 2, 3, 4])
    sr6 = sr1.replace([0, 1], [5, 6])
    np.testing.assert_equal(sr6.to_array(), a6)

    a7 = np.array([5.5, 6.5, 2, 3, 4])
    sr7 = sr1.replace([0, 1], [5.5, 6.5])
    np.testing.assert_equal(sr7.to_array(), a7)

    # Series input
    a8 = np.array([5, 5, 5, 3, 4])
    sr8 = sr1.replace(sr1[:3], 5)
    np.testing.assert_equal(sr8.to_array(), a8)
def test_groupby_series_level_zero(agg):
    pdf = pd.Series([1, 2, 3], index=[0, 1, 1])
    gdf = Series.from_pandas(pdf)
    pdg = pdf.groupby(level=0)
    gdg = gdf.groupby(level=0)
    pdresult = getattr(pdg, agg)()
    gdresult = getattr(gdg, agg)()
    assert_eq(pdresult, gdresult)
Exemple #5
0
def test_groupby_series_level_zero(agg):
    pdf = pd.Series([1, 2, 3], index=[0, 1, 1])
    gdf = Series.from_pandas(pdf)
    pdg = pdf.groupby(level=0)
    gdg = gdf.groupby(level=0)
    pdresult = getattr(pdg, agg)()
    gdresult = getattr(gdg, agg)()
    check_dtype = False if agg == 'count' else True
    assert_eq(pdresult, gdresult, check_dtype=check_dtype)
Exemple #6
0
def test_fillna_string(fill_type, inplace):
    psr = pd.Series(["z", None, "z", None])

    if fill_type == "scalar":
        fill_value_pd = "a"
        fill_value_cudf = fill_value_pd
    elif fill_type == "series":
        fill_value_pd = pd.Series(["a", "b", "c", "d"])
        fill_value_cudf = Series.from_pandas(fill_value_pd)

    sr = Series.from_pandas(psr)

    expect = psr.fillna(fill_value_pd)
    got = sr.fillna(fill_value_cudf, inplace=inplace)

    if inplace:
        got = sr

    assert_eq(expect, got)
Exemple #7
0
def test_to_from_pandas_nulls(data, nulls):
    pd_data = pd.Series(data.copy())
    if nulls == "some":
        # Fill half the values with NaT
        pd_data[list(range(0, len(pd_data), 2))] = np.datetime64("nat", "ns")
    elif nulls == "all":
        # Fill all the values with NaT
        pd_data[:] = np.datetime64("nat", "ns")
    gdf_data = Series.from_pandas(pd_data)

    expect = pd_data
    got = gdf_data.to_pandas()

    assert_eq(expect, got)
Exemple #8
0
def test_to_from_pandas_nulls(data, nulls):
    pd_data = pd.Series(data.copy().astype('datetime64[ms]'))
    if nulls == 'some':
        # Fill half the values with NaT
        pd_data[list(range(0, len(pd_data), 2))] = np.datetime64('nat')
    elif nulls == 'all':
        # Fill all the values with NaT
        pd_data[:] = np.datetime64('nat')
    gdf_data = Series.from_pandas(pd_data)

    expect = pd_data
    got = gdf_data.to_pandas()

    assert_eq(expect, got)
Exemple #9
0
def test_series_replace():
    a1 = np.array([0, 1, 2, 3, 4])

    # Numerical
    a2 = np.array([5, 1, 2, 3, 4])
    sr1 = Series(a1)
    sr2 = sr1.replace(0, 5)
    np.testing.assert_equal(sr2.to_array(), a2)

    # Categorical
    psr3 = pd.Series(["one", "two", "three"], dtype="category")
    psr4 = psr3.replace("one", "two")
    sr3 = Series.from_pandas(psr3)
    sr4 = sr3.replace("one", "two")
    pd.testing.assert_series_equal(sr4.to_pandas(), psr4)

    # List input
    a6 = np.array([5, 6, 2, 3, 4])
    sr6 = sr1.replace([0, 1], [5, 6])
    np.testing.assert_equal(sr6.to_array(), a6)

    a7 = np.array([5.5, 6.5, 2, 3, 4])
    sr7 = sr1.replace([0, 1], [5.5, 6.5])
    np.testing.assert_equal(sr7.to_array(), a7)

    # Series input
    a8 = np.array([5, 5, 5, 3, 4])
    sr8 = sr1.replace(sr1[:3], 5)
    np.testing.assert_equal(sr8.to_array(), a8)

    # large input containing null
    sr9 = Series(list(range(400)) + [None])
    sr10 = sr9.replace([22, 323, 27, 0], None)
    assert sr10.null_count == 5
    assert len(sr10.to_array()) == (401 - 5)

    sr11 = sr9.replace([22, 323, 27, 0], -1)
    assert sr11.null_count == 1
    assert len(sr11.to_array()) == (401 - 1)

    # large input not containing nulls
    sr9 = sr9.fillna(-11)
    sr12 = sr9.replace([22, 323, 27, 0], None)
    assert sr12.null_count == 4
    assert len(sr12.to_array()) == (401 - 4)

    sr13 = sr9.replace([22, 323, 27, 0], -1)
    assert sr13.null_count == 0
    assert len(sr13.to_array()) == 401
Exemple #10
0
def test_series_with_nulls_where(fill_value):
    psr = pd.Series([None] * 3 + list(range(5)))
    sr = Series.from_pandas(psr)

    expect = psr.where(psr > 0, fill_value)
    got = sr.where(sr > 0, fill_value)
    assert_eq(expect, got)

    expect = psr.where(psr < 0, fill_value)
    got = sr.where(sr < 0, fill_value)
    assert_eq(expect, got)

    expect = psr.where(psr == 0, fill_value)
    got = sr.where(sr == 0, fill_value)
    assert_eq(expect, got)
Exemple #11
0
def test_series_where(data_dtype, fill_value):
    psr = pd.Series(list(range(10)), dtype=data_dtype)
    sr = Series.from_pandas(psr)

    expect = psr.where(psr > 0, fill_value)
    got = sr.where(sr > 0, fill_value)
    assert_eq(expect, got)

    expect = psr.where(psr < 0, fill_value)
    got = sr.where(sr < 0, fill_value)
    assert_eq(expect, got)

    expect = psr.where(psr == 0, fill_value)
    got = sr.where(sr == 0, fill_value)
    assert_eq(expect, got)
Exemple #12
0
def test_fillna_string(fill_type, inplace):
    psr = pd.Series(['z', None, 'z', None])

    if fill_type == 'scalar':
        fill_value = 'a'
    elif fill_type == 'series':
        fill_value = Series(['a', 'b', 'c', 'd'])

    sr = Series.from_pandas(psr)

    expect = psr.fillna(fill_value)
    got = sr.fillna(fill_value, inplace=inplace)

    if inplace:
        got = sr

    assert_eq(expect, got)
Exemple #13
0
def test_fillna_datetime(fill_type, inplace):
    psr = pd.Series(pd.date_range('2010-01-01', '2020-01-10', freq='1y'))

    if fill_type == 'scalar':
        fill_value = pd.Timestamp('2010-01-02')
    elif fill_type == 'series':
        fill_value = psr + pd.Timedelta('1d')

    psr[[5, 9]] = None
    sr = Series.from_pandas(psr)

    expect = psr.fillna(fill_value)
    got = sr.fillna(fill_value, inplace=inplace)

    if inplace:
        got = sr

    assert_eq(expect, got)
Exemple #14
0
def test_fillna_datetime(fill_type, inplace):
    psr = pd.Series(pd.date_range("2010-01-01", "2020-01-10", freq="1y"))

    if fill_type == "scalar":
        fill_value = pd.Timestamp("2010-01-02")
    elif fill_type == "series":
        fill_value = psr + pd.Timedelta("1d")

    psr[[5, 9]] = None
    sr = Series.from_pandas(psr)

    expect = psr.fillna(fill_value)
    got = sr.fillna(fill_value, inplace=inplace)

    if inplace:
        got = sr

    assert_eq(expect, got)
Exemple #15
0
def test_fillna_categorical(fill_type, null_value, inplace):
    data = pd.Series(['a', 'b', 'a', null_value, 'c', null_value],
                     dtype='category')
    sr = Series.from_pandas(data)

    if fill_type == 'scalar':
        fill_value = 'c'
        expect = pd.Series(['a', 'b', 'a', 'c', 'c', 'c'], dtype='category')
    elif fill_type == 'series':
        fill_value = pd.Series(['c', 'c', 'c', 'c', 'c', 'a'],
                               dtype='category')
        expect = pd.Series(['a', 'b', 'a', 'c', 'c', 'a'], dtype='category')

    got = sr.fillna(fill_value, inplace=inplace)

    if inplace:
        got = sr

    assert_eq(expect, got)
Exemple #16
0
def test_fillna_categorical(fill_type, null_value, inplace):
    data = pd.Series(["a", "b", "a", null_value, "c", null_value],
                     dtype="category")
    sr = Series.from_pandas(data)

    if fill_type == "scalar":
        fill_value = "c"
        expect = pd.Series(["a", "b", "a", "c", "c", "c"], dtype="category")
    elif fill_type == "series":
        fill_value = pd.Series(["c", "c", "c", "c", "c", "a"],
                               dtype="category")
        expect = pd.Series(["a", "b", "a", "c", "c", "a"], dtype="category")

    got = sr.fillna(fill_value, inplace=inplace)

    if inplace:
        got = sr

    assert_eq(expect, got)