Esempio n. 1
0
def test_str_null_to_datetime():
    psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", "NaT"])
    gsr = Series(["2001-01-01", "2002-02-02", "2000-01-05", "NaT"])

    assert_eq(psr.astype("datetime64[s]"), gsr.astype("datetime64[s]"))

    psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", None])
    gsr = Series(["2001-01-01", "2002-02-02", "2000-01-05", None])

    assert_eq(psr.astype("datetime64[s]"), gsr.astype("datetime64[s]"))

    psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"])
    gsr = Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"])

    error_type = None
    try:
        psr.astype("datetime64[s]")
    except Exception as e:
        error_type = type(e)

    if error_type is None:
        raise Exception("Expected psr.astype('datetime64[s]') to fail")

    with pytest.raises(ValueError):
        gsr.astype("datetime64[s]")
Esempio n. 2
0
def test_typecast_on_join_categorical(dtype_l, dtype_r):
    if not (dtype_l == "category" or dtype_r == "category"):
        pytest.skip("at least one side must be category for this set of tests")
    if dtype_l == "category" and dtype_r == "category":
        pytest.skip("Can't determine which categorical to use")

    other_data = ["a", "b", "c", "d", "e"]
    join_data_l = Series([1, 2, 3, 4, 5], dtype=dtype_l)
    join_data_r = Series([1, 2, 3, 4, 6], dtype=dtype_r)
    if dtype_l == "category":
        exp_dtype = join_data_l.dtype
        exp_categories = join_data_l.astype(int)._column
    elif dtype_r == "category":
        exp_dtype = join_data_r.dtype
        exp_categories = join_data_r.astype(int)._column

    gdf_l = DataFrame({"join_col": join_data_l, "B": other_data})
    gdf_r = DataFrame({"join_col": join_data_r, "B": other_data})

    exp_join_data = [1, 2, 3, 4]
    exp_other_data = ["a", "b", "c", "d"]
    exp_join_col = Series(exp_join_data, dtype=exp_dtype)

    expect = DataFrame(
        {
            "join_col": exp_join_col,
            "B_x": exp_other_data,
            "B_y": exp_other_data,
        }
    )
    expect["join_col"] = expect["join_col"].cat.set_categories(exp_categories)

    got = gdf_l.merge(gdf_r, on="join_col", how="inner")
    assert_eq(expect, got, check_dtype=False)
Esempio n. 3
0
def test_str_to_datetime_error():
    psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"])
    gsr = Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"])

    try:
        psr.astype("datetime64[s]")
    except Exception:
        with pytest.raises(ValueError):
            gsr.astype("datetime64[s]")
    else:
        raise AssertionError("Expected psr.astype('datetime64[s]') to fail")
Esempio n. 4
0
def test_datetime_series_binops_pandas(lhs_dtype, rhs_dtype):
    pd_data_1 = pd.Series(
        pd.date_range("20010101", "20020215", freq="400h", name="times"))
    pd_data_2 = pd.Series(
        pd.date_range("20010101", "20020215", freq="401h", name="times"))
    gdf_data_1 = Series(pd_data_1).astype(lhs_dtype)
    gdf_data_2 = Series(pd_data_2).astype(rhs_dtype)
    assert_eq(pd_data_1, gdf_data_1.astype("datetime64[ns]"))
    assert_eq(pd_data_2, gdf_data_2.astype("datetime64[ns]"))
    assert_eq(pd_data_1 < pd_data_2, gdf_data_1 < gdf_data_2)
    assert_eq(pd_data_1 > pd_data_2, gdf_data_1 > gdf_data_2)
    assert_eq(pd_data_1 == pd_data_2, gdf_data_1 == gdf_data_2)
    assert_eq(pd_data_1 <= pd_data_2, gdf_data_1 <= gdf_data_2)
    assert_eq(pd_data_1 >= pd_data_2, gdf_data_1 >= gdf_data_2)
Esempio n. 5
0
def test_string_numeric_astype(dtype):
    if dtype.startswith("bool"):
        data = [1, 0, 1, 0, 1]
    elif dtype.startswith("int"):
        data = [1, 2, 3, 4, 5]
    elif dtype.startswith("float"):
        data = [1.0, 2.0, 3.0, 4.0, 5.0]
    elif dtype.startswith("datetime64"):
        data = [1000000000, 2000000000, 3000000000, 4000000000, 5000000000]
    if dtype.startswith("datetime64"):
        ps = pd.Series(data, dtype="datetime64[ns]")
        gs = Series.from_pandas(ps)
    else:
        ps = pd.Series(data, dtype=dtype)
        gs = Series(data, dtype=dtype)

    # Pandas datetime64 --> str typecasting returns arbitrary format depending
    # on the data, so making it consistent unless we choose to match the
    # behavior
    if dtype.startswith("datetime64"):
        expect = ps.dt.strftime("%Y-%m-%dT%H:%M:%SZ")
    else:
        expect = ps.astype("str")
    got = gs.astype("str")

    assert_eq(expect, got)
Esempio n. 6
0
def test_string_astype(dtype):
    if dtype.startswith("int"):
        data = ["1", "2", "3", "4", "5"]
    elif dtype.startswith("float"):
        data = ["1.0", "2.0", "3.0", "4.0", "5.0"]
    elif dtype.startswith("bool"):
        data = ["True", "False", "True", "False", "False"]
    elif dtype.startswith("datetime64"):
        data = [
            "2019-06-04T00:00:00Z",
            "2019-06-04T12:12:12Z",
            "2019-06-03T00:00:00Z",
            "2019-05-04T00:00:00Z",
            "2018-06-04T00:00:00Z",
        ]
    elif dtype == "str" or dtype == "object":
        data = ["ab", "cd", "ef", "gh", "ij"]
    ps = pd.Series(data)
    gs = Series(data)

    # Pandas str --> bool typecasting always returns True if there's a string
    if dtype.startswith("bool"):
        expect = ps == "True"
    else:
        expect = ps.astype(dtype)
    got = gs.astype(dtype)

    assert_eq(expect, got)
Esempio n. 7
0
    def func(index):
        arr = np.random.random(100) * 10
        sr = Series(arr)

        result = binop(sr.astype("int32"), sr)
        expect = binop(arr.astype("int32"), arr)

        np.testing.assert_almost_equal(result.to_array(), expect, decimal=5)
Esempio n. 8
0
def test_typecast_from_datetime_to_datetime(data, from_dtype, to_dtype):
    np_data = data.astype(from_dtype)
    gdf_col = Series(np_data)._column

    np_casted = np_data.astype(to_dtype)
    gdf_casted = gdf_col.astype(to_dtype)

    np.testing.assert_equal(np_casted, gdf_casted.to_array())
Esempio n. 9
0
def test_typecast_to_from_datetime(data, from_dtype, to_dtype):
    np_data = data.astype(from_dtype)
    gdf_data = Series(np_data)

    np_casted = np_data.astype(to_dtype).astype(from_dtype)
    gdf_casted = gdf_data.astype(to_dtype).astype(from_dtype)

    np.testing.assert_equal(np_casted, np.array(gdf_casted))
Esempio n. 10
0
def test_string_empty_astype(dtype):
    data = []
    ps = pd.Series(data, dtype="str")
    gs = Series(data, dtype="str")

    expect = ps.astype(dtype)
    got = gs.astype(dtype)

    assert_eq(expect, got)
Esempio n. 11
0
def test_typecast_from_datetime_to_int64_to_datetime(data, dtype):
    pd_data = pd.Series(data.copy())
    np_data = np.array(pd_data)
    gdf_data = Series(pd_data)

    np_casted = np_data.astype(np.int64).astype(dtype)
    gdf_casted = gdf_data.astype(np.int64).astype(dtype)

    np.testing.assert_equal(np_casted, gdf_casted.to_array())
Esempio n. 12
0
def test_typecast_from_datetime(data, dtype):
    pd_data = pd.Series(data.copy())
    np_data = np.array(pd_data)
    gdf_data = Series(pd_data)

    np_casted = np_data.astype(dtype)
    gdf_casted = gdf_data.astype(dtype)

    np.testing.assert_equal(np_casted, np.array(gdf_casted))
Esempio n. 13
0
def test_string_empty_numeric_astype(dtype):
    data = []

    if dtype.startswith("datetime64"):
        ps = pd.Series(data, dtype="datetime64[ns]")
    else:
        ps = pd.Series(data, dtype=dtype)
    gs = Series(data, dtype=dtype)

    expect = ps.astype("str")
    got = gs.astype("str")

    assert_eq(expect, got)
Esempio n. 14
0
def test_date_minmax():
    np_data = np.random.normal(size=10 ** 3)
    gdf_data = Series(np_data)

    np_casted = np_data.astype("datetime64[ms]")
    gdf_casted = gdf_data.astype("datetime64[ms]")

    np_min = np_casted.min()
    gdf_min = gdf_casted.min()
    assert np_min == gdf_min

    np_max = np_casted.max()
    gdf_max = gdf_casted.max()
    assert np_max == gdf_max
Esempio n. 15
0
def test_str_null_to_datetime(data, dtype):
    psr = pd.Series(data)
    gsr = Series(data)

    assert_eq(psr.astype(dtype), gsr.astype(dtype))