def test_str_null_to_datetime(): psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", "NaT"]) gsr = Series(["2001-01-01", "2002-02-02", "2000-01-05", "NaT"]) assert_eq(psr.astype("datetime64[s]"), gsr.astype("datetime64[s]")) psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", None]) gsr = Series(["2001-01-01", "2002-02-02", "2000-01-05", None]) assert_eq(psr.astype("datetime64[s]"), gsr.astype("datetime64[s]")) psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"]) gsr = Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"]) error_type = None try: psr.astype("datetime64[s]") except Exception as e: error_type = type(e) if error_type is None: raise Exception("Expected psr.astype('datetime64[s]') to fail") with pytest.raises(ValueError): gsr.astype("datetime64[s]")
def test_typecast_on_join_categorical(dtype_l, dtype_r): if not (dtype_l == "category" or dtype_r == "category"): pytest.skip("at least one side must be category for this set of tests") if dtype_l == "category" and dtype_r == "category": pytest.skip("Can't determine which categorical to use") other_data = ["a", "b", "c", "d", "e"] join_data_l = Series([1, 2, 3, 4, 5], dtype=dtype_l) join_data_r = Series([1, 2, 3, 4, 6], dtype=dtype_r) if dtype_l == "category": exp_dtype = join_data_l.dtype exp_categories = join_data_l.astype(int)._column elif dtype_r == "category": exp_dtype = join_data_r.dtype exp_categories = join_data_r.astype(int)._column gdf_l = DataFrame({"join_col": join_data_l, "B": other_data}) gdf_r = DataFrame({"join_col": join_data_r, "B": other_data}) exp_join_data = [1, 2, 3, 4] exp_other_data = ["a", "b", "c", "d"] exp_join_col = Series(exp_join_data, dtype=exp_dtype) expect = DataFrame( { "join_col": exp_join_col, "B_x": exp_other_data, "B_y": exp_other_data, } ) expect["join_col"] = expect["join_col"].cat.set_categories(exp_categories) got = gdf_l.merge(gdf_r, on="join_col", how="inner") assert_eq(expect, got, check_dtype=False)
def test_str_to_datetime_error(): psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"]) gsr = Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"]) try: psr.astype("datetime64[s]") except Exception: with pytest.raises(ValueError): gsr.astype("datetime64[s]") else: raise AssertionError("Expected psr.astype('datetime64[s]') to fail")
def test_datetime_series_binops_pandas(lhs_dtype, rhs_dtype): pd_data_1 = pd.Series( pd.date_range("20010101", "20020215", freq="400h", name="times")) pd_data_2 = pd.Series( pd.date_range("20010101", "20020215", freq="401h", name="times")) gdf_data_1 = Series(pd_data_1).astype(lhs_dtype) gdf_data_2 = Series(pd_data_2).astype(rhs_dtype) assert_eq(pd_data_1, gdf_data_1.astype("datetime64[ns]")) assert_eq(pd_data_2, gdf_data_2.astype("datetime64[ns]")) assert_eq(pd_data_1 < pd_data_2, gdf_data_1 < gdf_data_2) assert_eq(pd_data_1 > pd_data_2, gdf_data_1 > gdf_data_2) assert_eq(pd_data_1 == pd_data_2, gdf_data_1 == gdf_data_2) assert_eq(pd_data_1 <= pd_data_2, gdf_data_1 <= gdf_data_2) assert_eq(pd_data_1 >= pd_data_2, gdf_data_1 >= gdf_data_2)
def test_string_numeric_astype(dtype): if dtype.startswith("bool"): data = [1, 0, 1, 0, 1] elif dtype.startswith("int"): data = [1, 2, 3, 4, 5] elif dtype.startswith("float"): data = [1.0, 2.0, 3.0, 4.0, 5.0] elif dtype.startswith("datetime64"): data = [1000000000, 2000000000, 3000000000, 4000000000, 5000000000] if dtype.startswith("datetime64"): ps = pd.Series(data, dtype="datetime64[ns]") gs = Series.from_pandas(ps) else: ps = pd.Series(data, dtype=dtype) gs = Series(data, dtype=dtype) # Pandas datetime64 --> str typecasting returns arbitrary format depending # on the data, so making it consistent unless we choose to match the # behavior if dtype.startswith("datetime64"): expect = ps.dt.strftime("%Y-%m-%dT%H:%M:%SZ") else: expect = ps.astype("str") got = gs.astype("str") assert_eq(expect, got)
def test_string_astype(dtype): if dtype.startswith("int"): data = ["1", "2", "3", "4", "5"] elif dtype.startswith("float"): data = ["1.0", "2.0", "3.0", "4.0", "5.0"] elif dtype.startswith("bool"): data = ["True", "False", "True", "False", "False"] elif dtype.startswith("datetime64"): data = [ "2019-06-04T00:00:00Z", "2019-06-04T12:12:12Z", "2019-06-03T00:00:00Z", "2019-05-04T00:00:00Z", "2018-06-04T00:00:00Z", ] elif dtype == "str" or dtype == "object": data = ["ab", "cd", "ef", "gh", "ij"] ps = pd.Series(data) gs = Series(data) # Pandas str --> bool typecasting always returns True if there's a string if dtype.startswith("bool"): expect = ps == "True" else: expect = ps.astype(dtype) got = gs.astype(dtype) assert_eq(expect, got)
def func(index): arr = np.random.random(100) * 10 sr = Series(arr) result = binop(sr.astype("int32"), sr) expect = binop(arr.astype("int32"), arr) np.testing.assert_almost_equal(result.to_array(), expect, decimal=5)
def test_typecast_from_datetime_to_datetime(data, from_dtype, to_dtype): np_data = data.astype(from_dtype) gdf_col = Series(np_data)._column np_casted = np_data.astype(to_dtype) gdf_casted = gdf_col.astype(to_dtype) np.testing.assert_equal(np_casted, gdf_casted.to_array())
def test_typecast_to_from_datetime(data, from_dtype, to_dtype): np_data = data.astype(from_dtype) gdf_data = Series(np_data) np_casted = np_data.astype(to_dtype).astype(from_dtype) gdf_casted = gdf_data.astype(to_dtype).astype(from_dtype) np.testing.assert_equal(np_casted, np.array(gdf_casted))
def test_string_empty_astype(dtype): data = [] ps = pd.Series(data, dtype="str") gs = Series(data, dtype="str") expect = ps.astype(dtype) got = gs.astype(dtype) assert_eq(expect, got)
def test_typecast_from_datetime_to_int64_to_datetime(data, dtype): pd_data = pd.Series(data.copy()) np_data = np.array(pd_data) gdf_data = Series(pd_data) np_casted = np_data.astype(np.int64).astype(dtype) gdf_casted = gdf_data.astype(np.int64).astype(dtype) np.testing.assert_equal(np_casted, gdf_casted.to_array())
def test_typecast_from_datetime(data, dtype): pd_data = pd.Series(data.copy()) np_data = np.array(pd_data) gdf_data = Series(pd_data) np_casted = np_data.astype(dtype) gdf_casted = gdf_data.astype(dtype) np.testing.assert_equal(np_casted, np.array(gdf_casted))
def test_string_empty_numeric_astype(dtype): data = [] if dtype.startswith("datetime64"): ps = pd.Series(data, dtype="datetime64[ns]") else: ps = pd.Series(data, dtype=dtype) gs = Series(data, dtype=dtype) expect = ps.astype("str") got = gs.astype("str") assert_eq(expect, got)
def test_date_minmax(): np_data = np.random.normal(size=10 ** 3) gdf_data = Series(np_data) np_casted = np_data.astype("datetime64[ms]") gdf_casted = gdf_data.astype("datetime64[ms]") np_min = np_casted.min() gdf_min = gdf_casted.min() assert np_min == gdf_min np_max = np_casted.max() gdf_max = gdf_casted.max() assert np_max == gdf_max
def test_str_null_to_datetime(data, dtype): psr = pd.Series(data) gsr = Series(data) assert_eq(psr.astype(dtype), gsr.astype(dtype))