def test_datetime_series_binops_numpy(lhs_dtype, rhs_dtype): pd_data_1 = pd.Series( pd.date_range("20010101", "20020215", freq="400h", name="times")) pd_data_2 = pd.Series( pd.date_range("20010101", "20020215", freq="401h", name="times")) gdf_data_1 = Series(pd_data_1).astype(lhs_dtype) gdf_data_2 = Series(pd_data_2).astype(rhs_dtype) np_data_1 = np.array(pd_data_1).astype(lhs_dtype) np_data_2 = np.array(pd_data_2).astype(rhs_dtype) np.testing.assert_equal(np_data_1, gdf_data_1.to_array()) np.testing.assert_equal(np_data_2, gdf_data_2.to_array()) np.testing.assert_equal(np.less(np_data_1, np_data_2), (gdf_data_1 < gdf_data_2).to_array()) np.testing.assert_equal(np.greater(np_data_1, np_data_2), (gdf_data_1 > gdf_data_2).to_array()) np.testing.assert_equal(np.equal(np_data_1, np_data_2), (gdf_data_1 == gdf_data_2).to_array()) np.testing.assert_equal( np.less_equal(np_data_1, np_data_2), (gdf_data_1 <= gdf_data_2).to_array(), ) np.testing.assert_equal( np.greater_equal(np_data_1, np_data_2), (gdf_data_1 >= gdf_data_2).to_array(), )
def test_series_argsort(nelem, dtype, asc): np.random.seed(0) sr = Series((100 * np.random.random(nelem)).astype(dtype)) res = sr.argsort(ascending=asc) if asc: expected = np.argsort(sr.to_array(), kind="mergesort") else: expected = np.argsort(-sr.to_array(), kind="mergesort") np.testing.assert_array_equal(expected, res.to_array())
def test_categorical_basic(): cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"]) cudf_cat = as_index(cat) pdsr = pd.Series(cat) sr = Series(cat) np.testing.assert_array_equal(cat.codes, sr.to_array()) # Test attributes assert tuple(pdsr.cat.categories) == tuple(sr.cat.categories) assert pdsr.cat.ordered == sr.cat.ordered np.testing.assert_array_equal( pdsr.cat.codes.values, sr.cat.codes.to_array() ) np.testing.assert_array_equal(pdsr.cat.codes.dtype, sr.cat.codes.dtype) string = str(sr) expect_str = """ 0 a 1 a 2 b 3 c 4 a """ assert all(x == y for x, y in zip(string.split(), expect_str.split())) assert_eq(cat.codes, cudf_cat.codes.to_array())
def test_series_compare_scalar(nelem, cmpop, obj_class, dtype): arr1 = np.random.randint(0, 100, 100).astype(dtype) sr1 = Series(arr1) rhs = random.choice(arr1).item() if obj_class == "Index": sr1 = as_index(sr1) result1 = cmpop(sr1, rhs) result2 = cmpop(rhs, sr1) if obj_class == "Index": result1 = Series(result1) result2 = Series(result2) np.testing.assert_equal(result1.to_array(), cmpop(arr1, rhs)) np.testing.assert_equal(result2.to_array(), cmpop(rhs, arr1))
def test_series_sort_index(nelem, asc): np.random.seed(0) sr = Series((100 * np.random.random(nelem))) orig = sr.to_array() got = sr.sort_values().sort_index(ascending=asc).to_array() if not asc: # Reverse the array for descending sort got = got[::-1] np.testing.assert_array_equal(orig, got)
def test_series_compare(cmpop, obj_class, dtype): arr1 = np.random.randint(0, 100, 100).astype(dtype) arr2 = np.random.randint(0, 100, 100).astype(dtype) sr1 = Series(arr1) sr2 = Series(arr2) if obj_class == "Index": sr1 = as_index(sr1) sr2 = as_index(sr2) result1 = cmpop(sr1, sr1) result2 = cmpop(sr2, sr2) result3 = cmpop(sr1, sr2) if obj_class == "Index": result1 = Series(result1) result2 = Series(result2) result3 = Series(result3) np.testing.assert_equal(result1.to_array(), cmpop(arr1, arr1)) np.testing.assert_equal(result2.to_array(), cmpop(arr2, arr2)) np.testing.assert_equal(result3.to_array(), cmpop(arr1, arr2))
def test_series_binop_scalar(nelem, binop, obj_class): arr = np.random.random(nelem) rhs = random.choice(arr).item() sr = Series(arr) if obj_class == "Index": sr = as_index(sr) result = binop(sr, rhs) if obj_class == "Index": result = Series(result) np.testing.assert_almost_equal(result.to_array(), binop(arr, rhs))
def test_categorical_empty(): cat = pd.Categorical([]) pdsr = pd.Series(cat) sr = Series(cat) np.testing.assert_array_equal(cat.codes, sr.to_array()) # Test attributes assert tuple(pdsr.cat.categories) == tuple(sr.cat.categories) assert pdsr.cat.ordered == sr.cat.ordered np.testing.assert_array_equal( pdsr.cat.codes.values, sr.cat.codes.to_array() ) np.testing.assert_array_equal(pdsr.cat.codes.dtype, sr.cat.codes.dtype)
def test_series_bitwise_binop(binop, obj_class, lhs_dtype, rhs_dtype): arr1 = (np.random.random(100) * 100).astype(lhs_dtype) sr1 = Series(arr1) arr2 = (np.random.random(100) * 100).astype(rhs_dtype) sr2 = Series(arr2) if obj_class == "Index": sr1 = as_index(sr1) sr2 = as_index(sr2) result = binop(sr1, sr2) if obj_class == "Index": result = Series(result) np.testing.assert_almost_equal(result.to_array(), binop(arr1, arr2))
def test_series_cmpop_mixed_dtype(cmpop, lhs_dtype, rhs_dtype, obj_class): nelem = 5 lhs = (np.random.random(nelem) * nelem).astype(lhs_dtype) rhs = (np.random.random(nelem) * nelem).astype(rhs_dtype) sr1 = Series(lhs) sr2 = Series(rhs) if obj_class == "Index": sr1 = as_index(sr1) sr2 = as_index(sr2) result = cmpop(Series(sr1), Series(sr2)) if obj_class == "Index": result = Series(result) np.testing.assert_array_equal(result.to_array(), cmpop(lhs, rhs))
def test_categorical_integer(): cat = pd.Categorical(["a", "_", "_", "c", "a"], categories=["a", "b", "c"]) pdsr = pd.Series(cat) sr = Series(cat) np.testing.assert_array_equal(cat.codes, sr.to_array(fillna="pandas")) assert sr.null_count == 2 np.testing.assert_array_equal( pdsr.cat.codes.values, sr.cat.codes.fillna(-1).to_array() ) np.testing.assert_equal(pdsr.cat.codes.dtype, sr.cat.codes.dtype) string = str(sr) expect_str = """ 0 a 1 null 2 null 3 c 4 a dtype: category Categories (3, object): [a, b, c] """ assert string.split() == expect_str.split()
def test_typecast_to_different_datetime_resolutions(data, dtype): pd_data = pd.Series(data.copy()) np_data = np.array(pd_data).astype(dtype) gdf_series = Series(pd_data).astype(dtype) np.testing.assert_equal(np_data, gdf_series.to_array())