class TestCasting(object): pass @pytest.mark.parametrize('dropna', [True, False]) def test_construct_index(self, all_data, dropna): # ensure that we do not coerce to Float64Index, rather # keep as Index all_data = all_data[:10] if dropna: other = np.array(all_data[~all_data.isna()]) else: other = all_data result = pd.Index(integer_array(other, dtype=all_data.dtype)) expected = pd.Index(other, dtype=object) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('dropna', [True, False]) def test_astype_index(self, all_data, dropna): # as an int/uint index to Index all_data = all_data[:10] if dropna: other = all_data[~all_data.isna()] else: other = all_data dtype = all_data.dtype idx = pd.Index(np.array(other)) assert isinstance(idx, ABCIndexClass) result = idx.astype(dtype) expected = idx.astype(object).astype(dtype) tm.assert_index_equal(result, expected) def test_astype(self, all_data): all_data = all_data[:10] ints = all_data[~all_data.isna()] mixed = all_data dtype = Int8Dtype() # coerce to same type - ints s = pd.Series(ints) result = s.astype(all_data.dtype) expected = pd.Series(ints) tm.assert_series_equal(result, expected) # coerce to same other - ints s = pd.Series(ints) result = s.astype(dtype) expected = pd.Series(ints, dtype=dtype) tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - ints s = pd.Series(ints) result = s.astype(all_data.dtype.numpy_dtype) expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype)) tm.assert_series_equal(result, expected) # coerce to same type - mixed s = pd.Series(mixed) result = s.astype(all_data.dtype) expected = pd.Series(mixed) tm.assert_series_equal(result, expected) # coerce to same other - mixed s = pd.Series(mixed) result = s.astype(dtype) expected = pd.Series(mixed, dtype=dtype) tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - mixed s = pd.Series(mixed) with pytest.raises(ValueError): s.astype(all_data.dtype.numpy_dtype) # coerce to object s = pd.Series(mixed) result = s.astype('object') expected = pd.Series(np.asarray(mixed)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( 'dtype', [Int8Dtype(), 'Int8', UInt32Dtype(), 'UInt32']) def test_astype_specific_casting(self, dtype): s = pd.Series([1, 2, 3], dtype='Int64') result = s.astype(dtype) expected = pd.Series([1, 2, 3], dtype=dtype) tm.assert_series_equal(result, expected) s = pd.Series([1, 2, 3, None], dtype='Int64') result = s.astype(dtype) expected = pd.Series([1, 2, 3, None], dtype=dtype) tm.assert_series_equal(result, expected) def test_construct_cast_invalid(self, dtype): msg = "cannot safely" arr = [1.2, 2.3, 3.7] with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) arr = [1.2, 2.3, 3.7, np.nan] with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype)
def test_astype_to_larger_numpy(): a = pd.array([1, 2], dtype="Int32") result = a.astype("int64") expected = np.array([1, 2], dtype="int64") tm.assert_numpy_array_equal(result, expected) a = pd.array([1, 2], dtype="UInt32") result = a.astype("uint64") expected = np.array([1, 2], dtype="uint64") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"]) def test_astype_specific_casting(dtype): s = pd.Series([1, 2, 3], dtype="Int64") result = s.astype(dtype) expected = pd.Series([1, 2, 3], dtype=dtype) tm.assert_series_equal(result, expected) s = pd.Series([1, 2, 3, None], dtype="Int64") result = s.astype(dtype) expected = pd.Series([1, 2, 3, None], dtype=dtype) tm.assert_series_equal(result, expected) def test_astype_floating(): arr = pd.array([1, 2, None], dtype="Int64") result = arr.astype("Float64")
assert np.dtype(dtype.type).kind == "i" else: assert np.dtype(dtype.type).kind == "u" assert dtype.name is not None @pytest.mark.parametrize( "dtype, expected", [ (Int8Dtype(), "Int8Dtype()"), (Int16Dtype(), "Int16Dtype()"), (Int32Dtype(), "Int32Dtype()"), (Int64Dtype(), "Int64Dtype()"), (UInt8Dtype(), "UInt8Dtype()"), (UInt16Dtype(), "UInt16Dtype()"), (UInt32Dtype(), "UInt32Dtype()"), (UInt64Dtype(), "UInt64Dtype()"), ], ) def test_repr_dtype(dtype, expected): assert repr(dtype) == expected def test_repr_array(): result = repr(integer_array([1, None, 3])) expected = "<IntegerArray>\n[1, NaN, 3]\nLength: 3, dtype: Int64" assert result == expected def test_repr_array_long(): data = integer_array([1, 2, None] * 1000)
if dtype.is_signed_integer: assert np.dtype(dtype.type).kind == 'i' else: assert np.dtype(dtype.type).kind == 'u' assert dtype.name is not None @pytest.mark.parametrize('dtype, expected', [ (Int8Dtype(), 'Int8Dtype()'), (Int16Dtype(), 'Int16Dtype()'), (Int32Dtype(), 'Int32Dtype()'), (Int64Dtype(), 'Int64Dtype()'), (UInt8Dtype(), 'UInt8Dtype()'), (UInt16Dtype(), 'UInt16Dtype()'), (UInt32Dtype(), 'UInt32Dtype()'), (UInt64Dtype(), 'UInt64Dtype()'), ]) def test_repr_dtype(dtype, expected): assert repr(dtype) == expected def test_repr_array(): result = repr(integer_array([1, None, 3])) expected = ( '<IntegerArray>\n' '[1, NaN, 3]\n' 'Length: 3, dtype: Int64' ) assert result == expected
class TestCasting: @pytest.mark.parametrize("dropna", [True, False]) def test_construct_index(self, all_data, dropna): # ensure that we do not coerce to Float64Index, rather # keep as Index all_data = all_data[:10] if dropna: other = np.array(all_data[~all_data.isna()]) else: other = all_data result = pd.Index(integer_array(other, dtype=all_data.dtype)) expected = pd.Index(other, dtype=object) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("dropna", [True, False]) def test_astype_index(self, all_data, dropna): # as an int/uint index to Index all_data = all_data[:10] if dropna: other = all_data[~all_data.isna()] else: other = all_data dtype = all_data.dtype idx = pd.Index(np.array(other)) assert isinstance(idx, ABCIndexClass) result = idx.astype(dtype) expected = idx.astype(object).astype(dtype) tm.assert_index_equal(result, expected) def test_astype(self, all_data): all_data = all_data[:10] ints = all_data[~all_data.isna()] mixed = all_data dtype = Int8Dtype() # coerce to same type - ints s = pd.Series(ints) result = s.astype(all_data.dtype) expected = pd.Series(ints) tm.assert_series_equal(result, expected) # coerce to same other - ints s = pd.Series(ints) result = s.astype(dtype) expected = pd.Series(ints, dtype=dtype) tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - ints s = pd.Series(ints) result = s.astype(all_data.dtype.numpy_dtype) expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype)) tm.assert_series_equal(result, expected) # coerce to same type - mixed s = pd.Series(mixed) result = s.astype(all_data.dtype) expected = pd.Series(mixed) tm.assert_series_equal(result, expected) # coerce to same other - mixed s = pd.Series(mixed) result = s.astype(dtype) expected = pd.Series(mixed, dtype=dtype) tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - mixed s = pd.Series(mixed) msg = r"cannot convert to .*-dtype NumPy array with missing values.*" with pytest.raises(ValueError, match=msg): s.astype(all_data.dtype.numpy_dtype) # coerce to object s = pd.Series(mixed) result = s.astype("object") expected = pd.Series(np.asarray(mixed)) tm.assert_series_equal(result, expected) def test_astype_to_larger_numpy(self): a = pd.array([1, 2], dtype="Int32") result = a.astype("int64") expected = np.array([1, 2], dtype="int64") tm.assert_numpy_array_equal(result, expected) a = pd.array([1, 2], dtype="UInt32") result = a.astype("uint64") expected = np.array([1, 2], dtype="uint64") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"]) def test_astype_specific_casting(self, dtype): s = pd.Series([1, 2, 3], dtype="Int64") result = s.astype(dtype) expected = pd.Series([1, 2, 3], dtype=dtype) tm.assert_series_equal(result, expected) s = pd.Series([1, 2, 3, None], dtype="Int64") result = s.astype(dtype) expected = pd.Series([1, 2, 3, None], dtype=dtype) tm.assert_series_equal(result, expected) def test_construct_cast_invalid(self, dtype): msg = "cannot safely" arr = [1.2, 2.3, 3.7] with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) arr = [1.2, 2.3, 3.7, np.nan] with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) @pytest.mark.parametrize("in_series", [True, False]) def test_to_numpy_na_nan(self, in_series): a = pd.array([0, 1, None], dtype="Int64") if in_series: a = pd.Series(a) result = a.to_numpy(dtype="float64", na_value=np.nan) expected = np.array([0.0, 1.0, np.nan], dtype="float64") tm.assert_numpy_array_equal(result, expected) result = a.to_numpy(dtype="int64", na_value=-1) expected = np.array([0, 1, -1], dtype="int64") tm.assert_numpy_array_equal(result, expected) result = a.to_numpy(dtype="bool", na_value=False) expected = np.array([False, True, False], dtype="bool") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("in_series", [True, False]) @pytest.mark.parametrize("dtype", ["int32", "int64", "bool"]) def test_to_numpy_dtype(self, dtype, in_series): a = pd.array([0, 1], dtype="Int64") if in_series: a = pd.Series(a) result = a.to_numpy(dtype=dtype) expected = np.array([0, 1], dtype=dtype) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("dtype", ["float64", "int64", "bool"]) def test_to_numpy_na_raises(self, dtype): a = pd.array([0, 1, None], dtype="Int64") with pytest.raises(ValueError, match=dtype): a.to_numpy(dtype=dtype) def test_astype_str(self): a = pd.array([1, 2, None], dtype="Int64") expected = np.array(["1", "2", "<NA>"], dtype=object) tm.assert_numpy_array_equal(a.astype(str), expected) tm.assert_numpy_array_equal(a.astype("str"), expected) def test_astype_boolean(self): # https://github.com/pandas-dev/pandas/issues/31102 a = pd.array([1, 0, -1, 2, None], dtype="Int64") result = a.astype("boolean") expected = pd.array([True, False, True, True, None], dtype="boolean") tm.assert_extension_array_equal(result, expected)
def test_date_extract_(sample_df: DataFrame): step = DateExtractStep( name='dateextract', column='date', dateInfo=[ 'year', 'month', 'day', 'week', 'quarter', 'dayOfWeek', 'dayOfYear', 'isoYear', 'isoWeek', 'isoDayOfWeek', 'firstDayOfYear', 'firstDayOfMonth', 'firstDayOfWeek', 'firstDayOfQuarter', 'firstDayOfIsoWeek', 'previousDay', 'firstDayOfPreviousYear', 'firstDayOfPreviousMonth', 'firstDayOfPreviousWeek', 'firstDayOfPreviousQuarter', 'firstDayOfPreviousIsoWeek', 'previousYear', 'previousMonth', 'previousWeek', 'previousQuarter', 'previousIsoWeek', 'hour', 'minutes', 'seconds', 'milliseconds', ], newColumns=[ 'date_year', 'date_month', 'date_day', 'date_week', 'date_quarter', 'date_dayOfWeek', 'date_dayOfYear', 'date_isoYear', 'date_isoWeek', 'date_isoDayOfWeek', 'date_firstDayOfYear', 'date_firstDayOfMonth', 'date_firstDayOfWeek', 'date_firstDayOfQuarter', 'date_firstDayOfIsoWeek', 'date_previousDay', 'date_firstDayOfPreviousYear', 'date_firstDayOfPreviousMonth', 'date_firstDayOfPreviousWeek', 'date_firstDayOfPreviousQuarter', 'date_firstDayOfPreviousIsoWeek', 'date_previousYear', 'date_previousMonth', 'date_previousWeek', 'date_previousQuarter', 'date_previousIsoWeek', 'date_hour', 'date_minutes', 'date_seconds', 'date_milliseconds', ], ) df_result = execute_date_extract(step, sample_df) expected_result = DataFrame( { 'date': to_datetime( [ '2021-03-29T00:00:00.000Z', '2020-12-13T00:00:00.000Z', '2020-07-29T00:00:00.000Z', '2019-04-09T01:02:03.004Z', '2017-01-02T00:00:00.000Z', '2016-01-01T00:00:00.000Z', None, ] ), 'date_year': [2021, 2020, 2020, 2019, 2017, 2016, None], 'date_month': [3, 12, 7, 4, 1, 1, None], 'date_day': [29, 13, 29, 9, 2, 1, None], 'date_week': [13, 50, 30, 14, 1, 0, None], 'date_quarter': [1, 4, 3, 2, 1, 1, None], 'date_dayOfWeek': [2, 1, 4, 3, 2, 6, None], 'date_dayOfYear': [88, 348, 211, 99, 2, 1, None], 'date_isoYear': [2021, 2020, 2020, 2019, 2017, 2015, None], 'date_isoWeek': [13, 50, 31, 15, 1, 53, None], 'date_isoDayOfWeek': [1, 7, 3, 2, 1, 5, None], 'date_firstDayOfYear': to_datetime( [ "2021-01-01T00:00:00.000Z", "2020-01-01T00:00:00.000Z", "2020-01-01T00:00:00.000Z", "2019-01-01T00:00:00.000Z", "2017-01-01T00:00:00.000Z", "2016-01-01T00:00:00.000Z", None, ] ), 'date_firstDayOfMonth': to_datetime( [ "2021-03-01T00:00:00.000Z", "2020-12-01T00:00:00.000Z", "2020-07-01T00:00:00.000Z", "2019-04-01T00:00:00.000Z", "2017-01-01T00:00:00.000Z", "2016-01-01T00:00:00.000Z", None, ] ), 'date_firstDayOfWeek': to_datetime( [ "2021-03-28T00:00:00.000Z", "2020-12-13T00:00:00.000Z", "2020-07-26T00:00:00.000Z", "2019-04-07T00:00:00.000Z", "2017-01-01T00:00:00.000Z", "2015-12-27T00:00:00.000Z", None, ] ), 'date_firstDayOfQuarter': to_datetime( [ "2021-01-01T00:00:00.000Z", "2020-10-01T00:00:00.000Z", "2020-07-01T00:00:00.000Z", "2019-04-01T00:00:00.000Z", "2017-01-01T00:00:00.000Z", "2016-01-01T00:00:00.000Z", None, ] ), 'date_firstDayOfIsoWeek': to_datetime( [ "2021-03-29T00:00:00.000Z", "2020-12-07T00:00:00.000Z", "2020-07-27T00:00:00.000Z", "2019-04-08T00:00:00.000Z", "2017-01-02T00:00:00.000Z", "2015-12-28T00:00:00.000Z", None, ] ), 'date_previousDay': to_datetime( [ "2021-03-28T00:00:00.000Z", "2020-12-12T00:00:00.000Z", "2020-07-28T00:00:00.000Z", "2019-04-08T00:00:00.000Z", "2017-01-01T00:00:00.000Z", "2015-12-31T00:00:00.000Z", None, ] ), 'date_firstDayOfPreviousYear': to_datetime( [ "2020-01-01T00:00:00.000Z", "2019-01-01T00:00:00.000Z", "2019-01-01T00:00:00.000Z", "2018-01-01T00:00:00.000Z", "2016-01-01T00:00:00.000Z", "2015-01-01T00:00:00.000Z", None, ] ), 'date_firstDayOfPreviousMonth': to_datetime( [ "2021-02-01T00:00:00.000Z", "2020-11-01T00:00:00.000Z", "2020-06-01T00:00:00.000Z", "2019-03-01T00:00:00.000Z", "2016-12-01T00:00:00.000Z", "2015-12-01T00:00:00.000Z", None, ] ), 'date_firstDayOfPreviousWeek': to_datetime( [ "2021-03-21T00:00:00.000Z", "2020-12-06T00:00:00.000Z", "2020-07-19T00:00:00.000Z", "2019-03-31T00:00:00.000Z", "2016-12-25T00:00:00.000Z", "2015-12-20T00:00:00.000Z", None, ] ), 'date_firstDayOfPreviousQuarter': to_datetime( [ "2020-10-01T00:00:00.000Z", "2020-07-01T00:00:00.000Z", "2020-04-01T00:00:00.000Z", "2019-01-01T00:00:00.000Z", "2016-10-01T00:00:00.000Z", "2015-10-01T00:00:00.000Z", None, ] ), 'date_firstDayOfPreviousIsoWeek': to_datetime( [ "2021-03-22T00:00:00.000Z", "2020-11-30T00:00:00.000Z", "2020-07-20T00:00:00.000Z", "2019-04-01T00:00:00.000Z", "2016-12-26T00:00:00.000Z", "2015-12-21T00:00:00.000Z", None, ] ), 'date_previousYear': [2020, 2019, 2019, 2018, 2016, 2015, None], 'date_previousMonth': [2, 11, 6, 3, 12, 12, None], 'date_previousQuarter': [4, 3, 2, 1, 4, 4, None], 'date_previousWeek': [12, 49, 29, 13, 52, 51, None], 'date_previousIsoWeek': [12, 49, 30, 14, 52, 52, None], 'date_hour': [0, 0, 0, 1, 0, 0, None], 'date_minutes': [0, 0, 0, 2, 0, 0, None], 'date_seconds': [0, 0, 0, 3, 0, 0, None], 'date_milliseconds': [0, 0, 0, 4, 0, 0, None], } ) assert_dataframes_equals(df_result, expected_result) # Ensure there are no unsigned int types in result: assert UInt32Dtype() not in list(df_result.dtypes)
tm.assert_series_equal(result, expected) def test_astype_to_larger_numpy(): a = pd.array([1, 2], dtype="Int32") result = a.astype("int64") expected = np.array([1, 2], dtype="int64") tm.assert_numpy_array_equal(result, expected) a = pd.array([1, 2], dtype="UInt32") result = a.astype("uint64") expected = np.array([1, 2], dtype="uint64") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"]) def test_astype_specific_casting(dtype): s = pd.Series([1, 2, 3], dtype="Int64") result = s.astype(dtype) expected = pd.Series([1, 2, 3], dtype=dtype) tm.assert_series_equal(result, expected) s = pd.Series([1, 2, 3, None], dtype="Int64") result = s.astype(dtype) expected = pd.Series([1, 2, 3, None], dtype=dtype) tm.assert_series_equal(result, expected) def test_astype_dt64(): # GH#32435 arr = pd.array([1, 2, 3, pd.NA]) * 10 ** 9
class TestCasting: @pytest.mark.parametrize("dropna", [True, False]) def test_construct_index(self, all_data, dropna): # ensure that we do not coerce to Float64Index, rather # keep as Index all_data = all_data[:10] if dropna: other = np.array(all_data[~all_data.isna()]) else: other = all_data result = pd.Index(integer_array(other, dtype=all_data.dtype)) expected = pd.Index(other, dtype=object) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("dropna", [True, False]) def test_astype_index(self, all_data, dropna): # as an int/uint index to Index all_data = all_data[:10] if dropna: other = all_data[~all_data.isna()] else: other = all_data dtype = all_data.dtype idx = pd.Index(np.array(other)) assert isinstance(idx, ABCIndexClass) result = idx.astype(dtype) expected = idx.astype(object).astype(dtype) tm.assert_index_equal(result, expected) def test_astype(self, all_data): all_data = all_data[:10] ints = all_data[~all_data.isna()] mixed = all_data dtype = Int8Dtype() # coerce to same type - ints s = pd.Series(ints) result = s.astype(all_data.dtype) expected = pd.Series(ints) tm.assert_series_equal(result, expected) # coerce to same other - ints s = pd.Series(ints) result = s.astype(dtype) expected = pd.Series(ints, dtype=dtype) tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - ints s = pd.Series(ints) result = s.astype(all_data.dtype.numpy_dtype) expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype)) tm.assert_series_equal(result, expected) # coerce to same type - mixed s = pd.Series(mixed) result = s.astype(all_data.dtype) expected = pd.Series(mixed) tm.assert_series_equal(result, expected) # coerce to same other - mixed s = pd.Series(mixed) result = s.astype(dtype) expected = pd.Series(mixed, dtype=dtype) tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - mixed s = pd.Series(mixed) with pytest.raises(ValueError): s.astype(all_data.dtype.numpy_dtype) # coerce to object s = pd.Series(mixed) result = s.astype("object") expected = pd.Series(np.asarray(mixed)) tm.assert_series_equal(result, expected) def test_astype_to_larger_numpy(self): a = pd.array([1, 2], dtype="Int32") result = a.astype("int64") expected = np.array([1, 2], dtype="int64") tm.assert_numpy_array_equal(result, expected) a = pd.array([1, 2], dtype="UInt32") result = a.astype("uint64") expected = np.array([1, 2], dtype="uint64") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"]) def test_astype_specific_casting(self, dtype): s = pd.Series([1, 2, 3], dtype="Int64") result = s.astype(dtype) expected = pd.Series([1, 2, 3], dtype=dtype) tm.assert_series_equal(result, expected) s = pd.Series([1, 2, 3, None], dtype="Int64") result = s.astype(dtype) expected = pd.Series([1, 2, 3, None], dtype=dtype) tm.assert_series_equal(result, expected) def test_construct_cast_invalid(self, dtype): msg = "cannot safely" arr = [1.2, 2.3, 3.7] with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) arr = [1.2, 2.3, 3.7, np.nan] with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) def test_coerce_to_ndarray_float_NA_rasies(self): a = pd.array([0, 1, 2], dtype="Int64") with pytest.raises(TypeError, match="NAType"): a._coerce_to_ndarray(dtype="float", na_value=pd.NA)