Exemple #1
0
def test_to_integer_array_dtype_keyword(constructor):
    result = constructor([1, 2], dtype="Int8")
    assert result.dtype == Int8Dtype()

    # if values has dtype -> override it
    result = constructor(np.array([1, 2], dtype="int8"), dtype="Int32")
    assert result.dtype == Int32Dtype()
Exemple #2
0
def test_to_integer_array_dtype_keyword():
    result = integer_array([1, 2], dtype='int8')
    assert result.dtype == Int8Dtype()

    # if values has dtype -> override it
    result = integer_array(np.array([1, 2], dtype='int8'), dtype='int32')
    assert result.dtype == Int32Dtype()
Exemple #3
0
def test_to_integer_array_inferred_dtype():
    # if values has dtype -> respect it
    result = integer_array(np.array([1, 2], dtype='int8'))
    assert result.dtype == Int8Dtype()
    result = integer_array(np.array([1, 2], dtype='int32'))
    assert result.dtype == Int32Dtype()

    # if values have no dtype -> always int64
    result = integer_array([1, 2])
    assert result.dtype == Int64Dtype()
Exemple #4
0
def test_to_integer_array_inferred_dtype(constructor):
    # if values has dtype -> respect it
    result = constructor(np.array([1, 2], dtype="int8"))
    assert result.dtype == Int8Dtype()
    result = constructor(np.array([1, 2], dtype="int32"))
    assert result.dtype == Int32Dtype()

    # if values have no dtype -> always int64
    result = constructor([1, 2])
    assert result.dtype == Int64Dtype()
Exemple #5
0
    def test_astype(self, all_data):
        all_data = all_data[:10]

        ints = all_data[~all_data.isna()]
        mixed = all_data
        dtype = Int8Dtype()

        # coerce to same type - ints
        s = pd.Series(ints)
        result = s.astype(all_data.dtype)
        expected = pd.Series(ints)
        tm.assert_series_equal(result, expected)

        # coerce to same other - ints
        s = pd.Series(ints)
        result = s.astype(dtype)
        expected = pd.Series(ints, dtype=dtype)
        tm.assert_series_equal(result, expected)

        # coerce to same numpy_dtype - ints
        s = pd.Series(ints)
        result = s.astype(all_data.dtype.numpy_dtype)
        expected = pd.Series(ints._data.astype(
            all_data.dtype.numpy_dtype))
        tm.assert_series_equal(result, expected)

        # coerce to same type - mixed
        s = pd.Series(mixed)
        result = s.astype(all_data.dtype)
        expected = pd.Series(mixed)
        tm.assert_series_equal(result, expected)

        # coerce to same other - mixed
        s = pd.Series(mixed)
        result = s.astype(dtype)
        expected = pd.Series(mixed, dtype=dtype)
        tm.assert_series_equal(result, expected)

        # coerce to same numpy_dtype - mixed
        s = pd.Series(mixed)
        with pytest.raises(ValueError):
            s.astype(all_data.dtype.numpy_dtype)

        # coerce to object
        s = pd.Series(mixed)
        result = s.astype('object')
        expected = pd.Series(np.asarray(mixed))
        tm.assert_series_equal(result, expected)
Exemple #6
0
def test_astype(all_data):
    all_data = all_data[:10]

    ints = all_data[~all_data.isna()]
    mixed = all_data
    dtype = Int8Dtype()

    # coerce to same type - ints
    s = pd.Series(ints)
    result = s.astype(all_data.dtype)
    expected = pd.Series(ints)
    tm.assert_series_equal(result, expected)

    # coerce to same other - ints
    s = pd.Series(ints)
    result = s.astype(dtype)
    expected = pd.Series(ints, dtype=dtype)
    tm.assert_series_equal(result, expected)

    # coerce to same numpy_dtype - ints
    s = pd.Series(ints)
    result = s.astype(all_data.dtype.numpy_dtype)
    expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype))
    tm.assert_series_equal(result, expected)

    # coerce to same type - mixed
    s = pd.Series(mixed)
    result = s.astype(all_data.dtype)
    expected = pd.Series(mixed)
    tm.assert_series_equal(result, expected)

    # coerce to same other - mixed
    s = pd.Series(mixed)
    result = s.astype(dtype)
    expected = pd.Series(mixed, dtype=dtype)
    tm.assert_series_equal(result, expected)

    # coerce to same numpy_dtype - mixed
    s = pd.Series(mixed)
    msg = r"cannot convert to .*-dtype NumPy array with missing values.*"
    with pytest.raises(ValueError, match=msg):
        s.astype(all_data.dtype.numpy_dtype)

    # coerce to object
    s = pd.Series(mixed)
    result = s.astype("object")
    expected = pd.Series(np.asarray(mixed))
    tm.assert_series_equal(result, expected)
Exemple #7
0
class TestCasting(object):
    pass

    @pytest.mark.parametrize('dropna', [True, False])
    def test_construct_index(self, all_data, dropna):
        # ensure that we do not coerce to Float64Index, rather
        # keep as Index

        all_data = all_data[:10]
        if dropna:
            other = np.array(all_data[~all_data.isna()])
        else:
            other = all_data

        result = pd.Index(integer_array(other, dtype=all_data.dtype))
        expected = pd.Index(other, dtype=object)

        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('dropna', [True, False])
    def test_astype_index(self, all_data, dropna):
        # as an int/uint index to Index

        all_data = all_data[:10]
        if dropna:
            other = all_data[~all_data.isna()]
        else:
            other = all_data

        dtype = all_data.dtype
        idx = pd.Index(np.array(other))
        assert isinstance(idx, ABCIndexClass)

        result = idx.astype(dtype)
        expected = idx.astype(object).astype(dtype)
        tm.assert_index_equal(result, expected)

    def test_astype(self, all_data):
        all_data = all_data[:10]

        ints = all_data[~all_data.isna()]
        mixed = all_data
        dtype = Int8Dtype()

        # coerce to same type - ints
        s = pd.Series(ints)
        result = s.astype(all_data.dtype)
        expected = pd.Series(ints)
        tm.assert_series_equal(result, expected)

        # coerce to same other - ints
        s = pd.Series(ints)
        result = s.astype(dtype)
        expected = pd.Series(ints, dtype=dtype)
        tm.assert_series_equal(result, expected)

        # coerce to same numpy_dtype - ints
        s = pd.Series(ints)
        result = s.astype(all_data.dtype.numpy_dtype)
        expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype))
        tm.assert_series_equal(result, expected)

        # coerce to same type - mixed
        s = pd.Series(mixed)
        result = s.astype(all_data.dtype)
        expected = pd.Series(mixed)
        tm.assert_series_equal(result, expected)

        # coerce to same other - mixed
        s = pd.Series(mixed)
        result = s.astype(dtype)
        expected = pd.Series(mixed, dtype=dtype)
        tm.assert_series_equal(result, expected)

        # coerce to same numpy_dtype - mixed
        s = pd.Series(mixed)
        with pytest.raises(ValueError):
            s.astype(all_data.dtype.numpy_dtype)

        # coerce to object
        s = pd.Series(mixed)
        result = s.astype('object')
        expected = pd.Series(np.asarray(mixed))
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        'dtype',
        [Int8Dtype(), 'Int8', UInt32Dtype(), 'UInt32'])
    def test_astype_specific_casting(self, dtype):
        s = pd.Series([1, 2, 3], dtype='Int64')
        result = s.astype(dtype)
        expected = pd.Series([1, 2, 3], dtype=dtype)
        tm.assert_series_equal(result, expected)

        s = pd.Series([1, 2, 3, None], dtype='Int64')
        result = s.astype(dtype)
        expected = pd.Series([1, 2, 3, None], dtype=dtype)
        tm.assert_series_equal(result, expected)

    def test_construct_cast_invalid(self, dtype):

        msg = "cannot safely"
        arr = [1.2, 2.3, 3.7]
        with pytest.raises(TypeError, match=msg):
            integer_array(arr, dtype=dtype)

        with pytest.raises(TypeError, match=msg):
            pd.Series(arr).astype(dtype)

        arr = [1.2, 2.3, 3.7, np.nan]
        with pytest.raises(TypeError, match=msg):
            integer_array(arr, dtype=dtype)

        with pytest.raises(TypeError, match=msg):
            pd.Series(arr).astype(dtype)
Exemple #8
0

def test_dtypes(dtype):
    # smoke tests on auto dtype construction

    if dtype.is_signed_integer:
        assert np.dtype(dtype.type).kind == "i"
    else:
        assert np.dtype(dtype.type).kind == "u"
    assert dtype.name is not None


@pytest.mark.parametrize(
    "dtype, expected",
    [
        (Int8Dtype(), "Int8Dtype()"),
        (Int16Dtype(), "Int16Dtype()"),
        (Int32Dtype(), "Int32Dtype()"),
        (Int64Dtype(), "Int64Dtype()"),
        (UInt8Dtype(), "UInt8Dtype()"),
        (UInt16Dtype(), "UInt16Dtype()"),
        (UInt32Dtype(), "UInt32Dtype()"),
        (UInt64Dtype(), "UInt64Dtype()"),
    ],
)
def test_repr_dtype(dtype, expected):
    assert repr(dtype) == expected


def test_repr_array():
    result = repr(integer_array([1, None, 3]))
Exemple #9
0
def test_astype_to_larger_numpy():
    a = pd.array([1, 2], dtype="Int32")
    result = a.astype("int64")
    expected = np.array([1, 2], dtype="int64")
    tm.assert_numpy_array_equal(result, expected)

    a = pd.array([1, 2], dtype="UInt32")
    result = a.astype("uint64")
    expected = np.array([1, 2], dtype="uint64")
    tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize(
    "dtype",
    [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"])
def test_astype_specific_casting(dtype):
    s = pd.Series([1, 2, 3], dtype="Int64")
    result = s.astype(dtype)
    expected = pd.Series([1, 2, 3], dtype=dtype)
    tm.assert_series_equal(result, expected)

    s = pd.Series([1, 2, 3, None], dtype="Int64")
    result = s.astype(dtype)
    expected = pd.Series([1, 2, 3, None], dtype=dtype)
    tm.assert_series_equal(result, expected)


def test_astype_floating():
    arr = pd.array([1, 2, None], dtype="Int64")
    result = arr.astype("Float64")
Exemple #10
0
    elif request.param == 'data_missing':
        return data_missing


def test_dtypes(dtype):
    # smoke tests on auto dtype construction

    if dtype.is_signed_integer:
        assert np.dtype(dtype.type).kind == 'i'
    else:
        assert np.dtype(dtype.type).kind == 'u'
    assert dtype.name is not None


@pytest.mark.parametrize('dtype, expected', [
    (Int8Dtype(), 'Int8Dtype()'),
    (Int16Dtype(), 'Int16Dtype()'),
    (Int32Dtype(), 'Int32Dtype()'),
    (Int64Dtype(), 'Int64Dtype()'),
    (UInt8Dtype(), 'UInt8Dtype()'),
    (UInt16Dtype(), 'UInt16Dtype()'),
    (UInt32Dtype(), 'UInt32Dtype()'),
    (UInt64Dtype(), 'UInt64Dtype()'),
])
def test_repr_dtype(dtype, expected):
    assert repr(dtype) == expected


def test_repr_array():
    result = repr(integer_array([1, None, 3]))
    expected = (
Exemple #11
0
class TestCasting:
    @pytest.mark.parametrize("dropna", [True, False])
    def test_construct_index(self, all_data, dropna):
        # ensure that we do not coerce to Float64Index, rather
        # keep as Index

        all_data = all_data[:10]
        if dropna:
            other = np.array(all_data[~all_data.isna()])
        else:
            other = all_data

        result = pd.Index(integer_array(other, dtype=all_data.dtype))
        expected = pd.Index(other, dtype=object)

        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("dropna", [True, False])
    def test_astype_index(self, all_data, dropna):
        # as an int/uint index to Index

        all_data = all_data[:10]
        if dropna:
            other = all_data[~all_data.isna()]
        else:
            other = all_data

        dtype = all_data.dtype
        idx = pd.Index(np.array(other))
        assert isinstance(idx, ABCIndexClass)

        result = idx.astype(dtype)
        expected = idx.astype(object).astype(dtype)
        tm.assert_index_equal(result, expected)

    def test_astype(self, all_data):
        all_data = all_data[:10]

        ints = all_data[~all_data.isna()]
        mixed = all_data
        dtype = Int8Dtype()

        # coerce to same type - ints
        s = pd.Series(ints)
        result = s.astype(all_data.dtype)
        expected = pd.Series(ints)
        tm.assert_series_equal(result, expected)

        # coerce to same other - ints
        s = pd.Series(ints)
        result = s.astype(dtype)
        expected = pd.Series(ints, dtype=dtype)
        tm.assert_series_equal(result, expected)

        # coerce to same numpy_dtype - ints
        s = pd.Series(ints)
        result = s.astype(all_data.dtype.numpy_dtype)
        expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype))
        tm.assert_series_equal(result, expected)

        # coerce to same type - mixed
        s = pd.Series(mixed)
        result = s.astype(all_data.dtype)
        expected = pd.Series(mixed)
        tm.assert_series_equal(result, expected)

        # coerce to same other - mixed
        s = pd.Series(mixed)
        result = s.astype(dtype)
        expected = pd.Series(mixed, dtype=dtype)
        tm.assert_series_equal(result, expected)

        # coerce to same numpy_dtype - mixed
        s = pd.Series(mixed)
        msg = r"cannot convert to .*-dtype NumPy array with missing values.*"
        with pytest.raises(ValueError, match=msg):
            s.astype(all_data.dtype.numpy_dtype)

        # coerce to object
        s = pd.Series(mixed)
        result = s.astype("object")
        expected = pd.Series(np.asarray(mixed))
        tm.assert_series_equal(result, expected)

    def test_astype_to_larger_numpy(self):
        a = pd.array([1, 2], dtype="Int32")
        result = a.astype("int64")
        expected = np.array([1, 2], dtype="int64")
        tm.assert_numpy_array_equal(result, expected)

        a = pd.array([1, 2], dtype="UInt32")
        result = a.astype("uint64")
        expected = np.array([1, 2], dtype="uint64")
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        "dtype",
        [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"])
    def test_astype_specific_casting(self, dtype):
        s = pd.Series([1, 2, 3], dtype="Int64")
        result = s.astype(dtype)
        expected = pd.Series([1, 2, 3], dtype=dtype)
        tm.assert_series_equal(result, expected)

        s = pd.Series([1, 2, 3, None], dtype="Int64")
        result = s.astype(dtype)
        expected = pd.Series([1, 2, 3, None], dtype=dtype)
        tm.assert_series_equal(result, expected)

    def test_construct_cast_invalid(self, dtype):

        msg = "cannot safely"
        arr = [1.2, 2.3, 3.7]
        with pytest.raises(TypeError, match=msg):
            integer_array(arr, dtype=dtype)

        with pytest.raises(TypeError, match=msg):
            pd.Series(arr).astype(dtype)

        arr = [1.2, 2.3, 3.7, np.nan]
        with pytest.raises(TypeError, match=msg):
            integer_array(arr, dtype=dtype)

        with pytest.raises(TypeError, match=msg):
            pd.Series(arr).astype(dtype)

    @pytest.mark.parametrize("in_series", [True, False])
    def test_to_numpy_na_nan(self, in_series):
        a = pd.array([0, 1, None], dtype="Int64")
        if in_series:
            a = pd.Series(a)

        result = a.to_numpy(dtype="float64", na_value=np.nan)
        expected = np.array([0.0, 1.0, np.nan], dtype="float64")
        tm.assert_numpy_array_equal(result, expected)

        result = a.to_numpy(dtype="int64", na_value=-1)
        expected = np.array([0, 1, -1], dtype="int64")
        tm.assert_numpy_array_equal(result, expected)

        result = a.to_numpy(dtype="bool", na_value=False)
        expected = np.array([False, True, False], dtype="bool")
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize("in_series", [True, False])
    @pytest.mark.parametrize("dtype", ["int32", "int64", "bool"])
    def test_to_numpy_dtype(self, dtype, in_series):
        a = pd.array([0, 1], dtype="Int64")
        if in_series:
            a = pd.Series(a)

        result = a.to_numpy(dtype=dtype)
        expected = np.array([0, 1], dtype=dtype)
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize("dtype", ["float64", "int64", "bool"])
    def test_to_numpy_na_raises(self, dtype):
        a = pd.array([0, 1, None], dtype="Int64")
        with pytest.raises(ValueError, match=dtype):
            a.to_numpy(dtype=dtype)

    def test_astype_str(self):
        a = pd.array([1, 2, None], dtype="Int64")
        expected = np.array(["1", "2", "<NA>"], dtype=object)

        tm.assert_numpy_array_equal(a.astype(str), expected)
        tm.assert_numpy_array_equal(a.astype("str"), expected)

    def test_astype_boolean(self):
        # https://github.com/pandas-dev/pandas/issues/31102
        a = pd.array([1, 0, -1, 2, None], dtype="Int64")
        result = a.astype("boolean")
        expected = pd.array([True, False, True, True, None], dtype="boolean")
        tm.assert_extension_array_equal(result, expected)
Exemple #12
0
    tm.assert_series_equal(result, expected)


def test_astype_to_larger_numpy():
    a = pd.array([1, 2], dtype="Int32")
    result = a.astype("int64")
    expected = np.array([1, 2], dtype="int64")
    tm.assert_numpy_array_equal(result, expected)

    a = pd.array([1, 2], dtype="UInt32")
    result = a.astype("uint64")
    expected = np.array([1, 2], dtype="uint64")
    tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"])
def test_astype_specific_casting(dtype):
    s = pd.Series([1, 2, 3], dtype="Int64")
    result = s.astype(dtype)
    expected = pd.Series([1, 2, 3], dtype=dtype)
    tm.assert_series_equal(result, expected)

    s = pd.Series([1, 2, 3, None], dtype="Int64")
    result = s.astype(dtype)
    expected = pd.Series([1, 2, 3, None], dtype=dtype)
    tm.assert_series_equal(result, expected)


def test_astype_dt64():
    # GH#32435
    arr = pd.array([1, 2, 3, pd.NA]) * 10 ** 9
Exemple #13
0
class TestCasting:
    @pytest.mark.parametrize("dropna", [True, False])
    def test_construct_index(self, all_data, dropna):
        # ensure that we do not coerce to Float64Index, rather
        # keep as Index

        all_data = all_data[:10]
        if dropna:
            other = np.array(all_data[~all_data.isna()])
        else:
            other = all_data

        result = pd.Index(integer_array(other, dtype=all_data.dtype))
        expected = pd.Index(other, dtype=object)

        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("dropna", [True, False])
    def test_astype_index(self, all_data, dropna):
        # as an int/uint index to Index

        all_data = all_data[:10]
        if dropna:
            other = all_data[~all_data.isna()]
        else:
            other = all_data

        dtype = all_data.dtype
        idx = pd.Index(np.array(other))
        assert isinstance(idx, ABCIndexClass)

        result = idx.astype(dtype)
        expected = idx.astype(object).astype(dtype)
        tm.assert_index_equal(result, expected)

    def test_astype(self, all_data):
        all_data = all_data[:10]

        ints = all_data[~all_data.isna()]
        mixed = all_data
        dtype = Int8Dtype()

        # coerce to same type - ints
        s = pd.Series(ints)
        result = s.astype(all_data.dtype)
        expected = pd.Series(ints)
        tm.assert_series_equal(result, expected)

        # coerce to same other - ints
        s = pd.Series(ints)
        result = s.astype(dtype)
        expected = pd.Series(ints, dtype=dtype)
        tm.assert_series_equal(result, expected)

        # coerce to same numpy_dtype - ints
        s = pd.Series(ints)
        result = s.astype(all_data.dtype.numpy_dtype)
        expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype))
        tm.assert_series_equal(result, expected)

        # coerce to same type - mixed
        s = pd.Series(mixed)
        result = s.astype(all_data.dtype)
        expected = pd.Series(mixed)
        tm.assert_series_equal(result, expected)

        # coerce to same other - mixed
        s = pd.Series(mixed)
        result = s.astype(dtype)
        expected = pd.Series(mixed, dtype=dtype)
        tm.assert_series_equal(result, expected)

        # coerce to same numpy_dtype - mixed
        s = pd.Series(mixed)
        with pytest.raises(ValueError):
            s.astype(all_data.dtype.numpy_dtype)

        # coerce to object
        s = pd.Series(mixed)
        result = s.astype("object")
        expected = pd.Series(np.asarray(mixed))
        tm.assert_series_equal(result, expected)

    def test_astype_to_larger_numpy(self):
        a = pd.array([1, 2], dtype="Int32")
        result = a.astype("int64")
        expected = np.array([1, 2], dtype="int64")
        tm.assert_numpy_array_equal(result, expected)

        a = pd.array([1, 2], dtype="UInt32")
        result = a.astype("uint64")
        expected = np.array([1, 2], dtype="uint64")
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        "dtype",
        [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"])
    def test_astype_specific_casting(self, dtype):
        s = pd.Series([1, 2, 3], dtype="Int64")
        result = s.astype(dtype)
        expected = pd.Series([1, 2, 3], dtype=dtype)
        tm.assert_series_equal(result, expected)

        s = pd.Series([1, 2, 3, None], dtype="Int64")
        result = s.astype(dtype)
        expected = pd.Series([1, 2, 3, None], dtype=dtype)
        tm.assert_series_equal(result, expected)

    def test_construct_cast_invalid(self, dtype):

        msg = "cannot safely"
        arr = [1.2, 2.3, 3.7]
        with pytest.raises(TypeError, match=msg):
            integer_array(arr, dtype=dtype)

        with pytest.raises(TypeError, match=msg):
            pd.Series(arr).astype(dtype)

        arr = [1.2, 2.3, 3.7, np.nan]
        with pytest.raises(TypeError, match=msg):
            integer_array(arr, dtype=dtype)

        with pytest.raises(TypeError, match=msg):
            pd.Series(arr).astype(dtype)

    def test_coerce_to_ndarray_float_NA_rasies(self):
        a = pd.array([0, 1, 2], dtype="Int64")
        with pytest.raises(TypeError, match="NAType"):
            a._coerce_to_ndarray(dtype="float", na_value=pd.NA)