Python Series.astype Examples, pandas.Series.astype Python Examples

Example #1

0

Show file

File: test_dtypes.py Project: ChunHungLiu/pandas

    def test_astype_str(self):
        # GH4405
        digits = string.digits
        s1 = Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)])
        s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0])
        types = (compat.text_type, np.str_)
        for typ in types:
            for s in (s1, s2):
                res = s.astype(typ)
                expec = s.map(compat.text_type)
                assert_series_equal(res, expec)

        # GH9757
        # Test str and unicode on python 2.x and just str on python 3.x
        for tt in set([str, compat.text_type]):
            ts = Series([Timestamp('2010-01-04 00:00:00')])
            s = ts.astype(tt)
            expected = Series([tt('2010-01-04')])
            assert_series_equal(s, expected)

            ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')])
            s = ts.astype(tt)
            expected = Series([tt('2010-01-04 00:00:00-05:00')])
            assert_series_equal(s, expected)

            td = Series([Timedelta(1, unit='d')])
            s = td.astype(tt)
            expected = Series([tt('1 days 00:00:00.000000000')])
            assert_series_equal(s, expected)

Example #2

0

Show file

File: test_dtypes.py Project: MasonGallo/pandas

    def test_astype_categorical_to_categorical(self, name, dtype_ordered,
                                               series_ordered):
        # GH 10696/18593
        s_data = list('abcaacbab')
        s_dtype = CategoricalDtype(list('bac'), ordered=series_ordered)
        s = Series(s_data, dtype=s_dtype, name=name)

        # unspecified categories
        dtype = CategoricalDtype(ordered=dtype_ordered)
        result = s.astype(dtype)
        exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
        expected = Series(s_data, name=name, dtype=exp_dtype)
        tm.assert_series_equal(result, expected)

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = s.astype('category', ordered=dtype_ordered)
        tm.assert_series_equal(result, expected)

        # different categories
        dtype = CategoricalDtype(list('adc'), dtype_ordered)
        result = s.astype(dtype)
        expected = Series(s_data, name=name, dtype=dtype)
        tm.assert_series_equal(result, expected)

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = s.astype(
                'category', categories=list('adc'), ordered=dtype_ordered)
        tm.assert_series_equal(result, expected)

        if dtype_ordered is False:
            # not specifying ordered, so only test once
            expected = s
            result = s.astype('category')
            tm.assert_series_equal(result, expected)

Example #3

0

Show file

File: test_dtypes.py Project: forking-repos/pandas

    def test_astype_categorical_to_other(self):

        value = np.random.RandomState(0).randint(0, 10000, 100)
        df = DataFrame({'value': value})
        labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
        cat_labels = Categorical(labels, labels)

        df = df.sort_values(by=['value'], ascending=True)
        df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
                                   right=False, labels=cat_labels)

        s = df['value_group']
        expected = s
        tm.assert_series_equal(s.astype('category'), expected)
        tm.assert_series_equal(s.astype(CategoricalDtype()), expected)
        msg = (r"could not convert string to float|"
               r"invalid literal for float\(\)")
        with pytest.raises(ValueError, match=msg):
            s.astype('float64')

        cat = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']))
        exp = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
        tm.assert_series_equal(cat.astype('str'), exp)
        s2 = Series(Categorical(['1', '2', '3', '4']))
        exp2 = Series([1, 2, 3, 4]).astype(int)
        tm.assert_series_equal(s2.astype('int'), exp2)

        # object don't sort correctly, so just compare that we have the same
        # values
        def cmp(a, b):
            tm.assert_almost_equal(
                np.sort(np.unique(a)), np.sort(np.unique(b)))

        expected = Series(np.array(s.values), name='value_group')
        cmp(s.astype('object'), expected)
        cmp(s.astype(np.object_), expected)

        # array conversion
        tm.assert_almost_equal(np.array(s), np.array(s.values))

        # valid conversion
        for valid in [lambda x: x.astype('category'),
                      lambda x: x.astype(CategoricalDtype()),
                      lambda x: x.astype('object').astype('category'),
                      lambda x: x.astype('object').astype(
                          CategoricalDtype())
                      ]:

            result = valid(s)
            # compare series values
            # internal .categories can't be compared because it is sorted
            tm.assert_series_equal(result, s, check_categorical=False)

        # invalid conversion (these are NOT a dtype)
        msg = (r"invalid type <class 'pandas\.core\.arrays\.categorical\."
               "Categorical'> for astype")
        for invalid in [lambda x: x.astype(Categorical),
                        lambda x: x.astype('object').astype(Categorical)]:
            with pytest.raises(TypeError, match=msg):
                invalid(s)

Example #4

0

Show file

File: test_dtypes.py Project: ankostis/pandas

    def test_astype_cast_nan_inf_int(self, dtype, value):
        # gh-14265: check NaN and inf raise error when converting to int
        msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer'
        s = Series([value])

        with tm.assert_raises_regex(ValueError, msg):
            s.astype(dtype)

Example #5

0

Show file

File: test_dtypes.py Project: MasonGallo/pandas

    def test_astype_datetime64tz(self):
        s = Series(date_range('20130101', periods=3, tz='US/Eastern'))

        # astype
        result = s.astype(object)
        expected = Series(s.astype(object), dtype=object)
        tm.assert_series_equal(result, expected)

        result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz)
        tm.assert_series_equal(result, s)

        # astype - object, preserves on construction
        result = Series(s.astype(object))
        expected = s.astype(object)
        tm.assert_series_equal(result, expected)

        # astype - datetime64[ns, tz]
        result = Series(s.values).astype('datetime64[ns, US/Eastern]')
        tm.assert_series_equal(result, s)

        result = Series(s.values).astype(s.dtype)
        tm.assert_series_equal(result, s)

        result = s.astype('datetime64[ns, CET]')
        expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET'))
        tm.assert_series_equal(result, expected)

Example #6

0

Show file

File: test_generic.py Project: jdreaver/pandas

    def test_interp_scipy_basic(self):
        tm._skip_if_no_scipy()
        s = Series([1, 3, np.nan, 12, np.nan, 25])
        # slinear
        expected = Series([1., 3., 7.5, 12., 18.5, 25.])
        result = s.interpolate(method='slinear')
        assert_series_equal(result, expected)

        result = s.interpolate(method='slinear', donwcast='infer')
        assert_series_equal(result, expected)
        # nearest
        expected = Series([1, 3, 3, 12, 12, 25])
        result = s.interpolate(method='nearest')
        assert_series_equal(result, expected.astype('float'))

        result = s.interpolate(method='nearest', downcast='infer')
        assert_series_equal(result, expected)
        # zero
        expected = Series([1, 3, 3, 12, 12, 25])
        result = s.interpolate(method='zero')
        assert_series_equal(result, expected.astype('float'))

        result = s.interpolate(method='zero', downcast='infer')
        assert_series_equal(result, expected)
        # quadratic
        expected = Series([1, 3., 6.769231, 12., 18.230769, 25.])
        result = s.interpolate(method='quadratic')
        assert_series_equal(result, expected)

        result = s.interpolate(method='quadratic', downcast='infer')
        assert_series_equal(result, expected)
        # cubic
        expected = Series([1., 3., 6.8, 12., 18.2, 25.])
        result = s.interpolate(method='cubic')
        assert_series_equal(result, expected)

Example #7

0

Show file

File: test_dtypes.py Project: johnnychiuchiu/pandas

    def test_astype_generic_timestamp_no_frequency(self, dtype):
        # see gh-15524, gh-15987
        data = [1]
        s = Series(data)

        msg = "dtype has no unit. Please pass in"
        with pytest.raises(ValueError, match=msg):
            s.astype(dtype)

Example #8

0

Show file

File: test_dtypes.py Project: DusanMilunovic/pandas

    def test_arg_for_errors_in_astype(self):
        # see gh-14878
        s = Series([1, 2, 3])

        with pytest.raises(ValueError):
            s.astype(np.float64, errors=False)

        s.astype(np.int8, errors='raise')

Example #9

0

Show file

File: test_dtypes.py Project: chrish42/pandas

    def test_astype_extension_dtypes_duplicate_col(self, dtype):
        # GH 24704
        a1 = Series([0, np.nan, 4], name='a')
        a2 = Series([np.nan, 3, 5], name='a')
        df = concat([a1, a2], axis=1)

        result = df.astype(dtype)
        expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1)
        assert_frame_equal(result, expected)

Example #10

0

Show file

File: test_dtypes.py Project: MasonGallo/pandas

    def test_astype_categories_deprecation(self):

        # deprecated 17636
        s = Series(['a', 'b', 'a'])
        expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            result = s.astype('category', categories=['a', 'b'], ordered=True)
        tm.assert_series_equal(result, expected)

Example #11

0

Show file

File: test_dtypes.py Project: bashtage/pandas

    def test_arg_for_errors_in_astype(self):
        # see gh-14878
        s = Series([1, 2, 3])

        msg = (r"Expected value of kwarg 'errors' to be one of \['raise',"
               r" 'ignore'\]\. Supplied value is 'False'")
        with pytest.raises(ValueError, match=msg):
            s.astype(np.float64, errors=False)

        s.astype(np.int8, errors='raise')

Example #12

0

Show file

File: test_dtypes.py Project: bashtage/pandas

    def test_astype_generic_timestamp_no_frequency(self, dtype):
        # see gh-15524, gh-15987
        data = [1]
        s = Series(data)

        msg = ((r"The '{dtype}' dtype has no unit\. "
                r"Please pass in '{dtype}\[ns\]' instead.")
               .format(dtype=dtype.__name__))
        with pytest.raises(ValueError, match=msg):
            s.astype(dtype)

Example #13

0

Show file

File: test_dtypes.py Project: Winterflower/pandas

    def test_astype_cast_nan_inf_int(self):
        # GH14265, check nan and inf raise error when converting to int
        types = [np.int32, np.int64]
        values = [np.nan, np.inf]
        msg = 'Cannot convert non-finite values \(NA or inf\) to integer'

        for this_type in types:
            for this_val in values:
                s = Series([this_val])
                with self.assertRaisesRegexp(ValueError, msg):
                    s.astype(this_type)

Example #14

0

Show file

File: test_dtypes.py Project: MasonGallo/pandas

    def test_astype_dict_like(self, dtype_class):
        # see gh-7271
        s = Series(range(0, 10, 2), name='abc')

        dt1 = dtype_class({'abc': str})
        result = s.astype(dt1)
        expected = Series(['0', '2', '4', '6', '8'], name='abc')
        tm.assert_series_equal(result, expected)

        dt2 = dtype_class({'abc': 'float64'})
        result = s.astype(dt2)
        expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64',
                          name='abc')
        tm.assert_series_equal(result, expected)

        dt3 = dtype_class({'abc': str, 'def': str})
        with pytest.raises(KeyError):
            s.astype(dt3)

        dt4 = dtype_class({0: str})
        with pytest.raises(KeyError):
            s.astype(dt4)

        # GH16717
        # if dtypes provided is empty, it should error
        dt5 = dtype_class({})
        with pytest.raises(KeyError):
            s.astype(dt5)

Example #15

0

Show file

File: test_dtypes.py Project: Winterflower/pandas

    def test_astype_dict(self):
        # GH7271
        s = Series(range(0, 10, 2), name='abc')

        result = s.astype({'abc': str})
        expected = Series(['0', '2', '4', '6', '8'], name='abc')
        assert_series_equal(result, expected)

        result = s.astype({'abc': 'float64'})
        expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64',
                          name='abc')
        assert_series_equal(result, expected)

        self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str})
        self.assertRaises(KeyError, s.astype, {0: str})

Example #16

0

Show file

File: test_dtypes.py Project: ChunHungLiu/pandas

    def test_astype_datetimes(self):
        import pandas.tslib as tslib

        s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5))
        s = s.astype('O')
        self.assertEqual(s.dtype, np.object_)

        s = Series([datetime(2001, 1, 2, 0, 0)])
        s = s.astype('O')
        self.assertEqual(s.dtype, np.object_)

        s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
        s[1] = np.nan
        self.assertEqual(s.dtype, 'M8[ns]')
        s = s.astype('O')
        self.assertEqual(s.dtype, np.object_)

Example #17

0

Show file

File: test_dtypes.py Project: aterrel/pandas

    def test_is_equal_dtype(self):

        # test dtype comparisons between cats

        c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False)
        c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False)
        c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True)
        assert c1.is_dtype_equal(c1)
        assert c2.is_dtype_equal(c2)
        assert c3.is_dtype_equal(c3)
        assert c1.is_dtype_equal(c2)
        assert not c1.is_dtype_equal(c3)
        assert not c1.is_dtype_equal(Index(list('aabca')))
        assert not c1.is_dtype_equal(c1.astype(object))
        assert c1.is_dtype_equal(CategoricalIndex(c1))
        assert (c1.is_dtype_equal(
            CategoricalIndex(c1, categories=list('cab'))))
        assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))

        # GH 16659
        s1 = Series(c1)
        s2 = Series(c2)
        s3 = Series(c3)
        assert c1.is_dtype_equal(s1)
        assert c2.is_dtype_equal(s2)
        assert c3.is_dtype_equal(s3)
        assert c1.is_dtype_equal(s2)
        assert not c1.is_dtype_equal(s3)
        assert not c1.is_dtype_equal(s1.astype(object))

Example #18

0

Show file

File: test_timeseries.py Project: echlebek/pandas

    def test_custom_grouper(self):

        dti = DatetimeIndex(freq='Min', start=datetime(2005,1,1),
                            end=datetime(2005,1,10))

        data = np.array([1]*len(dti))
        s = Series(data, index=dti)

        b = TimeGrouper(Minute(5))
        g = s.groupby(b)

        self.assertEquals(g.ngroups, 2593)

        # construct expected val
        arr = [5] * 2592
        arr.append(1)
        idx = dti[0:-1:5]
        idx = idx.append(DatetimeIndex([np.datetime64(dti[-1])]))
        expect = Series(arr, index=idx)

        # cython returns float for now
        result = g.agg(np.sum)
        assert_series_equal(result, expect.astype(float))

        data = np.random.rand(len(dti), 10)
        df = DataFrame(data, index=dti)
        r = df.groupby(b).agg(np.sum)

        self.assertEquals(len(r.columns), 10)
        self.assertEquals(len(r.index), 2593)

Example #19

0

Show file

File: test_operators.py Project: hack-c/pandas

    def test_timedelta64_conversions(self):
        startdate = Series(date_range('2013-01-01', '2013-01-03'))
        enddate = Series(date_range('2013-03-01', '2013-03-03'))

        s1 = enddate - startdate
        s1[2] = np.nan

        for m in [1, 3, 10]:
            for unit in ['D', 'h', 'm', 's', 'ms', 'us', 'ns']:

                # op
                expected = s1.apply(lambda x: x / np.timedelta64(m, unit))
                result = s1 / np.timedelta64(m, unit)
                assert_series_equal(result, expected)

                if m == 1 and unit != 'ns':

                    # astype
                    result = s1.astype("timedelta64[{0}]".format(unit))
                    assert_series_equal(result, expected)

                # reverse op
                expected = s1.apply(
                    lambda x: Timedelta(np.timedelta64(m, unit)) / x)
                result = np.timedelta64(m, unit) / s1

        # astype
        s = Series(date_range('20130101', periods=3))
        result = s.astype(object)
        self.assertIsInstance(result.iloc[0], datetime)
        self.assertTrue(result.dtype == np.object_)

        result = s1.astype(object)
        self.assertIsInstance(result.iloc[0], timedelta)
        self.assertTrue(result.dtype == np.object_)

Example #20

0

Show file

File: test_util.py Project: 8ballbb/ProjectRothar

    def test_hourly(self):
        rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24),
                                freq='H')
        data_hourly = np.random.randint(100, 350, rng_hourly.size)
        ts_hourly = Series(data_hourly, index=rng_hourly)

        grouped = ts_hourly.groupby(ts_hourly.index.year)
        hoy = grouped.apply(lambda x: x.reset_index(drop=True))
        hoy = hoy.index.droplevel(0).values
        hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24
        hoy += 1

        annual = pivot_annual(ts_hourly)

        ts_hourly = ts_hourly.astype(float)
        for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]:
            subset = ts_hourly[hoy == i]
            subset.index = [x.year for x in subset.index]

            result = annual[i].dropna()
            tm.assert_series_equal(result, subset, check_names=False)
            self.assertEqual(result.name, i)

        leaps = ts_hourly[(ts_hourly.index.month == 2) & (
            ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)]
        hour = leaps.index.dayofyear[0] * 24 - 23
        leaps.index = leaps.index.year
        leaps.name = 1417
        tm.assert_series_equal(annual[hour].dropna(), leaps)

Example #21

0

Show file

File: test_resample.py Project: takluyver/pandas

    def test_custom_grouper(self):

        dti = DatetimeIndex(freq="Min", start=datetime(2005, 1, 1), end=datetime(2005, 1, 10))

        data = np.array([1] * len(dti))
        s = Series(data, index=dti)

        b = TimeGrouper(Minute(5))
        g = s.groupby(b)

        # check all cython functions work
        funcs = ["add", "mean", "prod", "ohlc", "min", "max", "var"]
        for f in funcs:
            g._cython_agg_general(f)

        self.assertEquals(g.ngroups, 2593)
        self.assert_(notnull(g.mean()).all())

        # construct expected val
        arr = [5] * 2592
        arr.append(1)
        idx = dti[0:-1:5]
        idx = idx.append(DatetimeIndex([np.datetime64(dti[-1])]))
        expect = Series(arr, index=idx)

        # cython returns float for now
        result = g.agg(np.sum)
        assert_series_equal(result, expect.astype(float))

        data = np.random.rand(len(dti), 10)
        df = DataFrame(data, index=dti)
        r = df.groupby(b).agg(np.sum)

        self.assertEquals(len(r.columns), 10)
        self.assertEquals(len(r.index), 2593)

Example #22

0

Show file

File: test_dtypes.py Project: MasonGallo/pandas

    def test_astype_categoricaldtype(self):
        s = Series(['a', 'b', 'a'])
        result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
        expected = Series(Categorical(['a', 'b', 'a'], ordered=True))
        tm.assert_series_equal(result, expected)

        result = s.astype(CategoricalDtype(['a', 'b'], ordered=False))
        expected = Series(Categorical(['a', 'b', 'a'], ordered=False))
        tm.assert_series_equal(result, expected)

        result = s.astype(CategoricalDtype(['a', 'b', 'c'], ordered=False))
        expected = Series(Categorical(['a', 'b', 'a'],
                                      categories=['a', 'b', 'c'],
                                      ordered=False))
        tm.assert_series_equal(result, expected)
        tm.assert_index_equal(result.cat.categories, Index(['a', 'b', 'c']))

Example #23

0

Show file

File: test_dtypes.py Project: ChunHungLiu/pandas

    def test_astype(self):
        s = Series(np.random.randn(5), name='foo')

        for dtype in ['float32', 'float64', 'int64', 'int32']:
            astyped = s.astype(dtype)
            self.assertEqual(astyped.dtype, dtype)
            self.assertEqual(astyped.name, s.name)

Example #24

0

Show file

File: test_timedelta.py Project: ivannz/pandas

    def test_freq_conversion(self):

        # doc example

        # series
        td = Series(date_range('20130101', periods=4)) - \
            Series(date_range('20121201', periods=4))
        td[2] += timedelta(minutes=5, seconds=3)
        td[3] = np.nan

        result = td / np.timedelta64(1, 'D')
        expected = Series([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan
                           ])
        assert_series_equal(result, expected)

        result = td.astype('timedelta64[D]')
        expected = Series([31, 31, 31, np.nan])
        assert_series_equal(result, expected)

        result = td / np.timedelta64(1, 's')
        expected = Series([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3,
                           np.nan])
        assert_series_equal(result, expected)

        result = td.astype('timedelta64[s]')
        assert_series_equal(result, expected)

        # tdi
        td = TimedeltaIndex(td)

        result = td / np.timedelta64(1, 'D')
        expected = Index([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan])
        assert_index_equal(result, expected)

        result = td.astype('timedelta64[D]')
        expected = Index([31, 31, 31, np.nan])
        assert_index_equal(result, expected)

        result = td / np.timedelta64(1, 's')
        expected = Index([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3,
                          np.nan])
        assert_index_equal(result, expected)

        result = td.astype('timedelta64[s]')
        assert_index_equal(result, expected)

Example #25

0

Show file

File: test_dtypes.py Project: MasonGallo/pandas

    def test_astype_datetime(self):
        s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5))

        s = s.astype('O')
        assert s.dtype == np.object_

        s = Series([datetime(2001, 1, 2, 0, 0)])

        s = s.astype('O')
        assert s.dtype == np.object_

        s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])

        s[1] = np.nan
        assert s.dtype == 'M8[ns]'

        s = s.astype('O')
        assert s.dtype == np.object_

Example #26

0

Show file

File: test_constructors.py Project: BobMcFry/pandas

    def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit):
        # tests all units
        # gh-19223
        dtype = "{}[{}]".format(dtype, unit)
        arr = np.array([1, 2, 3], dtype=arr_dtype)
        s = Series(arr)
        result = s.astype(dtype)
        expected = Series(arr.astype(dtype))

        tm.assert_series_equal(result, expected)

Example #27

0

Show file

File: test_dtypes.py Project: ankostis/pandas

    def test_astype_generic_timestamp_deprecated(self):
        # see gh-15524
        data = [1]

        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            s = Series(data)
            dtype = np.datetime64
            result = s.astype(dtype)
            expected = Series(data, dtype=dtype)
            tm.assert_series_equal(result, expected)

        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            s = Series(data)
            dtype = np.timedelta64
            result = s.astype(dtype)
            expected = Series(data, dtype=dtype)
            tm.assert_series_equal(result, expected)

Example #28

0

Show file

File: test_dtypes.py Project: ChunHungLiu/pandas

    def test_astype_cast_object_int(self):
        arr = Series(["car", "house", "tree", "1"])

        self.assertRaises(ValueError, arr.astype, int)
        self.assertRaises(ValueError, arr.astype, np.int64)
        self.assertRaises(ValueError, arr.astype, np.int8)

        arr = Series(['1', '2', '3', '4'], dtype=object)
        result = arr.astype(int)
        self.assert_numpy_array_equal(result, np.arange(1, 5))

Example #29

0

Show file

File: test_dtypes.py Project: bashtage/pandas

    def test_astype_str_cast(self):
        # see gh-9757
        ts = Series([Timestamp('2010-01-04 00:00:00')])
        s = ts.astype(str)

        expected = Series([str('2010-01-04')])
        tm.assert_series_equal(s, expected)

        ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')])
        s = ts.astype(str)

        expected = Series([str('2010-01-04 00:00:00-05:00')])
        tm.assert_series_equal(s, expected)

        td = Series([Timedelta(1, unit='d')])
        s = td.astype(str)

        expected = Series([str('1 days 00:00:00.000000000')])
        tm.assert_series_equal(s, expected)

Example #30

0

Show file

File: test_missing.py Project: mwaskom/pandas

    def test_interp_scipy_basic(self):
        tm._skip_if_no_scipy()

        s = Series([1, 3, np.nan, 12, np.nan, 25])
        # slinear
        expected = Series([1., 3., 7.5, 12., 18.5, 25.])
        result = s.interpolate(method='slinear')
        assert_series_equal(result, expected)

        result = s.interpolate(method='slinear', downcast='infer')
        assert_series_equal(result, expected)
        # nearest
        expected = Series([1, 3, 3, 12, 12, 25])
        result = s.interpolate(method='nearest')
        assert_series_equal(result, expected.astype('float'))

        result = s.interpolate(method='nearest', downcast='infer')
        assert_series_equal(result, expected)
        # zero
        expected = Series([1, 3, 3, 12, 12, 25])
        result = s.interpolate(method='zero')
        assert_series_equal(result, expected.astype('float'))

        result = s.interpolate(method='zero', downcast='infer')
        assert_series_equal(result, expected)
        # quadratic
        # GH #15662.
        # new cubic and quadratic interpolation algorithms from scipy 0.19.0.
        # previously `splmake` was used. See scipy/scipy#6710
        if _is_scipy_ge_0190:
            expected = Series([1, 3., 6.823529, 12., 18.058824, 25.])
        else:
            expected = Series([1, 3., 6.769231, 12., 18.230769, 25.])
        result = s.interpolate(method='quadratic')
        assert_series_equal(result, expected)

        result = s.interpolate(method='quadratic', downcast='infer')
        assert_series_equal(result, expected)
        # cubic
        expected = Series([1., 3., 6.8, 12., 18.2, 25.])
        result = s.interpolate(method='cubic')
        assert_series_equal(result, expected)

Example #31

0

Show file

 def setup(self, dtype, M, offset_factor):
     offset = int(M * offset_factor)
     tmp = Series(np.random.randint(offset, M + offset, 10**6))
     self.series = tmp.astype(dtype)
     self.values = np.arange(M).astype(dtype)

Example #32

0

Show file

def infer_problem_type(y: Series, silent=False) -> str:
    """ Identifies which type of prediction problem we are interested in (if user has not specified).
        Ie. binary classification, multi-class classification, or regression.
    """
    if len(y) == 0:
        raise ValueError("provided labels cannot have length = 0")
    y = y.dropna(
    )  # Remove missing values from y (there should not be any though as they were removed in Learner.general_data_processing())
    num_rows = len(y)

    unique_values = y.unique()

    MULTICLASS_LIMIT = 1000  # if numeric and class count would be above this amount, assume it is regression
    if num_rows > 1000:
        REGRESS_THRESHOLD = 0.05  # if the unique-ratio is less than this, we assume multiclass classification, even when labels are integers
    else:
        REGRESS_THRESHOLD = 0.1

    unique_count = len(unique_values)
    if unique_count == 2:
        problem_type = BINARY
        reason = "only two unique label-values observed"
    elif y.dtype.name in ['object', 'category']:
        problem_type = MULTICLASS
        reason = f"dtype of label-column == {y.dtype.name}"
    elif np.issubdtype(y.dtype, np.floating):
        unique_ratio = unique_count / float(num_rows)
        if (unique_ratio <= REGRESS_THRESHOLD) and (unique_count <=
                                                    MULTICLASS_LIMIT):
            try:
                can_convert_to_int = np.array_equal(y, y.astype(int))
                if can_convert_to_int:
                    problem_type = MULTICLASS
                    reason = "dtype of label-column == float, but few unique label-values observed and label-values can be converted to int"
                else:
                    problem_type = REGRESSION
                    reason = "dtype of label-column == float and label-values can't be converted to int"
            except:
                problem_type = REGRESSION
                reason = "dtype of label-column == float and label-values can't be converted to int"
        else:
            problem_type = REGRESSION
            reason = "dtype of label-column == float and many unique label-values observed"
    elif np.issubdtype(y.dtype, np.integer):
        unique_ratio = unique_count / float(num_rows)
        if (unique_ratio <= REGRESS_THRESHOLD) and (unique_count <=
                                                    MULTICLASS_LIMIT):
            problem_type = MULTICLASS  # TODO: Check if integers are from 0 to n-1 for n unique values, if they have a wide spread, it could still be regression
            reason = "dtype of label-column == int, but few unique label-values observed"
        else:
            problem_type = REGRESSION
            reason = "dtype of label-column == int and many unique label-values observed"
    else:
        raise NotImplementedError(f'label dtype {y.dtype} not supported!')
    if not silent:
        logger.log(
            25,
            f"AutoGluon infers your prediction problem is: '{problem_type}' (because {reason})."
        )

        # TODO: Move this outside of this function so it is visible even if problem type was not inferred.
        if problem_type in [BINARY, MULTICLASS]:
            if unique_count > 10:
                logger.log(
                    20,
                    f'\tFirst 10 (of {unique_count}) unique label values:  {list(unique_values[:10])}'
                )
            else:
                logger.log(
                    20,
                    f'\t{unique_count} unique label values:  {list(unique_values)}'
                )
        elif problem_type == REGRESSION:
            y_max = y.max()
            y_min = y.min()
            y_mean = y.mean()
            y_stddev = y.std()
            logger.log(
                20,
                f'\tLabel info (max, min, mean, stddev): ({y_max}, {y_min}, {round(y_mean, 5)}, {round(y_stddev, 5)})'
            )

        logger.log(
            25,
            f"\tIf '{problem_type}' is not the correct problem_type, please manually specify the problem_type argument in fit() (You may specify problem_type as one of: {[BINARY, MULTICLASS, REGRESSION]})"
        )
    return problem_type

Example #33

0

Show file

File: test_constructors.py Project: harshul1610/pandas

    def test_constructor_dtype_timedelta64(self):

        # basic
        td = Series([timedelta(days=i) for i in range(3)])
        self.assertEqual(td.dtype, 'timedelta64[ns]')

        td = Series([timedelta(days=1)])
        self.assertEqual(td.dtype, 'timedelta64[ns]')

        td = Series(
            [timedelta(days=1),
             timedelta(days=2),
             np.timedelta64(1, 's')])

        self.assertEqual(td.dtype, 'timedelta64[ns]')

        # mixed with NaT
        from pandas import tslib
        td = Series([timedelta(days=1), tslib.NaT], dtype='m8[ns]')
        self.assertEqual(td.dtype, 'timedelta64[ns]')

        td = Series([timedelta(days=1), np.nan], dtype='m8[ns]')
        self.assertEqual(td.dtype, 'timedelta64[ns]')

        td = Series([np.timedelta64(300000000), pd.NaT], dtype='m8[ns]')
        self.assertEqual(td.dtype, 'timedelta64[ns]')

        # improved inference
        # GH5689
        td = Series([np.timedelta64(300000000), pd.NaT])
        self.assertEqual(td.dtype, 'timedelta64[ns]')

        # because iNaT is int, not coerced to timedelta
        td = Series([np.timedelta64(300000000), tslib.iNaT])
        self.assertEqual(td.dtype, 'object')

        td = Series([np.timedelta64(300000000), np.nan])
        self.assertEqual(td.dtype, 'timedelta64[ns]')

        td = Series([pd.NaT, np.timedelta64(300000000)])
        self.assertEqual(td.dtype, 'timedelta64[ns]')

        td = Series([np.timedelta64(1, 's')])
        self.assertEqual(td.dtype, 'timedelta64[ns]')

        # these are frequency conversion astypes
        # for t in ['s', 'D', 'us', 'ms']:
        #    self.assertRaises(TypeError, td.astype, 'm8[%s]' % t)

        # valid astype
        td.astype('int64')

        # invalid casting
        self.assertRaises(TypeError, td.astype, 'int32')

        # this is an invalid casting
        def f():
            Series([timedelta(days=1), 'foo'], dtype='m8[ns]')

        self.assertRaises(Exception, f)

        # leave as object here
        td = Series([timedelta(days=i) for i in range(3)] + ['foo'])
        self.assertEqual(td.dtype, 'object')

        # these will correctly infer a timedelta
        s = Series([None, pd.NaT, '1 Day'])
        self.assertEqual(s.dtype, 'timedelta64[ns]')
        s = Series([np.nan, pd.NaT, '1 Day'])
        self.assertEqual(s.dtype, 'timedelta64[ns]')
        s = Series([pd.NaT, None, '1 Day'])
        self.assertEqual(s.dtype, 'timedelta64[ns]')
        s = Series([pd.NaT, np.nan, '1 Day'])
        self.assertEqual(s.dtype, 'timedelta64[ns]')

Example #34

0

Show file

File: test_dtypes.py Project: Ajayvikram10/cmpe-285-stock-suggestor

 def test_td64_series_astype_object(self):
     tdser = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]')
     result = tdser.astype(object)
     assert isinstance(result.iloc[0], timedelta)
     assert result.dtype == np.object_

Example #35

0

Show file

File: test_constructors.py Project: ktp-forked-repos/pandas

    def test_constructor_dtype_timedelta64(self):

        # basic
        td = Series([timedelta(days=i) for i in range(3)])
        assert td.dtype == 'timedelta64[ns]'

        td = Series([timedelta(days=1)])
        assert td.dtype == 'timedelta64[ns]'

        td = Series([timedelta(days=1), timedelta(days=2), np.timedelta64(
            1, 's')])

        assert td.dtype == 'timedelta64[ns]'

        # mixed with NaT
        td = Series([timedelta(days=1), NaT], dtype='m8[ns]')
        assert td.dtype == 'timedelta64[ns]'

        td = Series([timedelta(days=1), np.nan], dtype='m8[ns]')
        assert td.dtype == 'timedelta64[ns]'

        td = Series([np.timedelta64(300000000), pd.NaT], dtype='m8[ns]')
        assert td.dtype == 'timedelta64[ns]'

        # improved inference
        # GH5689
        td = Series([np.timedelta64(300000000), NaT])
        assert td.dtype == 'timedelta64[ns]'

        # because iNaT is int, not coerced to timedelta
        td = Series([np.timedelta64(300000000), iNaT])
        assert td.dtype == 'object'

        td = Series([np.timedelta64(300000000), np.nan])
        assert td.dtype == 'timedelta64[ns]'

        td = Series([pd.NaT, np.timedelta64(300000000)])
        assert td.dtype == 'timedelta64[ns]'

        td = Series([np.timedelta64(1, 's')])
        assert td.dtype == 'timedelta64[ns]'

        # these are frequency conversion astypes
        # for t in ['s', 'D', 'us', 'ms']:
        #    pytest.raises(TypeError, td.astype, 'm8[%s]' % t)

        # valid astype
        td.astype('int64')

        # invalid casting
        msg = (r"cannot astype a timedelta from \[timedelta64\[ns\]\] to"
               r" \[int32\]")
        with pytest.raises(TypeError, match=msg):
            td.astype('int32')

        # this is an invalid casting
        msg = "Could not convert object to NumPy timedelta"
        with pytest.raises(ValueError, match=msg):
            Series([timedelta(days=1), 'foo'], dtype='m8[ns]')

        # leave as object here
        td = Series([timedelta(days=i) for i in range(3)] + ['foo'])
        assert td.dtype == 'object'

        # these will correctly infer a timedelta
        s = Series([None, pd.NaT, '1 Day'])
        assert s.dtype == 'timedelta64[ns]'
        s = Series([np.nan, pd.NaT, '1 Day'])
        assert s.dtype == 'timedelta64[ns]'
        s = Series([pd.NaT, None, '1 Day'])
        assert s.dtype == 'timedelta64[ns]'
        s = Series([pd.NaT, np.nan, '1 Day'])
        assert s.dtype == 'timedelta64[ns]'

Example #36

0

Show file

File: test_period.py Project: ziggi0703/pandas

class TestSeriesPeriod(object):
    def setup_method(self, method):
        self.series = Series(period_range('2000-01-01', periods=10, freq='D'))

    def test_auto_conversion(self):
        series = Series(list(period_range('2000-01-01', periods=10, freq='D')))
        assert series.dtype == 'Period[D]'

        series = pd.Series([
            pd.Period('2011-01-01', freq='D'),
            pd.Period('2011-02-01', freq='D')
        ])
        assert series.dtype == 'Period[D]'

    def test_getitem(self):
        assert self.series[1] == pd.Period('2000-01-02', freq='D')

        result = self.series[[2, 4]]
        exp = pd.Series([
            pd.Period('2000-01-03', freq='D'),
            pd.Period('2000-01-05', freq='D')
        ],
                        index=[2, 4],
                        dtype='Period[D]')
        tm.assert_series_equal(result, exp)
        assert result.dtype == 'Period[D]'

    def test_isna(self):
        # GH 13737
        s = Series(
            [pd.Period('2011-01', freq='M'),
             pd.Period('NaT', freq='M')])
        tm.assert_series_equal(s.isna(), Series([False, True]))
        tm.assert_series_equal(s.notna(), Series([True, False]))

    def test_fillna(self):
        # GH 13737
        s = Series(
            [pd.Period('2011-01', freq='M'),
             pd.Period('NaT', freq='M')])

        res = s.fillna(pd.Period('2012-01', freq='M'))
        exp = Series(
            [pd.Period('2011-01', freq='M'),
             pd.Period('2012-01', freq='M')])
        tm.assert_series_equal(res, exp)
        assert res.dtype == 'Period[M]'

    def test_dropna(self):
        # GH 13737
        s = Series(
            [pd.Period('2011-01', freq='M'),
             pd.Period('NaT', freq='M')])
        tm.assert_series_equal(s.dropna(),
                               Series([pd.Period('2011-01', freq='M')]))

    def test_between(self):
        left, right = self.series[[2, 7]]
        result = self.series.between(left, right)
        expected = (self.series >= left) & (self.series <= right)
        tm.assert_series_equal(result, expected)

    # ---------------------------------------------------------------------
    # NaT support

    @pytest.mark.xfail(reason="PeriodDtype Series not supported yet",
                       strict=True)
    def test_NaT_scalar(self):
        series = Series([0, 1000, 2000, pd._libs.iNaT], dtype='period[D]')

        val = series[3]
        assert pd.isna(val)

        series[2] = val
        assert pd.isna(series[2])

    @pytest.mark.xfail(reason="PeriodDtype Series not supported yet",
                       strict=True)
    def test_NaT_cast(self):
        result = Series([np.nan]).astype('period[D]')
        expected = Series([pd.NaT])
        tm.assert_series_equal(result, expected)

    def test_set_none(self):
        self.series[3] = None
        assert self.series[3] is pd.NaT

        self.series[3:5] = None
        assert self.series[4] is pd.NaT

    def test_set_nan(self):
        # Do we want to allow this?
        self.series[5] = np.nan
        assert self.series[5] is pd.NaT

        self.series[5:7] = np.nan
        assert self.series[6] is pd.NaT

    def test_intercept_astype_object(self):
        expected = self.series.astype('object')

        df = DataFrame({
            'a': self.series,
            'b': np.random.randn(len(self.series))
        })

        result = df.values.squeeze()
        assert (result[:, 0] == expected.values).all()

        df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)})

        result = df.values.squeeze()
        assert (result[:, 0] == expected.values).all()

    def test_align_series(self, join_type):
        rng = period_range('1/1/2000', '1/1/2010', freq='A')
        ts = Series(np.random.randn(len(rng)), index=rng)

        ts.align(ts[::2], join=join_type)

    def test_truncate(self):
        # GH 17717
        idx1 = pd.PeriodIndex([
            pd.Period('2017-09-02'),
            pd.Period('2017-09-02'),
            pd.Period('2017-09-03')
        ])
        series1 = pd.Series([1, 2, 3], index=idx1)
        result1 = series1.truncate(after='2017-09-02')

        expected_idx1 = pd.PeriodIndex(
            [pd.Period('2017-09-02'),
             pd.Period('2017-09-02')])
        tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1))

        idx2 = pd.PeriodIndex([
            pd.Period('2017-09-03'),
            pd.Period('2017-09-02'),
            pd.Period('2017-09-03')
        ])
        series2 = pd.Series([1, 2, 3], index=idx2)
        result2 = series2.sort_index().truncate(after='2017-09-02')

        expected_idx2 = pd.PeriodIndex([pd.Period('2017-09-02')])
        tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2))

    @pytest.mark.parametrize(
        'input_vals',
        [[Period('2016-01', freq='M'),
          Period('2016-02', freq='M')],
         [Period('2016-01-01', freq='D'),
          Period('2016-01-02', freq='D')],
         [
             Period('2016-01-01 00:00:00', freq='H'),
             Period('2016-01-01 01:00:00', freq='H')
         ],
         [
             Period('2016-01-01 00:00:00', freq='M'),
             Period('2016-01-01 00:01:00', freq='M')
         ],
         [
             Period('2016-01-01 00:00:00', freq='S'),
             Period('2016-01-01 00:00:01', freq='S')
         ]])
    def test_end_time_timevalues(self, input_vals):
        # GH 17157
        # Check that the time part of the Period is adjusted by end_time
        # when using the dt accessor on a Series
        input_vals = PeriodArray._from_sequence(np.asarray(input_vals))

        s = Series(input_vals)
        result = s.dt.end_time
        expected = s.apply(lambda x: x.end_time)
        tm.assert_series_equal(result, expected)

Example #37

0

Show file

 def expected(self, dtype):
     arr = np.arange(5).astype(dtype)
     ser = Series(arr)
     ser = ser.astype(object)
     ser.values[0] = np.timedelta64(4, "ns")
     return ser

Example #38

0

Show file

File: test_period.py Project: abhishek18620/yify_scraper

class TestSeriesPeriod(object):
    def setup_method(self, method):
        self.series = Series(period_range('2000-01-01', periods=10, freq='D'))

    def test_auto_conversion(self):
        series = Series(list(period_range('2000-01-01', periods=10, freq='D')))
        assert series.dtype == 'object'

        series = pd.Series([
            pd.Period('2011-01-01', freq='D'),
            pd.Period('2011-02-01', freq='D')
        ])
        assert series.dtype == 'object'

    def test_getitem(self):
        assert self.series[1] == pd.Period('2000-01-02', freq='D')

        result = self.series[[2, 4]]
        exp = pd.Series([
            pd.Period('2000-01-03', freq='D'),
            pd.Period('2000-01-05', freq='D')
        ],
                        index=[2, 4])
        tm.assert_series_equal(result, exp)
        assert result.dtype == 'object'

    def test_isnull(self):
        # GH 13737
        s = Series(
            [pd.Period('2011-01', freq='M'),
             pd.Period('NaT', freq='M')])
        tm.assert_series_equal(s.isnull(), Series([False, True]))
        tm.assert_series_equal(s.notnull(), Series([True, False]))

    def test_fillna(self):
        # GH 13737
        s = Series(
            [pd.Period('2011-01', freq='M'),
             pd.Period('NaT', freq='M')])

        res = s.fillna(pd.Period('2012-01', freq='M'))
        exp = Series(
            [pd.Period('2011-01', freq='M'),
             pd.Period('2012-01', freq='M')])
        tm.assert_series_equal(res, exp)
        assert res.dtype == 'object'

        res = s.fillna('XXX')
        exp = Series([pd.Period('2011-01', freq='M'), 'XXX'])
        tm.assert_series_equal(res, exp)
        assert res.dtype == 'object'

    def test_dropna(self):
        # GH 13737
        s = Series(
            [pd.Period('2011-01', freq='M'),
             pd.Period('NaT', freq='M')])
        tm.assert_series_equal(s.dropna(),
                               Series([pd.Period('2011-01', freq='M')]))

    def test_series_comparison_scalars(self):
        val = pd.Period('2000-01-04', freq='D')
        result = self.series > val
        expected = pd.Series([x > val for x in self.series])
        tm.assert_series_equal(result, expected)

        val = self.series[5]
        result = self.series > val
        expected = pd.Series([x > val for x in self.series])
        tm.assert_series_equal(result, expected)

    def test_between(self):
        left, right = self.series[[2, 7]]
        result = self.series.between(left, right)
        expected = (self.series >= left) & (self.series <= right)
        tm.assert_series_equal(result, expected)

    # ---------------------------------------------------------------------
    # NaT support
    """
    # ToDo: Enable when support period dtype
    def test_NaT_scalar(self):
        series = Series([0, 1000, 2000, iNaT], dtype='period[D]')

        val = series[3]
        assert isnull(val)

        series[2] = val
        assert isnull(series[2])

    def test_NaT_cast(self):
        result = Series([np.nan]).astype('period[D]')
        expected = Series([NaT])
        tm.assert_series_equal(result, expected)
    """

    def test_set_none_nan(self):
        # currently Period is stored as object dtype, not as NaT
        self.series[3] = None
        assert self.series[3] is None

        self.series[3:5] = None
        assert self.series[4] is None

        self.series[5] = np.nan
        assert np.isnan(self.series[5])

        self.series[5:7] = np.nan
        assert np.isnan(self.series[6])

    def test_intercept_astype_object(self):
        expected = self.series.astype('object')

        df = DataFrame({
            'a': self.series,
            'b': np.random.randn(len(self.series))
        })

        result = df.values.squeeze()
        assert (result[:, 0] == expected.values).all()

        df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)})

        result = df.values.squeeze()
        assert (result[:, 0] == expected.values).all()

    def test_comp_series_period_scalar(self):
        # GH 13200
        for freq in ['M', '2M', '3M']:
            base = Series([
                Period(x, freq=freq)
                for x in ['2011-01', '2011-02', '2011-03', '2011-04']
            ])
            p = Period('2011-02', freq=freq)

            exp = pd.Series([False, True, False, False])
            tm.assert_series_equal(base == p, exp)
            tm.assert_series_equal(p == base, exp)

            exp = pd.Series([True, False, True, True])
            tm.assert_series_equal(base != p, exp)
            tm.assert_series_equal(p != base, exp)

            exp = pd.Series([False, False, True, True])
            tm.assert_series_equal(base > p, exp)
            tm.assert_series_equal(p < base, exp)

            exp = pd.Series([True, False, False, False])
            tm.assert_series_equal(base < p, exp)
            tm.assert_series_equal(p > base, exp)

            exp = pd.Series([False, True, True, True])
            tm.assert_series_equal(base >= p, exp)
            tm.assert_series_equal(p <= base, exp)

            exp = pd.Series([True, True, False, False])
            tm.assert_series_equal(base <= p, exp)
            tm.assert_series_equal(p >= base, exp)

            # different base freq
            msg = "Input has different freq=A-DEC from Period"
            with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
                base <= Period('2011', freq='A')

            with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
                Period('2011', freq='A') >= base

    def test_comp_series_period_series(self):
        # GH 13200
        for freq in ['M', '2M', '3M']:
            base = Series([
                Period(x, freq=freq)
                for x in ['2011-01', '2011-02', '2011-03', '2011-04']
            ])

            s = Series([
                Period(x, freq=freq)
                for x in ['2011-02', '2011-01', '2011-03', '2011-05']
            ])

            exp = Series([False, False, True, False])
            tm.assert_series_equal(base == s, exp)

            exp = Series([True, True, False, True])
            tm.assert_series_equal(base != s, exp)

            exp = Series([False, True, False, False])
            tm.assert_series_equal(base > s, exp)

            exp = Series([True, False, False, True])
            tm.assert_series_equal(base < s, exp)

            exp = Series([False, True, True, False])
            tm.assert_series_equal(base >= s, exp)

            exp = Series([True, False, True, True])
            tm.assert_series_equal(base <= s, exp)

            s2 = Series([
                Period(x, freq='A') for x in ['2011', '2011', '2011', '2011']
            ])

            # different base freq
            msg = "Input has different freq=A-DEC from Period"
            with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
                base <= s2

    def test_comp_series_period_object(self):
        # GH 13200
        base = Series([
            Period('2011', freq='A'),
            Period('2011-02', freq='M'),
            Period('2013', freq='A'),
            Period('2011-04', freq='M')
        ])

        s = Series([
            Period('2012', freq='A'),
            Period('2011-01', freq='M'),
            Period('2013', freq='A'),
            Period('2011-05', freq='M')
        ])

        exp = Series([False, False, True, False])
        tm.assert_series_equal(base == s, exp)

        exp = Series([True, True, False, True])
        tm.assert_series_equal(base != s, exp)

        exp = Series([False, True, False, False])
        tm.assert_series_equal(base > s, exp)

        exp = Series([True, False, False, True])
        tm.assert_series_equal(base < s, exp)

        exp = Series([False, True, True, False])
        tm.assert_series_equal(base >= s, exp)

        exp = Series([True, False, True, True])
        tm.assert_series_equal(base <= s, exp)

    def test_align_series(self):
        rng = period_range('1/1/2000', '1/1/2010', freq='A')
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts + ts[::2]
        expected = ts + ts
        expected[1::2] = np.nan
        tm.assert_series_equal(result, expected)

        result = ts + _permute(ts[::2])
        tm.assert_series_equal(result, expected)

        # it works!
        for kind in ['inner', 'outer', 'left', 'right']:
            ts.align(ts[::2], join=kind)
        msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
            ts + ts.asfreq('D', how="end")

Example #39

0

Show file

    def test_dt_accessor_api_for_categorical(self):
        # https://github.com/pandas-dev/pandas/issues/10661
        from pandas.core.indexes.accessors import Properties

        s_dr = Series(date_range("1/1/2015", periods=5, tz="MET"))
        c_dr = s_dr.astype("category")

        s_pr = Series(period_range("1/1/2015", freq="D", periods=5))
        c_pr = s_pr.astype("category")

        s_tdr = Series(timedelta_range("1 days", "10 days"))
        c_tdr = s_tdr.astype("category")

        # only testing field (like .day)
        # and bool (is_month_start)
        get_ops = lambda x: x._datetimelike_ops

        test_data = [
            ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr),
            ("Period", get_ops(PeriodArray), s_pr, c_pr),
            ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr),
        ]

        assert isinstance(c_dr.dt, Properties)

        special_func_defs = [
            ("strftime", ("%Y-%m-%d", ), {}),
            ("tz_convert", ("EST", ), {}),
            ("round", ("D", ), {}),
            ("floor", ("D", ), {}),
            ("ceil", ("D", ), {}),
            ("asfreq", ("D", ), {}),
            # FIXME: don't leave commented-out
            # ('tz_localize', ("UTC",), {}),
        ]
        _special_func_names = [f[0] for f in special_func_defs]

        # the series is already localized
        _ignore_names = ["tz_localize", "components"]

        for name, attr_names, s, c in test_data:
            func_names = [
                f for f in dir(s.dt)
                if not (f.startswith("_") or f in attr_names
                        or f in _special_func_names or f in _ignore_names)
            ]

            func_defs = [(f, (), {}) for f in func_names]
            for f_def in special_func_defs:
                if f_def[0] in dir(s.dt):
                    func_defs.append(f_def)

            for func, args, kwargs in func_defs:
                with warnings.catch_warnings():
                    if func == "to_period":
                        # dropping TZ
                        warnings.simplefilter("ignore", UserWarning)
                    res = getattr(c.dt, func)(*args, **kwargs)
                    exp = getattr(s.dt, func)(*args, **kwargs)

                tm.assert_equal(res, exp)

            for attr in attr_names:
                res = getattr(c.dt, attr)
                exp = getattr(s.dt, attr)

            if isinstance(res, DataFrame):
                tm.assert_frame_equal(res, exp)
            elif isinstance(res, Series):
                tm.assert_series_equal(res, exp)
            else:
                tm.assert_almost_equal(res, exp)

        invalid = Series([1, 2, 3]).astype("category")
        msg = "Can only use .dt accessor with datetimelike"

        with pytest.raises(AttributeError, match=msg):
            invalid.dt
        assert not hasattr(invalid, "str")

Example #40

0

Show file

File: test_astype.py Project: frreiss/pandas-fred

 def test_astype_categories_raises(self):
     # deprecated GH#17636, removed in GH#27141
     s = Series(["a", "b", "a"])
     with pytest.raises(TypeError, match="got an unexpected"):
         s.astype("category", categories=["a", "b"], ordered=True)

Example #41

0

Show file

File: test_astype.py Project: frreiss/pandas-fred

 def test_astype_from_categorical(self, items):
     ser = Series(items)
     exp = Series(Categorical(items))
     res = ser.astype("category")
     tm.assert_series_equal(res, exp)

Example #42

0

Show file

File: test_dtypes.py Project: 701789262a/arbobotti

    def test_astype_categorical_to_other(self):
        cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
        ser = Series(np.random.RandomState(0).randint(0, 10000,
                                                      100)).sort_values()
        ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat)

        expected = ser
        tm.assert_series_equal(ser.astype("category"), expected)
        tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
        msg = r"Cannot cast object dtype to float64"
        with pytest.raises(ValueError, match=msg):
            ser.astype("float64")

        cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
        exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
        tm.assert_series_equal(cat.astype("str"), exp)
        s2 = Series(Categorical(["1", "2", "3", "4"]))
        exp2 = Series([1, 2, 3, 4]).astype("int64")
        tm.assert_series_equal(s2.astype("int"), exp2)

        # object don't sort correctly, so just compare that we have the same
        # values
        def cmp(a, b):
            tm.assert_almost_equal(np.sort(np.unique(a)),
                                   np.sort(np.unique(b)))

        expected = Series(np.array(ser.values), name="value_group")
        cmp(ser.astype("object"), expected)
        cmp(ser.astype(np.object_), expected)

        # array conversion
        tm.assert_almost_equal(np.array(ser), np.array(ser.values))

        tm.assert_series_equal(ser.astype("category"), ser)
        tm.assert_series_equal(ser.astype(CategoricalDtype()), ser)

        roundtrip_expected = ser.cat.set_categories(
            ser.cat.categories.sort_values()).cat.remove_unused_categories()
        result = ser.astype("object").astype("category")
        tm.assert_series_equal(result, roundtrip_expected)
        result = ser.astype("object").astype(CategoricalDtype())
        tm.assert_series_equal(result, roundtrip_expected)

Example #43

0

Show file

 def restore(self, col: pd.Series) -> pd.Series:
     """Restore column when to_pandas."""
     return col.astype(self.dtype)

Example #44

0

Show file

File: test_constructors.py Project: harshul1610/pandas

    def test_constructor_with_datetime_tz(self):

        # 8260
        # support datetime64 with tz

        dr = date_range('20130101', periods=3, tz='US/Eastern')
        s = Series(dr)
        self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]')
        self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]')
        self.assertTrue(is_datetime64tz_dtype(s.dtype))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # export
        result = s.values
        self.assertIsInstance(result, np.ndarray)
        self.assertTrue(result.dtype == 'datetime64[ns]')

        exp = pd.DatetimeIndex(result)
        exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz)
        self.assert_index_equal(dr, exp)

        # indexing
        result = s.iloc[0]
        self.assertEqual(
            result,
            Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D'))
        result = s[0]
        self.assertEqual(
            result,
            Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D'))

        result = s[Series([True, True, False], index=s.index)]
        assert_series_equal(result, s[0:2])

        result = s.iloc[0:1]
        assert_series_equal(result, Series(dr[0:1]))

        # concat
        result = pd.concat([s.iloc[0:1], s.iloc[1:]])
        assert_series_equal(result, s)

        # astype
        result = s.astype(object)
        expected = Series(DatetimeIndex(s._values).asobject)
        assert_series_equal(result, expected)

        result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz)
        assert_series_equal(result, s)

        # astype - datetime64[ns, tz]
        result = Series(s.values).astype('datetime64[ns, US/Eastern]')
        assert_series_equal(result, s)

        result = Series(s.values).astype(s.dtype)
        assert_series_equal(result, s)

        result = s.astype('datetime64[ns, CET]')
        expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET'))
        assert_series_equal(result, expected)

        # short str
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # formatting with NaT
        result = s.shift()
        self.assertTrue('datetime64[ns, US/Eastern]' in str(result))
        self.assertTrue('NaT' in str(result))

        # long str
        t = Series(date_range('20130101', periods=1000, tz='US/Eastern'))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(t))

        result = pd.DatetimeIndex(s, freq='infer')
        tm.assert_index_equal(result, dr)

        # inference
        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')
        ])
        self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]')
        self.assertTrue(lib.infer_dtype(s) == 'datetime64')

        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')
        ])
        self.assertTrue(s.dtype == 'object')
        self.assertTrue(lib.infer_dtype(s) == 'datetime')

        # with all NaT
        s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
        expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
        assert_series_equal(s, expected)

Example #45

0

Show file

def test_where_setitem_invalid():
    # GH 2702
    # make sure correct exceptions are raised on invalid list assignment

    # slice
    s = Series(list('abc'))

    def f():
        s[0:3] = list(range(27))

    pytest.raises(ValueError, f)

    s[0:3] = list(range(3))
    expected = Series([0, 1, 2])
    assert_series_equal(
        s.astype(np.int64),
        expected,
    )

    # slice with step
    s = Series(list('abcdef'))

    def f():
        s[0:4:2] = list(range(27))

    pytest.raises(ValueError, f)

    s = Series(list('abcdef'))
    s[0:4:2] = list(range(2))
    expected = Series([0, 'b', 1, 'd', 'e', 'f'])
    assert_series_equal(s, expected)

    # neg slices
    s = Series(list('abcdef'))

    def f():
        s[:-1] = list(range(27))

    pytest.raises(ValueError, f)

    s[-3:-1] = list(range(2))
    expected = Series(['a', 'b', 'c', 0, 1, 'f'])
    assert_series_equal(s, expected)

    # list
    s = Series(list('abc'))

    def f():
        s[[0, 1, 2]] = list(range(27))

    pytest.raises(ValueError, f)

    s = Series(list('abc'))

    def f():
        s[[0, 1, 2]] = list(range(2))

    pytest.raises(ValueError, f)

    # scalar
    s = Series(list('abc'))
    s[0] = list(range(10))
    expected = Series([list(range(10)), 'b', 'c'])
    assert_series_equal(s, expected)

Example #46

0

Show file

File: test_astype.py Project: frreiss/pandas-fred

    def test_astype_cast_object_int(self):
        arr = Series(["1", "2", "3", "4"], dtype=object)
        result = arr.astype(int)

        tm.assert_series_equal(result, Series(np.arange(1, 5)))

Example #47

0

Show file

File: typeset_relations.py Project: yousef9950/pandas-profiling

def to_bool(series: pd.Series) -> pd.Series:
    dtype = hasnan_bool_name if series.hasnans else bool
    return series.astype(dtype)

Example #48

0

Show file

File: test_astype.py Project: frreiss/pandas-fred

 def test_astype_cast_object_int_fail(self, dtype):
     arr = Series(["car", "house", "tree", "1"])
     msg = r"invalid literal for int\(\) with base 10: 'car'"
     with pytest.raises(ValueError, match=msg):
         arr.astype(dtype)

Example #49

0

Show file

File: test_dtypes.py Project: Ajayvikram10/cmpe-285-stock-suggestor

    def test_astype_cast_object_int(self):
        arr = Series(['1', '2', '3', '4'], dtype=object)
        result = arr.astype(int)

        tm.assert_series_equal(result, Series(np.arange(1, 5)))

Example #50

0

Show file

File: test_astype.py Project: frreiss/pandas-fred

    def test_astype(self, dtype):
        s = Series(np.random.randn(5), name="foo")
        as_typed = s.astype(dtype)

        assert as_typed.dtype == dtype
        assert as_typed.name == s.name

Example #51

0

Show file

File: test_dtypes.py Project: Ajayvikram10/cmpe-285-stock-suggestor

 def test_astype_cast_object_int_fail(self, dtype):
     arr = Series(["car", "house", "tree", "1"])
     with pytest.raises(ValueError):
         arr.astype(dtype)

Example #52

0

Show file

File: test_astype.py Project: frreiss/pandas-fred

 def test_astype_to_str_preserves_na(self, value, string_value):
     # https://github.com/pandas-dev/pandas/issues/36904
     s = Series(["a", "b", value], dtype=object)
     result = s.astype(str)
     expected = Series(["a", "b", string_value], dtype=object)
     tm.assert_series_equal(result, expected)

Example #53

0

Show file

File: test_constructors.py Project: ktp-forked-repos/pandas

    def test_constructor_dtype_datetime64(self):

        s = Series(iNaT, dtype='M8[ns]', index=lrange(5))
        assert isna(s).all()

        # in theory this should be all nulls, but since
        # we are not specifying a dtype is ambiguous
        s = Series(iNaT, index=lrange(5))
        assert not isna(s).all()

        s = Series(nan, dtype='M8[ns]', index=lrange(5))
        assert isna(s).all()

        s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]')
        assert isna(s[1])
        assert s.dtype == 'M8[ns]'

        s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]')
        assert isna(s[1])
        assert s.dtype == 'M8[ns]'

        # GH3416
        dates = [
            np.datetime64(datetime(2013, 1, 1)),
            np.datetime64(datetime(2013, 1, 2)),
            np.datetime64(datetime(2013, 1, 3)),
        ]

        s = Series(dates)
        assert s.dtype == 'M8[ns]'

        s.iloc[0] = np.nan
        assert s.dtype == 'M8[ns]'

        # GH3414 related
        expected = Series([
            datetime(2013, 1, 1),
            datetime(2013, 1, 2),
            datetime(2013, 1, 3),
        ], dtype='datetime64[ns]')

        result = Series(
            Series(dates).astype(np.int64) / 1000000, dtype='M8[ms]')
        tm.assert_series_equal(result, expected)

        result = Series(dates, dtype='datetime64[ns]')
        tm.assert_series_equal(result, expected)

        expected = Series([
            pd.NaT,
            datetime(2013, 1, 2),
            datetime(2013, 1, 3),
        ], dtype='datetime64[ns]')
        result = Series([np.nan] + dates[1:], dtype='datetime64[ns]')
        tm.assert_series_equal(result, expected)

        dts = Series(dates, dtype='datetime64[ns]')

        # valid astype
        dts.astype('int64')

        # invalid casting
        msg = (r"cannot astype a datetimelike from \[datetime64\[ns\]\] to"
               r" \[int32\]")
        with pytest.raises(TypeError, match=msg):
            dts.astype('int32')

        # ints are ok
        # we test with np.int64 to get similar results on
        # windows / 32-bit platforms
        result = Series(dts, dtype=np.int64)
        expected = Series(dts.astype(np.int64))
        tm.assert_series_equal(result, expected)

        # invalid dates can be help as object
        result = Series([datetime(2, 1, 1)])
        assert result[0] == datetime(2, 1, 1, 0, 0)

        result = Series([datetime(3000, 1, 1)])
        assert result[0] == datetime(3000, 1, 1, 0, 0)

        # don't mix types
        result = Series([Timestamp('20130101'), 1], index=['a', 'b'])
        assert result['a'] == Timestamp('20130101')
        assert result['b'] == 1

        # GH6529
        # coerce datetime64 non-ns properly
        dates = date_range('01-Jan-2015', '01-Dec-2015', freq='M')
        values2 = dates.view(np.ndarray).astype('datetime64[ns]')
        expected = Series(values2, index=dates)

        for dtype in ['s', 'D', 'ms', 'us', 'ns']:
            values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype))
            result = Series(values1, dates)
            assert_series_equal(result, expected)

        # GH 13876
        # coerce to non-ns to object properly
        expected = Series(values2, index=dates, dtype=object)
        for dtype in ['s', 'D', 'ms', 'us', 'ns']:
            values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype))
            result = Series(values1, index=dates, dtype=object)
            assert_series_equal(result, expected)

        # leave datetime.date alone
        dates2 = np.array([d.date() for d in dates.to_pydatetime()],
                          dtype=object)
        series1 = Series(dates2, dates)
        tm.assert_numpy_array_equal(series1.values, dates2)
        assert series1.dtype == object

        # these will correctly infer a datetime
        s = Series([None, pd.NaT, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'
        s = Series([np.nan, pd.NaT, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'
        s = Series([pd.NaT, None, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'
        s = Series([pd.NaT, np.nan, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'

        # tz-aware (UTC and other tz's)
        # GH 8411
        dr = date_range('20130101', periods=3)
        assert Series(dr).iloc[0].tz is None
        dr = date_range('20130101', periods=3, tz='UTC')
        assert str(Series(dr).iloc[0].tz) == 'UTC'
        dr = date_range('20130101', periods=3, tz='US/Eastern')
        assert str(Series(dr).iloc[0].tz) == 'US/Eastern'

        # non-convertible
        s = Series([1479596223000, -1479590, pd.NaT])
        assert s.dtype == 'object'
        assert s[2] is pd.NaT
        assert 'NaT' in str(s)

        # if we passed a NaT it remains
        s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT])
        assert s.dtype == 'object'
        assert s[2] is pd.NaT
        assert 'NaT' in str(s)

        # if we passed a nan it remains
        s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan])
        assert s.dtype == 'object'
        assert s[2] is np.nan
        assert 'NaN' in str(s)

Example #54

0

Show file

File: test_astype.py Project: frreiss/pandas-fred

 def test_astype_from_float_to_str(self, dtype):
     # https://github.com/pandas-dev/pandas/issues/36451
     s = Series([0.1], dtype=dtype)
     result = s.astype(str)
     expected = Series(["0.1"])
     tm.assert_series_equal(result, expected)

Example #55

0

Show file

def test_append_raise(setup_path):

    with ensure_clean_store(setup_path) as store:

        # test append with invalid input to get good error messages

        # list in column
        df = tm.makeDataFrame()
        df["invalid"] = [["a"]] * len(df)
        assert df.dtypes["invalid"] == np.object_
        msg = re.escape(
            """Cannot serialize the column [invalid]
because its data contents are not [string] but [mixed] object dtype"""
        )
        with pytest.raises(TypeError, match=msg):
            store.append("df", df)

        # multiple invalid columns
        df["invalid2"] = [["a"]] * len(df)
        df["invalid3"] = [["a"]] * len(df)
        with pytest.raises(TypeError, match=msg):
            store.append("df", df)

        # datetime with embedded nans as object
        df = tm.makeDataFrame()
        s = Series(datetime.datetime(2001, 1, 2), index=df.index)
        s = s.astype(object)
        s[0:5] = np.nan
        df["invalid"] = s
        assert df.dtypes["invalid"] == np.object_
        msg = "too many timezones in this block, create separate data columns"
        with pytest.raises(TypeError, match=msg):
            store.append("df", df)

        # directly ndarray
        msg = "value must be None, Series, or DataFrame"
        with pytest.raises(TypeError, match=msg):
            store.append("df", np.arange(10))

        # series directly
        msg = re.escape(
            "cannot properly create the storer for: "
            "[group->df,value-><class 'pandas.core.series.Series'>]"
        )
        with pytest.raises(TypeError, match=msg):
            store.append("df", Series(np.arange(10)))

        # appending an incompatible table
        df = tm.makeDataFrame()
        store.append("df", df)

        df["foo"] = "foo"
        msg = re.escape(
            "invalid combination of [non_index_axes] on appending data "
            "[(1, ['A', 'B', 'C', 'D', 'foo'])] vs current table "
            "[(1, ['A', 'B', 'C', 'D'])]"
        )
        with pytest.raises(ValueError, match=msg):
            store.append("df", df)

        # incompatible type (GH 41897)
        _maybe_remove(store, "df")
        df["foo"] = Timestamp("20130101")
        store.append("df", df)
        df["foo"] = "bar"
        msg = re.escape(
            "invalid combination of [values_axes] on appending data "
            "[name->values_block_1,cname->values_block_1,"
            "dtype->bytes24,kind->string,shape->(1, 30)] "
            "vs current table "
            "[name->values_block_1,cname->values_block_1,"
            "dtype->datetime64,kind->datetime64,shape->None]"
        )
        with pytest.raises(ValueError, match=msg):
            store.append("df", df)

Example #56

0

Show file

File: test_astype.py Project: frreiss/pandas-fred

 def test_td64_series_astype_object(self):
     tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
     result = tdser.astype(object)
     assert isinstance(result.iloc[0], timedelta)
     assert result.dtype == np.object_

Example #57

0

Show file

def test_spilt_join_roundtrip(any_string_dtype):
    ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype)
    result = ser.str.split("_").str.join("_")
    expected = ser.astype(object)
    tm.assert_series_equal(result, expected)

Example #58

0

Show file

File: test_astype.py Project: frreiss/pandas-fred

 def test_dt64_series_astype_object(self):
     dt64ser = Series(date_range("20130101", periods=3))
     result = dt64ser.astype(object)
     assert isinstance(result.iloc[0], datetime)
     assert result.dtype == np.object_

Example #59

0

Show file

def infer_problem_type(y: Series):
    """ Identifies which type of prediction problem we are interested in (if user has not specified).
        Ie. binary classification, multi-class classification, or regression.
    """
    if len(y) == 0:
        raise ValueError("provided labels cannot have length = 0")
    y = y.dropna(
    )  # Remove missing values from y (there should not be any though as they were removed in Learner.general_data_processing())
    num_rows = len(y)

    unique_values = y.unique()
    unique_count = len(unique_values)
    if unique_count > 10:
        logger.log(
            20,
            f'Here are the first 10 unique label values in your data:  {list(unique_values[:10])}'
        )
    else:
        logger.log(
            20,
            f'Here are the {unique_count} unique label values in your data:  {list(unique_values)}'
        )

    MULTICLASS_LIMIT = 1000  # if numeric and class count would be above this amount, assume it is regression
    if num_rows > 1000:
        REGRESS_THRESHOLD = 0.05  # if the unique-ratio is less than this, we assume multiclass classification, even when labels are integers
    else:
        REGRESS_THRESHOLD = 0.1

    if unique_count == 2:
        problem_type = BINARY
        reason = "only two unique label-values observed"
    elif y.dtype.name in ['object', 'category']:
        problem_type = MULTICLASS
        reason = f"dtype of label-column == {y.dtype.name}"
    elif np.issubdtype(y.dtype, np.floating):
        unique_ratio = unique_count / float(num_rows)
        if (unique_ratio <= REGRESS_THRESHOLD) and (unique_count <=
                                                    MULTICLASS_LIMIT):
            try:
                can_convert_to_int = np.array_equal(y, y.astype(int))
                if can_convert_to_int:
                    problem_type = MULTICLASS
                    reason = "dtype of label-column == float, but few unique label-values observed and label-values can be converted to int"
                else:
                    problem_type = REGRESSION
                    reason = "dtype of label-column == float and label-values can't be converted to int"
            except:
                problem_type = REGRESSION
                reason = "dtype of label-column == float and label-values can't be converted to int"
        else:
            problem_type = REGRESSION
            reason = "dtype of label-column == float and many unique label-values observed"
    elif np.issubdtype(y.dtype, np.integer):
        unique_ratio = unique_count / float(num_rows)
        if (unique_ratio <= REGRESS_THRESHOLD) and (unique_count <=
                                                    MULTICLASS_LIMIT):
            problem_type = MULTICLASS  # TODO: Check if integers are from 0 to n-1 for n unique values, if they have a wide spread, it could still be regression
            reason = "dtype of label-column == int, but few unique label-values observed"
        else:
            problem_type = REGRESSION
            reason = "dtype of label-column == int and many unique label-values observed"
    else:
        raise NotImplementedError(f'label dtype {y.dtype} not supported!')
    logger.log(
        25,
        f"AutoGluon infers your prediction problem is: {problem_type}  (because {reason})."
    )
    logger.log(
        25,
        f"If this is wrong, please specify `problem_type` argument in fit() instead "
        f"(You may specify problem_type as one of: {[BINARY, MULTICLASS, REGRESSION]})\n"
    )
    return problem_type

Example #60

0

Show file

File: test_operators.py Project: ashimroy88/pandas-1

    def test_operators_bitwise(self):
        # GH 9016: support bitwise op for integer types
        index = list('bca')

        s_tft = Series([True, False, True], index=index)
        s_fff = Series([False, False, False], index=index)
        s_tff = Series([True, False, False], index=index)
        s_empty = Series([])

        # TODO: unused
        # s_0101 = Series([0, 1, 0, 1])

        s_0123 = Series(range(4), dtype='int64')
        s_3333 = Series([3] * 4)
        s_4444 = Series([4] * 4)

        res = s_tft & s_empty
        expected = s_fff
        assert_series_equal(res, expected)

        res = s_tft | s_empty
        expected = s_tft
        assert_series_equal(res, expected)

        res = s_0123 & s_3333
        expected = Series(range(4), dtype='int64')
        assert_series_equal(res, expected)

        res = s_0123 | s_4444
        expected = Series(range(4, 8), dtype='int64')
        assert_series_equal(res, expected)

        s_a0b1c0 = Series([1], list('b'))

        res = s_tft & s_a0b1c0
        expected = s_tff.reindex(list('abc'))
        assert_series_equal(res, expected)

        res = s_tft | s_a0b1c0
        expected = s_tft.reindex(list('abc'))
        assert_series_equal(res, expected)

        n0 = 0
        res = s_tft & n0
        expected = s_fff
        assert_series_equal(res, expected)

        res = s_0123 & n0
        expected = Series([0] * 4)
        assert_series_equal(res, expected)

        n1 = 1
        res = s_tft & n1
        expected = s_tft
        assert_series_equal(res, expected)

        res = s_0123 & n1
        expected = Series([0, 1, 0, 1])
        assert_series_equal(res, expected)

        s_1111 = Series([1] * 4, dtype='int8')
        res = s_0123 & s_1111
        expected = Series([0, 1, 0, 1], dtype='int64')
        assert_series_equal(res, expected)

        res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
        expected = Series([1, 1, 3, 3], dtype='int32')
        assert_series_equal(res, expected)

        with pytest.raises(TypeError):
            s_1111 & 'a'
        with pytest.raises(TypeError):
            s_1111 & ['a', 'b', 'c', 'd']
        with pytest.raises(TypeError):
            s_0123 & np.NaN
        with pytest.raises(TypeError):
            s_0123 & 3.14
        with pytest.raises(TypeError):
            s_0123 & [0.1, 4, 3.14, 2]

        # s_0123 will be all false now because of reindexing like s_tft
        if compat.PY3:
            # unable to sort incompatible object via .union.
            exp = Series([False] * 7, index=['b', 'c', 'a', 0, 1, 2, 3])
            with tm.assert_produces_warning(RuntimeWarning):
                assert_series_equal(s_tft & s_0123, exp)
        else:
            exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
            assert_series_equal(s_tft & s_0123, exp)

        # s_tft will be all false now because of reindexing like s_0123
        if compat.PY3:
            # unable to sort incompatible object via .union.
            exp = Series([False] * 7, index=[0, 1, 2, 3, 'b', 'c', 'a'])
            with tm.assert_produces_warning(RuntimeWarning):
                assert_series_equal(s_0123 & s_tft, exp)
        else:
            exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
            assert_series_equal(s_0123 & s_tft, exp)

        assert_series_equal(s_0123 & False, Series([False] * 4))
        assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
        assert_series_equal(s_0123 & [False], Series([False] * 4))
        assert_series_equal(s_0123 & (False), Series([False] * 4))
        assert_series_equal(s_0123 & Series([False, np.NaN, False, False]),
                            Series([False] * 4))

        s_ftft = Series([False, True, False, True])
        assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft)

        s_abNd = Series(['a', 'b', np.NaN, 'd'])
        res = s_0123 & s_abNd
        expected = s_ftft
        assert_series_equal(res, expected)