def test_union_categoricals_ordered(self):
        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([1, 2, 3], ordered=False)

        msg = 'Categorical.ordered must be the same'
        with tm.assertRaisesRegexp(TypeError, msg):
            union_categoricals([c1, c2])

        res = union_categoricals([c1, c1])
        exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True)
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3, np.nan], ordered=True)
        c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)

        res = union_categoricals([c1, c2])
        exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True)
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)

        msg = "to union ordered Categoricals, all categories must be the same"
        with tm.assertRaisesRegexp(TypeError, msg):
            union_categoricals([c1, c2])
Example #2
0
    def test_tz(self):
        t = '2014-02-01 09:00'
        ts = Timestamp(t)
        local = ts.tz_localize('Asia/Tokyo')
        self.assertEqual(local.hour, 9)
        self.assertEqual(local, Timestamp(t, tz='Asia/Tokyo'))
        conv = local.tz_convert('US/Eastern')
        self.assertEqual(conv,
                         Timestamp('2014-01-31 19:00', tz='US/Eastern'))
        self.assertEqual(conv.hour, 19)

        # preserves nanosecond
        ts = Timestamp(t) + offsets.Nano(5)
        local = ts.tz_localize('Asia/Tokyo')
        self.assertEqual(local.hour, 9)
        self.assertEqual(local.nanosecond, 5)
        conv = local.tz_convert('US/Eastern')
        self.assertEqual(conv.nanosecond, 5)
        self.assertEqual(conv.hour, 19)

        # GH 8025
        with tm.assertRaisesRegexp(TypeError, 'Cannot localize tz-aware Timestamp, use '
                                   'tz_convert for conversions'):
            Timestamp('2011-01-01' ,tz='US/Eastern').tz_localize('Asia/Tokyo')

        with tm.assertRaisesRegexp(TypeError, 'Cannot convert tz-naive Timestamp, use '
                            'tz_localize to localize'):
            Timestamp('2011-01-01').tz_convert('Asia/Tokyo')
Example #3
0
    def test_tz(self):
        t = "2014-02-01 09:00"
        ts = Timestamp(t)
        local = ts.tz_localize("Asia/Tokyo")
        self.assertEqual(local.hour, 9)
        self.assertEqual(local, Timestamp(t, tz="Asia/Tokyo"))
        conv = local.tz_convert("US/Eastern")
        self.assertEqual(conv, Timestamp("2014-01-31 19:00", tz="US/Eastern"))
        self.assertEqual(conv.hour, 19)

        # preserves nanosecond
        ts = Timestamp(t) + offsets.Nano(5)
        local = ts.tz_localize("Asia/Tokyo")
        self.assertEqual(local.hour, 9)
        self.assertEqual(local.nanosecond, 5)
        conv = local.tz_convert("US/Eastern")
        self.assertEqual(conv.nanosecond, 5)
        self.assertEqual(conv.hour, 19)

        # GH 8025
        with tm.assertRaisesRegexp(TypeError, "Cannot localize tz-aware Timestamp, use " "tz_convert for conversions"):
            Timestamp("2011-01-01", tz="US/Eastern").tz_localize("Asia/Tokyo")

        with tm.assertRaisesRegexp(TypeError, "Cannot convert tz-naive Timestamp, use " "tz_localize to localize"):
            Timestamp("2011-01-01").tz_convert("Asia/Tokyo")
Example #4
0
    def test_index_equal_metadata_message(self):

        expected = """Index are different

Attribute "names" are different
\\[left\\]:  \\[None\\]
\\[right\\]: \\[u?'x'\\]"""

        idx1 = pd.Index([1, 2, 3])
        idx2 = pd.Index([1, 2, 3], name='x')
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2)

        # same name, should pass
        assert_index_equal(pd.Index([1, 2, 3], name=np.nan),
                           pd.Index([1, 2, 3], name=np.nan))
        assert_index_equal(pd.Index([1, 2, 3], name=pd.NaT),
                           pd.Index([1, 2, 3], name=pd.NaT))

        expected = """Index are different

Attribute "names" are different
\\[left\\]:  \\[nan\\]
\\[right\\]: \\[NaT\\]"""

        idx1 = pd.Index([1, 2, 3], name=np.nan)
        idx2 = pd.Index([1, 2, 3], name=pd.NaT)
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2)
Example #5
0
    def test_categorical_equal_message(self):

        expected = """Categorical\\.categories are different

Categorical\\.categories values are different \\(25\\.0 %\\)
\\[left\\]:  Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[1, 2, 3, 5\\], dtype='int64'\\)"""

        a = pd.Categorical([1, 2, 3, 4])
        b = pd.Categorical([1, 2, 3, 5])
        with assertRaisesRegexp(AssertionError, expected):
            tm.assert_categorical_equal(a, b)

        expected = """Categorical\\.codes are different

Categorical\\.codes values are different \\(50\\.0 %\\)
\\[left\\]:  \\[0, 1, 3, 2\\]
\\[right\\]: \\[0, 1, 2, 3\\]"""

        a = pd.Categorical([1, 2, 4, 3], categories=[1, 2, 3, 4])
        b = pd.Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
        with assertRaisesRegexp(AssertionError, expected):
            tm.assert_categorical_equal(a, b)

        expected = """Categorical are different

Attribute "ordered" are different
\\[left\\]:  False
\\[right\\]: True"""

        a = pd.Categorical([1, 2, 3, 4], ordered=False)
        b = pd.Categorical([1, 2, 3, 4], ordered=True)
        with assertRaisesRegexp(AssertionError, expected):
            tm.assert_categorical_equal(a, b)
Example #6
0
    def test_take_fill_value(self):
        # see gh-12631
        idx = self._holder([1, 2, 3], name='xxx')
        result = idx.take(np.array([1, 0, -1]))
        expected = self._holder([2, 1, 3], name='xxx')
        tm.assert_index_equal(result, expected)

        name = self._holder.__name__
        msg = ("Unable to fill values because "
               "{name} cannot contain NA").format(name=name)

        # fill_value=True
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -1]), fill_value=True)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]), allow_fill=False,
                          fill_value=True)
        expected = self._holder([2, 1, 3], name='xxx')
        tm.assert_index_equal(result, expected)

        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -2]), fill_value=True)
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -5]), fill_value=True)

        with tm.assertRaises(IndexError):
            idx.take(np.array([1, -5]))
Example #7
0
    def test_tshift(self):
        # PeriodIndex
        ps = tm.makePeriodFrame()
        shifted = ps.tshift(1)
        unshifted = shifted.tshift(-1)

        assert_frame_equal(unshifted, ps)

        shifted2 = ps.tshift(freq='B')
        assert_frame_equal(shifted, shifted2)

        shifted3 = ps.tshift(freq=offsets.BDay())
        assert_frame_equal(shifted, shifted3)

        assertRaisesRegexp(ValueError, 'does not match', ps.tshift, freq='M')

        # DatetimeIndex
        shifted = self.tsframe.tshift(1)
        unshifted = shifted.tshift(-1)

        assert_frame_equal(self.tsframe, unshifted)

        shifted2 = self.tsframe.tshift(freq=self.tsframe.index.freq)
        assert_frame_equal(shifted, shifted2)

        inferred_ts = DataFrame(self.tsframe.values,
                                Index(np.asarray(self.tsframe.index)),
                                columns=self.tsframe.columns)
        shifted = inferred_ts.tshift(1)
        unshifted = shifted.tshift(-1)
        assert_frame_equal(shifted, self.tsframe.tshift(1))
        assert_frame_equal(unshifted, inferred_ts)

        no_freq = self.tsframe.iloc[[0, 5, 7], :]
        self.assertRaises(ValueError, no_freq.tshift)
Example #8
0
    def test_get_loc(self):
        idx = pd.period_range('2000-01-01', periods=3)

        for method in [None, 'pad', 'backfill', 'nearest']:
            self.assertEqual(idx.get_loc(idx[1], method), 1)
            self.assertEqual(
                idx.get_loc(idx[1].asfreq('H', how='start'), method), 1)
            self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1)
            self.assertEqual(
                idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1)
            self.assertEqual(idx.get_loc(str(idx[1]), method), 1)

        idx = pd.period_range('2000-01-01', periods=5)[::2]
        self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest',
                                     tolerance='1 day'), 1)
        self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest',
                                     tolerance=pd.Timedelta('1D')), 1)
        self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest',
                                     tolerance=np.timedelta64(1, 'D')), 1)
        self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest',
                                     tolerance=timedelta(1)), 1)
        with tm.assertRaisesRegexp(ValueError, 'must be convertible'):
            idx.get_loc('2000-01-10', method='nearest', tolerance='foo')

        msg = 'Input has different freq from PeriodIndex\\(freq=D\\)'
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour')
        with tm.assertRaises(KeyError):
            idx.get_loc('2000-01-10', method='nearest', tolerance='1 day')
Example #9
0
    def test_take_fill_value(self):
        # GH 12631
        idx = pd.Float64Index([1., 2., 3.], name='xxx')
        result = idx.take(np.array([1, 0, -1]))
        expected = pd.Float64Index([2., 1., 3.], name='xxx')
        tm.assert_index_equal(result, expected)

        # fill_value
        result = idx.take(np.array([1, 0, -1]), fill_value=True)
        expected = pd.Float64Index([2., 1., np.nan], name='xxx')
        tm.assert_index_equal(result, expected)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]), allow_fill=False,
                          fill_value=True)
        expected = pd.Float64Index([2., 1., 3.], name='xxx')
        tm.assert_index_equal(result, expected)

        msg = ('When allow_fill=True and fill_value is not None, '
               'all indices must be >= -1')
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -2]), fill_value=True)
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -5]), fill_value=True)

        with tm.assertRaises(IndexError):
            idx.take(np.array([1, -5]))
Example #10
0
    def test_difference_base(self):
        for name, idx in compat.iteritems(self.indices):
            first = idx[2:]
            second = idx[:4]
            answer = idx[4:]
            result = first.difference(second)

            if isinstance(idx, CategoricalIndex):
                pass
            else:
                self.assertTrue(tm.equalContents(result, answer))

            # GH 10149
            cases = [klass(second.values)
                     for klass in [np.array, Series, list]]
            for case in cases:
                if isinstance(idx, PeriodIndex):
                    msg = "can only call with other PeriodIndex-ed objects"
                    with tm.assertRaisesRegexp(ValueError, msg):
                        result = first.difference(case)
                elif isinstance(idx, CategoricalIndex):
                    pass
                elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
                    self.assertEqual(result.__class__, answer.__class__)
                    tm.assert_numpy_array_equal(result.asi8, answer.asi8)
                else:
                    result = first.difference(case)
                    self.assertTrue(tm.equalContents(result, answer))

            if isinstance(idx, MultiIndex):
                msg = "other must be a MultiIndex or a list of tuples"
                with tm.assertRaisesRegexp(TypeError, msg):
                    result = first.difference([1, 2, 3])
Example #11
0
    def test_symmetric_difference(self):
        for name, idx in compat.iteritems(self.indices):
            first = idx[1:]
            second = idx[:-1]
            if isinstance(idx, CategoricalIndex):
                pass
            else:
                answer = idx[[0, -1]]
                result = first.symmetric_difference(second)
                self.assertTrue(tm.equalContents(result, answer))

            # GH 10149
            cases = [klass(second.values)
                     for klass in [np.array, Series, list]]
            for case in cases:
                if isinstance(idx, PeriodIndex):
                    msg = "can only call with other PeriodIndex-ed objects"
                    with tm.assertRaisesRegexp(ValueError, msg):
                        result = first.symmetric_difference(case)
                elif isinstance(idx, CategoricalIndex):
                    pass
                else:
                    result = first.symmetric_difference(case)
                    self.assertTrue(tm.equalContents(result, answer))

            if isinstance(idx, MultiIndex):
                msg = "other must be a MultiIndex or a list of tuples"
                with tm.assertRaisesRegexp(TypeError, msg):
                    result = first.symmetric_difference([1, 2, 3])

        # 12591 deprecated
        with tm.assert_produces_warning(FutureWarning):
            first.sym_diff(second)
Example #12
0
    def test_take_filling_fill_value(self):
        # same tests as GH 12631
        sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # fill_value
        result = sparse.take(np.array([1, 0, -1]), fill_value=True)
        expected = SparseArray([0, np.nan, 0], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # allow_fill=False
        result = sparse.take(np.array([1, 0, -1]),
                             allow_fill=False, fill_value=True)
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        msg = ('When allow_fill=True and fill_value is not None, '
               'all indices must be >= -1')
        with tm.assertRaisesRegexp(ValueError, msg):
            sparse.take(np.array([1, 0, -2]), fill_value=True)
        with tm.assertRaisesRegexp(ValueError, msg):
            sparse.take(np.array([1, 0, -5]), fill_value=True)

        with tm.assertRaises(IndexError):
            sparse.take(np.array([1, -6]))
        with tm.assertRaises(IndexError):
            sparse.take(np.array([1, 5]))
        with tm.assertRaises(IndexError):
            sparse.take(np.array([1, 5]), fill_value=True)
Example #13
0
    def test_union_base(self):
        for name, idx in compat.iteritems(self.indices):
            first = idx[3:]
            second = idx[:5]
            everything = idx
            union = first.union(second)
            self.assertTrue(tm.equalContents(union, everything))

            # GH 10149
            cases = [klass(second.values)
                     for klass in [np.array, Series, list]]
            for case in cases:
                if isinstance(idx, PeriodIndex):
                    msg = "can only call with other PeriodIndex-ed objects"
                    with tm.assertRaisesRegexp(ValueError, msg):
                        result = first.union(case)
                elif isinstance(idx, CategoricalIndex):
                    pass
                else:
                    result = first.union(case)
                    self.assertTrue(tm.equalContents(result, everything))

            if isinstance(idx, MultiIndex):
                msg = "other must be a MultiIndex or a list of tuples"
                with tm.assertRaisesRegexp(TypeError, msg):
                    result = first.union([1, 2, 3])
Example #14
0
    def test_append_series_dict(self):
        df = DataFrame(np.random.randn(5, 4),
                       columns=['foo', 'bar', 'baz', 'qux'])

        series = df.loc[4]
        with assertRaisesRegexp(ValueError, 'Indexes have overlapping values'):
            df.append(series, verify_integrity=True)
        series.name = None
        with assertRaisesRegexp(TypeError, 'Can only append a Series if '
                                'ignore_index=True'):
            df.append(series, verify_integrity=True)

        result = df.append(series[::-1], ignore_index=True)
        expected = df.append(DataFrame({0: series[::-1]}, index=df.columns).T,
                             ignore_index=True)
        assert_frame_equal(result, expected)

        # dict
        result = df.append(series.to_dict(), ignore_index=True)
        assert_frame_equal(result, expected)

        result = df.append(series[::-1][:3], ignore_index=True)
        expected = df.append(DataFrame({0: series[::-1][:3]}).T,
                             ignore_index=True)
        assert_frame_equal(result, expected.loc[:, result.columns])

        # can append when name set
        row = df.loc[4]
        row.name = 5
        result = df.append(row)
        expected = df.append(df[-1:], ignore_index=True)
        assert_frame_equal(result, expected)
Example #15
0
    def test_insert_index_datetime64tz(self):
        obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03',
                                '2011-01-04'], tz='US/Eastern')
        self.assertEqual(obj.dtype, 'datetime64[ns, US/Eastern]')

        # datetime64tz + datetime64tz => datetime64
        exp = pd.DatetimeIndex(['2011-01-01', '2012-01-01', '2011-01-02',
                                '2011-01-03', '2011-01-04'], tz='US/Eastern')
        val = pd.Timestamp('2012-01-01', tz='US/Eastern')
        self._assert_insert_conversion(obj, val, exp,
                                       'datetime64[ns, US/Eastern]')

        # ToDo: must coerce to object
        msg = "Passed item and index have different timezone"
        with tm.assertRaisesRegexp(ValueError, msg):
            obj.insert(1, pd.Timestamp('2012-01-01'))

        # ToDo: must coerce to object
        msg = "Passed item and index have different timezone"
        with tm.assertRaisesRegexp(ValueError, msg):
            obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo'))

        # ToDo: must coerce to object
        msg = "cannot insert DatetimeIndex with incompatible label"
        with tm.assertRaisesRegexp(TypeError, msg):
            obj.insert(1, 1)
Example #16
0
    def test_take_fill_value(self):
        # GH 12631
        idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'],
                             name='xxx', freq='D')
        result = idx.take(np.array([1, 0, -1]))
        expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'],
                                  name='xxx', freq='D')
        tm.assert_index_equal(result, expected)

        # fill_value
        result = idx.take(np.array([1, 0, -1]), fill_value=True)
        expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'],
                                  name='xxx', freq='D')
        tm.assert_index_equal(result, expected)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]), allow_fill=False,
                          fill_value=True)
        expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'],
                                  name='xxx', freq='D')
        tm.assert_index_equal(result, expected)

        msg = ('When allow_fill=True and fill_value is not None, '
               'all indices must be >= -1')
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -2]), fill_value=True)
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -5]), fill_value=True)

        with tm.assertRaises(IndexError):
            idx.take(np.array([1, -5]))
Example #17
0
    def test_round(self):
        td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min')
        elt = td[1]

        expected_rng = TimedeltaIndex([
            Timedelta('16801 days 00:00:00'),
            Timedelta('16801 days 00:00:00'),
            Timedelta('16801 days 01:00:00'),
            Timedelta('16801 days 02:00:00'),
            Timedelta('16801 days 02:00:00'),
        ])
        expected_elt = expected_rng[1]

        tm.assert_index_equal(td.round(freq='H'), expected_rng)
        self.assertEqual(elt.round(freq='H'), expected_elt)

        msg = pd.tseries.frequencies._INVALID_FREQ_ERROR
        with self.assertRaisesRegexp(ValueError, msg):
            td.round(freq='foo')
        with tm.assertRaisesRegexp(ValueError, msg):
            elt.round(freq='foo')

        msg = "<MonthEnd> is a non-fixed frequency"
        tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M')
        tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M')
Example #18
0
    def test_take_fill_value(self):
        # GH 12631
        idx = pd.Int64Index([1, 2, 3], name='xxx')
        result = idx.take(np.array([1, 0, -1]))
        expected = pd.Int64Index([2, 1, 3], name='xxx')
        tm.assert_index_equal(result, expected)

        # fill_value
        msg = "Unable to fill values because Int64Index cannot contain NA"
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -1]), fill_value=True)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]), allow_fill=False,
                          fill_value=True)
        expected = pd.Int64Index([2, 1, 3], name='xxx')
        tm.assert_index_equal(result, expected)

        msg = "Unable to fill values because Int64Index cannot contain NA"
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -2]), fill_value=True)
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -5]), fill_value=True)

        with tm.assertRaises(IndexError):
            idx.take(np.array([1, -5]))
Example #19
0
    def test_numpy_cumsum(self):
        non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
        non_null_expected = SparseArray(non_null_data.cumsum())

        null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
        null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))

        for data, expected in [
            (null_data, null_expected),
            (non_null_data, non_null_expected)
        ]:
            out = np.cumsum(SparseArray(data))
            tm.assert_sp_array_equal(out, expected)

            out = np.cumsum(SparseArray(data, fill_value=np.nan))
            tm.assert_sp_array_equal(out, expected)

            out = np.cumsum(SparseArray(data, fill_value=2))
            tm.assert_sp_array_equal(out, expected)

            msg = "the 'dtype' parameter is not supported"
            tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
                                  SparseArray(data), dtype=np.int64)

            msg = "the 'out' parameter is not supported"
            tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
                                  SparseArray(data), out=out)
Example #20
0
    def test_constructor_freq_mult(self):
        # GH #7811
        for func in [PeriodIndex, period_range]:
            # must be the same, but for sure...
            pidx = func(start='2014-01', freq='2M', periods=4)
            expected = PeriodIndex(['2014-01', '2014-03',
                                    '2014-05', '2014-07'], freq='2M')
            tm.assert_index_equal(pidx, expected)

            pidx = func(start='2014-01-02', end='2014-01-15', freq='3D')
            expected = PeriodIndex(['2014-01-02', '2014-01-05',
                                    '2014-01-08', '2014-01-11',
                                    '2014-01-14'], freq='3D')
            tm.assert_index_equal(pidx, expected)

            pidx = func(end='2014-01-01 17:00', freq='4H', periods=3)
            expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00',
                                    '2014-01-01 17:00'], freq='4H')
            tm.assert_index_equal(pidx, expected)

        msg = ('Frequency must be positive, because it'
               ' represents span: -1M')
        with tm.assertRaisesRegexp(ValueError, msg):
            PeriodIndex(['2011-01'], freq='-1M')

        msg = ('Frequency must be positive, because it' ' represents span: 0M')
        with tm.assertRaisesRegexp(ValueError, msg):
            PeriodIndex(['2011-01'], freq='0M')

        msg = ('Frequency must be positive, because it' ' represents span: 0M')
        with tm.assertRaisesRegexp(ValueError, msg):
            period_range('2011-01', periods=3, freq='0M')
Example #21
0
    def test_numpy_transpose(self):
        sdf = SparseDataFrame([1, 2, 3], index=[1, 2, 3], columns=['a'])
        result = np.transpose(np.transpose(sdf))
        tm.assert_sp_frame_equal(result, sdf)

        msg = "the 'axes' parameter is not supported"
        tm.assertRaisesRegexp(ValueError, msg, np.transpose, sdf, axes=1)
Example #22
0
    def test_constructor_cant_cast_datetime64(self):
        msg = "Cannot cast datetime64 to "
        with tm.assertRaisesRegexp(TypeError, msg):
            Series(date_range('1/1/2000', periods=10), dtype=float)

        with tm.assertRaisesRegexp(TypeError, msg):
            Series(date_range('1/1/2000', periods=10), dtype=int)
Example #23
0
    def test_constructor_generic_timestamp_deprecated(self):
        # see gh-15524

        with tm.assert_produces_warning(FutureWarning):
            dtype = np.timedelta64
            s = Series([], dtype=dtype)

            assert s.empty
            assert s.dtype == 'm8[ns]'

        with tm.assert_produces_warning(FutureWarning):
            dtype = np.datetime64
            s = Series([], dtype=dtype)

            assert s.empty
            assert s.dtype == 'M8[ns]'

        # These timestamps have the wrong frequencies,
        # so an Exception should be raised now.
        msg = "cannot convert timedeltalike"
        with tm.assertRaisesRegexp(TypeError, msg):
            Series([], dtype='m8[ps]')

        msg = "cannot convert datetimelike"
        with tm.assertRaisesRegexp(TypeError, msg):
            Series([], dtype='M8[ps]')
Example #24
0
def test_concat4_interleave_partitions():
    pdf1 = pd.DataFrame(np.random.randn(10, 5), columns=list("ABCDE"), index=list("abcdefghij"))
    pdf2 = pd.DataFrame(np.random.randn(13, 5), columns=list("ABCDE"), index=list("fghijklmnopqr"))
    pdf3 = pd.DataFrame(np.random.randn(13, 6), columns=list("CDEXYZ"), index=list("fghijklmnopqr"))

    ddf1 = dd.from_pandas(pdf1, 2)
    ddf2 = dd.from_pandas(pdf2, 3)
    ddf3 = dd.from_pandas(pdf3, 2)

    msg = (
        "All inputs have known divisions which cannnot be "
        "concatenated in order. Specify "
        "interleave_partitions=True to ignore order"
    )

    cases = [[ddf1, ddf1], [ddf1, ddf2], [ddf1, ddf3], [ddf2, ddf1], [ddf2, ddf3], [ddf3, ddf1], [ddf3, ddf2]]
    for case in cases:
        pdcase = [c.compute() for c in case]

        with tm.assertRaisesRegexp(ValueError, msg):
            dd.concat(case)

        assert eq(dd.concat(case, interleave_partitions=True), pd.concat(pdcase))
        assert eq(dd.concat(case, join="inner", interleave_partitions=True), pd.concat(pdcase, join="inner"))

    msg = "'join' must be 'inner' or 'outer'"
    with tm.assertRaisesRegexp(ValueError, msg):
        dd.concat([ddf1, ddf1], join="invalid", interleave_partitions=True)
Example #25
0
    def test_no_args_with_kwargs(self):
        bad_arg = 'bar'
        min_fname_arg_count = 2

        compat_args = OrderedDict()
        compat_args['foo'] = -5
        compat_args[bad_arg] = 1

        msg = (r"the '{arg}' parameter is not supported "
               r"in the pandas implementation of {func}\(\)".
               format(arg=bad_arg, func=self.fname))

        args = ()
        kwargs = {'foo': -5, bad_arg: 2}
        tm.assertRaisesRegexp(ValueError, msg,
                              validate_args_and_kwargs,
                              self.fname, args, kwargs,
                              min_fname_arg_count, compat_args)

        args = (-5, 2)
        kwargs = {}
        tm.assertRaisesRegexp(ValueError, msg,
                              validate_args_and_kwargs,
                              self.fname, args, kwargs,
                              min_fname_arg_count, compat_args)
Example #26
0
    def test_null_quote_char(self):
        data = 'a,b,c\n1,2,3'

        # sanity checks
        msg = 'quotechar must be set if quoting enabled'

        tm.assertRaisesRegexp(TypeError, msg, self.read_csv,
                              StringIO(data), quotechar=None,
                              quoting=csv.QUOTE_MINIMAL)

        tm.assertRaisesRegexp(TypeError, msg, self.read_csv,
                              StringIO(data), quotechar='',
                              quoting=csv.QUOTE_MINIMAL)

        # no errors should be raised if quoting is None
        expected = DataFrame([[1, 2, 3]],
                             columns=['a', 'b', 'c'])

        result = self.read_csv(StringIO(data), quotechar=None,
                               quoting=csv.QUOTE_NONE)
        tm.assert_frame_equal(result, expected)

        result = self.read_csv(StringIO(data), quotechar='',
                               quoting=csv.QUOTE_NONE)
        tm.assert_frame_equal(result, expected)
    def test_wdi_download_error_handling(self):
        cntry_codes = ['USA', 'XX']
        inds = 'NY.GDP.PCAP.CD'

        with tm.assertRaisesRegexp(ValueError, "Invalid Country Code\\(s\\): XX"):
            result = download(country=cntry_codes, indicator=inds,
                              start=2003, end=2004, errors='raise')

        if PANDAS_0160:
            # assert_produces_warning doesn't exists in prior versions
            with self.assert_produces_warning():
                result = download(country=cntry_codes, indicator=inds,
                                  start=2003, end=2004, errors='warn')
                self.assertTrue(isinstance(result, pd.DataFrame))
                self.assertEqual(len(result), 2)

        cntry_codes = ['USA']
        inds = ['NY.GDP.PCAP.CD', 'BAD_INDICATOR']

        with tm.assertRaisesRegexp(ValueError, "The provided parameter value is not valid\\. Indicator: BAD_INDICATOR"):
            result = download(country=cntry_codes, indicator=inds,
                              start=2003, end=2004, errors='raise')

        if PANDAS_0160:
            with self.assert_produces_warning():
                result = download(country=cntry_codes, indicator=inds,
                                  start=2003, end=2004, errors='warn')
                self.assertTrue(isinstance(result, pd.DataFrame))
                self.assertEqual(len(result), 2)
Example #28
0
    def test_constructor_pi_nat(self):
        idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT,
                           Period('2011-01', freq='M')])
        exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M')
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT,
                                    Period('2011-01', freq='M')]))
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'),
                           Period('2011-01', freq='M')])
        exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M')
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex(np.array([pd.NaT, pd.NaT,
                                    Period('2011-01', freq='M'),
                                    Period('2011-01', freq='M')]))
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M')
        tm.assert_index_equal(idx, exp)

        with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
            PeriodIndex([pd.NaT, pd.NaT])

        with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
            PeriodIndex(np.array([pd.NaT, pd.NaT]))

        with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
            PeriodIndex(['NaT', 'NaT'])

        with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
            PeriodIndex(np.array(['NaT', 'NaT']))
Example #29
0
 def test_set_item(self):
     def setitem():
         self.arr[5] = 3
     def setslice():
         self.arr[1:5] = 2
     assertRaisesRegexp(TypeError, "item assignment", setitem)
     assertRaisesRegexp(TypeError, "item assignment", setslice)
Example #30
0
 def test_type_check(self):
     # GH 11776
     df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']})
     with tm.assertRaisesRegexp(TypeError, "1-d array"):
         to_numeric(df)
     for errors in ['ignore', 'raise', 'coerce']:
         with tm.assertRaisesRegexp(TypeError, "1-d array"):
             to_numeric(df, errors=errors)
Example #31
0
 def test_invalid_table_attrs(self):
     url = self.banklist_data
     with tm.assertRaisesRegexp(ValueError, 'No tables found'):
         self.read_html(url, 'First Federal Bank of Florida',
                        attrs={'id': 'tasdfable'})
 def check_mutable_error(self, *args, **kwargs):
     # pass whatever functions you normally would to assertRaises (after the Exception kind)
     assertRaisesRegexp(TypeError, self.mutable_regex, *args, **kwargs)
Example #33
0
    def test_numeric_compat(self):

        idx = self.create_index()
        tm.assertRaisesRegexp(TypeError, "cannot perform __mul__",
                              lambda: idx * 1)
        tm.assertRaisesRegexp(TypeError, "cannot perform __mul__",
                              lambda: 1 * idx)

        div_err = "cannot perform __truediv__" if PY3 \
                  else "cannot perform __div__"
        tm.assertRaisesRegexp(TypeError, div_err, lambda: idx / 1)
        tm.assertRaisesRegexp(TypeError, div_err, lambda: 1 / idx)
        tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__",
                              lambda: idx // 1)
        tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__",
                              lambda: 1 // idx)
Example #34
0
 def test_hash_error(self):
     for ind in self.indices.values():
         with tm.assertRaisesRegexp(
                 TypeError, "unhashable type: %r" % type(ind).__name__):
             hash(ind)
Example #35
0
    def test_wrong_number_names(self):
        def testit(ind):
            ind.names = ["apple", "banana", "carrot"]

        for ind in self.indices.values():
            assertRaisesRegexp(ValueError, "^Length", testit, ind)
Example #36
0
 def test_logical_compat(self):
     idx = self.create_index()
     tm.assertRaisesRegexp(TypeError, 'cannot perform all',
                           lambda: idx.all())
     tm.assertRaisesRegexp(TypeError, 'cannot perform any',
                           lambda: idx.any())
Example #37
0
 def test_negative_skiprows(self):
     with tm.assertRaisesRegexp(ValueError,
                                '\(you passed a negative value\)'):
         self.read_html(self.spam_data, 'Water', skiprows=-1)
Example #38
0
    def test_set_index2(self):
        df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
                        'B': ['one', 'two', 'three', 'one', 'two'],
                        'C': ['a', 'b', 'c', 'd', 'e'],
                        'D': np.random.randn(5),
                        'E': np.random.randn(5)})

        # new object, single-column
        result = df.set_index('C')
        result_nodrop = df.set_index('C', drop=False)

        index = Index(df['C'], name='C')

        expected = df.loc[:, ['A', 'B', 'D', 'E']]
        expected.index = index

        expected_nodrop = df.copy()
        expected_nodrop.index = index

        assert_frame_equal(result, expected)
        assert_frame_equal(result_nodrop, expected_nodrop)
        self.assertEqual(result.index.name, index.name)

        # inplace, single
        df2 = df.copy()

        df2.set_index('C', inplace=True)

        assert_frame_equal(df2, expected)

        df3 = df.copy()
        df3.set_index('C', drop=False, inplace=True)

        assert_frame_equal(df3, expected_nodrop)

        # create new object, multi-column
        result = df.set_index(['A', 'B'])
        result_nodrop = df.set_index(['A', 'B'], drop=False)

        index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B'])

        expected = df.loc[:, ['C', 'D', 'E']]
        expected.index = index

        expected_nodrop = df.copy()
        expected_nodrop.index = index

        assert_frame_equal(result, expected)
        assert_frame_equal(result_nodrop, expected_nodrop)
        self.assertEqual(result.index.names, index.names)

        # inplace
        df2 = df.copy()
        df2.set_index(['A', 'B'], inplace=True)
        assert_frame_equal(df2, expected)

        df3 = df.copy()
        df3.set_index(['A', 'B'], drop=False, inplace=True)
        assert_frame_equal(df3, expected_nodrop)

        # corner case
        with assertRaisesRegexp(ValueError, 'Index has duplicate keys'):
            df.set_index('A', verify_integrity=True)

        # append
        result = df.set_index(['A', 'B'], append=True)
        xp = df.reset_index().set_index(['index', 'A', 'B'])
        xp.index.names = [None, 'A', 'B']
        assert_frame_equal(result, xp)

        # append to existing multiindex
        rdf = df.set_index(['A'], append=True)
        rdf = rdf.set_index(['B', 'C'], append=True)
        expected = df.set_index(['A', 'B', 'C'], append=True)
        assert_frame_equal(rdf, expected)

        # Series
        result = df.set_index(df.C)
        self.assertEqual(result.index.name, 'C')
Example #39
0
    def test_frame_to_time_stamp(self):
        K = 5
        index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
        df = DataFrame(randn(len(index), K), index=index)
        df['mix'] = 'a'

        exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC')
        result = df.to_timestamp('D', 'end')
        tm.assert_index_equal(result.index, exp_index)
        tm.assert_numpy_array_equal(result.values, df.values)

        exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN')
        result = df.to_timestamp('D', 'start')
        tm.assert_index_equal(result.index, exp_index)

        def _get_with_delta(delta, freq='A-DEC'):
            return date_range(to_datetime('1/1/2001') + delta,
                              to_datetime('12/31/2009') + delta, freq=freq)

        delta = timedelta(hours=23)
        result = df.to_timestamp('H', 'end')
        exp_index = _get_with_delta(delta)
        tm.assert_index_equal(result.index, exp_index)

        delta = timedelta(hours=23, minutes=59)
        result = df.to_timestamp('T', 'end')
        exp_index = _get_with_delta(delta)
        tm.assert_index_equal(result.index, exp_index)

        result = df.to_timestamp('S', 'end')
        delta = timedelta(hours=23, minutes=59, seconds=59)
        exp_index = _get_with_delta(delta)
        tm.assert_index_equal(result.index, exp_index)

        # columns
        df = df.T

        exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC')
        result = df.to_timestamp('D', 'end', axis=1)
        tm.assert_index_equal(result.columns, exp_index)
        tm.assert_numpy_array_equal(result.values, df.values)

        exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN')
        result = df.to_timestamp('D', 'start', axis=1)
        tm.assert_index_equal(result.columns, exp_index)

        delta = timedelta(hours=23)
        result = df.to_timestamp('H', 'end', axis=1)
        exp_index = _get_with_delta(delta)
        tm.assert_index_equal(result.columns, exp_index)

        delta = timedelta(hours=23, minutes=59)
        result = df.to_timestamp('T', 'end', axis=1)
        exp_index = _get_with_delta(delta)
        tm.assert_index_equal(result.columns, exp_index)

        result = df.to_timestamp('S', 'end', axis=1)
        delta = timedelta(hours=23, minutes=59, seconds=59)
        exp_index = _get_with_delta(delta)
        tm.assert_index_equal(result.columns, exp_index)

        # invalid axis
        tm.assertRaisesRegexp(ValueError, 'axis', df.to_timestamp, axis=2)

        result1 = df.to_timestamp('5t', axis=1)
        result2 = df.to_timestamp('t', axis=1)
        expected = pd.date_range('2001-01-01', '2009-01-01', freq='AS')
        self.assertTrue(isinstance(result1.columns, DatetimeIndex))
        self.assertTrue(isinstance(result2.columns, DatetimeIndex))
        self.assert_numpy_array_equal(result1.columns.asi8, expected.asi8)
        self.assert_numpy_array_equal(result2.columns.asi8, expected.asi8)
        # PeriodIndex.to_timestamp always use 'infer'
        self.assertEqual(result1.columns.freqstr, 'AS-JAN')
        self.assertEqual(result2.columns.freqstr, 'AS-JAN')
Example #40
0
 def test_skiprows_invalid(self):
     with tm.assertRaisesRegexp(TypeError,
                                'is not a valid type for skipping rows'):
         self.read_html(self.spam_data, '.*Water.*', skiprows='asdf')
Example #41
0
    def test_converters_type_must_be_dict(self):
        data = """index,A,B,C,D
foo,2,3,4,5
"""
        with tm.assertRaisesRegexp(TypeError, 'Type converters.+'):
            self.read_csv(StringIO(data), converters=0)
Example #42
0
 def test_set_columns(self):
     cols = Index(np.arange(len(self.mixed_frame.columns)))
     self.mixed_frame.columns = cols
     with assertRaisesRegexp(ValueError, 'Length mismatch'):
         self.mixed_frame.columns = cols[::2]
Example #43
0
    def test_take_fill_value(self):
        # GH 12631

        # numeric category
        idx = pd.CategoricalIndex([1, 2, 3], name='xxx')
        result = idx.take(np.array([1, 0, -1]))
        expected = pd.CategoricalIndex([2, 1, 3], name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # fill_value
        result = idx.take(np.array([1, 0, -1]), fill_value=True)
        expected = pd.CategoricalIndex([2, 1, np.nan],
                                       categories=[1, 2, 3],
                                       name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]),
                          allow_fill=False,
                          fill_value=True)
        expected = pd.CategoricalIndex([2, 1, 3], name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # object category
        idx = pd.CategoricalIndex(list('CBA'),
                                  categories=list('ABC'),
                                  ordered=True,
                                  name='xxx')
        result = idx.take(np.array([1, 0, -1]))
        expected = pd.CategoricalIndex(list('BCA'),
                                       categories=list('ABC'),
                                       ordered=True,
                                       name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # fill_value
        result = idx.take(np.array([1, 0, -1]), fill_value=True)
        expected = pd.CategoricalIndex(['B', 'C', np.nan],
                                       categories=list('ABC'),
                                       ordered=True,
                                       name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]),
                          allow_fill=False,
                          fill_value=True)
        expected = pd.CategoricalIndex(list('BCA'),
                                       categories=list('ABC'),
                                       ordered=True,
                                       name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        msg = ('When allow_fill=True and fill_value is not None, '
               'all indices must be >= -1')
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -2]), fill_value=True)
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -5]), fill_value=True)

        with tm.assertRaises(IndexError):
            idx.take(np.array([1, -5]))
Example #44
0
    def test_construction_dti_with_mixed_timezones(self):
        # GH 11488 (not changed, added explicit tests)

        # no tz results in DatetimeIndex
        result = DatetimeIndex(
            [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
        exp = DatetimeIndex(
            [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # same tz results in DatetimeIndex
        result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                                Timestamp('2011-01-02 10:00',
                                          tz='Asia/Tokyo')],
                               name='idx')
        exp = DatetimeIndex([Timestamp('2011-01-01 10:00'),
                             Timestamp('2011-01-02 10:00')],
                            tz='Asia/Tokyo', name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # same tz results in DatetimeIndex (DST)
        result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
                                Timestamp('2011-08-01 10:00',
                                          tz='US/Eastern')],
                               name='idx')
        exp = DatetimeIndex([Timestamp('2011-01-01 10:00'),
                             Timestamp('2011-08-01 10:00')],
                            tz='US/Eastern', name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # different tz coerces tz-naive to tz-awareIndex(dtype=object)
        result = DatetimeIndex([Timestamp('2011-01-01 10:00'),
                                Timestamp('2011-01-02 10:00',
                                          tz='US/Eastern')], name='idx')
        exp = DatetimeIndex([Timestamp('2011-01-01 05:00'),
                             Timestamp('2011-01-02 10:00')],
                            tz='US/Eastern', name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # tz mismatch affecting to tz-aware raises TypeError/ValueError

        with tm.assertRaises(ValueError):
            DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                           Timestamp('2011-01-02 10:00', tz='US/Eastern')],
                          name='idx')

        with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'):
            DatetimeIndex([Timestamp('2011-01-01 10:00'),
                           Timestamp('2011-01-02 10:00', tz='US/Eastern')],
                          tz='Asia/Tokyo', name='idx')

        with tm.assertRaises(ValueError):
            DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                           Timestamp('2011-01-02 10:00', tz='US/Eastern')],
                          tz='US/Eastern', name='idx')

        with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'):
            # passing tz should results in DatetimeIndex, then mismatch raises
            # TypeError
            Index([pd.NaT, Timestamp('2011-01-01 10:00'),
                   pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
                  tz='Asia/Tokyo', name='idx')
Example #45
0
    def test_isinstance(self):

        expected = "Expected type "
        with assertRaisesRegexp(AssertionError, expected):
            tm.assertIsInstance(1, pd.Series)
Example #46
0
    def test_get_loc(self):
        idx = pd.date_range('2000-01-01', periods=3)

        for method in [None, 'pad', 'backfill', 'nearest']:
            self.assertEqual(idx.get_loc(idx[1], method), 1)
            self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1)
            self.assertEqual(idx.get_loc(str(idx[1]), method), 1)
            if method is not None:
                self.assertEqual(
                    idx.get_loc(idx[1],
                                method,
                                tolerance=pd.Timedelta('0 days')), 1)

        self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0)
        self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1)

        self.assertEqual(
            idx.get_loc('2000-01-01T12', method='nearest', tolerance='1 day'),
            1)
        self.assertEqual(
            idx.get_loc('2000-01-01T12',
                        method='nearest',
                        tolerance=pd.Timedelta('1D')), 1)
        self.assertEqual(
            idx.get_loc('2000-01-01T12',
                        method='nearest',
                        tolerance=np.timedelta64(1, 'D')), 1)
        self.assertEqual(
            idx.get_loc('2000-01-01T12',
                        method='nearest',
                        tolerance=timedelta(1)), 1)
        with tm.assertRaisesRegexp(ValueError, 'must be convertible'):
            idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo')
        with tm.assertRaises(KeyError):
            idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours')

        self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3))
        self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3))

        self.assertEqual(idx.get_loc('1999', method='nearest'), 0)
        self.assertEqual(idx.get_loc('2001', method='nearest'), 2)

        with tm.assertRaises(KeyError):
            idx.get_loc('1999', method='pad')
        with tm.assertRaises(KeyError):
            idx.get_loc('2001', method='backfill')

        with tm.assertRaises(KeyError):
            idx.get_loc('foobar')
        with tm.assertRaises(TypeError):
            idx.get_loc(slice(2))

        idx = pd.to_datetime(['2000-01-01', '2000-01-04'])
        self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0)
        self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1)
        self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2))

        # time indexing
        idx = pd.date_range('2000-01-01', periods=24, freq='H')
        tm.assert_numpy_array_equal(idx.get_loc(time(12)),
                                    np.array([12]),
                                    check_dtype=False)
        tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)),
                                    np.array([]),
                                    check_dtype=False)
        with tm.assertRaises(NotImplementedError):
            idx.get_loc(time(12, 30), method='pad')
Example #47
0
    def test_index_equal_message(self):

        expected = """Index are different

Index levels are different
\\[left\\]:  1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
\\[right\\]: 2, MultiIndex\\(levels=\\[\\[u?'A', u?'B'\\], \\[1, 2, 3, 4\\]\\],
           labels=\\[\\[0, 0, 1, 1\\], \\[0, 1, 2, 3\\]\\]\\)"""

        idx1 = pd.Index([1, 2, 3])
        idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3),
                                          ('B', 4)])
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2, exact=False)

        expected = """MultiIndex level \\[1\\] are different

MultiIndex level \\[1\\] values are different \\(25\\.0 %\\)
\\[left\\]:  Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""

        idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3),
                                          ('B', 4)])
        idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3),
                                          ('B', 4)])
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2)
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2, check_exact=False)

        expected = """Index are different

Index length are different
\\[left\\]:  3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
\\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""

        idx1 = pd.Index([1, 2, 3])
        idx2 = pd.Index([1, 2, 3, 4])
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2)
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2, check_exact=False)

        expected = """Index are different

Index classes are different
\\[left\\]:  Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
\\[right\\]: Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)"""

        idx1 = pd.Index([1, 2, 3])
        idx2 = pd.Index([1, 2, 3.0])
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2, exact=True)
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2, exact=True, check_exact=False)

        expected = """Index are different

Index values are different \\(33\\.33333 %\\)
\\[left\\]:  Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\)
\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)"""

        idx1 = pd.Index([1, 2, 3.])
        idx2 = pd.Index([1, 2, 3.0000000001])
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2)

        # must success
        assert_index_equal(idx1, idx2, check_exact=False)

        expected = """Index are different

Index values are different \\(33\\.33333 %\\)
\\[left\\]:  Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\)
\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)"""

        idx1 = pd.Index([1, 2, 3.])
        idx2 = pd.Index([1, 2, 3.0001])
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2)
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2, check_exact=False)
        # must success
        assert_index_equal(idx1,
                           idx2,
                           check_exact=False,
                           check_less_precise=True)

        expected = """Index are different

Index values are different \\(33\\.33333 %\\)
\\[left\\]:  Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)"""

        idx1 = pd.Index([1, 2, 3])
        idx2 = pd.Index([1, 2, 4])
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2)
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2, check_less_precise=True)

        expected = """MultiIndex level \\[1\\] are different

MultiIndex level \\[1\\] values are different \\(25\\.0 %\\)
\\[left\\]:  Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""

        idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3),
                                          ('B', 4)])
        idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3),
                                          ('B', 4)])
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2)
        with assertRaisesRegexp(AssertionError, expected):
            assert_index_equal(idx1, idx2, check_exact=False)
Example #48
0
    def test_notisinstance(self):

        expected = "Input must not be type "
        with assertRaisesRegexp(AssertionError, expected):
            tm.assertNotIsInstance(pd.Series([1]), pd.Series)
Example #49
0
 def test_hash_error(self):
     index = date_range('20010101', periods=10)
     with tm.assertRaisesRegexp(
             TypeError, "unhashable type: %r" % type(index).__name__):
         hash(index)
Example #50
0
    def test_frame_equal_message(self):

        expected = """DataFrame are different

DataFrame shape \\(number of rows\\) are different
\\[left\\]:  3, RangeIndex\\(start=0, stop=3, step=1\\)
\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_frame_equal(pd.DataFrame({'A': [1, 2, 3]}),
                               pd.DataFrame({'A': [1, 2, 3, 4]}))

        expected = """DataFrame are different

DataFrame shape \\(number of columns\\) are different
\\[left\\]:  2, Index\\(\\[u?'A', u?'B'\\], dtype='object'\\)
\\[right\\]: 1, Index\\(\\[u?'A'\\], dtype='object'\\)"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_frame_equal(pd.DataFrame({
                'A': [1, 2, 3],
                'B': [4, 5, 6]
            }), pd.DataFrame({'A': [1, 2, 3]}))

        expected = """DataFrame\\.index are different

DataFrame\\.index values are different \\(33\\.33333 %\\)
\\[left\\]:  Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\)
\\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_frame_equal(
                pd.DataFrame({
                    'A': [1, 2, 3],
                    'B': [4, 5, 6]
                },
                             index=['a', 'b', 'c']),
                pd.DataFrame({
                    'A': [1, 2, 3],
                    'B': [4, 5, 6]
                },
                             index=['a', 'b', 'd']))

        expected = """DataFrame\\.columns are different

DataFrame\\.columns values are different \\(50\\.0 %\\)
\\[left\\]:  Index\\(\\[u?'A', u?'B'\\], dtype='object'\\)
\\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_frame_equal(
                pd.DataFrame({
                    'A': [1, 2, 3],
                    'B': [4, 5, 6]
                },
                             index=['a', 'b', 'c']),
                pd.DataFrame({
                    'A': [1, 2, 3],
                    'b': [4, 5, 6]
                },
                             index=['a', 'b', 'c']))

        expected = """DataFrame\\.iloc\\[:, 1\\] are different

DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
\\[left\\]:  \\[4, 5, 6\\]
\\[right\\]: \\[4, 5, 7\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_frame_equal(pd.DataFrame({
                'A': [1, 2, 3],
                'B': [4, 5, 6]
            }), pd.DataFrame({
                'A': [1, 2, 3],
                'B': [4, 5, 7]
            }))

        with assertRaisesRegexp(AssertionError, expected):
            assert_frame_equal(pd.DataFrame({
                'A': [1, 2, 3],
                'B': [4, 5, 6]
            }),
                               pd.DataFrame({
                                   'A': [1, 2, 3],
                                   'B': [4, 5, 7]
                               }),
                               by_blocks=True)
Example #51
0
    def test_add_iadd(self):
        # union
        rng1 = pd.period_range('1/1/2000', freq='D', periods=5)
        other1 = pd.period_range('1/6/2000', freq='D', periods=5)
        expected1 = pd.period_range('1/1/2000', freq='D', periods=10)

        rng2 = pd.period_range('1/1/2000', freq='D', periods=5)
        other2 = pd.period_range('1/4/2000', freq='D', periods=5)
        expected2 = pd.period_range('1/1/2000', freq='D', periods=8)

        rng3 = pd.period_range('1/1/2000', freq='D', periods=5)
        other3 = pd.PeriodIndex([], freq='D')
        expected3 = pd.period_range('1/1/2000', freq='D', periods=5)

        rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5)
        other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5)
        expected4 = pd.PeriodIndex([
            '2000-01-01 09:00', '2000-01-01 10:00', '2000-01-01 11:00',
            '2000-01-01 12:00', '2000-01-01 13:00', '2000-01-02 09:00',
            '2000-01-02 10:00', '2000-01-02 11:00', '2000-01-02 12:00',
            '2000-01-02 13:00'
        ],
                                   freq='H')

        rng5 = pd.PeriodIndex(
            ['2000-01-01 09:01', '2000-01-01 09:03', '2000-01-01 09:05'],
            freq='T')
        other5 = pd.PeriodIndex(
            ['2000-01-01 09:01', '2000-01-01 09:05'
             '2000-01-01 09:08'],
            freq='T')
        expected5 = pd.PeriodIndex([
            '2000-01-01 09:01', '2000-01-01 09:03', '2000-01-01 09:05',
            '2000-01-01 09:08'
        ],
                                   freq='T')

        rng6 = pd.period_range('2000-01-01', freq='M', periods=7)
        other6 = pd.period_range('2000-04-01', freq='M', periods=7)
        expected6 = pd.period_range('2000-01-01', freq='M', periods=10)

        rng7 = pd.period_range('2003-01-01', freq='A', periods=5)
        other7 = pd.period_range('1998-01-01', freq='A', periods=8)
        expected7 = pd.period_range('1998-01-01', freq='A', periods=10)

        for rng, other, expected in [(rng1, other1, expected1),
                                     (rng2, other2, expected2),
                                     (rng3, other3, expected3),
                                     (rng4, other4, expected4),
                                     (rng5, other5, expected5),
                                     (rng6, other6, expected6),
                                     (rng7, other7, expected7)]:

            result_add = rng + other
            result_union = rng.union(other)

            tm.assert_index_equal(result_add, expected)
            tm.assert_index_equal(result_union, expected)
            # GH 6527
            rng += other
            tm.assert_index_equal(rng, expected)

        # offset
        for delta in [pd.offsets.Hour(2), timedelta(hours=2)]:
            rng = pd.period_range('2000-01-01', '2000-02-01')
            with tm.assertRaisesRegexp(TypeError,
                                       'unsupported operand type\(s\)'):
                result = rng + delta
            with tm.assertRaisesRegexp(TypeError,
                                       'unsupported operand type\(s\)'):
                rng += delta

        # int
        rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10)
        result = rng + 1
        expected = pd.period_range('2000-01-01 10:00', freq='H', periods=10)
        tm.assert_index_equal(result, expected)
        rng += 1
        tm.assert_index_equal(rng, expected)
Example #52
0
    def test_numpy_array_equal_message(self):

        if is_platform_windows():
            raise nose.SkipTest("windows has incomparable line-endings "
                                "and uses L on the shape")

        expected = """numpy array are different

numpy array shapes are different
\\[left\\]:  \\(2,\\)
\\[right\\]: \\(3,\\)"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]))

        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]))

        # scalar comparison
        expected = """Expected type """
        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(1, 2)
        expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5"""
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(1, 2)

        # array / scalar array comparison
        expected = """numpy array are different

numpy array classes are different
\\[left\\]:  ndarray
\\[right\\]: int"""

        with assertRaisesRegexp(AssertionError, expected):
            # numpy_array_equal only accepts np.ndarray
            assert_numpy_array_equal(np.array([1]), 1)
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([1]), 1)

        # scalar / array comparison
        expected = """numpy array are different

numpy array classes are different
\\[left\\]:  int
\\[right\\]: ndarray"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(1, np.array([1]))
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(1, np.array([1]))

        expected = """numpy array are different

numpy array values are different \\(66\\.66667 %\\)
\\[left\\]:  \\[nan, 2\\.0, 3\\.0\\]
\\[right\\]: \\[1\\.0, nan, 3\\.0\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([np.nan, 2, 3]),
                                     np.array([1, np.nan, 3]))
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([np.nan, 2, 3]),
                                np.array([1, np.nan, 3]))

        expected = """numpy array are different

numpy array values are different \\(50\\.0 %\\)
\\[left\\]:  \\[1, 2\\]
\\[right\\]: \\[1, 3\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3]))
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([1, 2]), np.array([1, 3]))

        expected = """numpy array are different

numpy array values are different \\(50\\.0 %\\)
\\[left\\]:  \\[1\\.1, 2\\.000001\\]
\\[right\\]: \\[1\\.1, 2.0\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([1.1, 2.000001]),
                                     np.array([1.1, 2.0]))

        # must pass
        assert_almost_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0]))

        expected = """numpy array are different

numpy array values are different \\(16\\.66667 %\\)
\\[left\\]:  \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\]
\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]),
                                     np.array([[1, 3], [3, 4], [5, 6]]))
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]),
                                np.array([[1, 3], [3, 4], [5, 6]]))

        expected = """numpy array are different

numpy array values are different \\(25\\.0 %\\)
\\[left\\]:  \\[\\[1, 2\\], \\[3, 4\\]\\]
\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([[1, 2], [3, 4]]),
                                     np.array([[1, 3], [3, 4]]))
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([[1, 2], [3, 4]]),
                                np.array([[1, 3], [3, 4]]))

        # allow to overwrite message
        expected = """Index are different

Index shapes are different
\\[left\\]:  \\(2,\\)
\\[right\\]: \\(3,\\)"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([1, 2]),
                                     np.array([3, 4, 5]),
                                     obj='Index')
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([1, 2]),
                                np.array([3, 4, 5]),
                                obj='Index')
Example #53
0
 def test_dt_accessor_no_new_attributes(self):
     # https://github.com/pydata/pandas/issues/10673
     s = Series(date_range('20130101', periods=5, freq='D'))
     with tm.assertRaisesRegexp(AttributeError,
                                "You cannot add any new attribute"):
         s.dt.xlabel = "a"
Example #54
0
def test_mut_exclusive():
    msg = "mutually exclusive arguments: '[ab]' and '[ab]'"
    with tm.assertRaisesRegexp(TypeError, msg):
        com._mut_exclusive(a=1, b=2)
    assert com._mut_exclusive(a=1, b=None) == 1
    assert com._mut_exclusive(major=None, major_axis=None) is None
Example #55
0
    def test_per_axis_per_level_getitem(self):

        # GH6134
        # example test case
        ix = MultiIndex.from_product(
            [_mklbl('A', 5),
             _mklbl('B', 7),
             _mklbl('C', 4),
             _mklbl('D', 2)])
        df = DataFrame(np.arange(len(ix.get_values())), index=ix)

        result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C3')
        ]]
        tm.assert_frame_equal(result, expected)

        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C2' or c == 'C3')
        ]]
        result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :]
        tm.assert_frame_equal(result, expected)

        # test multi-index slicing with per axis and per index controls
        index = MultiIndex.from_tuples([('A', 1), ('A', 2), ('A', 3),
                                        ('B', 1)],
                                       names=['one', 'two'])
        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
                                          ('b', 'foo'), ('b', 'bah')],
                                         names=['lvl0', 'lvl1'])

        df = DataFrame(np.arange(16, dtype='int64').reshape(4, 4),
                       index=index,
                       columns=columns)
        df = df.sort_index(axis=0).sort_index(axis=1)

        # identity
        result = df.loc[(slice(None), slice(None)), :]
        tm.assert_frame_equal(result, df)
        result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)
        result = df.loc[:, (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)

        # index
        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), 1), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # columns
        result = df.loc[:, (slice(None), ['foo'])]
        expected = df.iloc[:, [1, 3]]
        tm.assert_frame_equal(result, expected)

        # both
        result = df.loc[(slice(None), 1), (slice(None), ['foo'])]
        expected = df.iloc[[0, 3], [1, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc['A', 'a']
        expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]),
                             index=Index([1, 2, 3], name='two'),
                             columns=Index(['bar', 'foo'], name='lvl1'))
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), [1, 2]), :]
        expected = df.iloc[[0, 1, 3]]
        tm.assert_frame_equal(result, expected)

        # multi-level series
        s = Series(np.arange(len(ix.get_values())), index=ix)
        result = s.loc['A1':'A3', :, ['C1', 'C3']]
        expected = s.loc[[
            tuple([a, b, c, d]) for a, b, c, d in s.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C3')
        ]]
        tm.assert_series_equal(result, expected)

        # boolean indexers
        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
        expected = df.iloc[[2, 3]]
        tm.assert_frame_equal(result, expected)

        def f():
            df.loc[(slice(None), np.array([True, False])), :]

        self.assertRaises(ValueError, f)

        # ambiguous cases
        # these can be multiply interpreted (e.g. in this case
        # as df.loc[slice(None),[1]] as well
        self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]])

        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # not lexsorted
        self.assertEqual(df.index.lexsort_depth, 2)
        df = df.sort_index(level=1, axis=0)
        self.assertEqual(df.index.lexsort_depth, 0)
        with tm.assertRaisesRegexp(
                UnsortedIndexError,
                'MultiIndex Slicing requires the index to be fully '
                r'lexsorted tuple len \(2\), lexsort depth \(0\)'):
            df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
Example #56
0
 def test_write_explicit_bad(self, compression, get_random_path):
     with tm.assertRaisesRegexp(ValueError,
                                "Unrecognized compression type"):
         with tm.ensure_clean(get_random_path) as path:
             df = tm.makeDataFrame()
             df.to_pickle(path, compression=compression)
Example #57
0
 def assert_unbounded_slice_error(slc):
     tm.assertRaisesRegexp(ValueError, "unbounded slice",
                           lambda: BlockPlacement(slc))
Example #58
0
    def test_dt_namespace_accessor(self):

        # GH 7207, 11128
        # test .dt namespace accessor

        ok_for_base = ['year', 'month', 'day', 'hour', 'minute', 'second',
                       'weekofyear', 'week', 'dayofweek', 'weekday',
                       'dayofyear', 'quarter', 'freq', 'days_in_month',
                       'daysinmonth', 'is_leap_year']
        ok_for_period = ok_for_base + ['qyear', 'start_time', 'end_time']
        ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq']
        ok_for_dt = ok_for_base + ['date', 'time', 'microsecond', 'nanosecond',
                                   'is_month_start', 'is_month_end',
                                   'is_quarter_start', 'is_quarter_end',
                                   'is_year_start', 'is_year_end', 'tz',
                                   'weekday_name']
        ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize',
                             'tz_convert', 'normalize', 'strftime', 'round',
                             'floor', 'ceil', 'weekday_name']
        ok_for_td = ['days', 'seconds', 'microseconds', 'nanoseconds']
        ok_for_td_methods = ['components', 'to_pytimedelta', 'total_seconds',
                             'round', 'floor', 'ceil']

        def get_expected(s, name):
            result = getattr(Index(s._values), prop)
            if isinstance(result, np.ndarray):
                if is_integer_dtype(result):
                    result = result.astype('int64')
            elif not is_list_like(result):
                return result
            return Series(result, index=s.index, name=s.name)

        def compare(s, name):
            a = getattr(s.dt, prop)
            b = get_expected(s, prop)
            if not (is_list_like(a) and is_list_like(b)):
                self.assertEqual(a, b)
            else:
                tm.assert_series_equal(a, b)

        # datetimeindex
        cases = [Series(date_range('20130101', periods=5), name='xxx'),
                 Series(date_range('20130101', periods=5, freq='s'),
                        name='xxx'),
                 Series(date_range('20130101 00:00:00', periods=5, freq='ms'),
                        name='xxx')]
        for s in cases:
            for prop in ok_for_dt:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_dt_methods:
                getattr(s.dt, prop)

            result = s.dt.to_pydatetime()
            self.assertIsInstance(result, np.ndarray)
            self.assertTrue(result.dtype == object)

            result = s.dt.tz_localize('US/Eastern')
            exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern')
            expected = Series(exp_values, index=s.index, name='xxx')
            tm.assert_series_equal(result, expected)

            tz_result = result.dt.tz
            self.assertEqual(str(tz_result), 'US/Eastern')
            freq_result = s.dt.freq
            self.assertEqual(freq_result, DatetimeIndex(s.values,
                                                        freq='infer').freq)

            # let's localize, then convert
            result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
            exp_values = (DatetimeIndex(s.values).tz_localize('UTC')
                                                 .tz_convert('US/Eastern'))
            expected = Series(exp_values, index=s.index, name='xxx')
            tm.assert_series_equal(result, expected)

        # round
        s = Series(pd.to_datetime(['2012-01-01 13:00:00',
                                   '2012-01-01 12:01:00',
                                   '2012-01-01 08:00:00']), name='xxx')
        result = s.dt.round('D')
        expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02',
                                          '2012-01-01']), name='xxx')
        tm.assert_series_equal(result, expected)

        # round with tz
        result = (s.dt.tz_localize('UTC')
                   .dt.tz_convert('US/Eastern')
                   .dt.round('D'))
        exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
                                     '2012-01-01']).tz_localize('US/Eastern')
        expected = Series(exp_values, name='xxx')
        tm.assert_series_equal(result, expected)

        # floor
        s = Series(pd.to_datetime(['2012-01-01 13:00:00',
                                   '2012-01-01 12:01:00',
                                   '2012-01-01 08:00:00']), name='xxx')
        result = s.dt.floor('D')
        expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01',
                                          '2012-01-01']), name='xxx')
        tm.assert_series_equal(result, expected)

        # ceil
        s = Series(pd.to_datetime(['2012-01-01 13:00:00',
                                   '2012-01-01 12:01:00',
                                   '2012-01-01 08:00:00']), name='xxx')
        result = s.dt.ceil('D')
        expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02',
                                          '2012-01-02']), name='xxx')
        tm.assert_series_equal(result, expected)

        # datetimeindex with tz
        s = Series(date_range('20130101', periods=5, tz='US/Eastern'),
                   name='xxx')
        for prop in ok_for_dt:

            # we test freq below
            if prop != 'freq':
                compare(s, prop)

        for prop in ok_for_dt_methods:
            getattr(s.dt, prop)

        result = s.dt.to_pydatetime()
        self.assertIsInstance(result, np.ndarray)
        self.assertTrue(result.dtype == object)

        result = s.dt.tz_convert('CET')
        expected = Series(s._values.tz_convert('CET'),
                          index=s.index, name='xxx')
        tm.assert_series_equal(result, expected)

        tz_result = result.dt.tz
        self.assertEqual(str(tz_result), 'CET')
        freq_result = s.dt.freq
        self.assertEqual(freq_result, DatetimeIndex(s.values,
                                                    freq='infer').freq)

        # timedeltaindex
        cases = [Series(timedelta_range('1 day', periods=5),
                        index=list('abcde'), name='xxx'),
                 Series(timedelta_range('1 day 01:23:45', periods=5,
                        freq='s'), name='xxx'),
                 Series(timedelta_range('2 days 01:23:45.012345', periods=5,
                        freq='ms'), name='xxx')]
        for s in cases:
            for prop in ok_for_td:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_td_methods:
                getattr(s.dt, prop)

            result = s.dt.components
            self.assertIsInstance(result, DataFrame)
            tm.assert_index_equal(result.index, s.index)

            result = s.dt.to_pytimedelta()
            self.assertIsInstance(result, np.ndarray)
            self.assertTrue(result.dtype == object)

            result = s.dt.total_seconds()
            self.assertIsInstance(result, pd.Series)
            self.assertTrue(result.dtype == 'float64')

            freq_result = s.dt.freq
            self.assertEqual(freq_result, TimedeltaIndex(s.values,
                                                         freq='infer').freq)

        # both
        index = date_range('20130101', periods=3, freq='D')
        s = Series(date_range('20140204', periods=3, freq='s'),
                   index=index, name='xxx')
        exp = Series(np.array([2014, 2014, 2014], dtype='int64'),
                     index=index, name='xxx')
        tm.assert_series_equal(s.dt.year, exp)

        exp = Series(np.array([2, 2, 2], dtype='int64'),
                     index=index, name='xxx')
        tm.assert_series_equal(s.dt.month, exp)

        exp = Series(np.array([0, 1, 2], dtype='int64'),
                     index=index, name='xxx')
        tm.assert_series_equal(s.dt.second, exp)

        exp = pd.Series([s[0]] * 3, index=index, name='xxx')
        tm.assert_series_equal(s.dt.normalize(), exp)

        # periodindex
        cases = [Series(period_range('20130101', periods=5, freq='D'),
                        name='xxx')]
        for s in cases:
            for prop in ok_for_period:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_period_methods:
                getattr(s.dt, prop)

            freq_result = s.dt.freq
            self.assertEqual(freq_result, PeriodIndex(s.values).freq)

        # test limited display api
        def get_dir(s):
            results = [r for r in s.dt.__dir__() if not r.startswith('_')]
            return list(sorted(set(results)))

        s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))

        s = Series(period_range('20130101', periods=5,
                                freq='D', name='xxx').asobject)
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_period + ok_for_period_methods))))

        # 11295
        # ambiguous time error on the conversions
        s = Series(pd.date_range('2015-01-01', '2016-01-01',
                                 freq='T'), name='xxx')
        s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago')
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))
        exp_values = pd.date_range('2015-01-01', '2016-01-01', freq='T',
                                   tz='UTC').tz_convert('America/Chicago')
        expected = Series(exp_values, name='xxx')
        tm.assert_series_equal(s, expected)

        # no setting allowed
        s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
        with tm.assertRaisesRegexp(ValueError, "modifications"):
            s.dt.hour = 5

        # trying to set a copy
        with pd.option_context('chained_assignment', 'raise'):

            def f():
                s.dt.hour[0] = 5

            self.assertRaises(com.SettingWithCopyError, f)
Example #59
0
 def test_hash_error(self):
     index = timedelta_range('1 days', periods=10)
     with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" %
                                type(index).__name__):
         hash(index)
Example #60
0
    def test_shift(self):
        # naive shift
        shiftedFrame = self.tsframe.shift(5)
        self.assert_index_equal(shiftedFrame.index, self.tsframe.index)

        shiftedSeries = self.tsframe['A'].shift(5)
        assert_series_equal(shiftedFrame['A'], shiftedSeries)

        shiftedFrame = self.tsframe.shift(-5)
        self.assert_index_equal(shiftedFrame.index, self.tsframe.index)

        shiftedSeries = self.tsframe['A'].shift(-5)
        assert_series_equal(shiftedFrame['A'], shiftedSeries)

        # shift by 0
        unshifted = self.tsframe.shift(0)
        assert_frame_equal(unshifted, self.tsframe)

        # shift by DateOffset
        shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay())
        self.assertEqual(len(shiftedFrame), len(self.tsframe))

        shiftedFrame2 = self.tsframe.shift(5, freq='B')
        assert_frame_equal(shiftedFrame, shiftedFrame2)

        d = self.tsframe.index[0]
        shifted_d = d + offsets.BDay(5)
        assert_series_equal(self.tsframe.xs(d),
                            shiftedFrame.xs(shifted_d),
                            check_names=False)

        # shift int frame
        int_shifted = self.intframe.shift(1)  # noqa

        # Shifting with PeriodIndex
        ps = tm.makePeriodFrame()
        shifted = ps.shift(1)
        unshifted = shifted.shift(-1)
        self.assert_index_equal(shifted.index, ps.index)
        self.assert_index_equal(unshifted.index, ps.index)
        tm.assert_numpy_array_equal(unshifted.ix[:, 0].valid().values,
                                    ps.ix[:-1, 0].values)

        shifted2 = ps.shift(1, 'B')
        shifted3 = ps.shift(1, offsets.BDay())
        assert_frame_equal(shifted2, shifted3)
        assert_frame_equal(ps, shifted2.shift(-1, 'B'))

        assertRaisesRegexp(ValueError,
                           'does not match PeriodIndex freq',
                           ps.shift,
                           freq='D')

        # shift other axis
        # GH 6371
        df = DataFrame(np.random.rand(10, 5))
        expected = pd.concat(
            [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]],
            ignore_index=True,
            axis=1)
        result = df.shift(1, axis=1)
        assert_frame_equal(result, expected)

        # shift named axis
        df = DataFrame(np.random.rand(10, 5))
        expected = pd.concat(
            [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]],
            ignore_index=True,
            axis=1)
        result = df.shift(1, axis='columns')
        assert_frame_equal(result, expected)