Beispiel #1
0
    def test_reset_index(self):
        df = tm.makeDataFrame()[:5]
        ser = df.stack()
        ser.index.names = ['hash', 'category']

        ser.name = 'value'
        df = ser.reset_index()
        self.assertIn('value', df)

        df = ser.reset_index(name='value2')
        self.assertIn('value2', df)

        # check inplace
        s = ser.reset_index(drop=True)
        s2 = ser
        s2.reset_index(drop=True, inplace=True)
        assert_series_equal(s, s2)

        # level
        index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
                           labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
                                   [0, 1, 0, 1, 0, 1]])
        s = Series(np.random.randn(6), index=index)
        rs = s.reset_index(level=1)
        self.assertEqual(len(rs.columns), 2)

        rs = s.reset_index(level=[0, 2], drop=True)
        self.assertTrue(rs.index.equals(Index(index.get_level_values(1))))
        tm.assertIsInstance(rs, Series)
    def test_reset_index(self):
        df = tm.makeDataFrame()[:5]
        ser = df.stack()
        ser.index.names = ['hash', 'category']

        ser.name = 'value'
        df = ser.reset_index()
        assert 'value' in df

        df = ser.reset_index(name='value2')
        assert 'value2' in df

        # check inplace
        s = ser.reset_index(drop=True)
        s2 = ser
        s2.reset_index(drop=True, inplace=True)
        tm.assert_series_equal(s, s2)

        # level
        index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
                           codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
                                  [0, 1, 0, 1, 0, 1]])
        s = Series(np.random.randn(6), index=index)
        rs = s.reset_index(level=1)
        assert len(rs.columns) == 2

        rs = s.reset_index(level=[0, 2], drop=True)
        tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
        assert isinstance(rs, Series)
Beispiel #3
0
    def test_reset_index(self):
        df = tm.makeDataFrame()[:5]
        ser = df.stack()
        ser.index.names = ["hash", "category"]

        ser.name = "value"
        df = ser.reset_index()
        self.assertIn("value", df)

        df = ser.reset_index(name="value2")
        self.assertIn("value2", df)

        # check inplace
        s = ser.reset_index(drop=True)
        s2 = ser
        s2.reset_index(drop=True, inplace=True)
        assert_series_equal(s, s2)

        # level
        index = MultiIndex(
            levels=[["bar"], ["one", "two", "three"], [0, 1]],
            labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
        )
        s = Series(np.random.randn(6), index=index)
        rs = s.reset_index(level=1)
        self.assertEqual(len(rs.columns), 2)

        rs = s.reset_index(level=[0, 2], drop=True)
        self.assertTrue(rs.index.equals(Index(index.get_level_values(1))))
        tm.assertIsInstance(rs, Series)
Beispiel #4
0
    def test_set_index_cast_datetimeindex(self):
        df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i)
                              for i in range(1000)],
                        'B': np.random.randn(1000)})

        idf = df.set_index('A')
        assert isinstance(idf.index, pd.DatetimeIndex)

        # don't cast a DatetimeIndex WITH a tz, leave as object
        # GH 6032
        i = (pd.DatetimeIndex(
            to_datetime(['2013-1-1 13:00',
                         '2013-1-2 14:00'], errors="raise"))
             .tz_localize('US/Pacific'))
        df = DataFrame(np.random.randn(2, 1), columns=['A'])

        expected = Series(np.array([pd.Timestamp('2013-01-01 13:00:00-0800',
                                                 tz='US/Pacific'),
                                    pd.Timestamp('2013-01-02 14:00:00-0800',
                                                 tz='US/Pacific')],
                                   dtype="object"))

        # convert index to series
        result = Series(i)
        assert_series_equal(result, expected)

        # assignt to frame
        df['B'] = i
        result = df['B']
        assert_series_equal(result, expected, check_names=False)
        assert result.name == 'B'

        # keep the timezone
        result = i.to_series(keep_tz=True)
        assert_series_equal(result.reset_index(drop=True), expected)

        # convert to utc
        df['C'] = i.to_series().reset_index(drop=True)
        result = df['C']
        comp = pd.DatetimeIndex(expected.values)
        comp = comp.tz_localize(None)
        tm.assert_numpy_array_equal(result.values, comp.values)

        # list of datetimes with a tz
        df['D'] = i.to_pydatetime()
        result = df['D']
        assert_series_equal(result, expected, check_names=False)
        assert result.name == 'D'

        # GH 6785
        # set the index manually
        import pytz
        df = DataFrame(
            [{'ts': datetime(2014, 4, 1, tzinfo=pytz.utc), 'foo': 1}])
        expected = df.set_index('ts')
        df.index = df['ts']
        df.pop('ts')
        assert_frame_equal(df, expected)
 def test_reset_index_range(self):
     # GH 12071
     s = Series(range(2), name='A', dtype='int64')
     series_result = s.reset_index()
     assert isinstance(series_result.index, RangeIndex)
     series_expected = DataFrame([[0, 0], [1, 1]],
                                 columns=['index', 'A'],
                                 index=RangeIndex(stop=2))
     tm.assert_frame_equal(series_result, series_expected)
 def test_droplevel(self):
     # GH20342
     ser = Series([1, 2, 3, 4])
     ser.index = MultiIndex.from_arrays([(1, 2, 3, 4), (5, 6, 7, 8)],
                                        names=['a', 'b'])
     expected = ser.reset_index('b', drop=True)
     result = ser.droplevel('b', axis='index')
     tm.assert_series_equal(result, expected)
     # test that droplevel raises ValueError on axis != 0
     with pytest.raises(ValueError):
         ser.droplevel(1, axis='columns')
Beispiel #7
0
    def test_reset_index_right_dtype(self):
        time = np.arange(0.0, 10, np.sqrt(2) / 2)
        s1 = Series((9.81 * time ** 2) / 2,
                    index=Index(time, name='time'),
                    name='speed')
        df = DataFrame(s1)

        resetted = s1.reset_index()
        assert resetted['time'].dtype == np.float64

        resetted = df.reset_index()
        assert resetted['time'].dtype == np.float64
    def test_reset_index_drop_errors(self):
        #  GH 20925

        # KeyError raised for series index when passed level name is missing
        s = Series(range(4))
        with pytest.raises(KeyError, match='must be same as name'):
            s.reset_index('wrong', drop=True)
        with pytest.raises(KeyError, match='must be same as name'):
            s.reset_index('wrong')

        # KeyError raised for series when level to be dropped is missing
        s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
        with pytest.raises(KeyError, match='not found'):
            s.reset_index('wrong', drop=True)
    def test_factor_rank_autocorrelation(self, factor_values,
                                         sector_values, end_date,
                                         time_rule, by_sector,
                                         expected_vals):
        dr = date_range(start='2015-1-1', end=end_date)
        dr.name = 'date'
        tickers = ['A', 'B', 'C', 'D']
        factor_df = DataFrame(index=dr, columns=tickers, data=factor_values).stack()
        factor_df.index = factor_df.index.set_names(['date', 'asset'])

        factor = Series(factor_df)
        factor.name = 'factor'
        factor = factor.reset_index()
        factor['sector'] = sector_values
        factor = factor.set_index(['date', 'asset', 'sector']).factor

        fa = factor_rank_autocorrelation(factor, time_rule, by_sector)
        expected = Series(index=fa.index, data=expected_vals)

        assert_series_equal(fa, expected)
Beispiel #10
0
 def test_reset_index_name(self):
     s = Series([1, 2, 3], index=Index(range(3), name='x'))
     assert s.reset_index().index.name is None
     assert s.reset_index(drop=True).index.name is None
Beispiel #11
0
    def test_set_index_cast_datetimeindex(self):
        df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i)
                              for i in range(1000)],
                        'B': np.random.randn(1000)})

        idf = df.set_index('A')
        assert isinstance(idf.index, pd.DatetimeIndex)

        # don't cast a DatetimeIndex WITH a tz, leave as object
        # GH 6032
        i = (pd.DatetimeIndex(
            to_datetime(['2013-1-1 13:00',
                         '2013-1-2 14:00'], errors="raise"))
             .tz_localize('US/Pacific'))
        df = DataFrame(np.random.randn(2, 1), columns=['A'])

        expected = Series(np.array([pd.Timestamp('2013-01-01 13:00:00-0800',
                                                 tz='US/Pacific'),
                                    pd.Timestamp('2013-01-02 14:00:00-0800',
                                                 tz='US/Pacific')],
                                   dtype="object"))

        # convert index to series
        result = Series(i)
        assert_series_equal(result, expected)

        # assignt to frame
        df['B'] = i
        result = df['B']
        assert_series_equal(result, expected, check_names=False)
        assert result.name == 'B'

        # keep the timezone
        result = i.to_series(keep_tz=True)
        assert_series_equal(result.reset_index(drop=True), expected)

        # convert to utc
        df['C'] = i.to_series().reset_index(drop=True)
        result = df['C']
        comp = pd.DatetimeIndex(expected.values).copy()
        comp.tz = None
        tm.assert_numpy_array_equal(result.values, comp.values)

        # list of datetimes with a tz
        df['D'] = i.to_pydatetime()
        result = df['D']
        assert_series_equal(result, expected, check_names=False)
        assert result.name == 'D'

        # GH 6785
        # set the index manually
        import pytz
        df = DataFrame(
            [{'ts': datetime(2014, 4, 1, tzinfo=pytz.utc), 'foo': 1}])
        expected = df.set_index('ts')
        df.index = df['ts']
        df.pop('ts')
        assert_frame_equal(df, expected)

        # GH 3950
        # reset_index with single level
        for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern']:
            idx = pd.date_range('1/1/2011', periods=5,
                                freq='D', tz=tz, name='idx')
            df = pd.DataFrame(
                {'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx)

            expected = pd.DataFrame({'idx': [datetime(2011, 1, 1),
                                             datetime(2011, 1, 2),
                                             datetime(2011, 1, 3),
                                             datetime(2011, 1, 4),
                                             datetime(2011, 1, 5)],
                                     'a': range(5),
                                     'b': ['A', 'B', 'C', 'D', 'E']},
                                    columns=['idx', 'a', 'b'])
            expected['idx'] = expected['idx'].apply(
                lambda d: pd.Timestamp(d, tz=tz))
            assert_frame_equal(df.reset_index(), expected)