Beispiel #1
0
    def test_reset_index(self):
        df = tm.makeDataFrame()[:5]
        ser = df.stack()
        ser.index.names = ['hash', 'category']

        ser.name = 'value'
        df = ser.reset_index()
        self.assertIn('value', df)

        df = ser.reset_index(name='value2')
        self.assertIn('value2', df)

        # check inplace
        s = ser.reset_index(drop=True)
        s2 = ser
        s2.reset_index(drop=True, inplace=True)
        assert_series_equal(s, s2)

        # level
        index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
                           labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
                                   [0, 1, 0, 1, 0, 1]])
        s = Series(np.random.randn(6), index=index)
        rs = s.reset_index(level=1)
        self.assertEqual(len(rs.columns), 2)

        rs = s.reset_index(level=[0, 2], drop=True)
        self.assert_index_equal(rs.index, Index(index.get_level_values(1)))
        tm.assertIsInstance(rs, Series)
Beispiel #2
0
    def test_reset_index(self):
        df = tm.makeDataFrame()[:5]
        ser = df.stack()
        ser.index.names = ["hash", "category"]

        ser.name = "value"
        df = ser.reset_index()
        self.assertIn("value", df)

        df = ser.reset_index(name="value2")
        self.assertIn("value2", df)

        # check inplace
        s = ser.reset_index(drop=True)
        s2 = ser
        s2.reset_index(drop=True, inplace=True)
        assert_series_equal(s, s2)

        # level
        index = MultiIndex(
            levels=[["bar"], ["one", "two", "three"], [0, 1]],
            labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
        )
        s = Series(np.random.randn(6), index=index)
        rs = s.reset_index(level=1)
        self.assertEqual(len(rs.columns), 2)

        rs = s.reset_index(level=[0, 2], drop=True)
        self.assertTrue(rs.index.equals(Index(index.get_level_values(1))))
        tm.assertIsInstance(rs, Series)
Beispiel #3
0
    def test_aggregate_item_by_item(self):

        df = self.df.copy()
        df['E'] = ['a'] * len(self.df)
        grouped = self.df.groupby('A')

        # API change in 0.11
        # def aggfun(ser):
        #     return len(ser + 'a')
        # result = grouped.agg(aggfun)
        # self.assertEqual(len(result.columns), 1)

        aggfun = lambda ser: ser.size
        result = grouped.agg(aggfun)
        foo = (self.df.A == 'foo').sum()
        bar = (self.df.A == 'bar').sum()
        K = len(result.columns)

        # GH5782
        # odd comparisons can result here, so cast to make easy
        exp = pd.Series(np.array([foo] * K), index=list('BCD'),
                        dtype=np.float64, name='foo')
        tm.assert_series_equal(result.xs('foo'), exp)

        exp = pd.Series(np.array([bar] * K), index=list('BCD'),
                        dtype=np.float64, name='bar')
        tm.assert_almost_equal(result.xs('bar'), exp)

        def aggfun(ser):
            return ser.size

        result = DataFrame().groupby(self.df.A).agg(aggfun)
        tm.assertIsInstance(result, DataFrame)
        self.assertEqual(len(result), 0)
Beispiel #4
0
 def _check_rng(rng):
     converted = rng.to_pydatetime()
     tm.assertIsInstance(converted, np.ndarray)
     for x, stamp in zip(converted, rng):
         tm.assertIsInstance(x, datetime)
         self.assertEqual(x, stamp.to_pydatetime())
         self.assertEqual(x.tzinfo, stamp.tzinfo)
Beispiel #5
0
    def test_join_inner(self):
        other = Int64Index([7, 12, 25, 1, 2, 5])
        other_mono = Int64Index([1, 2, 5, 7, 12, 25])

        # not monotonic
        res, lidx, ridx = self.index.join(other, how="inner", return_indexers=True)

        # no guarantee of sortedness, so sort for comparison purposes
        ind = res.argsort()
        res = res.take(ind)
        lidx = lidx.take(ind)
        ridx = ridx.take(ind)

        eres = Int64Index([2, 12])
        elidx = np.array([1, 6], dtype=np.intp)
        eridx = np.array([4, 1], dtype=np.intp)

        tm.assertIsInstance(res, Int64Index)
        self.assert_index_equal(res, eres)
        tm.assert_numpy_array_equal(lidx, elidx)
        tm.assert_numpy_array_equal(ridx, eridx)

        # monotonic
        res, lidx, ridx = self.index.join(other_mono, how="inner", return_indexers=True)

        res2 = self.index.intersection(other_mono)
        self.assert_index_equal(res, res2)

        elidx = np.array([1, 6], dtype=np.intp)
        eridx = np.array([1, 4], dtype=np.intp)
        tm.assertIsInstance(res, Int64Index)
        self.assert_index_equal(res, eres)
        tm.assert_numpy_array_equal(lidx, elidx)
        tm.assert_numpy_array_equal(ridx, eridx)
Beispiel #6
0
    def test_groupby_groups_datetimeindex(self):
        # #1430
        from pandas.tseries.api import DatetimeIndex
        periods = 1000
        ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods)
        df = DataFrame({'high': np.arange(periods),
                        'low': np.arange(periods)}, index=ind)
        grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))

        # it works!
        groups = grouped.groups
        tm.assertIsInstance(list(groups.keys())[0], datetime)

        # GH 11442
        index = pd.date_range('2015/01/01', periods=5, name='date')
        df = pd.DataFrame({'A': [5, 6, 7, 8, 9],
                           'B': [1, 2, 3, 4, 5]}, index=index)
        result = df.groupby(level='date').groups
        dates = ['2015-01-05', '2015-01-04', '2015-01-03',
                 '2015-01-02', '2015-01-01']
        expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date')
                    for date in dates}
        tm.assert_dict_equal(result, expected)

        grouped = df.groupby(level='date')
        for date in dates:
            result = grouped.get_group(date)
            data = [[df.loc[date, 'A'], df.loc[date, 'B']]]
            expected_index = pd.DatetimeIndex([date], name='date')
            expected = pd.DataFrame(data,
                                    columns=list('AB'),
                                    index=expected_index)
            tm.assert_frame_equal(result, expected)
Beispiel #7
0
    def test_applymap(self):
        applied = self.frame.applymap(lambda x: x * 2)
        assert_frame_equal(applied, self.frame * 2)
        result = self.frame.applymap(type)

        # GH #465, function returning tuples
        result = self.frame.applymap(lambda x: (x, x))
        tm.assertIsInstance(result['A'][0], tuple)

        # GH 2909, object conversion to float in constructor?
        df = DataFrame(data=[1, 'a'])
        result = df.applymap(lambda x: x)
        self.assertEqual(result.dtypes[0], object)

        df = DataFrame(data=[1., 'a'])
        result = df.applymap(lambda x: x)
        self.assertEqual(result.dtypes[0], object)

        # GH2786
        df = DataFrame(np.random.random((3, 4)))
        df2 = df.copy()
        cols = ['a', 'a', 'a', 'a']
        df.columns = cols

        expected = df2.applymap(str)
        expected.columns = cols
        result = df.applymap(str)
        assert_frame_equal(result, expected)

        # datetime/timedelta
        df['datetime'] = Timestamp('20130101')
        df['timedelta'] = pd.Timedelta('1 min')
        result = df.applymap(str)
        for f in ['datetime', 'timedelta']:
            self.assertEqual(result.loc[0, f], str(df.loc[0, f]))
Beispiel #8
0
 def test_reset_index_range(self):
     # GH 12071
     s = pd.Series(range(2), name="A", dtype="int64")
     series_result = s.reset_index()
     tm.assertIsInstance(series_result.index, RangeIndex)
     series_expected = pd.DataFrame([[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2))
     assert_frame_equal(series_result, series_expected)
Beispiel #9
0
    def test_asfreq_datetimeindex(self):
        df = DataFrame({"A": [1, 2, 3]}, index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)])
        df = df.asfreq("B")
        tm.assertIsInstance(df.index, DatetimeIndex)

        ts = df["A"].asfreq("B")
        tm.assertIsInstance(ts.index, DatetimeIndex)
Beispiel #10
0
def assert_block_equal(left, right):
    tm.assert_numpy_array_equal(left.values, right.values)
    assert (left.dtype == right.dtype)
    tm.assertIsInstance(left.mgr_locs, lib.BlockPlacement)
    tm.assertIsInstance(right.mgr_locs, lib.BlockPlacement)
    tm.assert_numpy_array_equal(left.mgr_locs.as_array,
                                right.mgr_locs.as_array)
Beispiel #11
0
    def test_groupby_function_tuple_1677(self):
        df = DataFrame(np.random.rand(100),
                       index=date_range("1/1/2000", periods=100))
        monthly_group = df.groupby(lambda x: (x.year, x.month))

        result = monthly_group.mean()
        tm.assertIsInstance(result.index[0], tuple)
Beispiel #12
0
def _compare_ols_results(model1, model2):
    tm.assertIsInstance(model1, type(model2))

    if hasattr(model1, '_window_type'):
        _compare_moving_ols(model1, model2)
    else:
        _compare_fullsample_ols(model1, model2)
Beispiel #13
0
    def test_to_html(self):
        # big mixed
        biggie = DataFrame({'A': np.random.randn(200),
                            'B': tm.makeStringIndex(200)},
                           index=lrange(200))

        biggie.loc[:20, 'A'] = np.nan
        biggie.loc[:20, 'B'] = np.nan
        s = biggie.to_html()

        buf = StringIO()
        retval = biggie.to_html(buf=buf)
        self.assertIsNone(retval)
        self.assertEqual(buf.getvalue(), s)

        tm.assertIsInstance(s, compat.string_types)

        biggie.to_html(columns=['B', 'A'], col_space=17)
        biggie.to_html(columns=['B', 'A'],
                       formatters={'A': lambda x: '%.1f' % x})

        biggie.to_html(columns=['B', 'A'], float_format=str)
        biggie.to_html(columns=['B', 'A'], col_space=12, float_format=str)

        frame = DataFrame(index=np.arange(200))
        frame.to_html()
Beispiel #14
0
 def test_file_url(self):
     url = self.banklist_data
     dfs = self.read_html(file_path_to_url(url), 'First',
                          attrs={'id': 'table'})
     tm.assertIsInstance(dfs, list)
     for df in dfs:
         tm.assertIsInstance(df, DataFrame)
Beispiel #15
0
    def test_join_outer(self):
        other = Int64Index([7, 12, 25, 1, 2, 5])
        other_mono = Int64Index([1, 2, 5, 7, 12, 25])

        # not monotonic
        # guarantee of sortedness
        res, lidx, ridx = self.index.join(other, how="outer", return_indexers=True)
        noidx_res = self.index.join(other, how="outer")
        self.assert_index_equal(res, noidx_res)

        eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25])
        elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp)
        eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.intp)

        tm.assertIsInstance(res, Int64Index)
        self.assert_index_equal(res, eres)
        tm.assert_numpy_array_equal(lidx, elidx)
        tm.assert_numpy_array_equal(ridx, eridx)

        # monotonic
        res, lidx, ridx = self.index.join(other_mono, how="outer", return_indexers=True)
        noidx_res = self.index.join(other_mono, how="outer")
        self.assert_index_equal(res, noidx_res)

        elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp)
        eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.intp)
        tm.assertIsInstance(res, Int64Index)
        self.assert_index_equal(res, eres)
        tm.assert_numpy_array_equal(lidx, elidx)
        tm.assert_numpy_array_equal(ridx, eridx)
Beispiel #16
0
    def test_array_interface(self):
        result = np.sqrt(self.frame)
        tm.assertIsInstance(result, type(self.frame))
        self.assertIs(result.index, self.frame.index)
        self.assertIs(result.columns, self.frame.columns)

        assert_frame_equal(result, self.frame.apply(np.sqrt))
Beispiel #17
0
    def test_join_right(self):
        other = Int64Index([7, 12, 25, 1, 2, 5])
        other_mono = Int64Index([1, 2, 5, 7, 12, 25])

        # not monotonic
        res, lidx, ridx = self.index.join(other, how='right',
                                          return_indexers=True)
        eres = other
        elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp)

        tm.assertIsInstance(other, Int64Index)
        self.assert_index_equal(res, eres)
        tm.assert_numpy_array_equal(lidx, elidx)
        self.assertIsNone(ridx)

        # monotonic
        res, lidx, ridx = self.index.join(other_mono, how='right',
                                          return_indexers=True)
        eres = other_mono
        elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp)
        tm.assertIsInstance(other, Int64Index)
        self.assert_index_equal(res, eres)
        tm.assert_numpy_array_equal(lidx, elidx)
        self.assertIsNone(ridx)

        # non-unique
        idx = Index([1, 1, 2, 5])
        idx2 = Index([1, 2, 5, 7, 9])
        res, lidx, ridx = idx.join(idx2, how='right', return_indexers=True)
        eres = Index([1, 1, 2, 5, 7, 9])  # 1 is in idx2, so it should be x2
        elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
        eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
        self.assert_index_equal(res, eres)
        tm.assert_numpy_array_equal(lidx, elidx)
        tm.assert_numpy_array_equal(ridx, eridx)
    def test_parse_dates_column_list(self):
        from pandas.core.datetools import to_datetime

        data = '''date;destination;ventilationcode;unitcode;units;aux_date
01/01/2010;P;P;50;1;12/1/2011
01/01/2010;P;R;50;1;13/1/2011
15/01/2010;P;P;50;1;14/1/2011
01/05/2010;P;P;50;1;15/1/2011'''

        expected = self.read_csv(StringIO(data), sep=";", index_col=lrange(4))

        lev = expected.index.levels[0]
        levels = list(expected.index.levels)
        levels[0] = lev.to_datetime(dayfirst=True)
        # hack to get this to work - remove for final test
        levels[0].name = lev.name
        expected.index.set_levels(levels, inplace=True)
        expected['aux_date'] = to_datetime(expected['aux_date'],
                                           dayfirst=True)
        expected['aux_date'] = lmap(Timestamp, expected['aux_date'])
        tm.assertIsInstance(expected['aux_date'][0], datetime)

        df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4),
                           parse_dates=[0, 5], dayfirst=True)
        tm.assert_frame_equal(df, expected)

        df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4),
                           parse_dates=['date', 'aux_date'], dayfirst=True)
        tm.assert_frame_equal(df, expected)
Beispiel #19
0
    def test_apply_empty_infer_type(self):
        no_cols = DataFrame(index=['a', 'b', 'c'])
        no_index = DataFrame(columns=['a', 'b', 'c'])

        def _check(df, f):
            with warnings.catch_warnings(record=True):
                test_res = f(np.array([], dtype='f8'))
            is_reduction = not isinstance(test_res, np.ndarray)

            def _checkit(axis=0, raw=False):
                res = df.apply(f, axis=axis, raw=raw)
                if is_reduction:
                    agg_axis = df._get_agg_axis(axis)
                    tm.assertIsInstance(res, Series)
                    self.assertIs(res.index, agg_axis)
                else:
                    tm.assertIsInstance(res, DataFrame)

            _checkit()
            _checkit(axis=1)
            _checkit(raw=True)
            _checkit(axis=0, raw=True)

        with np.errstate(all='ignore'):
            _check(no_cols, lambda x: x)
            _check(no_cols, lambda x: x.mean())
            _check(no_index, lambda x: x)
            _check(no_index, lambda x: x.mean())

        result = no_cols.apply(lambda x: x.mean(), broadcast=True)
        tm.assertIsInstance(result, DataFrame)
Beispiel #20
0
    def test_daterange_bug_456(self):
        # GH #456
        rng1 = cdate_range('12/5/2011', '12/5/2011')
        rng2 = cdate_range('12/2/2011', '12/5/2011')
        rng2.offset = datetools.CDay()

        result = rng1.union(rng2)
        tm.assertIsInstance(result, DatetimeIndex)
Beispiel #21
0
    def test_joins(self):
        index = period_range('1/1/2000', '1/20/2000', freq='D')

        for kind in ['inner', 'outer', 'left', 'right']:
            joined = index.join(index[:-5], how=kind)

            tm.assertIsInstance(joined, PeriodIndex)
            self.assertEqual(joined.freq, index.freq)
Beispiel #22
0
 def _checkit(axis=0, raw=False):
     res = df.apply(f, axis=axis, raw=raw)
     if is_reduction:
         agg_axis = df._get_agg_axis(axis)
         tm.assertIsInstance(res, Series)
         self.assertIs(res.index, agg_axis)
     else:
         tm.assertIsInstance(res, DataFrame)
Beispiel #23
0
    def test_copy(self):
        cp = self.frame.copy()
        tm.assertIsInstance(cp, SparseDataFrame)
        tm.assert_sp_frame_equal(cp, self.frame)

        # as of v0.15.0
        # this is now identical (but not is_a )
        self.assertTrue(cp.index.identical(self.frame.index))
    def test_unpickle_daterange(self):
        pth, _ = os.path.split(os.path.abspath(__file__))
        filepath = os.path.join(pth, 'data', 'daterange_073.pickle')

        rng = read_pickle(filepath)
        tm.assertIsInstance(rng[0], datetime)
        tm.assertIsInstance(rng.offset, offsets.BDay)
        self.assertEqual(rng.values.dtype, object)
Beispiel #25
0
 def test_regex_idempotency(self):
     url = self.banklist_data
     dfs = self.read_html(file_path_to_url(url),
                          match=re.compile(re.compile('Florida')),
                          attrs={'id': 'table'})
     tm.assertIsInstance(dfs, list)
     for df in dfs:
         tm.assertIsInstance(df, DataFrame)
Beispiel #26
0
    def test_constructor_empty(self):
        idx = pd.PeriodIndex([], freq='M')
        tm.assertIsInstance(idx, PeriodIndex)
        self.assertEqual(len(idx), 0)
        self.assertEqual(idx.freq, 'M')

        with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
            pd.PeriodIndex([])
Beispiel #27
0
    def test_coerce_list(self):
        # coerce things
        arr = Index([1, 2, 3, 4])
        tm.assertIsInstance(arr, Int64Index)

        # but not if explicit dtype passed
        arr = Index([1, 2, 3, 4], dtype=object)
        tm.assertIsInstance(arr, Index)
Beispiel #28
0
    def test_iterator(self):
        # See gh-6607
        reader = self.read_csv(StringIO(self.data1), index_col=0,
                               iterator=True)
        df = self.read_csv(StringIO(self.data1), index_col=0)

        chunk = reader.read(3)
        tm.assert_frame_equal(chunk, df[:3])

        last_chunk = reader.read(5)
        tm.assert_frame_equal(last_chunk, df[3:])

        # pass list
        lines = list(csv.reader(StringIO(self.data1)))
        parser = TextParser(lines, index_col=0, chunksize=2)

        df = self.read_csv(StringIO(self.data1), index_col=0)

        chunks = list(parser)
        tm.assert_frame_equal(chunks[0], df[:2])
        tm.assert_frame_equal(chunks[1], df[2:4])
        tm.assert_frame_equal(chunks[2], df[4:])

        # pass skiprows
        parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1])
        chunks = list(parser)
        tm.assert_frame_equal(chunks[0], df[1:3])

        treader = self.read_table(StringIO(self.data1), sep=',', index_col=0,
                                  iterator=True)
        tm.assertIsInstance(treader, TextFileReader)

        # gh-3967: stopping iteration when chunksize is specified
        data = """A,B,C
foo,1,2,3
bar,4,5,6
baz,7,8,9
"""
        reader = self.read_csv(StringIO(data), iterator=True)
        result = list(reader)
        expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[
            3, 6, 9]), index=['foo', 'bar', 'baz'])
        tm.assert_frame_equal(result[0], expected)

        # chunksize = 1
        reader = self.read_csv(StringIO(data), chunksize=1)
        result = list(reader)
        expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[
            3, 6, 9]), index=['foo', 'bar', 'baz'])
        self.assertEqual(len(result), 3)
        tm.assert_frame_equal(pd.concat(result), expected)

        # skip_footer is not supported with the C parser yet
        if self.engine == 'python':
            # test bad parameter (skip_footer)
            reader = self.read_csv(StringIO(self.data1), index_col=0,
                                   iterator=True, skip_footer=True)
            self.assertRaises(ValueError, reader.read, 3)
Beispiel #29
0
    def test_longpanel_series_combo(self):
        wp = tm.makePanel()
        lp = wp.to_frame()

        y = lp.pop('ItemA')
        model = ols(y=y, x=lp, entity_effects=True, window=20)
        self.assertTrue(notnull(model.beta.values).all())
        tm.assertIsInstance(model, PanelOLS)
        model.summary
Beispiel #30
0
    def test_append_join_nondatetimeindex(self):
        rng = timedelta_range('1 days', periods=10)
        idx = Index(['a', 'b', 'c', 'd'])

        result = rng.append(idx)
        tm.assertIsInstance(result[0], Timedelta)

        # it works
        rng.join(idx, how='outer')
    def test_constructor(self):
        for col, series in compat.iteritems(self.frame):
            tm.assertIsInstance(series, SparseSeries)

        tm.assertIsInstance(self.iframe['A'].sp_index, IntIndex)

        # constructed zframe from matrix above
        self.assertEqual(self.zframe['A'].fill_value, 0)
        tm.assert_almost_equal([0, 0, 0, 0, 1, 2, 3, 4, 5, 6],
                               self.zframe['A'].values)

        # construct no data
        sdf = SparseDataFrame(columns=np.arange(10), index=np.arange(10))
        for col, series in compat.iteritems(sdf):
            tm.assertIsInstance(series, SparseSeries)

        # construct from nested dict
        data = {}
        for c, s in compat.iteritems(self.frame):
            data[c] = s.to_dict()

        sdf = SparseDataFrame(data)
        tm.assert_sp_frame_equal(sdf, self.frame)

        # TODO: test data is copied from inputs

        # init dict with different index
        idx = self.frame.index[:5]
        cons = SparseDataFrame(
            self.frame, index=idx, columns=self.frame.columns,
            default_fill_value=self.frame.default_fill_value,
            default_kind=self.frame.default_kind, copy=True)
        reindexed = self.frame.reindex(idx)
        tm.assert_sp_frame_equal(cons, reindexed, exact_indices=False)

        # assert level parameter breaks reindex
        self.assertRaises(TypeError, self.frame.reindex, idx, level=0)

        repr(self.frame)
Beispiel #32
0
    def test_dense_to_sparse(self):
        df = DataFrame({
            'A': [nan, nan, nan, 1, 2],
            'B': [1, 2, nan, nan, nan]
        })
        sdf = df.to_sparse()
        tm.assertIsInstance(sdf, SparseDataFrame)
        self.assertTrue(np.isnan(sdf.default_fill_value))
        tm.assertIsInstance(sdf['A'].sp_index, BlockIndex)
        tm.assert_frame_equal(sdf.to_dense(), df)

        sdf = df.to_sparse(kind='integer')
        tm.assertIsInstance(sdf['A'].sp_index, IntIndex)

        df = DataFrame({
            'A': [0, 0, 0, 1, 2],
            'B': [1, 2, 0, 0, 0]
        },
                       dtype=float)
        sdf = df.to_sparse(fill_value=0)
        self.assertEqual(sdf.default_fill_value, 0)
        tm.assert_frame_equal(sdf.to_dense(), df)
Beispiel #33
0
 def test_multiindex_header_skiprows_tuples(self):
     df = self._bank_data(header=[0, 1], skiprows=1, tupleize_cols=True)[0]
     tm.assertIsInstance(df.columns, Index)
Beispiel #34
0
 def test_multiindex_index(self):
     df = self._bank_data(index_col=[0, 1])[0]
     tm.assertIsInstance(df.index, MultiIndex)
Beispiel #35
0
 def test_multiindex_header(self):
     df = self._bank_data(header=[0, 1])[0]
     tm.assertIsInstance(df.columns, MultiIndex)
Beispiel #36
0
    def test_tolist(self):
        rng = date_range('1/1/2000', periods=10)

        result = rng.tolist()
        tm.assertIsInstance(result[0], Timestamp)
Beispiel #37
0
    def test_objects(self):
        arr = np.random.randint(0, 100, size=50).astype('O')

        result = algos.unique(arr)
        tm.assertIsInstance(result, np.ndarray)
Beispiel #38
0
 def test_subclass_iterrows(self):
     # GH 13977
     df = tm.SubclassedDataFrame({'a': [1]})
     for i, row in df.iterrows():
         tm.assertIsInstance(row, tm.SubclassedSeries)
         tm.assert_series_equal(row, df.loc[i])
Beispiel #39
0
    def test_isinstance(self):

        expected = "Expected type "
        with assertRaisesRegexp(AssertionError, expected):
            tm.assertIsInstance(1, pd.Series)
Beispiel #40
0
 def test_to_frame(self):
     s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd'), name='xxx')
     res = s.to_frame()
     exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd'))
     tm.assert_frame_equal(res, exp)
     tm.assertIsInstance(res, tm.SubclassedDataFrame)
Beispiel #41
0
    def test_indexing_sliced(self):
        # GH 11559
        df = tm.SubclassedDataFrame(
            {
                'X': [1, 2, 3],
                'Y': [4, 5, 6],
                'Z': [7, 8, 9]
            },
            index=['a', 'b', 'c'])
        res = df.loc[:, 'X']
        exp = tm.SubclassedSeries([1, 2, 3], index=list('abc'), name='X')
        tm.assert_series_equal(res, exp)
        tm.assertIsInstance(res, tm.SubclassedSeries)

        res = df.iloc[:, 1]
        exp = tm.SubclassedSeries([4, 5, 6], index=list('abc'), name='Y')
        tm.assert_series_equal(res, exp)
        tm.assertIsInstance(res, tm.SubclassedSeries)

        res = df.ix[:, 'Z']
        exp = tm.SubclassedSeries([7, 8, 9], index=list('abc'), name='Z')
        tm.assert_series_equal(res, exp)
        tm.assertIsInstance(res, tm.SubclassedSeries)

        res = df.loc['a', :]
        exp = tm.SubclassedSeries([1, 4, 7], index=list('XYZ'), name='a')
        tm.assert_series_equal(res, exp)
        tm.assertIsInstance(res, tm.SubclassedSeries)

        res = df.iloc[1, :]
        exp = tm.SubclassedSeries([2, 5, 8], index=list('XYZ'), name='b')
        tm.assert_series_equal(res, exp)
        tm.assertIsInstance(res, tm.SubclassedSeries)

        res = df.ix['c', :]
        exp = tm.SubclassedSeries([3, 6, 9], index=list('XYZ'), name='c')
        tm.assert_series_equal(res, exp)
        tm.assertIsInstance(res, tm.SubclassedSeries)
Beispiel #42
0
    def test_convert_array_of_periods(self):
        rng = period_range('1/1/2000', periods=20, freq='D')
        periods = list(rng)

        result = pd.Index(periods)
        tm.assertIsInstance(result, PeriodIndex)
Beispiel #43
0
    def test_iteration(self):
        index = PeriodIndex(start='1/1/10', periods=4, freq='B')

        result = list(index)
        tm.assertIsInstance(result[0], Period)
        self.assertEqual(result[0].freq, index.freq)
Beispiel #44
0
 def test_make_time_series(self):
     index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
     series = Series(1, index=index)
     tm.assertIsInstance(series, Series)
Beispiel #45
0
 def test_applymap(self):
     # just test that it works
     result = self.frame.applymap(lambda x: x * 2)
     tm.assertIsInstance(result, SparseDataFrame)
Beispiel #46
0
 def test_multiindex_header_index_skiprows(self):
     df = self._bank_data(header=[0, 1], index_col=[0, 1], skiprows=1)[0]
     tm.assertIsInstance(df.index, MultiIndex)
     tm.assertIsInstance(df.columns, MultiIndex)
Beispiel #47
0
 def test_works_on_valid_markup(self):
     filename = os.path.join(DATA_PATH, 'valid_markup.html')
     dfs = self.read_html(filename, index_col=0)
     tm.assertIsInstance(dfs, list)
     tm.assertIsInstance(dfs[0], DataFrame)
 def test_mgr_locs(self):
     tm.assertIsInstance(self.fblock.mgr_locs, lib.BlockPlacement)
     tm.assert_numpy_array_equal(self.fblock.mgr_locs.as_array,
                                 np.array([0, 2, 4], dtype=np.int64))
Beispiel #49
0
 def test_series_box_timedelta(self):
     rng = timedelta_range('1 day 1 s', periods=5, freq='h')
     s = Series(rng)
     tm.assertIsInstance(s[1], Timedelta)
     tm.assertIsInstance(s.iat[2], Timedelta)
Beispiel #50
0
 def test_spam_no_match(self):
     dfs = self.read_html(self.spam_data)
     for df in dfs:
         tm.assertIsInstance(df, DataFrame)
Beispiel #51
0
    def test_margins(self):
        def _check_output(result,
                          values_col,
                          index=['A', 'B'],
                          columns=['C'],
                          margins_col='All'):
            col_margins = result.ix[:-1, margins_col]
            expected_col_margins = self.data.groupby(index)[values_col].mean()
            tm.assert_series_equal(col_margins,
                                   expected_col_margins,
                                   check_names=False)
            self.assertEqual(col_margins.name, margins_col)

            result = result.sortlevel()
            index_margins = result.ix[(margins_col, '')].iloc[:-1]
            expected_ix_margins = self.data.groupby(columns)[values_col].mean()
            tm.assert_series_equal(index_margins,
                                   expected_ix_margins,
                                   check_names=False)
            self.assertEqual(index_margins.name, (margins_col, ''))

            grand_total_margins = result.loc[(margins_col, ''), margins_col]
            expected_total_margins = self.data[values_col].mean()
            self.assertEqual(grand_total_margins, expected_total_margins)

        # column specified
        result = self.data.pivot_table(values='D',
                                       index=['A', 'B'],
                                       columns='C',
                                       margins=True,
                                       aggfunc=np.mean)
        _check_output(result, 'D')

        # Set a different margins_name (not 'All')
        result = self.data.pivot_table(values='D',
                                       index=['A', 'B'],
                                       columns='C',
                                       margins=True,
                                       aggfunc=np.mean,
                                       margins_name='Totals')
        _check_output(result, 'D', margins_col='Totals')

        # no column specified
        table = self.data.pivot_table(index=['A', 'B'],
                                      columns='C',
                                      margins=True,
                                      aggfunc=np.mean)
        for value_col in table.columns.levels[0]:
            _check_output(table[value_col], value_col)

        # no col

        # to help with a buglet
        self.data.columns = [k * 2 for k in self.data.columns]
        table = self.data.pivot_table(index=['AA', 'BB'],
                                      margins=True,
                                      aggfunc=np.mean)
        for value_col in table.columns:
            totals = table.loc[('All', ''), value_col]
            self.assertEqual(totals, self.data[value_col].mean())

        # no rows
        rtable = self.data.pivot_table(columns=['AA', 'BB'],
                                       margins=True,
                                       aggfunc=np.mean)
        tm.assertIsInstance(rtable, Series)

        table = self.data.pivot_table(index=['AA', 'BB'],
                                      margins=True,
                                      aggfunc='mean')
        for item in ['DD', 'EE', 'FF']:
            totals = table.loc[('All', ''), item]
            self.assertEqual(totals, self.data[item].mean())

        # issue number #8349: pivot_table with margins and dictionary aggfunc
        data = [
            {
                'JOB': 'Worker',
                'NAME': 'Bob',
                'YEAR': 2013,
                'MONTH': 12,
                'DAYS': 3,
                'SALARY': 17
            },
            {
                'JOB': 'Employ',
                'NAME': 'Mary',
                'YEAR': 2013,
                'MONTH': 12,
                'DAYS': 5,
                'SALARY': 23
            },
            {
                'JOB': 'Worker',
                'NAME': 'Bob',
                'YEAR': 2014,
                'MONTH': 1,
                'DAYS': 10,
                'SALARY': 100
            },
            {
                'JOB': 'Worker',
                'NAME': 'Bob',
                'YEAR': 2014,
                'MONTH': 1,
                'DAYS': 11,
                'SALARY': 110
            },
            {
                'JOB': 'Employ',
                'NAME': 'Mary',
                'YEAR': 2014,
                'MONTH': 1,
                'DAYS': 15,
                'SALARY': 200
            },
            {
                'JOB': 'Worker',
                'NAME': 'Bob',
                'YEAR': 2014,
                'MONTH': 2,
                'DAYS': 8,
                'SALARY': 80
            },
            {
                'JOB': 'Employ',
                'NAME': 'Mary',
                'YEAR': 2014,
                'MONTH': 2,
                'DAYS': 5,
                'SALARY': 190
            },
        ]

        df = DataFrame(data)

        df = df.set_index(['JOB', 'NAME', 'YEAR', 'MONTH'],
                          drop=False,
                          append=False)

        result = df.pivot_table(index=['JOB', 'NAME'],
                                columns=['YEAR', 'MONTH'],
                                values=['DAYS', 'SALARY'],
                                aggfunc={
                                    'DAYS': 'mean',
                                    'SALARY': 'sum'
                                },
                                margins=True)

        expected = df.pivot_table(index=['JOB', 'NAME'],
                                  columns=['YEAR', 'MONTH'],
                                  values=['DAYS'],
                                  aggfunc='mean',
                                  margins=True)

        tm.assert_frame_equal(result['DAYS'], expected['DAYS'])

        expected = df.pivot_table(index=['JOB', 'NAME'],
                                  columns=['YEAR', 'MONTH'],
                                  values=['SALARY'],
                                  aggfunc='sum',
                                  margins=True)

        tm.assert_frame_equal(result['SALARY'], expected['SALARY'])
Beispiel #52
0
 def test_banklist_no_match(self):
     dfs = self.read_html(self.banklist_data, attrs={'id': 'table'})
     for df in dfs:
         tm.assertIsInstance(df, DataFrame)
Beispiel #53
0
 def _validate_periodindex(self, pickled, current):
     tm.assert_index_equal(pickled, current)
     tm.assertIsInstance(pickled.freq, MonthEnd)
     tm.assert_equal(pickled.freq, MonthEnd())
     tm.assert_equal(pickled.freqstr, 'M')
     tm.assert_index_equal(pickled.shift(2), current.shift(2))
Beispiel #54
0
 def _check(res):
     tm.assertIsInstance(res, SparseArray)
     self.assertEqual(res.dtype, np.bool)
     self.assertIsInstance(res.fill_value, bool)
Beispiel #55
0
 def test_constructor_corner(self):
     df = tm.makeTimeDataFrame()
     objs = [df, df]
     s = Series(objs, index=[0, 1])
     tm.assertIsInstance(s, Series)
Beispiel #56
0
    def test_ufunc_coercions(self):
        idx = self._holder([1, 2, 3, 4, 5], name='x')

        result = np.sqrt(idx)
        tm.assertIsInstance(result, Float64Index)
        exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x')
        tm.assert_index_equal(result, exp)

        result = np.divide(idx, 2.)
        tm.assertIsInstance(result, Float64Index)
        exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x')
        tm.assert_index_equal(result, exp)

        # _evaluate_numeric_binop
        result = idx + 2.
        tm.assertIsInstance(result, Float64Index)
        exp = Float64Index([3., 4., 5., 6., 7.], name='x')
        tm.assert_index_equal(result, exp)

        result = idx - 2.
        tm.assertIsInstance(result, Float64Index)
        exp = Float64Index([-1., 0., 1., 2., 3.], name='x')
        tm.assert_index_equal(result, exp)

        result = idx * 1.
        tm.assertIsInstance(result, Float64Index)
        exp = Float64Index([1., 2., 3., 4., 5.], name='x')
        tm.assert_index_equal(result, exp)

        result = idx / 2.
        tm.assertIsInstance(result, Float64Index)
        exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x')
        tm.assert_index_equal(result, exp)
Beispiel #57
0
 def test_apply_multi_index(self):
     s = DataFrame([[1, 2], [3, 4], [5, 6]])
     s.index = MultiIndex.from_arrays([['a', 'a', 'b'], ['c', 'd', 'd']])
     s.columns = ['col1', 'col2']
     res = s.apply(lambda x: Series({'min': min(x), 'max': max(x)}), 1)
     tm.assertIsInstance(res.index, MultiIndex)
Beispiel #58
0
    def test_operators_timedelta64(self):

        # invalid ops
        self.assertRaises(Exception, self.objSeries.__add__, 1)
        self.assertRaises(Exception, self.objSeries.__add__,
                          np.array(1, dtype=np.int64))
        self.assertRaises(Exception, self.objSeries.__sub__, 1)
        self.assertRaises(Exception, self.objSeries.__sub__,
                          np.array(1, dtype=np.int64))

        # seriese ops
        v1 = date_range('2012-1-1', periods=3, freq='D')
        v2 = date_range('2012-1-2', periods=3, freq='D')
        rs = Series(v2) - Series(v1)
        xp = Series(1e9 * 3600 * 24,
                    rs.index).astype('int64').astype('timedelta64[ns]')
        assert_series_equal(rs, xp)
        self.assertEqual(rs.dtype, 'timedelta64[ns]')

        df = DataFrame(dict(A=v1))
        td = Series([timedelta(days=i) for i in range(3)])
        self.assertEqual(td.dtype, 'timedelta64[ns]')

        # series on the rhs
        result = df['A'] - df['A'].shift()
        self.assertEqual(result.dtype, 'timedelta64[ns]')

        result = df['A'] + td
        self.assertEqual(result.dtype, 'M8[ns]')

        # scalar Timestamp on rhs
        maxa = df['A'].max()
        tm.assertIsInstance(maxa, Timestamp)

        resultb = df['A'] - df['A'].max()
        self.assertEqual(resultb.dtype, 'timedelta64[ns]')

        # timestamp on lhs
        result = resultb + df['A']
        values = [
            Timestamp('20111230'),
            Timestamp('20120101'),
            Timestamp('20120103')
        ]
        expected = Series(values, name='A')
        assert_series_equal(result, expected)

        # datetimes on rhs
        result = df['A'] - datetime(2001, 1, 1)
        expected = Series([timedelta(days=4017 + i) for i in range(3)],
                          name='A')
        assert_series_equal(result, expected)
        self.assertEqual(result.dtype, 'm8[ns]')

        d = datetime(2001, 1, 1, 3, 4)
        resulta = df['A'] - d
        self.assertEqual(resulta.dtype, 'm8[ns]')

        # roundtrip
        resultb = resulta + d
        assert_series_equal(df['A'], resultb)

        # timedeltas on rhs
        td = timedelta(days=1)
        resulta = df['A'] + td
        resultb = resulta - td
        assert_series_equal(resultb, df['A'])
        self.assertEqual(resultb.dtype, 'M8[ns]')

        # roundtrip
        td = timedelta(minutes=5, seconds=3)
        resulta = df['A'] + td
        resultb = resulta - td
        assert_series_equal(df['A'], resultb)
        self.assertEqual(resultb.dtype, 'M8[ns]')

        # inplace
        value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1))
        rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1))
        self.assertEqual(rs[2], value)
Beispiel #59
0
def compare_index_period(result, expected, typ, version):
    tm.assert_index_equal(result, expected)
    tm.assertIsInstance(result.freq, MonthEnd)
    tm.assert_equal(result.freq, MonthEnd())
    tm.assert_equal(result.freqstr, 'M')
    tm.assert_index_equal(result.shift(2), expected.shift(2))
Beispiel #60
0
 def check_result(self, result, expected, klass=None):
     klass = klass or self.klass
     assertIsInstance(result, klass)
     self.assertEqual(result, expected)