def test_reset_index(self): df = tm.makeDataFrame()[:5] ser = df.stack() ser.index.names = ['hash', 'category'] ser.name = 'value' df = ser.reset_index() self.assertIn('value', df) df = ser.reset_index(name='value2') self.assertIn('value2', df) # check inplace s = ser.reset_index(drop=True) s2 = ser s2.reset_index(drop=True, inplace=True) assert_series_equal(s, s2) # level index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) rs = s.reset_index(level=1) self.assertEqual(len(rs.columns), 2) rs = s.reset_index(level=[0, 2], drop=True) self.assert_index_equal(rs.index, Index(index.get_level_values(1))) tm.assertIsInstance(rs, Series)
def test_reset_index(self): df = tm.makeDataFrame()[:5] ser = df.stack() ser.index.names = ["hash", "category"] ser.name = "value" df = ser.reset_index() self.assertIn("value", df) df = ser.reset_index(name="value2") self.assertIn("value2", df) # check inplace s = ser.reset_index(drop=True) s2 = ser s2.reset_index(drop=True, inplace=True) assert_series_equal(s, s2) # level index = MultiIndex( levels=[["bar"], ["one", "two", "three"], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], ) s = Series(np.random.randn(6), index=index) rs = s.reset_index(level=1) self.assertEqual(len(rs.columns), 2) rs = s.reset_index(level=[0, 2], drop=True) self.assertTrue(rs.index.equals(Index(index.get_level_values(1)))) tm.assertIsInstance(rs, Series)
def test_aggregate_item_by_item(self): df = self.df.copy() df['E'] = ['a'] * len(self.df) grouped = self.df.groupby('A') # API change in 0.11 # def aggfun(ser): # return len(ser + 'a') # result = grouped.agg(aggfun) # self.assertEqual(len(result.columns), 1) aggfun = lambda ser: ser.size result = grouped.agg(aggfun) foo = (self.df.A == 'foo').sum() bar = (self.df.A == 'bar').sum() K = len(result.columns) # GH5782 # odd comparisons can result here, so cast to make easy exp = pd.Series(np.array([foo] * K), index=list('BCD'), dtype=np.float64, name='foo') tm.assert_series_equal(result.xs('foo'), exp) exp = pd.Series(np.array([bar] * K), index=list('BCD'), dtype=np.float64, name='bar') tm.assert_almost_equal(result.xs('bar'), exp) def aggfun(ser): return ser.size result = DataFrame().groupby(self.df.A).agg(aggfun) tm.assertIsInstance(result, DataFrame) self.assertEqual(len(result), 0)
def _check_rng(rng): converted = rng.to_pydatetime() tm.assertIsInstance(converted, np.ndarray) for x, stamp in zip(converted, rng): tm.assertIsInstance(x, datetime) self.assertEqual(x, stamp.to_pydatetime()) self.assertEqual(x.tzinfo, stamp.tzinfo)
def test_join_inner(self): other = Int64Index([7, 12, 25, 1, 2, 5]) other_mono = Int64Index([1, 2, 5, 7, 12, 25]) # not monotonic res, lidx, ridx = self.index.join(other, how="inner", return_indexers=True) # no guarantee of sortedness, so sort for comparison purposes ind = res.argsort() res = res.take(ind) lidx = lidx.take(ind) ridx = ridx.take(ind) eres = Int64Index([2, 12]) elidx = np.array([1, 6], dtype=np.intp) eridx = np.array([4, 1], dtype=np.intp) tm.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) # monotonic res, lidx, ridx = self.index.join(other_mono, how="inner", return_indexers=True) res2 = self.index.intersection(other_mono) self.assert_index_equal(res, res2) elidx = np.array([1, 6], dtype=np.intp) eridx = np.array([1, 4], dtype=np.intp) tm.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx)
def test_groupby_groups_datetimeindex(self): # #1430 from pandas.tseries.api import DatetimeIndex periods = 1000 ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods) df = DataFrame({'high': np.arange(periods), 'low': np.arange(periods)}, index=ind) grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) # it works! groups = grouped.groups tm.assertIsInstance(list(groups.keys())[0], datetime) # GH 11442 index = pd.date_range('2015/01/01', periods=5, name='date') df = pd.DataFrame({'A': [5, 6, 7, 8, 9], 'B': [1, 2, 3, 4, 5]}, index=index) result = df.groupby(level='date').groups dates = ['2015-01-05', '2015-01-04', '2015-01-03', '2015-01-02', '2015-01-01'] expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date') for date in dates} tm.assert_dict_equal(result, expected) grouped = df.groupby(level='date') for date in dates: result = grouped.get_group(date) data = [[df.loc[date, 'A'], df.loc[date, 'B']]] expected_index = pd.DatetimeIndex([date], name='date') expected = pd.DataFrame(data, columns=list('AB'), index=expected_index) tm.assert_frame_equal(result, expected)
def test_applymap(self): applied = self.frame.applymap(lambda x: x * 2) assert_frame_equal(applied, self.frame * 2) result = self.frame.applymap(type) # GH #465, function returning tuples result = self.frame.applymap(lambda x: (x, x)) tm.assertIsInstance(result['A'][0], tuple) # GH 2909, object conversion to float in constructor? df = DataFrame(data=[1, 'a']) result = df.applymap(lambda x: x) self.assertEqual(result.dtypes[0], object) df = DataFrame(data=[1., 'a']) result = df.applymap(lambda x: x) self.assertEqual(result.dtypes[0], object) # GH2786 df = DataFrame(np.random.random((3, 4))) df2 = df.copy() cols = ['a', 'a', 'a', 'a'] df.columns = cols expected = df2.applymap(str) expected.columns = cols result = df.applymap(str) assert_frame_equal(result, expected) # datetime/timedelta df['datetime'] = Timestamp('20130101') df['timedelta'] = pd.Timedelta('1 min') result = df.applymap(str) for f in ['datetime', 'timedelta']: self.assertEqual(result.loc[0, f], str(df.loc[0, f]))
def test_reset_index_range(self): # GH 12071 s = pd.Series(range(2), name="A", dtype="int64") series_result = s.reset_index() tm.assertIsInstance(series_result.index, RangeIndex) series_expected = pd.DataFrame([[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)) assert_frame_equal(series_result, series_expected)
def test_asfreq_datetimeindex(self): df = DataFrame({"A": [1, 2, 3]}, index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)]) df = df.asfreq("B") tm.assertIsInstance(df.index, DatetimeIndex) ts = df["A"].asfreq("B") tm.assertIsInstance(ts.index, DatetimeIndex)
def assert_block_equal(left, right): tm.assert_numpy_array_equal(left.values, right.values) assert (left.dtype == right.dtype) tm.assertIsInstance(left.mgr_locs, lib.BlockPlacement) tm.assertIsInstance(right.mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array)
def test_groupby_function_tuple_1677(self): df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100)) monthly_group = df.groupby(lambda x: (x.year, x.month)) result = monthly_group.mean() tm.assertIsInstance(result.index[0], tuple)
def _compare_ols_results(model1, model2): tm.assertIsInstance(model1, type(model2)) if hasattr(model1, '_window_type'): _compare_moving_ols(model1, model2) else: _compare_fullsample_ols(model1, model2)
def test_to_html(self): # big mixed biggie = DataFrame({'A': np.random.randn(200), 'B': tm.makeStringIndex(200)}, index=lrange(200)) biggie.loc[:20, 'A'] = np.nan biggie.loc[:20, 'B'] = np.nan s = biggie.to_html() buf = StringIO() retval = biggie.to_html(buf=buf) self.assertIsNone(retval) self.assertEqual(buf.getvalue(), s) tm.assertIsInstance(s, compat.string_types) biggie.to_html(columns=['B', 'A'], col_space=17) biggie.to_html(columns=['B', 'A'], formatters={'A': lambda x: '%.1f' % x}) biggie.to_html(columns=['B', 'A'], float_format=str) biggie.to_html(columns=['B', 'A'], col_space=12, float_format=str) frame = DataFrame(index=np.arange(200)) frame.to_html()
def test_file_url(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), 'First', attrs={'id': 'table'}) tm.assertIsInstance(dfs, list) for df in dfs: tm.assertIsInstance(df, DataFrame)
def test_join_outer(self): other = Int64Index([7, 12, 25, 1, 2, 5]) other_mono = Int64Index([1, 2, 5, 7, 12, 25]) # not monotonic # guarantee of sortedness res, lidx, ridx = self.index.join(other, how="outer", return_indexers=True) noidx_res = self.index.join(other, how="outer") self.assert_index_equal(res, noidx_res) eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.intp) tm.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) # monotonic res, lidx, ridx = self.index.join(other_mono, how="outer", return_indexers=True) noidx_res = self.index.join(other_mono, how="outer") self.assert_index_equal(res, noidx_res) elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.intp) tm.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx)
def test_array_interface(self): result = np.sqrt(self.frame) tm.assertIsInstance(result, type(self.frame)) self.assertIs(result.index, self.frame.index) self.assertIs(result.columns, self.frame.columns) assert_frame_equal(result, self.frame.apply(np.sqrt))
def test_join_right(self): other = Int64Index([7, 12, 25, 1, 2, 5]) other_mono = Int64Index([1, 2, 5, 7, 12, 25]) # not monotonic res, lidx, ridx = self.index.join(other, how='right', return_indexers=True) eres = other elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp) tm.assertIsInstance(other, Int64Index) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) self.assertIsNone(ridx) # monotonic res, lidx, ridx = self.index.join(other_mono, how='right', return_indexers=True) eres = other_mono elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp) tm.assertIsInstance(other, Int64Index) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) self.assertIsNone(ridx) # non-unique idx = Index([1, 1, 2, 5]) idx2 = Index([1, 2, 5, 7, 9]) res, lidx, ridx = idx.join(idx2, how='right', return_indexers=True) eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx)
def test_parse_dates_column_list(self): from pandas.core.datetools import to_datetime data = '''date;destination;ventilationcode;unitcode;units;aux_date 01/01/2010;P;P;50;1;12/1/2011 01/01/2010;P;R;50;1;13/1/2011 15/01/2010;P;P;50;1;14/1/2011 01/05/2010;P;P;50;1;15/1/2011''' expected = self.read_csv(StringIO(data), sep=";", index_col=lrange(4)) lev = expected.index.levels[0] levels = list(expected.index.levels) levels[0] = lev.to_datetime(dayfirst=True) # hack to get this to work - remove for final test levels[0].name = lev.name expected.index.set_levels(levels, inplace=True) expected['aux_date'] = to_datetime(expected['aux_date'], dayfirst=True) expected['aux_date'] = lmap(Timestamp, expected['aux_date']) tm.assertIsInstance(expected['aux_date'][0], datetime) df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4), parse_dates=[0, 5], dayfirst=True) tm.assert_frame_equal(df, expected) df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4), parse_dates=['date', 'aux_date'], dayfirst=True) tm.assert_frame_equal(df, expected)
def test_apply_empty_infer_type(self): no_cols = DataFrame(index=['a', 'b', 'c']) no_index = DataFrame(columns=['a', 'b', 'c']) def _check(df, f): with warnings.catch_warnings(record=True): test_res = f(np.array([], dtype='f8')) is_reduction = not isinstance(test_res, np.ndarray) def _checkit(axis=0, raw=False): res = df.apply(f, axis=axis, raw=raw) if is_reduction: agg_axis = df._get_agg_axis(axis) tm.assertIsInstance(res, Series) self.assertIs(res.index, agg_axis) else: tm.assertIsInstance(res, DataFrame) _checkit() _checkit(axis=1) _checkit(raw=True) _checkit(axis=0, raw=True) with np.errstate(all='ignore'): _check(no_cols, lambda x: x) _check(no_cols, lambda x: x.mean()) _check(no_index, lambda x: x) _check(no_index, lambda x: x.mean()) result = no_cols.apply(lambda x: x.mean(), broadcast=True) tm.assertIsInstance(result, DataFrame)
def test_daterange_bug_456(self): # GH #456 rng1 = cdate_range('12/5/2011', '12/5/2011') rng2 = cdate_range('12/2/2011', '12/5/2011') rng2.offset = datetools.CDay() result = rng1.union(rng2) tm.assertIsInstance(result, DatetimeIndex)
def test_joins(self): index = period_range('1/1/2000', '1/20/2000', freq='D') for kind in ['inner', 'outer', 'left', 'right']: joined = index.join(index[:-5], how=kind) tm.assertIsInstance(joined, PeriodIndex) self.assertEqual(joined.freq, index.freq)
def _checkit(axis=0, raw=False): res = df.apply(f, axis=axis, raw=raw) if is_reduction: agg_axis = df._get_agg_axis(axis) tm.assertIsInstance(res, Series) self.assertIs(res.index, agg_axis) else: tm.assertIsInstance(res, DataFrame)
def test_copy(self): cp = self.frame.copy() tm.assertIsInstance(cp, SparseDataFrame) tm.assert_sp_frame_equal(cp, self.frame) # as of v0.15.0 # this is now identical (but not is_a ) self.assertTrue(cp.index.identical(self.frame.index))
def test_unpickle_daterange(self): pth, _ = os.path.split(os.path.abspath(__file__)) filepath = os.path.join(pth, 'data', 'daterange_073.pickle') rng = read_pickle(filepath) tm.assertIsInstance(rng[0], datetime) tm.assertIsInstance(rng.offset, offsets.BDay) self.assertEqual(rng.values.dtype, object)
def test_regex_idempotency(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), match=re.compile(re.compile('Florida')), attrs={'id': 'table'}) tm.assertIsInstance(dfs, list) for df in dfs: tm.assertIsInstance(df, DataFrame)
def test_constructor_empty(self): idx = pd.PeriodIndex([], freq='M') tm.assertIsInstance(idx, PeriodIndex) self.assertEqual(len(idx), 0) self.assertEqual(idx.freq, 'M') with tm.assertRaisesRegexp(ValueError, 'freq not specified'): pd.PeriodIndex([])
def test_coerce_list(self): # coerce things arr = Index([1, 2, 3, 4]) tm.assertIsInstance(arr, Int64Index) # but not if explicit dtype passed arr = Index([1, 2, 3, 4], dtype=object) tm.assertIsInstance(arr, Index)
def test_iterator(self): # See gh-6607 reader = self.read_csv(StringIO(self.data1), index_col=0, iterator=True) df = self.read_csv(StringIO(self.data1), index_col=0) chunk = reader.read(3) tm.assert_frame_equal(chunk, df[:3]) last_chunk = reader.read(5) tm.assert_frame_equal(last_chunk, df[3:]) # pass list lines = list(csv.reader(StringIO(self.data1))) parser = TextParser(lines, index_col=0, chunksize=2) df = self.read_csv(StringIO(self.data1), index_col=0) chunks = list(parser) tm.assert_frame_equal(chunks[0], df[:2]) tm.assert_frame_equal(chunks[1], df[2:4]) tm.assert_frame_equal(chunks[2], df[4:]) # pass skiprows parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1]) chunks = list(parser) tm.assert_frame_equal(chunks[0], df[1:3]) treader = self.read_table(StringIO(self.data1), sep=',', index_col=0, iterator=True) tm.assertIsInstance(treader, TextFileReader) # gh-3967: stopping iteration when chunksize is specified data = """A,B,C foo,1,2,3 bar,4,5,6 baz,7,8,9 """ reader = self.read_csv(StringIO(data), iterator=True) result = list(reader) expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[ 3, 6, 9]), index=['foo', 'bar', 'baz']) tm.assert_frame_equal(result[0], expected) # chunksize = 1 reader = self.read_csv(StringIO(data), chunksize=1) result = list(reader) expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[ 3, 6, 9]), index=['foo', 'bar', 'baz']) self.assertEqual(len(result), 3) tm.assert_frame_equal(pd.concat(result), expected) # skip_footer is not supported with the C parser yet if self.engine == 'python': # test bad parameter (skip_footer) reader = self.read_csv(StringIO(self.data1), index_col=0, iterator=True, skip_footer=True) self.assertRaises(ValueError, reader.read, 3)
def test_longpanel_series_combo(self): wp = tm.makePanel() lp = wp.to_frame() y = lp.pop('ItemA') model = ols(y=y, x=lp, entity_effects=True, window=20) self.assertTrue(notnull(model.beta.values).all()) tm.assertIsInstance(model, PanelOLS) model.summary
def test_append_join_nondatetimeindex(self): rng = timedelta_range('1 days', periods=10) idx = Index(['a', 'b', 'c', 'd']) result = rng.append(idx) tm.assertIsInstance(result[0], Timedelta) # it works rng.join(idx, how='outer')
def test_constructor(self): for col, series in compat.iteritems(self.frame): tm.assertIsInstance(series, SparseSeries) tm.assertIsInstance(self.iframe['A'].sp_index, IntIndex) # constructed zframe from matrix above self.assertEqual(self.zframe['A'].fill_value, 0) tm.assert_almost_equal([0, 0, 0, 0, 1, 2, 3, 4, 5, 6], self.zframe['A'].values) # construct no data sdf = SparseDataFrame(columns=np.arange(10), index=np.arange(10)) for col, series in compat.iteritems(sdf): tm.assertIsInstance(series, SparseSeries) # construct from nested dict data = {} for c, s in compat.iteritems(self.frame): data[c] = s.to_dict() sdf = SparseDataFrame(data) tm.assert_sp_frame_equal(sdf, self.frame) # TODO: test data is copied from inputs # init dict with different index idx = self.frame.index[:5] cons = SparseDataFrame( self.frame, index=idx, columns=self.frame.columns, default_fill_value=self.frame.default_fill_value, default_kind=self.frame.default_kind, copy=True) reindexed = self.frame.reindex(idx) tm.assert_sp_frame_equal(cons, reindexed, exact_indices=False) # assert level parameter breaks reindex self.assertRaises(TypeError, self.frame.reindex, idx, level=0) repr(self.frame)
def test_dense_to_sparse(self): df = DataFrame({ 'A': [nan, nan, nan, 1, 2], 'B': [1, 2, nan, nan, nan] }) sdf = df.to_sparse() tm.assertIsInstance(sdf, SparseDataFrame) self.assertTrue(np.isnan(sdf.default_fill_value)) tm.assertIsInstance(sdf['A'].sp_index, BlockIndex) tm.assert_frame_equal(sdf.to_dense(), df) sdf = df.to_sparse(kind='integer') tm.assertIsInstance(sdf['A'].sp_index, IntIndex) df = DataFrame({ 'A': [0, 0, 0, 1, 2], 'B': [1, 2, 0, 0, 0] }, dtype=float) sdf = df.to_sparse(fill_value=0) self.assertEqual(sdf.default_fill_value, 0) tm.assert_frame_equal(sdf.to_dense(), df)
def test_multiindex_header_skiprows_tuples(self): df = self._bank_data(header=[0, 1], skiprows=1, tupleize_cols=True)[0] tm.assertIsInstance(df.columns, Index)
def test_multiindex_index(self): df = self._bank_data(index_col=[0, 1])[0] tm.assertIsInstance(df.index, MultiIndex)
def test_multiindex_header(self): df = self._bank_data(header=[0, 1])[0] tm.assertIsInstance(df.columns, MultiIndex)
def test_tolist(self): rng = date_range('1/1/2000', periods=10) result = rng.tolist() tm.assertIsInstance(result[0], Timestamp)
def test_objects(self): arr = np.random.randint(0, 100, size=50).astype('O') result = algos.unique(arr) tm.assertIsInstance(result, np.ndarray)
def test_subclass_iterrows(self): # GH 13977 df = tm.SubclassedDataFrame({'a': [1]}) for i, row in df.iterrows(): tm.assertIsInstance(row, tm.SubclassedSeries) tm.assert_series_equal(row, df.loc[i])
def test_isinstance(self): expected = "Expected type " with assertRaisesRegexp(AssertionError, expected): tm.assertIsInstance(1, pd.Series)
def test_to_frame(self): s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd'), name='xxx') res = s.to_frame() exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd')) tm.assert_frame_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedDataFrame)
def test_indexing_sliced(self): # GH 11559 df = tm.SubclassedDataFrame( { 'X': [1, 2, 3], 'Y': [4, 5, 6], 'Z': [7, 8, 9] }, index=['a', 'b', 'c']) res = df.loc[:, 'X'] exp = tm.SubclassedSeries([1, 2, 3], index=list('abc'), name='X') tm.assert_series_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedSeries) res = df.iloc[:, 1] exp = tm.SubclassedSeries([4, 5, 6], index=list('abc'), name='Y') tm.assert_series_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedSeries) res = df.ix[:, 'Z'] exp = tm.SubclassedSeries([7, 8, 9], index=list('abc'), name='Z') tm.assert_series_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedSeries) res = df.loc['a', :] exp = tm.SubclassedSeries([1, 4, 7], index=list('XYZ'), name='a') tm.assert_series_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedSeries) res = df.iloc[1, :] exp = tm.SubclassedSeries([2, 5, 8], index=list('XYZ'), name='b') tm.assert_series_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedSeries) res = df.ix['c', :] exp = tm.SubclassedSeries([3, 6, 9], index=list('XYZ'), name='c') tm.assert_series_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedSeries)
def test_convert_array_of_periods(self): rng = period_range('1/1/2000', periods=20, freq='D') periods = list(rng) result = pd.Index(periods) tm.assertIsInstance(result, PeriodIndex)
def test_iteration(self): index = PeriodIndex(start='1/1/10', periods=4, freq='B') result = list(index) tm.assertIsInstance(result[0], Period) self.assertEqual(result[0].freq, index.freq)
def test_make_time_series(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') series = Series(1, index=index) tm.assertIsInstance(series, Series)
def test_applymap(self): # just test that it works result = self.frame.applymap(lambda x: x * 2) tm.assertIsInstance(result, SparseDataFrame)
def test_multiindex_header_index_skiprows(self): df = self._bank_data(header=[0, 1], index_col=[0, 1], skiprows=1)[0] tm.assertIsInstance(df.index, MultiIndex) tm.assertIsInstance(df.columns, MultiIndex)
def test_works_on_valid_markup(self): filename = os.path.join(DATA_PATH, 'valid_markup.html') dfs = self.read_html(filename, index_col=0) tm.assertIsInstance(dfs, list) tm.assertIsInstance(dfs[0], DataFrame)
def test_mgr_locs(self): tm.assertIsInstance(self.fblock.mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64))
def test_series_box_timedelta(self): rng = timedelta_range('1 day 1 s', periods=5, freq='h') s = Series(rng) tm.assertIsInstance(s[1], Timedelta) tm.assertIsInstance(s.iat[2], Timedelta)
def test_spam_no_match(self): dfs = self.read_html(self.spam_data) for df in dfs: tm.assertIsInstance(df, DataFrame)
def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], columns=['C'], margins_col='All'): col_margins = result.ix[:-1, margins_col] expected_col_margins = self.data.groupby(index)[values_col].mean() tm.assert_series_equal(col_margins, expected_col_margins, check_names=False) self.assertEqual(col_margins.name, margins_col) result = result.sortlevel() index_margins = result.ix[(margins_col, '')].iloc[:-1] expected_ix_margins = self.data.groupby(columns)[values_col].mean() tm.assert_series_equal(index_margins, expected_ix_margins, check_names=False) self.assertEqual(index_margins.name, (margins_col, '')) grand_total_margins = result.loc[(margins_col, ''), margins_col] expected_total_margins = self.data[values_col].mean() self.assertEqual(grand_total_margins, expected_total_margins) # column specified result = self.data.pivot_table(values='D', index=['A', 'B'], columns='C', margins=True, aggfunc=np.mean) _check_output(result, 'D') # Set a different margins_name (not 'All') result = self.data.pivot_table(values='D', index=['A', 'B'], columns='C', margins=True, aggfunc=np.mean, margins_name='Totals') _check_output(result, 'D', margins_col='Totals') # no column specified table = self.data.pivot_table(index=['A', 'B'], columns='C', margins=True, aggfunc=np.mean) for value_col in table.columns.levels[0]: _check_output(table[value_col], value_col) # no col # to help with a buglet self.data.columns = [k * 2 for k in self.data.columns] table = self.data.pivot_table(index=['AA', 'BB'], margins=True, aggfunc=np.mean) for value_col in table.columns: totals = table.loc[('All', ''), value_col] self.assertEqual(totals, self.data[value_col].mean()) # no rows rtable = self.data.pivot_table(columns=['AA', 'BB'], margins=True, aggfunc=np.mean) tm.assertIsInstance(rtable, Series) table = self.data.pivot_table(index=['AA', 'BB'], margins=True, aggfunc='mean') for item in ['DD', 'EE', 'FF']: totals = table.loc[('All', ''), item] self.assertEqual(totals, self.data[item].mean()) # issue number #8349: pivot_table with margins and dictionary aggfunc data = [ { 'JOB': 'Worker', 'NAME': 'Bob', 'YEAR': 2013, 'MONTH': 12, 'DAYS': 3, 'SALARY': 17 }, { 'JOB': 'Employ', 'NAME': 'Mary', 'YEAR': 2013, 'MONTH': 12, 'DAYS': 5, 'SALARY': 23 }, { 'JOB': 'Worker', 'NAME': 'Bob', 'YEAR': 2014, 'MONTH': 1, 'DAYS': 10, 'SALARY': 100 }, { 'JOB': 'Worker', 'NAME': 'Bob', 'YEAR': 2014, 'MONTH': 1, 'DAYS': 11, 'SALARY': 110 }, { 'JOB': 'Employ', 'NAME': 'Mary', 'YEAR': 2014, 'MONTH': 1, 'DAYS': 15, 'SALARY': 200 }, { 'JOB': 'Worker', 'NAME': 'Bob', 'YEAR': 2014, 'MONTH': 2, 'DAYS': 8, 'SALARY': 80 }, { 'JOB': 'Employ', 'NAME': 'Mary', 'YEAR': 2014, 'MONTH': 2, 'DAYS': 5, 'SALARY': 190 }, ] df = DataFrame(data) df = df.set_index(['JOB', 'NAME', 'YEAR', 'MONTH'], drop=False, append=False) result = df.pivot_table(index=['JOB', 'NAME'], columns=['YEAR', 'MONTH'], values=['DAYS', 'SALARY'], aggfunc={ 'DAYS': 'mean', 'SALARY': 'sum' }, margins=True) expected = df.pivot_table(index=['JOB', 'NAME'], columns=['YEAR', 'MONTH'], values=['DAYS'], aggfunc='mean', margins=True) tm.assert_frame_equal(result['DAYS'], expected['DAYS']) expected = df.pivot_table(index=['JOB', 'NAME'], columns=['YEAR', 'MONTH'], values=['SALARY'], aggfunc='sum', margins=True) tm.assert_frame_equal(result['SALARY'], expected['SALARY'])
def test_banklist_no_match(self): dfs = self.read_html(self.banklist_data, attrs={'id': 'table'}) for df in dfs: tm.assertIsInstance(df, DataFrame)
def _validate_periodindex(self, pickled, current): tm.assert_index_equal(pickled, current) tm.assertIsInstance(pickled.freq, MonthEnd) tm.assert_equal(pickled.freq, MonthEnd()) tm.assert_equal(pickled.freqstr, 'M') tm.assert_index_equal(pickled.shift(2), current.shift(2))
def _check(res): tm.assertIsInstance(res, SparseArray) self.assertEqual(res.dtype, np.bool) self.assertIsInstance(res.fill_value, bool)
def test_constructor_corner(self): df = tm.makeTimeDataFrame() objs = [df, df] s = Series(objs, index=[0, 1]) tm.assertIsInstance(s, Series)
def test_ufunc_coercions(self): idx = self._holder([1, 2, 3, 4, 5], name='x') result = np.sqrt(idx) tm.assertIsInstance(result, Float64Index) exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') tm.assert_index_equal(result, exp) result = np.divide(idx, 2.) tm.assertIsInstance(result, Float64Index) exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') tm.assert_index_equal(result, exp) # _evaluate_numeric_binop result = idx + 2. tm.assertIsInstance(result, Float64Index) exp = Float64Index([3., 4., 5., 6., 7.], name='x') tm.assert_index_equal(result, exp) result = idx - 2. tm.assertIsInstance(result, Float64Index) exp = Float64Index([-1., 0., 1., 2., 3.], name='x') tm.assert_index_equal(result, exp) result = idx * 1. tm.assertIsInstance(result, Float64Index) exp = Float64Index([1., 2., 3., 4., 5.], name='x') tm.assert_index_equal(result, exp) result = idx / 2. tm.assertIsInstance(result, Float64Index) exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') tm.assert_index_equal(result, exp)
def test_apply_multi_index(self): s = DataFrame([[1, 2], [3, 4], [5, 6]]) s.index = MultiIndex.from_arrays([['a', 'a', 'b'], ['c', 'd', 'd']]) s.columns = ['col1', 'col2'] res = s.apply(lambda x: Series({'min': min(x), 'max': max(x)}), 1) tm.assertIsInstance(res.index, MultiIndex)
def test_operators_timedelta64(self): # invalid ops self.assertRaises(Exception, self.objSeries.__add__, 1) self.assertRaises(Exception, self.objSeries.__add__, np.array(1, dtype=np.int64)) self.assertRaises(Exception, self.objSeries.__sub__, 1) self.assertRaises(Exception, self.objSeries.__sub__, np.array(1, dtype=np.int64)) # seriese ops v1 = date_range('2012-1-1', periods=3, freq='D') v2 = date_range('2012-1-2', periods=3, freq='D') rs = Series(v2) - Series(v1) xp = Series(1e9 * 3600 * 24, rs.index).astype('int64').astype('timedelta64[ns]') assert_series_equal(rs, xp) self.assertEqual(rs.dtype, 'timedelta64[ns]') df = DataFrame(dict(A=v1)) td = Series([timedelta(days=i) for i in range(3)]) self.assertEqual(td.dtype, 'timedelta64[ns]') # series on the rhs result = df['A'] - df['A'].shift() self.assertEqual(result.dtype, 'timedelta64[ns]') result = df['A'] + td self.assertEqual(result.dtype, 'M8[ns]') # scalar Timestamp on rhs maxa = df['A'].max() tm.assertIsInstance(maxa, Timestamp) resultb = df['A'] - df['A'].max() self.assertEqual(resultb.dtype, 'timedelta64[ns]') # timestamp on lhs result = resultb + df['A'] values = [ Timestamp('20111230'), Timestamp('20120101'), Timestamp('20120103') ] expected = Series(values, name='A') assert_series_equal(result, expected) # datetimes on rhs result = df['A'] - datetime(2001, 1, 1) expected = Series([timedelta(days=4017 + i) for i in range(3)], name='A') assert_series_equal(result, expected) self.assertEqual(result.dtype, 'm8[ns]') d = datetime(2001, 1, 1, 3, 4) resulta = df['A'] - d self.assertEqual(resulta.dtype, 'm8[ns]') # roundtrip resultb = resulta + d assert_series_equal(df['A'], resultb) # timedeltas on rhs td = timedelta(days=1) resulta = df['A'] + td resultb = resulta - td assert_series_equal(resultb, df['A']) self.assertEqual(resultb.dtype, 'M8[ns]') # roundtrip td = timedelta(minutes=5, seconds=3) resulta = df['A'] + td resultb = resulta - td assert_series_equal(df['A'], resultb) self.assertEqual(resultb.dtype, 'M8[ns]') # inplace value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1)) rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1)) self.assertEqual(rs[2], value)
def compare_index_period(result, expected, typ, version): tm.assert_index_equal(result, expected) tm.assertIsInstance(result.freq, MonthEnd) tm.assert_equal(result.freq, MonthEnd()) tm.assert_equal(result.freqstr, 'M') tm.assert_index_equal(result.shift(2), expected.shift(2))
def check_result(self, result, expected, klass=None): klass = klass or self.klass assertIsInstance(result, klass) self.assertEqual(result, expected)