class TestSparseSeries(tm.TestCase, SharedWithSparse): _multiprocess_can_split_ = True def setUp(self): arr, index = _test_data1() date_index = bdate_range('1/1/2011', periods=len(index)) self.bseries = SparseSeries(arr, index=index, kind='block', name='bseries') self.ts = self.bseries self.btseries = SparseSeries(arr, index=date_index, kind='block') self.iseries = SparseSeries(arr, index=index, kind='integer', name='iseries') arr, index = _test_data2() self.bseries2 = SparseSeries(arr, index=index, kind='block') self.iseries2 = SparseSeries(arr, index=index, kind='integer') arr, index = _test_data1_zero() self.zbseries = SparseSeries(arr, index=index, kind='block', fill_value=0, name='zbseries') self.ziseries = SparseSeries(arr, index=index, kind='integer', fill_value=0) arr, index = _test_data2_zero() self.zbseries2 = SparseSeries(arr, index=index, kind='block', fill_value=0) self.ziseries2 = SparseSeries(arr, index=index, kind='integer', fill_value=0) def test_constructor_dtype(self): arr = SparseSeries([np.nan, 1, 2, np.nan]) self.assertEqual(arr.dtype, np.float64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0) self.assertEqual(arr.dtype, np.float64) self.assertEqual(arr.fill_value, 0) arr = SparseSeries([0, 1, 2, 4], dtype=np.int64) self.assertEqual(arr.dtype, np.int64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64) self.assertEqual(arr.dtype, np.int64) self.assertEqual(arr.fill_value, 0) def test_iteration_and_str(self): [x for x in self.bseries] str(self.bseries) def test_TimeSeries_deprecation(self): # deprecation TimeSeries, #10890 with tm.assert_produces_warning(FutureWarning): pd.SparseTimeSeries(1, index=pd.date_range('20130101', periods=3)) def test_construct_DataFrame_with_sp_series(self): # it works! df = DataFrame({'col': self.bseries}) # printing & access df.iloc[:1] df['col'] df.dtypes str(df) tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False) result = df.iloc[:, 0] tm.assert_sp_series_equal(result, self.bseries, check_names=False) # blocking expected = Series({'col': 'float64:sparse'}) result = df.ftypes tm.assert_series_equal(expected, result) def test_series_density(self): # GH2803 ts = Series(np.random.randn(10)) ts[2:-2] = nan sts = ts.to_sparse() density = sts.density # don't die self.assertEqual(density, 4 / 10.0) def test_sparse_to_dense(self): arr, index = _test_data1() series = self.bseries.to_dense() assert_equal(series, arr) series = self.bseries.to_dense(sparse_only=True) assert_equal(series, arr[np.isfinite(arr)]) series = self.iseries.to_dense() assert_equal(series, arr) arr, index = _test_data1_zero() series = self.zbseries.to_dense() assert_equal(series, arr) series = self.ziseries.to_dense() assert_equal(series, arr) def test_to_dense_fill_value(self): s = pd.Series([1, np.nan, np.nan, 3, np.nan]) res = SparseSeries(s).to_dense() tm.assert_series_equal(res, s) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) s = pd.Series([1, np.nan, 0, 3, 0]) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan]) res = SparseSeries(s).to_dense() tm.assert_series_equal(res, s) s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan]) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) def test_dense_to_sparse(self): series = self.bseries.to_dense() bseries = series.to_sparse(kind='block') iseries = series.to_sparse(kind='integer') tm.assert_sp_series_equal(bseries, self.bseries) tm.assert_sp_series_equal(iseries, self.iseries, check_names=False) self.assertEqual(iseries.name, self.bseries.name) self.assertEqual(len(series), len(bseries)) self.assertEqual(len(series), len(iseries)) self.assertEqual(series.shape, bseries.shape) self.assertEqual(series.shape, iseries.shape) # non-NaN fill value series = self.zbseries.to_dense() zbseries = series.to_sparse(kind='block', fill_value=0) ziseries = series.to_sparse(kind='integer', fill_value=0) tm.assert_sp_series_equal(zbseries, self.zbseries) tm.assert_sp_series_equal(ziseries, self.ziseries, check_names=False) self.assertEqual(ziseries.name, self.zbseries.name) self.assertEqual(len(series), len(zbseries)) self.assertEqual(len(series), len(ziseries)) self.assertEqual(series.shape, zbseries.shape) self.assertEqual(series.shape, ziseries.shape) def test_to_dense_preserve_name(self): assert (self.bseries.name is not None) result = self.bseries.to_dense() self.assertEqual(result.name, self.bseries.name) def test_constructor(self): # test setup guys self.assertTrue(np.isnan(self.bseries.fill_value)) tm.assertIsInstance(self.bseries.sp_index, BlockIndex) self.assertTrue(np.isnan(self.iseries.fill_value)) tm.assertIsInstance(self.iseries.sp_index, IntIndex) self.assertEqual(self.zbseries.fill_value, 0) assert_equal(self.zbseries.values.values, self.bseries.to_dense().fillna(0).values) # pass SparseSeries def _check_const(sparse, name): # use passed series name result = SparseSeries(sparse) tm.assert_sp_series_equal(result, sparse) self.assertEqual(sparse.name, name) self.assertEqual(result.name, name) # use passed name result = SparseSeries(sparse, name='x') tm.assert_sp_series_equal(result, sparse, check_names=False) self.assertEqual(result.name, 'x') _check_const(self.bseries, 'bseries') _check_const(self.iseries, 'iseries') _check_const(self.zbseries, 'zbseries') # Sparse time series works date_index = bdate_range('1/1/2000', periods=len(self.bseries)) s5 = SparseSeries(self.bseries, index=date_index) tm.assertIsInstance(s5, SparseSeries) # pass Series bseries2 = SparseSeries(self.bseries.to_dense()) assert_equal(self.bseries.sp_values, bseries2.sp_values) # pass dict? # don't copy the data by default values = np.ones(self.bseries.npoints) sp = SparseSeries(values, sparse_index=self.bseries.sp_index) sp.sp_values[:5] = 97 self.assertEqual(values[0], 97) self.assertEqual(len(sp), 20) self.assertEqual(sp.shape, (20, )) # but can make it copy! sp = SparseSeries(values, sparse_index=self.bseries.sp_index, copy=True) sp.sp_values[:5] = 100 self.assertEqual(values[0], 97) self.assertEqual(len(sp), 20) self.assertEqual(sp.shape, (20, )) def test_constructor_scalar(self): data = 5 sp = SparseSeries(data, np.arange(100)) sp = sp.reindex(np.arange(200)) self.assertTrue((sp.ix[:99] == data).all()) self.assertTrue(isnull(sp.ix[100:]).all()) data = np.nan sp = SparseSeries(data, np.arange(100)) self.assertEqual(len(sp), 100) self.assertEqual(sp.shape, (100, )) def test_constructor_ndarray(self): pass def test_constructor_nonnan(self): arr = [0, 0, 0, nan, nan] sp_series = SparseSeries(arr, fill_value=0) assert_equal(sp_series.values.values, arr) self.assertEqual(len(sp_series), 5) self.assertEqual(sp_series.shape, (5, )) # GH 9272 def test_constructor_empty(self): sp = SparseSeries() self.assertEqual(len(sp.index), 0) self.assertEqual(sp.shape, (0, )) def test_copy_astype(self): cop = self.bseries.astype(np.float64) self.assertIsNot(cop, self.bseries) self.assertIs(cop.sp_index, self.bseries.sp_index) self.assertEqual(cop.dtype, np.float64) cop2 = self.iseries.copy() tm.assert_sp_series_equal(cop, self.bseries) tm.assert_sp_series_equal(cop2, self.iseries) # test that data is copied cop[:5] = 97 self.assertEqual(cop.sp_values[0], 97) self.assertNotEqual(self.bseries.sp_values[0], 97) # correct fill value zbcop = self.zbseries.copy() zicop = self.ziseries.copy() tm.assert_sp_series_equal(zbcop, self.zbseries) tm.assert_sp_series_equal(zicop, self.ziseries) # no deep copy view = self.bseries.copy(deep=False) view.sp_values[:5] = 5 self.assertTrue((self.bseries.sp_values[:5] == 5).all()) def test_shape(self): # GH 10452 self.assertEqual(self.bseries.shape, (20, )) self.assertEqual(self.btseries.shape, (20, )) self.assertEqual(self.iseries.shape, (20, )) self.assertEqual(self.bseries2.shape, (15, )) self.assertEqual(self.iseries2.shape, (15, )) self.assertEqual(self.zbseries2.shape, (15, )) self.assertEqual(self.ziseries2.shape, (15, )) def test_astype(self): self.assertRaises(Exception, self.bseries.astype, np.int64) def test_kind(self): self.assertEqual(self.bseries.kind, 'block') self.assertEqual(self.iseries.kind, 'integer') def test_to_frame(self): # GH 9850 s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x') exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_sp_frame_equal(s.to_frame(), exp) exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_sp_frame_equal(s.to_frame(name='y'), exp) s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0) exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]}, default_fill_value=0) tm.assert_sp_frame_equal(s.to_frame(), exp) exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp) def test_pickle(self): def _test_roundtrip(series): unpickled = self.round_trip_pickle(series) tm.assert_sp_series_equal(series, unpickled) tm.assert_series_equal(series.to_dense(), unpickled.to_dense()) self._check_all(_test_roundtrip) def _check_all(self, check_func): check_func(self.bseries) check_func(self.iseries) check_func(self.zbseries) check_func(self.ziseries) def test_getitem(self): def _check_getitem(sp, dense): for idx, val in compat.iteritems(dense): tm.assert_almost_equal(val, sp[idx]) for i in range(len(dense)): tm.assert_almost_equal(sp[i], dense[i]) # j = np.float64(i) # assert_almost_equal(sp[j], dense[j]) # API change 1/6/2012 # negative getitem works # for i in xrange(len(dense)): # assert_almost_equal(sp[-i], dense[-i]) _check_getitem(self.bseries, self.bseries.to_dense()) _check_getitem(self.btseries, self.btseries.to_dense()) _check_getitem(self.zbseries, self.zbseries.to_dense()) _check_getitem(self.iseries, self.iseries.to_dense()) _check_getitem(self.ziseries, self.ziseries.to_dense()) # exception handling self.assertRaises(Exception, self.bseries.__getitem__, len(self.bseries) + 1) # index not contained self.assertRaises(Exception, self.btseries.__getitem__, self.btseries.index[-1] + BDay()) def test_get_get_value(self): tm.assert_almost_equal(self.bseries.get(10), self.bseries[10]) self.assertIsNone(self.bseries.get(len(self.bseries) + 1)) dt = self.btseries.index[10] result = self.btseries.get(dt) expected = self.btseries.to_dense()[dt] tm.assert_almost_equal(result, expected) tm.assert_almost_equal(self.bseries.get_value(10), self.bseries[10]) def test_set_value(self): idx = self.btseries.index[7] self.btseries.set_value(idx, 0) self.assertEqual(self.btseries[idx], 0) self.iseries.set_value('foobar', 0) self.assertEqual(self.iseries.index[-1], 'foobar') self.assertEqual(self.iseries['foobar'], 0) def test_getitem_slice(self): idx = self.bseries.index res = self.bseries[::2] tm.assertIsInstance(res, SparseSeries) expected = self.bseries.reindex(idx[::2]) tm.assert_sp_series_equal(res, expected) res = self.bseries[:5] tm.assertIsInstance(res, SparseSeries) tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:5])) res = self.bseries[5:] tm.assert_sp_series_equal(res, self.bseries.reindex(idx[5:])) # negative indices res = self.bseries[:-3] tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:-3])) def test_take(self): def _compare_with_dense(sp): dense = sp.to_dense() def _compare(idx): dense_result = dense.take(idx).values sparse_result = sp.take(idx) self.assertIsInstance(sparse_result, SparseSeries) tm.assert_almost_equal(dense_result, sparse_result.values.values) _compare([1., 2., 3., 4., 5., 0.]) _compare([7, 2, 9, 0, 4]) _compare([3, 6, 3, 4, 7]) self._check_all(_compare_with_dense) self.assertRaises(Exception, self.bseries.take, [0, len(self.bseries) + 1]) # Corner case sp = SparseSeries(np.ones(10) * nan) exp = pd.Series(np.repeat(nan, 5)) tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp) def test_setitem(self): self.bseries[5] = 7. self.assertEqual(self.bseries[5], 7.) def test_setslice(self): self.bseries[5:10] = 7. tm.assert_series_equal( self.bseries[5:10].to_dense(), Series(7., index=range(5, 10), name=self.bseries.name)) def test_operators(self): def _check_op(a, b, op): sp_result = op(a, b) adense = a.to_dense() if isinstance(a, SparseSeries) else a bdense = b.to_dense() if isinstance(b, SparseSeries) else b dense_result = op(adense, bdense) tm.assert_almost_equal(sp_result.to_dense(), dense_result) def check(a, b): _check_op(a, b, operator.add) _check_op(a, b, operator.sub) _check_op(a, b, operator.truediv) _check_op(a, b, operator.floordiv) _check_op(a, b, operator.mul) _check_op(a, b, lambda x, y: operator.add(y, x)) _check_op(a, b, lambda x, y: operator.sub(y, x)) _check_op(a, b, lambda x, y: operator.truediv(y, x)) _check_op(a, b, lambda x, y: operator.floordiv(y, x)) _check_op(a, b, lambda x, y: operator.mul(y, x)) # NaN ** 0 = 1 in C? # _check_op(a, b, operator.pow) # _check_op(a, b, lambda x, y: operator.pow(y, x)) check(self.bseries, self.bseries) check(self.iseries, self.iseries) check(self.bseries, self.iseries) check(self.bseries, self.bseries2) check(self.bseries, self.iseries2) check(self.iseries, self.iseries2) # scalar value check(self.bseries, 5) # zero-based check(self.zbseries, self.zbseries * 2) check(self.zbseries, self.zbseries2) check(self.ziseries, self.ziseries2) # with dense result = self.bseries + self.bseries.to_dense() tm.assert_sp_series_equal(result, self.bseries + self.bseries) def test_binary_operators(self): # skipping for now ##### raise nose.SkipTest("skipping sparse binary operators test") def _check_inplace_op(iop, op): tmp = self.bseries.copy() expected = op(tmp, self.bseries) iop(tmp, self.bseries) tm.assert_sp_series_equal(tmp, expected) inplace_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow'] for op in inplace_ops: _check_inplace_op(getattr(operator, "i%s" % op), getattr(operator, op)) def test_abs(self): s = SparseSeries([1, 2, -3], name='x') expected = SparseSeries([1, 2, 3], name='x') result = s.abs() tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') result = abs(s) tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') result = np.abs(s) tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') def test_reindex(self): def _compare_with_series(sps, new_index): spsre = sps.reindex(new_index) series = sps.to_dense() seriesre = series.reindex(new_index) seriesre = seriesre.to_sparse(fill_value=sps.fill_value) tm.assert_sp_series_equal(spsre, seriesre) tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense()) _compare_with_series(self.bseries, self.bseries.index[::2]) _compare_with_series(self.bseries, list(self.bseries.index[::2])) _compare_with_series(self.bseries, self.bseries.index[:10]) _compare_with_series(self.bseries, self.bseries.index[5:]) _compare_with_series(self.zbseries, self.zbseries.index[::2]) _compare_with_series(self.zbseries, self.zbseries.index[:10]) _compare_with_series(self.zbseries, self.zbseries.index[5:]) # special cases same_index = self.bseries.reindex(self.bseries.index) tm.assert_sp_series_equal(self.bseries, same_index) self.assertIsNot(same_index, self.bseries) # corner cases sp = SparseSeries([], index=[]) # TODO: sp_zero is not used anywhere...remove? sp_zero = SparseSeries([], index=[], fill_value=0) # noqa _compare_with_series(sp, np.arange(10)) # with copy=False reindexed = self.bseries.reindex(self.bseries.index, copy=True) reindexed.sp_values[:] = 1. self.assertTrue((self.bseries.sp_values != 1.).all()) reindexed = self.bseries.reindex(self.bseries.index, copy=False) reindexed.sp_values[:] = 1. tm.assert_numpy_array_equal(self.bseries.sp_values, np.repeat(1., 10)) def test_sparse_reindex(self): length = 10 def _check(values, index1, index2, fill_value): first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value) reindexed = first_series.sparse_reindex(index2) self.assertIs(reindexed.sp_index, index2) int_indices1 = index1.to_int_index().indices int_indices2 = index2.to_int_index().indices expected = Series(values, index=int_indices1) expected = expected.reindex(int_indices2).fillna(fill_value) tm.assert_almost_equal(expected.values, reindexed.sp_values) # make sure level argument asserts # TODO: expected is not used anywhere...remove? expected = expected.reindex(int_indices2).fillna( fill_value) # noqa def _check_with_fill_value(values, first, second, fill_value=nan): i_index1 = IntIndex(length, first) i_index2 = IntIndex(length, second) b_index1 = i_index1.to_block_index() b_index2 = i_index2.to_block_index() _check(values, i_index1, i_index2, fill_value) _check(values, b_index1, b_index2, fill_value) def _check_all(values, first, second): _check_with_fill_value(values, first, second, fill_value=nan) _check_with_fill_value(values, first, second, fill_value=0) index1 = [2, 4, 5, 6, 8, 9] values1 = np.arange(6.) _check_all(values1, index1, [2, 4, 5]) _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9]) _check_all(values1, index1, [0, 1]) _check_all(values1, index1, [0, 1, 7, 8, 9]) _check_all(values1, index1, []) first_series = SparseSeries(values1, sparse_index=IntIndex(length, index1), fill_value=nan) with tm.assertRaisesRegexp(TypeError, 'new index must be a SparseIndex'): reindexed = first_series.sparse_reindex(0) # noqa def test_repr(self): # TODO: These aren't used bsrepr = repr(self.bseries) # noqa isrepr = repr(self.iseries) # noqa def test_iter(self): pass def test_truncate(self): pass def test_fillna(self): pass def test_groupby(self): pass def test_reductions(self): def _compare_with_dense(obj, op): sparse_result = getattr(obj, op)() series = obj.to_dense() dense_result = getattr(series, op)() self.assertEqual(sparse_result, dense_result) to_compare = ['count', 'sum', 'mean', 'std', 'var', 'skew'] def _compare_all(obj): for op in to_compare: _compare_with_dense(obj, op) _compare_all(self.bseries) self.bseries.sp_values[5:10] = np.NaN _compare_all(self.bseries) _compare_all(self.zbseries) self.zbseries.sp_values[5:10] = np.NaN _compare_all(self.zbseries) series = self.zbseries.copy() series.fill_value = 2 _compare_all(series) nonna = Series(np.random.randn(20)).to_sparse() _compare_all(nonna) nonna2 = Series(np.random.randn(20)).to_sparse(fill_value=0) _compare_all(nonna2) def test_dropna(self): sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) sp_valid = sp.valid() expected = sp.to_dense().valid() expected = expected[expected != 0] tm.assert_almost_equal(sp_valid.values, expected.values) self.assertTrue(sp_valid.index.equals(expected.index)) self.assertEqual(len(sp_valid.sp_values), 2) result = self.bseries.dropna() expected = self.bseries.to_dense().dropna() self.assertNotIsInstance(result, SparseSeries) tm.assert_series_equal(result, expected) def test_homogenize(self): def _check_matches(indices, expected): data = {} for i, idx in enumerate(indices): data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx) homogenized = spf.homogenize(data) for k, v in compat.iteritems(homogenized): assert (v.sp_index.equals(expected)) indices1 = [ BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10]) ] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) # must have NaN fill value data = { 'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0) } with tm.assertRaisesRegexp(TypeError, "NaN fill value"): spf.homogenize(data) def test_fill_value_corner(self): cop = self.zbseries.copy() cop.fill_value = 0 result = self.bseries / cop self.assertTrue(np.isnan(result.fill_value)) cop2 = self.zbseries.copy() cop2.fill_value = 1 result = cop2 / cop self.assertTrue(np.isnan(result.fill_value)) def test_fill_value_when_combine_const(self): # GH12723 s = SparseSeries([0, 1, np.nan, 3, 4, 5], index=np.arange(6)) exp = s.fillna(0).add(2) res = s.add(2, fill_value=0) self.assert_series_equal(res, exp) def test_shift(self): series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6)) shifted = series.shift(0) self.assertIsNot(shifted, series) tm.assert_sp_series_equal(shifted, series) f = lambda s: s.shift(1) _dense_series_compare(series, f) f = lambda s: s.shift(-2) _dense_series_compare(series, f) series = SparseSeries([nan, 1., 2., 3., nan, nan], index=bdate_range('1/1/2000', periods=6)) f = lambda s: s.shift(2, freq='B') _dense_series_compare(series, f) f = lambda s: s.shift(2, freq=datetools.bday) _dense_series_compare(series, f) def test_shift_nan(self): # GH 12908 orig = pd.Series([np.nan, 2, np.nan, 4, 0, np.nan, 0]) sparse = orig.to_sparse() tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse()) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse()) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse()) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse()) sparse = orig.to_sparse(fill_value=0) tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse(fill_value=0)) def test_shift_dtype(self): # GH 12908 orig = pd.Series([1, 2, 3, 4], dtype=np.int64) sparse = orig.to_sparse() tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse()) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse()) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse()) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse()) def test_shift_dtype_fill_value(self): # GH 12908 orig = pd.Series([1, 0, 0, 4], dtype=np.int64) sparse = orig.to_sparse(fill_value=0) tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse(fill_value=0)) def test_cumsum(self): result = self.bseries.cumsum() expected = self.bseries.to_dense().cumsum() tm.assertIsInstance(result, SparseSeries) self.assertEqual(result.name, self.bseries.name) tm.assert_series_equal(result.to_dense(), expected) result = self.zbseries.cumsum() expected = self.zbseries.to_dense().cumsum() tm.assertIsInstance(result, Series) tm.assert_series_equal(result, expected) def test_combine_first(self): s = self.bseries result = s[::2].combine_first(s) result2 = s[::2].combine_first(s.to_dense()) expected = s[::2].to_dense().combine_first(s.to_dense()) expected = expected.to_sparse(fill_value=s.fill_value) tm.assert_sp_series_equal(result, result2) tm.assert_sp_series_equal(result, expected)
class TestSparseSeries(TestCase, test_series.CheckNameIntegration): def setUp(self): arr, index = _test_data1() date_index = DateRange("1/1/2011", periods=len(index)) self.bseries = SparseSeries(arr, index=index, kind="block") self.bseries.name = "bseries" self.ts = self.bseries self.btseries = SparseSeries(arr, index=date_index, kind="block") self.iseries = SparseSeries(arr, index=index, kind="integer") arr, index = _test_data2() self.bseries2 = SparseSeries(arr, index=index, kind="block") self.iseries2 = SparseSeries(arr, index=index, kind="integer") arr, index = _test_data1_zero() self.zbseries = SparseSeries(arr, index=index, kind="block", fill_value=0) self.ziseries = SparseSeries(arr, index=index, kind="integer", fill_value=0) arr, index = _test_data2_zero() self.zbseries2 = SparseSeries(arr, index=index, kind="block", fill_value=0) self.ziseries2 = SparseSeries(arr, index=index, kind="integer", fill_value=0) def test_construct_DataFrame_with_sp_series(self): # it works! df = DataFrame({"col": self.bseries}) def test_sparse_to_dense(self): arr, index = _test_data1() series = self.bseries.to_dense() assert_equal(series, arr) series = self.bseries.to_dense(sparse_only=True) assert_equal(series, arr[np.isfinite(arr)]) series = self.iseries.to_dense() assert_equal(series, arr) arr, index = _test_data1_zero() series = self.zbseries.to_dense() assert_equal(series, arr) series = self.ziseries.to_dense() assert_equal(series, arr) def test_dense_to_sparse(self): series = self.bseries.to_dense() bseries = series.to_sparse(kind="block") iseries = series.to_sparse(kind="integer") assert_sp_series_equal(bseries, self.bseries) assert_sp_series_equal(iseries, self.iseries) # non-NaN fill value series = self.zbseries.to_dense() zbseries = series.to_sparse(kind="block", fill_value=0) ziseries = series.to_sparse(kind="integer", fill_value=0) assert_sp_series_equal(zbseries, self.zbseries) assert_sp_series_equal(ziseries, self.ziseries) def test_to_dense_preserve_name(self): assert self.bseries.name is not None result = self.bseries.to_dense() self.assertEquals(result.name, self.bseries.name) def test_constructor(self): # test setup guys self.assert_(np.isnan(self.bseries.fill_value)) self.assert_(isinstance(self.bseries.sp_index, BlockIndex)) self.assert_(np.isnan(self.iseries.fill_value)) self.assert_(isinstance(self.iseries.sp_index, IntIndex)) self.assertEquals(self.zbseries.fill_value, 0) assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0)) # pass SparseSeries s2 = SparseSeries(self.bseries) s3 = SparseSeries(self.iseries) s4 = SparseSeries(self.zbseries) assert_sp_series_equal(s2, self.bseries) assert_sp_series_equal(s3, self.iseries) assert_sp_series_equal(s4, self.zbseries) # Sparse time series works date_index = DateRange("1/1/2000", periods=len(self.bseries)) s5 = SparseSeries(self.bseries, index=date_index) self.assert_(isinstance(s5, SparseTimeSeries)) # pass Series bseries2 = SparseSeries(self.bseries.to_dense()) assert_equal(self.bseries.sp_values, bseries2.sp_values) # pass dict? # don't copy the data by default values = np.ones(len(self.bseries.sp_values)) sp = SparseSeries(values, sparse_index=self.bseries.sp_index) sp.sp_values[:5] = 97 self.assert_(values[0] == 97) # but can make it copy! sp = SparseSeries(values, sparse_index=self.bseries.sp_index, copy=True) sp.sp_values[:5] = 100 self.assert_(values[0] == 97) def test_constructor_ndarray(self): pass def test_constructor_nonnan(self): arr = [0, 0, 0, nan, nan] sp_series = SparseSeries(arr, fill_value=0) assert_equal(sp_series.values, arr) def test_copy_astype(self): cop = self.bseries.astype(np.float_) self.assert_(cop is not self.bseries) self.assert_(cop.sp_index is self.bseries.sp_index) self.assert_(cop.dtype == np.float64) cop2 = self.iseries.copy() assert_sp_series_equal(cop, self.bseries) assert_sp_series_equal(cop2, self.iseries) # test that data is copied cop.sp_values[:5] = 97 self.assert_(cop.sp_values[0] == 97) self.assert_(self.bseries.sp_values[0] != 97) # correct fill value zbcop = self.zbseries.copy() zicop = self.ziseries.copy() assert_sp_series_equal(zbcop, self.zbseries) assert_sp_series_equal(zicop, self.ziseries) # no deep copy view = self.bseries.copy(deep=False) view.sp_values[:5] = 5 self.assert_((self.bseries.sp_values[:5] == 5).all()) def test_astype(self): self.assertRaises(Exception, self.bseries.astype, np.int64) def test_kind(self): self.assertEquals(self.bseries.kind, "block") self.assertEquals(self.iseries.kind, "integer") def test_pickle(self): def _test_roundtrip(series): pickled = pickle.dumps(series, protocol=pickle.HIGHEST_PROTOCOL) unpickled = pickle.loads(pickled) assert_sp_series_equal(series, unpickled) assert_series_equal(series.to_dense(), unpickled.to_dense()) self._check_all(_test_roundtrip) def _check_all(self, check_func): check_func(self.bseries) check_func(self.iseries) check_func(self.zbseries) check_func(self.ziseries) def test_getitem(self): def _check_getitem(sp, dense): for idx, val in dense.iteritems(): assert_almost_equal(val, sp[idx]) for i in xrange(len(dense)): assert_almost_equal(sp[i], dense[i]) # j = np.float64(i) # assert_almost_equal(sp[j], dense[j]) # API change 1/6/2012 # negative getitem works # for i in xrange(len(dense)): # assert_almost_equal(sp[-i], dense[-i]) _check_getitem(self.bseries, self.bseries.to_dense()) _check_getitem(self.btseries, self.btseries.to_dense()) _check_getitem(self.zbseries, self.zbseries.to_dense()) _check_getitem(self.iseries, self.iseries.to_dense()) _check_getitem(self.ziseries, self.ziseries.to_dense()) # exception handling self.assertRaises(Exception, self.bseries.__getitem__, len(self.bseries) + 1) # index not contained self.assertRaises(Exception, self.btseries.__getitem__, self.btseries.index[-1] + BDay()) def test_get_get_value(self): assert_almost_equal(self.bseries.get(10), self.bseries[10]) self.assert_(self.bseries.get(len(self.bseries) + 1) is None) dt = self.btseries.index[10] result = self.btseries.get(dt) expected = self.btseries.to_dense()[dt] assert_almost_equal(result, expected) assert_almost_equal(self.bseries.get_value(10), self.bseries[10]) def test_set_value(self): idx = self.btseries.index[7] res = self.btseries.set_value(idx, 0) self.assert_(res is not self.btseries) self.assertEqual(res[idx], 0) res = self.iseries.set_value("foobar", 0) self.assert_(res is not self.iseries) self.assert_(res.index[-1] == "foobar") self.assertEqual(res["foobar"], 0) def test_getitem_slice(self): idx = self.bseries.index res = self.bseries[::2] self.assert_(isinstance(res, SparseSeries)) assert_sp_series_equal(res, self.bseries.reindex(idx[::2])) res = self.bseries[:5] self.assert_(isinstance(res, SparseSeries)) assert_sp_series_equal(res, self.bseries.reindex(idx[:5])) res = self.bseries[5:] assert_sp_series_equal(res, self.bseries.reindex(idx[5:])) # negative indices res = self.bseries[:-3] assert_sp_series_equal(res, self.bseries.reindex(idx[:-3])) def test_take(self): def _compare_with_dense(sp): dense = sp.to_dense() def _compare(idx): dense_result = dense.take(idx).values sparse_result = sp.take(idx) self.assert_(isinstance(sparse_result, SparseSeries)) assert_almost_equal(dense_result, sparse_result.values) _compare([1.0, 2.0, 3.0, 4.0, 5.0, 0.0]) _compare([7, 2, 9, 0, 4]) _compare([3, 6, 3, 4, 7]) self._check_all(_compare_with_dense) self.assertRaises(Exception, self.bseries.take, [-1, 0]) self.assertRaises(Exception, self.bseries.take, [0, len(self.bseries) + 1]) # Corner case sp = SparseSeries(np.ones(10.0) * nan) assert_almost_equal(sp.take([0, 1, 2, 3, 4]), np.repeat(nan, 5)) def test_setitem(self): self.assertRaises(Exception, self.bseries.__setitem__, 5, 7.0) self.assertRaises(Exception, self.iseries.__setitem__, 5, 7.0) def test_setslice(self): self.assertRaises(Exception, self.bseries.__setslice__, 5, 10, 7.0) def test_operators(self): def _check_op(a, b, op): sp_result = op(a, b) adense = a.to_dense() if isinstance(a, SparseSeries) else a bdense = b.to_dense() if isinstance(b, SparseSeries) else b dense_result = op(adense, bdense) assert_almost_equal(sp_result.to_dense(), dense_result) def check(a, b): _check_op(a, b, operator.add) _check_op(a, b, operator.sub) _check_op(a, b, operator.truediv) _check_op(a, b, operator.floordiv) _check_op(a, b, operator.mul) _check_op(a, b, lambda x, y: operator.add(y, x)) _check_op(a, b, lambda x, y: operator.sub(y, x)) _check_op(a, b, lambda x, y: operator.truediv(y, x)) _check_op(a, b, lambda x, y: operator.floordiv(y, x)) _check_op(a, b, lambda x, y: operator.mul(y, x)) # NaN ** 0 = 1 in C? # _check_op(a, b, operator.pow) # _check_op(a, b, lambda x, y: operator.pow(y, x)) check(self.bseries, self.bseries) check(self.iseries, self.iseries) check(self.bseries, self.iseries) check(self.bseries, self.bseries2) check(self.bseries, self.iseries2) check(self.iseries, self.iseries2) # scalar value check(self.bseries, 5) # zero-based check(self.zbseries, self.zbseries * 2) check(self.zbseries, self.zbseries2) check(self.ziseries, self.ziseries2) # with dense result = self.bseries + self.bseries.to_dense() assert_sp_series_equal(result, self.bseries + self.bseries) # @dec.knownfailureif(True, 'Known NumPy failer as of 1.5.1') def test_operators_corner2(self): raise nose.SkipTest("known failer on numpy 1.5.1") # NumPy circumvents __r*__ operations val = np.float64(3.0) result = val - self.zbseries assert_sp_series_equal(result, 3 - self.zbseries) def test_binary_operators(self): def _check_inplace_op(op): tmp = self.bseries.copy() self.assertRaises(NotImplementedError, op, tmp, self.bseries) inplace_ops = ["iadd", "isub", "imul", "itruediv", "ifloordiv", "ipow"] for op in inplace_ops: _check_inplace_op(getattr(operator, op)) def test_reindex(self): def _compare_with_series(sps, new_index): spsre = sps.reindex(new_index) series = sps.to_dense() seriesre = series.reindex(new_index) seriesre = seriesre.to_sparse(fill_value=sps.fill_value) assert_sp_series_equal(spsre, seriesre) assert_series_equal(spsre.to_dense(), seriesre.to_dense()) _compare_with_series(self.bseries, self.bseries.index[::2]) _compare_with_series(self.bseries, list(self.bseries.index[::2])) _compare_with_series(self.bseries, self.bseries.index[:10]) _compare_with_series(self.bseries, self.bseries.index[5:]) _compare_with_series(self.zbseries, self.zbseries.index[::2]) _compare_with_series(self.zbseries, self.zbseries.index[:10]) _compare_with_series(self.zbseries, self.zbseries.index[5:]) # special cases same_index = self.bseries.reindex(self.bseries.index) assert_sp_series_equal(self.bseries, same_index) self.assert_(same_index is not self.bseries) # corner cases sp = SparseSeries([], index=[]) sp_zero = SparseSeries([], index=[], fill_value=0) _compare_with_series(sp, np.arange(10)) # with copy=False reindexed = self.bseries.reindex(self.bseries.index, copy=True) reindexed.sp_values[:] = 1.0 self.assert_((self.bseries.sp_values != 1.0).all()) reindexed = self.bseries.reindex(self.bseries.index, copy=False) reindexed.sp_values[:] = 1.0 self.assert_((self.bseries.sp_values == 1.0).all()) def test_sparse_reindex(self): length = 10 def _check(values, index1, index2, fill_value): first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value) reindexed = first_series.sparse_reindex(index2) self.assert_(reindexed.sp_index is index2) int_indices1 = index1.to_int_index().indices int_indices2 = index2.to_int_index().indices expected = Series(values, index=int_indices1) expected = expected.reindex(int_indices2).fillna(fill_value) assert_almost_equal(expected.values, reindexed.sp_values) # make sure level argument asserts expected = expected.reindex(int_indices2).fillna(fill_value) def _check_with_fill_value(values, first, second, fill_value=nan): i_index1 = IntIndex(length, first) i_index2 = IntIndex(length, second) b_index1 = i_index1.to_block_index() b_index2 = i_index2.to_block_index() _check(values, i_index1, i_index2, fill_value) _check(values, b_index1, b_index2, fill_value) def _check_all(values, first, second): _check_with_fill_value(values, first, second, fill_value=nan) _check_with_fill_value(values, first, second, fill_value=0) index1 = [2, 4, 5, 6, 8, 9] values1 = np.arange(6.0) _check_all(values1, index1, [2, 4, 5]) _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9]) _check_all(values1, index1, [0, 1]) _check_all(values1, index1, [0, 1, 7, 8, 9]) _check_all(values1, index1, []) def test_repr(self): bsrepr = repr(self.bseries) isrepr = repr(self.iseries) def test_iter(self): pass def test_truncate(self): pass def test_fillna(self): pass def test_groupby(self): pass def test_reductions(self): def _compare_with_dense(obj, op): sparse_result = getattr(obj, op)() series = obj.to_dense() dense_result = getattr(series, op)() self.assertEquals(sparse_result, dense_result) to_compare = ["count", "sum", "mean", "std", "var", "skew"] def _compare_all(obj): for op in to_compare: _compare_with_dense(obj, op) _compare_all(self.bseries) self.bseries.sp_values[5:10] = np.NaN _compare_all(self.bseries) _compare_all(self.zbseries) self.zbseries.sp_values[5:10] = np.NaN _compare_all(self.zbseries) series = self.zbseries.copy() series.fill_value = 2 _compare_all(series) def test_dropna(self): sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) sp_valid = sp.valid() assert_almost_equal(sp_valid.values, sp.to_dense().valid().values) self.assert_(sp_valid.index.equals(sp.to_dense().valid().index)) self.assertEquals(len(sp_valid.sp_values), 2) result = self.bseries.dropna() expected = self.bseries.to_dense().dropna() self.assert_(not isinstance(result, SparseSeries)) tm.assert_series_equal(result, expected) def test_homogenize(self): def _check_matches(indices, expected): data = {} for i, idx in enumerate(indices): data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx) homogenized = spf.homogenize(data) for k, v in homogenized.iteritems(): assert v.sp_index.equals(expected) indices1 = [BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10])] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) # must have NaN fill value data = {"a": SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0)} nose.tools.assert_raises(Exception, spf.homogenize, data) def test_fill_value_corner(self): cop = self.zbseries.copy() cop.fill_value = 0 result = self.bseries / cop self.assert_(np.isnan(result.fill_value)) cop2 = self.zbseries.copy() cop2.fill_value = 1 result = cop2 / cop self.assert_(np.isnan(result.fill_value)) def test_shift(self): series = SparseSeries([nan, 1.0, 2.0, 3.0, nan, nan], index=np.arange(6)) shifted = series.shift(0) self.assert_(shifted is not series) assert_sp_series_equal(shifted, series) f = lambda s: s.shift(1) _dense_series_compare(series, f) f = lambda s: s.shift(-2) _dense_series_compare(series, f) series = SparseSeries([nan, 1.0, 2.0, 3.0, nan, nan], index=DateRange("1/1/2000", periods=6)) f = lambda s: s.shift(2, timeRule="WEEKDAY") _dense_series_compare(series, f) f = lambda s: s.shift(2, offset=datetools.bday) _dense_series_compare(series, f) def test_cumsum(self): result = self.bseries.cumsum() expected = self.bseries.to_dense().cumsum() self.assert_(isinstance(result, SparseSeries)) self.assertEquals(result.name, self.bseries.name) assert_series_equal(result.to_dense(), expected) result = self.zbseries.cumsum() expected = self.zbseries.to_dense().cumsum() self.assert_(isinstance(result, Series)) assert_series_equal(result, expected) def test_combine_first(self): s = self.bseries result = s[::2].combine_first(s) result2 = s[::2].combine_first(s.to_dense()) expected = s[::2].to_dense().combine_first(s.to_dense()) expected = expected.to_sparse(fill_value=s.fill_value) assert_sp_series_equal(result, result2) assert_sp_series_equal(result, expected)
class TestSparseSeries(tm.TestCase, SharedWithSparse): _multiprocess_can_split_ = True def setUp(self): arr, index = _test_data1() date_index = bdate_range('1/1/2011', periods=len(index)) self.bseries = SparseSeries(arr, index=index, kind='block', name='bseries') self.ts = self.bseries self.btseries = SparseSeries(arr, index=date_index, kind='block') self.iseries = SparseSeries(arr, index=index, kind='integer', name='iseries') arr, index = _test_data2() self.bseries2 = SparseSeries(arr, index=index, kind='block') self.iseries2 = SparseSeries(arr, index=index, kind='integer') arr, index = _test_data1_zero() self.zbseries = SparseSeries(arr, index=index, kind='block', fill_value=0, name='zbseries') self.ziseries = SparseSeries(arr, index=index, kind='integer', fill_value=0) arr, index = _test_data2_zero() self.zbseries2 = SparseSeries(arr, index=index, kind='block', fill_value=0) self.ziseries2 = SparseSeries(arr, index=index, kind='integer', fill_value=0) def test_constructor_dtype(self): arr = SparseSeries([np.nan, 1, 2, np.nan]) self.assertEqual(arr.dtype, np.float64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0) self.assertEqual(arr.dtype, np.float64) self.assertEqual(arr.fill_value, 0) arr = SparseSeries([0, 1, 2, 4], dtype=np.int64) self.assertEqual(arr.dtype, np.int64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64) self.assertEqual(arr.dtype, np.int64) self.assertEqual(arr.fill_value, 0) def test_iteration_and_str(self): [x for x in self.bseries] str(self.bseries) def test_TimeSeries_deprecation(self): # deprecation TimeSeries, #10890 with tm.assert_produces_warning(FutureWarning): pd.SparseTimeSeries(1, index=pd.date_range('20130101', periods=3)) def test_construct_DataFrame_with_sp_series(self): # it works! df = DataFrame({'col': self.bseries}) # printing & access df.iloc[:1] df['col'] df.dtypes str(df) tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False) result = df.iloc[:, 0] tm.assert_sp_series_equal(result, self.bseries, check_names=False) # blocking expected = Series({'col': 'float64:sparse'}) result = df.ftypes tm.assert_series_equal(expected, result) def test_series_density(self): # GH2803 ts = Series(np.random.randn(10)) ts[2:-2] = nan sts = ts.to_sparse() density = sts.density # don't die self.assertEqual(density, 4 / 10.0) def test_sparse_to_dense(self): arr, index = _test_data1() series = self.bseries.to_dense() assert_equal(series, arr) series = self.bseries.to_dense(sparse_only=True) assert_equal(series, arr[np.isfinite(arr)]) series = self.iseries.to_dense() assert_equal(series, arr) arr, index = _test_data1_zero() series = self.zbseries.to_dense() assert_equal(series, arr) series = self.ziseries.to_dense() assert_equal(series, arr) def test_to_dense_fill_value(self): s = pd.Series([1, np.nan, np.nan, 3, np.nan]) res = SparseSeries(s).to_dense() tm.assert_series_equal(res, s) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) s = pd.Series([1, np.nan, 0, 3, 0]) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan]) res = SparseSeries(s).to_dense() tm.assert_series_equal(res, s) s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan]) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) def test_dense_to_sparse(self): series = self.bseries.to_dense() bseries = series.to_sparse(kind='block') iseries = series.to_sparse(kind='integer') tm.assert_sp_series_equal(bseries, self.bseries) tm.assert_sp_series_equal(iseries, self.iseries, check_names=False) self.assertEqual(iseries.name, self.bseries.name) self.assertEqual(len(series), len(bseries)) self.assertEqual(len(series), len(iseries)) self.assertEqual(series.shape, bseries.shape) self.assertEqual(series.shape, iseries.shape) # non-NaN fill value series = self.zbseries.to_dense() zbseries = series.to_sparse(kind='block', fill_value=0) ziseries = series.to_sparse(kind='integer', fill_value=0) tm.assert_sp_series_equal(zbseries, self.zbseries) tm.assert_sp_series_equal(ziseries, self.ziseries, check_names=False) self.assertEqual(ziseries.name, self.zbseries.name) self.assertEqual(len(series), len(zbseries)) self.assertEqual(len(series), len(ziseries)) self.assertEqual(series.shape, zbseries.shape) self.assertEqual(series.shape, ziseries.shape) def test_to_dense_preserve_name(self): assert (self.bseries.name is not None) result = self.bseries.to_dense() self.assertEqual(result.name, self.bseries.name) def test_constructor(self): # test setup guys self.assertTrue(np.isnan(self.bseries.fill_value)) tm.assertIsInstance(self.bseries.sp_index, BlockIndex) self.assertTrue(np.isnan(self.iseries.fill_value)) tm.assertIsInstance(self.iseries.sp_index, IntIndex) self.assertEqual(self.zbseries.fill_value, 0) assert_equal(self.zbseries.values.values, self.bseries.to_dense().fillna(0).values) # pass SparseSeries def _check_const(sparse, name): # use passed series name result = SparseSeries(sparse) tm.assert_sp_series_equal(result, sparse) self.assertEqual(sparse.name, name) self.assertEqual(result.name, name) # use passed name result = SparseSeries(sparse, name='x') tm.assert_sp_series_equal(result, sparse, check_names=False) self.assertEqual(result.name, 'x') _check_const(self.bseries, 'bseries') _check_const(self.iseries, 'iseries') _check_const(self.zbseries, 'zbseries') # Sparse time series works date_index = bdate_range('1/1/2000', periods=len(self.bseries)) s5 = SparseSeries(self.bseries, index=date_index) tm.assertIsInstance(s5, SparseSeries) # pass Series bseries2 = SparseSeries(self.bseries.to_dense()) assert_equal(self.bseries.sp_values, bseries2.sp_values) # pass dict? # don't copy the data by default values = np.ones(self.bseries.npoints) sp = SparseSeries(values, sparse_index=self.bseries.sp_index) sp.sp_values[:5] = 97 self.assertEqual(values[0], 97) self.assertEqual(len(sp), 20) self.assertEqual(sp.shape, (20, )) # but can make it copy! sp = SparseSeries(values, sparse_index=self.bseries.sp_index, copy=True) sp.sp_values[:5] = 100 self.assertEqual(values[0], 97) self.assertEqual(len(sp), 20) self.assertEqual(sp.shape, (20, )) def test_constructor_scalar(self): data = 5 sp = SparseSeries(data, np.arange(100)) sp = sp.reindex(np.arange(200)) self.assertTrue((sp.ix[:99] == data).all()) self.assertTrue(isnull(sp.ix[100:]).all()) data = np.nan sp = SparseSeries(data, np.arange(100)) self.assertEqual(len(sp), 100) self.assertEqual(sp.shape, (100, )) def test_constructor_ndarray(self): pass def test_constructor_nonnan(self): arr = [0, 0, 0, nan, nan] sp_series = SparseSeries(arr, fill_value=0) assert_equal(sp_series.values.values, arr) self.assertEqual(len(sp_series), 5) self.assertEqual(sp_series.shape, (5, )) # GH 9272 def test_constructor_empty(self): sp = SparseSeries() self.assertEqual(len(sp.index), 0) self.assertEqual(sp.shape, (0, )) def test_copy_astype(self): cop = self.bseries.astype(np.float64) self.assertIsNot(cop, self.bseries) self.assertIs(cop.sp_index, self.bseries.sp_index) self.assertEqual(cop.dtype, np.float64) cop2 = self.iseries.copy() tm.assert_sp_series_equal(cop, self.bseries) tm.assert_sp_series_equal(cop2, self.iseries) # test that data is copied cop[:5] = 97 self.assertEqual(cop.sp_values[0], 97) self.assertNotEqual(self.bseries.sp_values[0], 97) # correct fill value zbcop = self.zbseries.copy() zicop = self.ziseries.copy() tm.assert_sp_series_equal(zbcop, self.zbseries) tm.assert_sp_series_equal(zicop, self.ziseries) # no deep copy view = self.bseries.copy(deep=False) view.sp_values[:5] = 5 self.assertTrue((self.bseries.sp_values[:5] == 5).all()) def test_shape(self): # GH 10452 self.assertEqual(self.bseries.shape, (20, )) self.assertEqual(self.btseries.shape, (20, )) self.assertEqual(self.iseries.shape, (20, )) self.assertEqual(self.bseries2.shape, (15, )) self.assertEqual(self.iseries2.shape, (15, )) self.assertEqual(self.zbseries2.shape, (15, )) self.assertEqual(self.ziseries2.shape, (15, )) def test_astype(self): self.assertRaises(Exception, self.bseries.astype, np.int64) def test_kind(self): self.assertEqual(self.bseries.kind, 'block') self.assertEqual(self.iseries.kind, 'integer') def test_to_frame(self): # GH 9850 s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x') exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_sp_frame_equal(s.to_frame(), exp) exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_sp_frame_equal(s.to_frame(name='y'), exp) s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0) exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]}, default_fill_value=0) tm.assert_sp_frame_equal(s.to_frame(), exp) exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp) def test_pickle(self): def _test_roundtrip(series): unpickled = self.round_trip_pickle(series) tm.assert_sp_series_equal(series, unpickled) tm.assert_series_equal(series.to_dense(), unpickled.to_dense()) self._check_all(_test_roundtrip) def _check_all(self, check_func): check_func(self.bseries) check_func(self.iseries) check_func(self.zbseries) check_func(self.ziseries) def test_getitem(self): def _check_getitem(sp, dense): for idx, val in compat.iteritems(dense): tm.assert_almost_equal(val, sp[idx]) for i in range(len(dense)): tm.assert_almost_equal(sp[i], dense[i]) # j = np.float64(i) # assert_almost_equal(sp[j], dense[j]) # API change 1/6/2012 # negative getitem works # for i in xrange(len(dense)): # assert_almost_equal(sp[-i], dense[-i]) _check_getitem(self.bseries, self.bseries.to_dense()) _check_getitem(self.btseries, self.btseries.to_dense()) _check_getitem(self.zbseries, self.zbseries.to_dense()) _check_getitem(self.iseries, self.iseries.to_dense()) _check_getitem(self.ziseries, self.ziseries.to_dense()) # exception handling self.assertRaises(Exception, self.bseries.__getitem__, len(self.bseries) + 1) # index not contained self.assertRaises(Exception, self.btseries.__getitem__, self.btseries.index[-1] + BDay()) def test_get_get_value(self): tm.assert_almost_equal(self.bseries.get(10), self.bseries[10]) self.assertIsNone(self.bseries.get(len(self.bseries) + 1)) dt = self.btseries.index[10] result = self.btseries.get(dt) expected = self.btseries.to_dense()[dt] tm.assert_almost_equal(result, expected) tm.assert_almost_equal(self.bseries.get_value(10), self.bseries[10]) def test_set_value(self): idx = self.btseries.index[7] self.btseries.set_value(idx, 0) self.assertEqual(self.btseries[idx], 0) self.iseries.set_value('foobar', 0) self.assertEqual(self.iseries.index[-1], 'foobar') self.assertEqual(self.iseries['foobar'], 0) def test_getitem_slice(self): idx = self.bseries.index res = self.bseries[::2] tm.assertIsInstance(res, SparseSeries) expected = self.bseries.reindex(idx[::2]) tm.assert_sp_series_equal(res, expected) res = self.bseries[:5] tm.assertIsInstance(res, SparseSeries) tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:5])) res = self.bseries[5:] tm.assert_sp_series_equal(res, self.bseries.reindex(idx[5:])) # negative indices res = self.bseries[:-3] tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:-3])) def test_take(self): def _compare_with_dense(sp): dense = sp.to_dense() def _compare(idx): dense_result = dense.take(idx).values sparse_result = sp.take(idx) self.assertIsInstance(sparse_result, SparseSeries) tm.assert_almost_equal(dense_result, sparse_result.values.values) _compare([1., 2., 3., 4., 5., 0.]) _compare([7, 2, 9, 0, 4]) _compare([3, 6, 3, 4, 7]) self._check_all(_compare_with_dense) self.assertRaises(Exception, self.bseries.take, [0, len(self.bseries) + 1]) # Corner case sp = SparseSeries(np.ones(10) * nan) exp = pd.Series(np.repeat(nan, 5)) tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp) def test_setitem(self): self.bseries[5] = 7. self.assertEqual(self.bseries[5], 7.) def test_setslice(self): self.bseries[5:10] = 7. tm.assert_series_equal(self.bseries[5:10].to_dense(), Series(7., index=range(5, 10), name=self.bseries.name)) def test_operators(self): def _check_op(a, b, op): sp_result = op(a, b) adense = a.to_dense() if isinstance(a, SparseSeries) else a bdense = b.to_dense() if isinstance(b, SparseSeries) else b dense_result = op(adense, bdense) tm.assert_almost_equal(sp_result.to_dense(), dense_result) def check(a, b): _check_op(a, b, operator.add) _check_op(a, b, operator.sub) _check_op(a, b, operator.truediv) _check_op(a, b, operator.floordiv) _check_op(a, b, operator.mul) _check_op(a, b, lambda x, y: operator.add(y, x)) _check_op(a, b, lambda x, y: operator.sub(y, x)) _check_op(a, b, lambda x, y: operator.truediv(y, x)) _check_op(a, b, lambda x, y: operator.floordiv(y, x)) _check_op(a, b, lambda x, y: operator.mul(y, x)) # NaN ** 0 = 1 in C? # _check_op(a, b, operator.pow) # _check_op(a, b, lambda x, y: operator.pow(y, x)) check(self.bseries, self.bseries) check(self.iseries, self.iseries) check(self.bseries, self.iseries) check(self.bseries, self.bseries2) check(self.bseries, self.iseries2) check(self.iseries, self.iseries2) # scalar value check(self.bseries, 5) # zero-based check(self.zbseries, self.zbseries * 2) check(self.zbseries, self.zbseries2) check(self.ziseries, self.ziseries2) # with dense result = self.bseries + self.bseries.to_dense() tm.assert_sp_series_equal(result, self.bseries + self.bseries) def test_binary_operators(self): # skipping for now ##### raise nose.SkipTest("skipping sparse binary operators test") def _check_inplace_op(iop, op): tmp = self.bseries.copy() expected = op(tmp, self.bseries) iop(tmp, self.bseries) tm.assert_sp_series_equal(tmp, expected) inplace_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow'] for op in inplace_ops: _check_inplace_op(getattr(operator, "i%s" % op), getattr(operator, op)) def test_abs(self): s = SparseSeries([1, 2, -3], name='x') expected = SparseSeries([1, 2, 3], name='x') result = s.abs() tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') result = abs(s) tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') result = np.abs(s) tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') def test_reindex(self): def _compare_with_series(sps, new_index): spsre = sps.reindex(new_index) series = sps.to_dense() seriesre = series.reindex(new_index) seriesre = seriesre.to_sparse(fill_value=sps.fill_value) tm.assert_sp_series_equal(spsre, seriesre) tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense()) _compare_with_series(self.bseries, self.bseries.index[::2]) _compare_with_series(self.bseries, list(self.bseries.index[::2])) _compare_with_series(self.bseries, self.bseries.index[:10]) _compare_with_series(self.bseries, self.bseries.index[5:]) _compare_with_series(self.zbseries, self.zbseries.index[::2]) _compare_with_series(self.zbseries, self.zbseries.index[:10]) _compare_with_series(self.zbseries, self.zbseries.index[5:]) # special cases same_index = self.bseries.reindex(self.bseries.index) tm.assert_sp_series_equal(self.bseries, same_index) self.assertIsNot(same_index, self.bseries) # corner cases sp = SparseSeries([], index=[]) # TODO: sp_zero is not used anywhere...remove? sp_zero = SparseSeries([], index=[], fill_value=0) # noqa _compare_with_series(sp, np.arange(10)) # with copy=False reindexed = self.bseries.reindex(self.bseries.index, copy=True) reindexed.sp_values[:] = 1. self.assertTrue((self.bseries.sp_values != 1.).all()) reindexed = self.bseries.reindex(self.bseries.index, copy=False) reindexed.sp_values[:] = 1. tm.assert_numpy_array_equal(self.bseries.sp_values, np.repeat(1., 10)) def test_sparse_reindex(self): length = 10 def _check(values, index1, index2, fill_value): first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value) reindexed = first_series.sparse_reindex(index2) self.assertIs(reindexed.sp_index, index2) int_indices1 = index1.to_int_index().indices int_indices2 = index2.to_int_index().indices expected = Series(values, index=int_indices1) expected = expected.reindex(int_indices2).fillna(fill_value) tm.assert_almost_equal(expected.values, reindexed.sp_values) # make sure level argument asserts # TODO: expected is not used anywhere...remove? expected = expected.reindex(int_indices2).fillna(fill_value) # noqa def _check_with_fill_value(values, first, second, fill_value=nan): i_index1 = IntIndex(length, first) i_index2 = IntIndex(length, second) b_index1 = i_index1.to_block_index() b_index2 = i_index2.to_block_index() _check(values, i_index1, i_index2, fill_value) _check(values, b_index1, b_index2, fill_value) def _check_all(values, first, second): _check_with_fill_value(values, first, second, fill_value=nan) _check_with_fill_value(values, first, second, fill_value=0) index1 = [2, 4, 5, 6, 8, 9] values1 = np.arange(6.) _check_all(values1, index1, [2, 4, 5]) _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9]) _check_all(values1, index1, [0, 1]) _check_all(values1, index1, [0, 1, 7, 8, 9]) _check_all(values1, index1, []) first_series = SparseSeries(values1, sparse_index=IntIndex(length, index1), fill_value=nan) with tm.assertRaisesRegexp(TypeError, 'new index must be a SparseIndex'): reindexed = first_series.sparse_reindex(0) # noqa def test_repr(self): # TODO: These aren't used bsrepr = repr(self.bseries) # noqa isrepr = repr(self.iseries) # noqa def test_iter(self): pass def test_truncate(self): pass def test_fillna(self): pass def test_groupby(self): pass def test_reductions(self): def _compare_with_dense(obj, op): sparse_result = getattr(obj, op)() series = obj.to_dense() dense_result = getattr(series, op)() self.assertEqual(sparse_result, dense_result) to_compare = ['count', 'sum', 'mean', 'std', 'var', 'skew'] def _compare_all(obj): for op in to_compare: _compare_with_dense(obj, op) _compare_all(self.bseries) self.bseries.sp_values[5:10] = np.NaN _compare_all(self.bseries) _compare_all(self.zbseries) self.zbseries.sp_values[5:10] = np.NaN _compare_all(self.zbseries) series = self.zbseries.copy() series.fill_value = 2 _compare_all(series) nonna = Series(np.random.randn(20)).to_sparse() _compare_all(nonna) nonna2 = Series(np.random.randn(20)).to_sparse(fill_value=0) _compare_all(nonna2) def test_dropna(self): sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) sp_valid = sp.valid() expected = sp.to_dense().valid() expected = expected[expected != 0] tm.assert_almost_equal(sp_valid.values, expected.values) self.assertTrue(sp_valid.index.equals(expected.index)) self.assertEqual(len(sp_valid.sp_values), 2) result = self.bseries.dropna() expected = self.bseries.to_dense().dropna() self.assertNotIsInstance(result, SparseSeries) tm.assert_series_equal(result, expected) def test_homogenize(self): def _check_matches(indices, expected): data = {} for i, idx in enumerate(indices): data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx) homogenized = spf.homogenize(data) for k, v in compat.iteritems(homogenized): assert (v.sp_index.equals(expected)) indices1 = [BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10])] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) # must have NaN fill value data = {'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0)} with tm.assertRaisesRegexp(TypeError, "NaN fill value"): spf.homogenize(data) def test_fill_value_corner(self): cop = self.zbseries.copy() cop.fill_value = 0 result = self.bseries / cop self.assertTrue(np.isnan(result.fill_value)) cop2 = self.zbseries.copy() cop2.fill_value = 1 result = cop2 / cop self.assertTrue(np.isnan(result.fill_value)) def test_fill_value_when_combine_const(self): # GH12723 s = SparseSeries([0, 1, np.nan, 3, 4, 5], index=np.arange(6)) exp = s.fillna(0).add(2) res = s.add(2, fill_value=0) self.assert_series_equal(res, exp) def test_shift(self): series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6)) shifted = series.shift(0) self.assertIsNot(shifted, series) tm.assert_sp_series_equal(shifted, series) f = lambda s: s.shift(1) _dense_series_compare(series, f) f = lambda s: s.shift(-2) _dense_series_compare(series, f) series = SparseSeries([nan, 1., 2., 3., nan, nan], index=bdate_range('1/1/2000', periods=6)) f = lambda s: s.shift(2, freq='B') _dense_series_compare(series, f) f = lambda s: s.shift(2, freq=datetools.bday) _dense_series_compare(series, f) def test_shift_nan(self): # GH 12908 orig = pd.Series([np.nan, 2, np.nan, 4, 0, np.nan, 0]) sparse = orig.to_sparse() tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse()) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse()) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse()) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse()) sparse = orig.to_sparse(fill_value=0) tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse(fill_value=0)) def test_shift_dtype(self): # GH 12908 orig = pd.Series([1, 2, 3, 4], dtype=np.int64) sparse = orig.to_sparse() tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse()) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse()) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse()) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse()) def test_shift_dtype_fill_value(self): # GH 12908 orig = pd.Series([1, 0, 0, 4], dtype=np.int64) sparse = orig.to_sparse(fill_value=0) tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse(fill_value=0)) def test_cumsum(self): result = self.bseries.cumsum() expected = self.bseries.to_dense().cumsum() tm.assertIsInstance(result, SparseSeries) self.assertEqual(result.name, self.bseries.name) tm.assert_series_equal(result.to_dense(), expected) result = self.zbseries.cumsum() expected = self.zbseries.to_dense().cumsum() tm.assertIsInstance(result, Series) tm.assert_series_equal(result, expected) def test_combine_first(self): s = self.bseries result = s[::2].combine_first(s) result2 = s[::2].combine_first(s.to_dense()) expected = s[::2].to_dense().combine_first(s.to_dense()) expected = expected.to_sparse(fill_value=s.fill_value) tm.assert_sp_series_equal(result, result2) tm.assert_sp_series_equal(result, expected)
class TestSparseSeries(TestCase, test_series.CheckNameIntegration): def setUp(self): arr, index = _test_data1() date_index = DateRange('1/1/2011', periods=len(index)) self.bseries = SparseSeries(arr, index=index, kind='block') self.bseries.name = 'bseries' self.ts = self.bseries self.btseries = SparseSeries(arr, index=date_index, kind='block') self.iseries = SparseSeries(arr, index=index, kind='integer') arr, index = _test_data2() self.bseries2 = SparseSeries(arr, index=index, kind='block') self.iseries2 = SparseSeries(arr, index=index, kind='integer') arr, index = _test_data1_zero() self.zbseries = SparseSeries(arr, index=index, kind='block', fill_value=0) self.ziseries = SparseSeries(arr, index=index, kind='integer', fill_value=0) arr, index = _test_data2_zero() self.zbseries2 = SparseSeries(arr, index=index, kind='block', fill_value=0) self.ziseries2 = SparseSeries(arr, index=index, kind='integer', fill_value=0) def test_construct_DataFrame_with_sp_series(self): # it works! df = DataFrame({'col': self.bseries}) def test_sparse_to_dense(self): arr, index = _test_data1() series = self.bseries.to_dense() assert_equal(series, arr) series = self.bseries.to_dense(sparse_only=True) assert_equal(series, arr[np.isfinite(arr)]) series = self.iseries.to_dense() assert_equal(series, arr) arr, index = _test_data1_zero() series = self.zbseries.to_dense() assert_equal(series, arr) series = self.ziseries.to_dense() assert_equal(series, arr) def test_dense_to_sparse(self): series = self.bseries.to_dense() bseries = series.to_sparse(kind='block') iseries = series.to_sparse(kind='integer') assert_sp_series_equal(bseries, self.bseries) assert_sp_series_equal(iseries, self.iseries) # non-NaN fill value series = self.zbseries.to_dense() zbseries = series.to_sparse(kind='block', fill_value=0) ziseries = series.to_sparse(kind='integer', fill_value=0) assert_sp_series_equal(zbseries, self.zbseries) assert_sp_series_equal(ziseries, self.ziseries) def test_to_dense_preserve_name(self): assert (self.bseries.name is not None) result = self.bseries.to_dense() self.assertEquals(result.name, self.bseries.name) def test_constructor(self): # test setup guys self.assert_(np.isnan(self.bseries.fill_value)) self.assert_(isinstance(self.bseries.sp_index, BlockIndex)) self.assert_(np.isnan(self.iseries.fill_value)) self.assert_(isinstance(self.iseries.sp_index, IntIndex)) self.assertEquals(self.zbseries.fill_value, 0) assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0)) # pass SparseSeries s2 = SparseSeries(self.bseries) s3 = SparseSeries(self.iseries) s4 = SparseSeries(self.zbseries) assert_sp_series_equal(s2, self.bseries) assert_sp_series_equal(s3, self.iseries) assert_sp_series_equal(s4, self.zbseries) # Sparse time series works date_index = DateRange('1/1/2000', periods=len(self.bseries)) s5 = SparseSeries(self.bseries, index=date_index) self.assert_(isinstance(s5, SparseTimeSeries)) # pass Series bseries2 = SparseSeries(self.bseries.to_dense()) assert_equal(self.bseries.sp_values, bseries2.sp_values) # pass dict? # don't copy the data by default values = np.ones(len(self.bseries.sp_values)) sp = SparseSeries(values, sparse_index=self.bseries.sp_index) sp.sp_values[:5] = 97 self.assert_(values[0] == 97) # but can make it copy! sp = SparseSeries(values, sparse_index=self.bseries.sp_index, copy=True) sp.sp_values[:5] = 100 self.assert_(values[0] == 97) def test_constructor_ndarray(self): pass def test_constructor_nonnan(self): arr = [0, 0, 0, nan, nan] sp_series = SparseSeries(arr, fill_value=0) assert_equal(sp_series.values, arr) def test_copy_astype(self): cop = self.bseries.astype(np.float_) self.assert_(cop is not self.bseries) self.assert_(cop.sp_index is self.bseries.sp_index) self.assert_(cop.dtype == np.float64) cop2 = self.iseries.copy() assert_sp_series_equal(cop, self.bseries) assert_sp_series_equal(cop2, self.iseries) # test that data is copied cop.sp_values[:5] = 97 self.assert_(cop.sp_values[0] == 97) self.assert_(self.bseries.sp_values[0] != 97) # correct fill value zbcop = self.zbseries.copy() zicop = self.ziseries.copy() assert_sp_series_equal(zbcop, self.zbseries) assert_sp_series_equal(zicop, self.ziseries) # no deep copy view = self.bseries.copy(deep=False) view.sp_values[:5] = 5 self.assert_((self.bseries.sp_values[:5] == 5).all()) def test_astype(self): self.assertRaises(Exception, self.bseries.astype, np.int64) def test_kind(self): self.assertEquals(self.bseries.kind, 'block') self.assertEquals(self.iseries.kind, 'integer') def test_pickle(self): def _test_roundtrip(series): pickled = pickle.dumps(series, protocol=pickle.HIGHEST_PROTOCOL) unpickled = pickle.loads(pickled) assert_sp_series_equal(series, unpickled) assert_series_equal(series.to_dense(), unpickled.to_dense()) self._check_all(_test_roundtrip) def _check_all(self, check_func): check_func(self.bseries) check_func(self.iseries) check_func(self.zbseries) check_func(self.ziseries) def test_getitem(self): def _check_getitem(sp, dense): for idx, val in dense.iteritems(): assert_almost_equal(val, sp[idx]) for i in xrange(len(dense)): assert_almost_equal(sp[i], dense[i]) # j = np.float64(i) # assert_almost_equal(sp[j], dense[j]) # negative getitem works for i in xrange(len(dense)): assert_almost_equal(sp[-i], dense[-i]) _check_getitem(self.bseries, self.bseries.to_dense()) _check_getitem(self.btseries, self.btseries.to_dense()) _check_getitem(self.zbseries, self.zbseries.to_dense()) _check_getitem(self.iseries, self.iseries.to_dense()) _check_getitem(self.ziseries, self.ziseries.to_dense()) # exception handling self.assertRaises(Exception, self.bseries.__getitem__, len(self.bseries) + 1) # index not contained self.assertRaises(Exception, self.btseries.__getitem__, self.btseries.index[-1] + BDay()) def test_get_get_value(self): assert_almost_equal(self.bseries.get(10), self.bseries[10]) self.assert_(self.bseries.get(len(self.bseries) + 1) is None) dt = self.btseries.index[10] result = self.btseries.get(dt) expected = self.btseries.to_dense()[dt] assert_almost_equal(result, expected) assert_almost_equal(self.bseries.get_value(10), self.bseries[10]) def test_set_value(self): idx = self.btseries.index[7] res = self.btseries.set_value(idx, 0) self.assert_(res is not self.btseries) self.assertEqual(res[idx], 0) res = self.iseries.set_value('foobar', 0) self.assert_(res is not self.iseries) self.assert_(res.index[-1] == 'foobar') self.assertEqual(res['foobar'], 0) def test_getitem_slice(self): idx = self.bseries.index res = self.bseries[::2] self.assert_(isinstance(res, SparseSeries)) assert_sp_series_equal(res, self.bseries.reindex(idx[::2])) res = self.bseries[:5] self.assert_(isinstance(res, SparseSeries)) assert_sp_series_equal(res, self.bseries.reindex(idx[:5])) res = self.bseries[5:] assert_sp_series_equal(res, self.bseries.reindex(idx[5:])) # negative indices res = self.bseries[:-3] assert_sp_series_equal(res, self.bseries.reindex(idx[:-3])) def test_take(self): def _compare_with_dense(sp): dense = sp.to_dense() def _compare(idx): dense_result = dense.take(idx).values sparse_result = sp.take(idx) assert_almost_equal(dense_result, sparse_result) _compare([1., 2., 3., 4., 5., 0.]) _compare([7, 2, 9, 0, 4]) _compare([3, 6, 3, 4, 7]) self._check_all(_compare_with_dense) self.assertRaises(Exception, self.bseries.take, [-1, 0]) self.assertRaises(Exception, self.bseries.take, [0, len(self.bseries) + 1]) # Corner case sp = SparseSeries(np.ones(10.) * nan) assert_almost_equal(sp.take([0, 1, 2, 3, 4]), np.repeat(nan, 5)) def test_setitem(self): self.assertRaises(Exception, self.bseries.__setitem__, 5, 7.) self.assertRaises(Exception, self.iseries.__setitem__, 5, 7.) def test_setslice(self): self.assertRaises(Exception, self.bseries.__setslice__, 5, 10, 7.) def test_operators(self): def _check_op(a, b, op): sp_result = op(a, b) adense = a.to_dense() if isinstance(a, SparseSeries) else a bdense = b.to_dense() if isinstance(b, SparseSeries) else b dense_result = op(adense, bdense) assert_almost_equal(sp_result.to_dense(), dense_result) def check(a, b): _check_op(a, b, operator.add) _check_op(a, b, operator.sub) _check_op(a, b, operator.truediv) _check_op(a, b, operator.floordiv) _check_op(a, b, operator.mul) _check_op(a, b, lambda x, y: operator.add(y, x)) _check_op(a, b, lambda x, y: operator.sub(y, x)) _check_op(a, b, lambda x, y: operator.truediv(y, x)) _check_op(a, b, lambda x, y: operator.floordiv(y, x)) _check_op(a, b, lambda x, y: operator.mul(y, x)) # NaN ** 0 = 1 in C? # _check_op(a, b, operator.pow) # _check_op(a, b, lambda x, y: operator.pow(y, x)) check(self.bseries, self.bseries) check(self.iseries, self.iseries) check(self.bseries, self.iseries) check(self.bseries, self.bseries2) check(self.bseries, self.iseries2) check(self.iseries, self.iseries2) # scalar value check(self.bseries, 5) # zero-based check(self.zbseries, self.zbseries * 2) check(self.zbseries, self.zbseries2) check(self.ziseries, self.ziseries2) # with dense result = self.bseries + self.bseries.to_dense() assert_sp_series_equal(result, self.bseries + self.bseries) # @dec.knownfailureif(True, 'Known NumPy failer as of 1.5.1') def test_operators_corner2(self): raise nose.SkipTest('known failer on numpy 1.5.1') # NumPy circumvents __r*__ operations val = np.float64(3.0) result = val - self.zbseries assert_sp_series_equal(result, 3 - self.zbseries) def test_binary_operators(self): def _check_inplace_op(op): tmp = self.bseries.copy() self.assertRaises(NotImplementedError, op, tmp, self.bseries) inplace_ops = ['iadd', 'isub', 'imul', 'itruediv', 'ifloordiv', 'ipow'] for op in inplace_ops: _check_inplace_op(getattr(operator, op)) def test_reindex(self): def _compare_with_series(sps, new_index): spsre = sps.reindex(new_index) series = sps.to_dense() seriesre = series.reindex(new_index) seriesre = seriesre.to_sparse(fill_value=sps.fill_value) assert_sp_series_equal(spsre, seriesre) assert_series_equal(spsre.to_dense(), seriesre.to_dense()) _compare_with_series(self.bseries, self.bseries.index[::2]) _compare_with_series(self.bseries, list(self.bseries.index[::2])) _compare_with_series(self.bseries, self.bseries.index[:10]) _compare_with_series(self.bseries, self.bseries.index[5:]) _compare_with_series(self.zbseries, self.zbseries.index[::2]) _compare_with_series(self.zbseries, self.zbseries.index[:10]) _compare_with_series(self.zbseries, self.zbseries.index[5:]) # special cases same_index = self.bseries.reindex(self.bseries.index) assert_sp_series_equal(self.bseries, same_index) self.assert_(same_index is not self.bseries) # corner cases sp = SparseSeries([], index=[]) sp_zero = SparseSeries([], index=[], fill_value=0) _compare_with_series(sp, np.arange(10)) # with copy=False reindexed = self.bseries.reindex(self.bseries.index, copy=True) reindexed.sp_values[:] = 1. self.assert_((self.bseries.sp_values != 1.).all()) reindexed = self.bseries.reindex(self.bseries.index, copy=False) reindexed.sp_values[:] = 1. self.assert_((self.bseries.sp_values == 1.).all()) def test_sparse_reindex(self): length = 10 def _check(values, index1, index2, fill_value): first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value) reindexed = first_series.sparse_reindex(index2) self.assert_(reindexed.sp_index is index2) int_indices1 = index1.to_int_index().indices int_indices2 = index2.to_int_index().indices expected = Series(values, index=int_indices1) expected = expected.reindex(int_indices2).fillna(fill_value) assert_almost_equal(expected.values, reindexed.sp_values) # make sure level argument asserts expected = expected.reindex(int_indices2).fillna(fill_value) def _check_with_fill_value(values, first, second, fill_value=nan): i_index1 = IntIndex(length, first) i_index2 = IntIndex(length, second) b_index1 = i_index1.to_block_index() b_index2 = i_index2.to_block_index() _check(values, i_index1, i_index2, fill_value) _check(values, b_index1, b_index2, fill_value) def _check_all(values, first, second): _check_with_fill_value(values, first, second, fill_value=nan) _check_with_fill_value(values, first, second, fill_value=0) index1 = [2, 4, 5, 6, 8, 9] values1 = np.arange(6.) _check_all(values1, index1, [2, 4, 5]) _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9]) _check_all(values1, index1, [0, 1]) _check_all(values1, index1, [0, 1, 7, 8, 9]) _check_all(values1, index1, []) def test_repr(self): bsrepr = repr(self.bseries) isrepr = repr(self.iseries) def test_iter(self): pass def test_truncate(self): pass def test_fillna(self): pass def test_groupby(self): pass def test_reductions(self): def _compare_with_dense(obj, op): sparse_result = getattr(obj, op)() series = obj.to_dense() dense_result = getattr(series, op)() self.assertEquals(sparse_result, dense_result) to_compare = ['count', 'sum', 'mean', 'std', 'var', 'skew'] def _compare_all(obj): for op in to_compare: _compare_with_dense(obj, op) _compare_all(self.bseries) self.bseries.sp_values[5:10] = np.NaN _compare_all(self.bseries) _compare_all(self.zbseries) self.zbseries.sp_values[5:10] = np.NaN _compare_all(self.zbseries) series = self.zbseries.copy() series.fill_value = 2 _compare_all(series) def test_valid(self): sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) sp_valid = sp.valid() assert_almost_equal(sp_valid, sp.to_dense().valid()) self.assert_(sp_valid.index.equals(sp.to_dense().valid().index)) self.assertEquals(len(sp_valid.sp_values), 2) def test_homogenize(self): def _check_matches(indices, expected): data = {} for i, idx in enumerate(indices): data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx) homogenized = spf.homogenize(data) for k, v in homogenized.iteritems(): assert (v.sp_index.equals(expected)) indices1 = [ BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10]) ] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) # must have NaN fill value data = { 'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0) } nose.tools.assert_raises(Exception, spf.homogenize, data) def test_fill_value_corner(self): cop = self.zbseries.copy() cop.fill_value = 0 result = self.bseries / cop self.assert_(np.isnan(result.fill_value)) cop2 = self.zbseries.copy() cop2.fill_value = 1 result = cop2 / cop self.assert_(np.isnan(result.fill_value)) def test_shift(self): series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6)) shifted = series.shift(0) self.assert_(shifted is not series) assert_sp_series_equal(shifted, series) f = lambda s: s.shift(1) _dense_series_compare(series, f) f = lambda s: s.shift(-2) _dense_series_compare(series, f) series = SparseSeries([nan, 1., 2., 3., nan, nan], index=DateRange('1/1/2000', periods=6)) f = lambda s: s.shift(2, timeRule='WEEKDAY') _dense_series_compare(series, f) f = lambda s: s.shift(2, offset=datetools.bday) _dense_series_compare(series, f) def test_cumsum(self): result = self.bseries.cumsum() expected = self.bseries.to_dense().cumsum() self.assert_(isinstance(result, SparseSeries)) self.assertEquals(result.name, self.bseries.name) assert_series_equal(result.to_dense(), expected) result = self.zbseries.cumsum() expected = self.zbseries.to_dense().cumsum() self.assert_(isinstance(result, Series)) assert_series_equal(result, expected)