def test_sparseseries_roundtrip(self): # GH 13999 for kind in ['integer', 'block']: for fill in [1, np.nan, 0]: arr = SparseArray([np.nan, 1, np.nan, 2, 3], kind=kind, fill_value=fill) res = SparseArray(SparseSeries(arr)) tm.assert_sp_array_equal(arr, res) arr = SparseArray([0, 0, 0, 1, 1, 2], dtype=np.int64, kind=kind, fill_value=fill) res = SparseArray(SparseSeries(arr), dtype=np.int64) tm.assert_sp_array_equal(arr, res) res = SparseArray(SparseSeries(arr)) tm.assert_sp_array_equal(arr, res) for fill in [True, False, np.nan]: arr = SparseArray([True, False, True, True], dtype=np.bool, kind=kind, fill_value=fill) res = SparseArray(SparseSeries(arr)) tm.assert_sp_array_equal(arr, res) res = SparseArray(SparseSeries(arr)) tm.assert_sp_array_equal(arr, res)
def test_constructor(self): # test setup guys self.assertTrue(np.isnan(self.bseries.fill_value)) tm.assertIsInstance(self.bseries.sp_index, BlockIndex) self.assertTrue(np.isnan(self.iseries.fill_value)) tm.assertIsInstance(self.iseries.sp_index, IntIndex) self.assertEqual(self.zbseries.fill_value, 0) assert_equal(self.zbseries.values.values, self.bseries.to_dense().fillna(0).values) # pass SparseSeries def _check_const(sparse, name): # use passed series name result = SparseSeries(sparse) tm.assert_sp_series_equal(result, sparse) self.assertEqual(sparse.name, name) self.assertEqual(result.name, name) # use passed name result = SparseSeries(sparse, name='x') tm.assert_sp_series_equal(result, sparse, check_names=False) self.assertEqual(result.name, 'x') _check_const(self.bseries, 'bseries') _check_const(self.iseries, 'iseries') _check_const(self.zbseries, 'zbseries') # Sparse time series works date_index = bdate_range('1/1/2000', periods=len(self.bseries)) s5 = SparseSeries(self.bseries, index=date_index) tm.assertIsInstance(s5, SparseSeries) # pass Series bseries2 = SparseSeries(self.bseries.to_dense()) assert_equal(self.bseries.sp_values, bseries2.sp_values) # pass dict? # don't copy the data by default values = np.ones(self.bseries.npoints) sp = SparseSeries(values, sparse_index=self.bseries.sp_index) sp.sp_values[:5] = 97 self.assertEqual(values[0], 97) self.assertEqual(len(sp), 20) self.assertEqual(sp.shape, (20, )) # but can make it copy! sp = SparseSeries(values, sparse_index=self.bseries.sp_index, copy=True) sp.sp_values[:5] = 100 self.assertEqual(values[0], 97) self.assertEqual(len(sp), 20) self.assertEqual(sp.shape, (20, ))
def setUp(self): arr, index = _test_data1() date_index = bdate_range('1/1/2011', periods=len(index)) self.bseries = SparseSeries(arr, index=index, kind='block', name='bseries') self.ts = self.bseries self.btseries = SparseSeries(arr, index=date_index, kind='block') self.iseries = SparseSeries(arr, index=index, kind='integer', name='iseries') arr, index = _test_data2() self.bseries2 = SparseSeries(arr, index=index, kind='block') self.iseries2 = SparseSeries(arr, index=index, kind='integer') arr, index = _test_data1_zero() self.zbseries = SparseSeries(arr, index=index, kind='block', fill_value=0, name='zbseries') self.ziseries = SparseSeries(arr, index=index, kind='integer', fill_value=0) arr, index = _test_data2_zero() self.zbseries2 = SparseSeries(arr, index=index, kind='block', fill_value=0) self.ziseries2 = SparseSeries(arr, index=index, kind='integer', fill_value=0)
def test_constructor_scalar(self): data = 5 sp = SparseSeries(data, np.arange(100)) sp = sp.reindex(np.arange(200)) self.assertTrue((sp.ix[:99] == data).all()) self.assertTrue(isnull(sp.ix[100:]).all()) data = np.nan sp = SparseSeries(data, np.arange(100)) self.assertEqual(len(sp), 100) self.assertEqual(sp.shape, (100, ))
def _check_const(sparse, name): # use passed series name result = SparseSeries(sparse) tm.assert_sp_series_equal(result, sparse) self.assertEqual(sparse.name, name) self.assertEqual(result.name, name) # use passed name result = SparseSeries(sparse, name='x') tm.assert_sp_series_equal(result, sparse, check_names=False) self.assertEqual(result.name, 'x')
def setUp(self): arr, index = _test_data1() self.bseries = SparseSeries(arr, index=index, kind='block', name='bseries') arr, index = _test_data1_zero() self.zbseries = SparseSeries(arr, index=index, kind='block', fill_value=0, name='zbseries')
def test_abs(self): s = SparseSeries([1, 2, -3], name='x') expected = SparseSeries([1, 2, 3], name='x') result = s.abs() tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') result = abs(s) tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') result = np.abs(s) tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x')
def _check_frame_ops(self, frame): def _compare_to_dense(a, b, da, db, op): sparse_result = op(a, b) dense_result = op(da, db) fill = sparse_result.default_fill_value dense_result = dense_result.to_sparse(fill_value=fill) tm.assert_sp_frame_equal(sparse_result, dense_result, exact_indices=False) if isinstance(a, DataFrame) and isinstance(db, DataFrame): mixed_result = op(a, db) tm.assertIsInstance(mixed_result, SparseDataFrame) tm.assert_sp_frame_equal(mixed_result, sparse_result, exact_indices=False) opnames = ['add', 'sub', 'mul', 'truediv', 'floordiv'] ops = [getattr(operator, name) for name in opnames] fidx = frame.index # time series operations series = [ frame['A'], frame['B'], frame['C'], frame['D'], frame['A'].reindex(fidx[:7]), frame['A'].reindex(fidx[::2]), SparseSeries([], index=[]) ] for op in opnames: _compare_to_dense(frame, frame[::2], frame.to_dense(), frame[::2].to_dense(), getattr(operator, op)) # 2304, no auto-broadcasting for i, s in enumerate(series): f = lambda a, b: getattr(a, op)(b, axis='index') _compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), f) # rops are not implemented # _compare_to_dense(s, frame, s.to_dense(), # frame.to_dense(), f) # cross-sectional operations series = [ frame.xs(fidx[0]), frame.xs(fidx[3]), frame.xs(fidx[5]), frame.xs(fidx[7]), frame.xs(fidx[5])[:2] ] for op in ops: for s in series: _compare_to_dense(frame, s, frame.to_dense(), s, op) _compare_to_dense(s, frame, s, frame.to_dense(), op) # it works! result = self.frame + self.frame.loc[:, ['A', 'B']] # noqa
def test_fill_value_when_combine_const(self): # GH12723 s = SparseSeries([0, 1, np.nan, 3, 4, 5], index=np.arange(6)) exp = s.fillna(0).add(2) res = s.add(2, fill_value=0) self.assert_series_equal(res, exp)
def test_homogenize(self): def _check_matches(indices, expected): data = {} for i, idx in enumerate(indices): data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx) homogenized = spf.homogenize(data) for k, v in homogenized.iteritems(): assert (v.sp_index.equals(expected)) indices1 = [ BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10]) ] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) # must have NaN fill value data = { 'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0) } nose.tools.assert_raises(Exception, spf.homogenize, data)
def test_valid(self): sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) sp_valid = sp.valid() assert_almost_equal(sp_valid, sp.to_dense().valid()) self.assert_(sp_valid.index.equals(sp.to_dense().valid().index)) self.assertEquals(len(sp_valid.sp_values), 2)
def test_constructor_dtype(self): arr = SparseSeries([np.nan, 1, 2, np.nan]) self.assertEqual(arr.dtype, np.float64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0) self.assertEqual(arr.dtype, np.float64) self.assertEqual(arr.fill_value, 0) arr = SparseSeries([0, 1, 2, 4], dtype=np.int64) self.assertEqual(arr.dtype, np.int64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64) self.assertEqual(arr.dtype, np.int64) self.assertEqual(arr.fill_value, 0)
def test_homogenize(self): def _check_matches(indices, expected): data = {} for i, idx in enumerate(indices): data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx) homogenized = spf.homogenize(data) for k, v in compat.iteritems(homogenized): assert (v.sp_index.equals(expected)) indices1 = [ BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10]) ] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) # must have NaN fill value data = { 'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0) } with tm.assertRaisesRegexp(TypeError, "NaN fill value"): spf.homogenize(data)
def _check_matches(indices, expected): data = {} for i, idx in enumerate(indices): data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx) homogenized = spf.homogenize(data) for k, v in compat.iteritems(homogenized): assert (v.sp_index.equals(expected))
def test_density(self): df = SparseSeries([nan, nan, nan, 0, 1, 2, 3, 4, 5, 6]) self.assertEqual(df.density, 0.7) df = SparseDataFrame({'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10), 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}) self.assertEqual(df.density, 0.75)
def _read_sparse_series(self, group, where=None): index = self._read_index(group, 'index') sp_values = _read_array(group, 'sp_values') sp_index = self._read_index(group, 'sp_index') name = getattr(group._v_attrs, 'name', None) fill_value = getattr(group._v_attrs, 'fill_value', None) kind = getattr(group._v_attrs, 'kind', 'block') return SparseSeries(sp_values, index=index, sparse_index=sp_index, kind=kind, fill_value=fill_value, name=name)
def test_cumsum(self): result = self.bseries.cumsum() expected = SparseSeries(self.bseries.to_dense().cumsum()) tm.assert_sp_series_equal(result, expected) # TODO: gh-12855 - return a SparseSeries here result = self.zbseries.cumsum() expected = self.zbseries.to_dense().cumsum() self.assertNotIsInstance(result, SparseSeries) tm.assert_series_equal(result, expected)
def test_shift(self): series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6)) shifted = series.shift(0) self.assert_(shifted is not series) assert_sp_series_equal(shifted, series) f = lambda s: s.shift(1) _dense_series_compare(series, f) f = lambda s: s.shift(-2) _dense_series_compare(series, f) series = SparseSeries([nan, 1., 2., 3., nan, nan], index=DateRange('1/1/2000', periods=6)) f = lambda s: s.shift(2, timeRule='WEEKDAY') _dense_series_compare(series, f) f = lambda s: s.shift(2, offset=datetools.bday) _dense_series_compare(series, f)
def test_shift(self): series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6)) shifted = series.shift(0) self.assertIsNot(shifted, series) tm.assert_sp_series_equal(shifted, series) f = lambda s: s.shift(1) _dense_series_compare(series, f) f = lambda s: s.shift(-2) _dense_series_compare(series, f) series = SparseSeries([nan, 1., 2., 3., nan, nan], index=bdate_range('1/1/2000', periods=6)) f = lambda s: s.shift(2, freq='B') _dense_series_compare(series, f) f = lambda s: s.shift(2, freq=datetools.bday) _dense_series_compare(series, f)
def _check_frame_ops(self, frame): fill = frame.default_fill_value def _compare_to_dense(a, b, da, db, op): sparse_result = op(a, b) dense_result = op(da, db) dense_result = dense_result.to_sparse(fill_value=fill) assert_sp_frame_equal(sparse_result, dense_result, exact_indices=False) if isinstance(a, DataFrame) and isinstance(db, DataFrame): mixed_result = op(a, db) self.assert_(isinstance(mixed_result, SparseDataFrame)) assert_sp_frame_equal(mixed_result, sparse_result, exact_indices=False) opnames = ['add', 'sub', 'mul', 'truediv', 'floordiv'] ops = [getattr(operator, name) for name in opnames] fidx = frame.index # time series operations series = [ frame['A'], frame['B'], frame['C'], frame['D'], frame['A'].reindex(fidx[:7]), frame['A'].reindex(fidx[::2]), SparseSeries([], index=[]) ] for op in ops: _compare_to_dense(frame, frame[::2], frame.to_dense(), frame[::2].to_dense(), op) for s in series: _compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), op) _compare_to_dense(s, frame, s.to_dense(), frame.to_dense(), op) # cross-sectional operations series = [ frame.xs(fidx[0]), frame.xs(fidx[3]), frame.xs(fidx[5]), frame.xs(fidx[7]), frame.xs(fidx[5])[:2] ] for op in ops: for s in series: _compare_to_dense(frame, s, frame.to_dense(), s, op) _compare_to_dense(s, frame, s, frame.to_dense(), op)
def test_reindex(self): def _compare_with_series(sps, new_index): spsre = sps.reindex(new_index) series = sps.to_dense() seriesre = series.reindex(new_index) seriesre = seriesre.to_sparse(fill_value=sps.fill_value) tm.assert_sp_series_equal(spsre, seriesre) tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense()) _compare_with_series(self.bseries, self.bseries.index[::2]) _compare_with_series(self.bseries, list(self.bseries.index[::2])) _compare_with_series(self.bseries, self.bseries.index[:10]) _compare_with_series(self.bseries, self.bseries.index[5:]) _compare_with_series(self.zbseries, self.zbseries.index[::2]) _compare_with_series(self.zbseries, self.zbseries.index[:10]) _compare_with_series(self.zbseries, self.zbseries.index[5:]) # special cases same_index = self.bseries.reindex(self.bseries.index) tm.assert_sp_series_equal(self.bseries, same_index) self.assertIsNot(same_index, self.bseries) # corner cases sp = SparseSeries([], index=[]) # TODO: sp_zero is not used anywhere...remove? sp_zero = SparseSeries([], index=[], fill_value=0) # noqa _compare_with_series(sp, np.arange(10)) # with copy=False reindexed = self.bseries.reindex(self.bseries.index, copy=True) reindexed.sp_values[:] = 1. self.assertTrue((self.bseries.sp_values != 1.).all()) reindexed = self.bseries.reindex(self.bseries.index, copy=False) reindexed.sp_values[:] = 1. tm.assert_numpy_array_equal(self.bseries.sp_values, np.repeat(1., 10))
def _check(values, index1, index2, fill_value): first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value) reindexed = first_series.sparse_reindex(index2) self.assert_(reindexed.sp_index is index2) int_indices1 = index1.to_int_index().indices int_indices2 = index2.to_int_index().indices expected = Series(values, index=int_indices1) expected = expected.reindex(int_indices2).fillna(fill_value) assert_almost_equal(expected.values, reindexed.sp_values)
def test_sparse_reindex(self): length = 10 def _check(values, index1, index2, fill_value): first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value) reindexed = first_series.sparse_reindex(index2) self.assertIs(reindexed.sp_index, index2) int_indices1 = index1.to_int_index().indices int_indices2 = index2.to_int_index().indices expected = Series(values, index=int_indices1) expected = expected.reindex(int_indices2).fillna(fill_value) tm.assert_almost_equal(expected.values, reindexed.sp_values) # make sure level argument asserts # TODO: expected is not used anywhere...remove? expected = expected.reindex(int_indices2).fillna( fill_value) # noqa def _check_with_fill_value(values, first, second, fill_value=nan): i_index1 = IntIndex(length, first) i_index2 = IntIndex(length, second) b_index1 = i_index1.to_block_index() b_index2 = i_index2.to_block_index() _check(values, i_index1, i_index2, fill_value) _check(values, b_index1, b_index2, fill_value) def _check_all(values, first, second): _check_with_fill_value(values, first, second, fill_value=nan) _check_with_fill_value(values, first, second, fill_value=0) index1 = [2, 4, 5, 6, 8, 9] values1 = np.arange(6.) _check_all(values1, index1, [2, 4, 5]) _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9]) _check_all(values1, index1, [0, 1]) _check_all(values1, index1, [0, 1, 7, 8, 9]) _check_all(values1, index1, []) first_series = SparseSeries(values1, sparse_index=IntIndex(length, index1), fill_value=nan) with tm.assertRaisesRegexp(TypeError, 'new index must be a SparseIndex'): reindexed = first_series.sparse_reindex(0) # noqa
def test_reindex(self): def _compare_with_series(sps, new_index): spsre = sps.reindex(new_index) series = sps.to_dense() seriesre = series.reindex(new_index) seriesre = seriesre.to_sparse(fill_value=sps.fill_value) assert_sp_series_equal(spsre, seriesre) assert_series_equal(spsre.to_dense(), seriesre.to_dense()) _compare_with_series(self.bseries, self.bseries.index[::2]) _compare_with_series(self.bseries, list(self.bseries.index[::2])) _compare_with_series(self.bseries, self.bseries.index[:10]) _compare_with_series(self.bseries, self.bseries.index[5:]) _compare_with_series(self.zbseries, self.zbseries.index[::2]) _compare_with_series(self.zbseries, self.zbseries.index[:10]) _compare_with_series(self.zbseries, self.zbseries.index[5:]) # special cases same_index = self.bseries.reindex(self.bseries.index) assert_sp_series_equal(self.bseries, same_index) self.assert_(same_index is not self.bseries) # corner cases sp = SparseSeries([], index=[]) sp_zero = SparseSeries([], index=[], fill_value=0) _compare_with_series(sp, np.arange(10)) # with copy=False reindexed = self.bseries.reindex(self.bseries.index, copy=True) reindexed.sp_values[:] = 1. self.assert_((self.bseries.sp_values != 1.).all()) reindexed = self.bseries.reindex(self.bseries.index, copy=False) reindexed.sp_values[:] = 1. self.assert_((self.bseries.sp_values == 1.).all())
def test_dropna(self): sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) sp_valid = sp.valid() expected = sp.to_dense().valid() expected = expected[expected != 0] exp_arr = pd.SparseArray(expected.values, fill_value=0, kind='block') tm.assert_sp_array_equal(sp_valid.values, exp_arr) self.assert_index_equal(sp_valid.index, expected.index) self.assertEqual(len(sp_valid.sp_values), 2) result = self.bseries.dropna() expected = self.bseries.to_dense().dropna() self.assertNotIsInstance(result, SparseSeries) tm.assert_series_equal(result, expected)
def _check(values, index1, index2, fill_value): first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value) reindexed = first_series.sparse_reindex(index2) self.assertIs(reindexed.sp_index, index2) int_indices1 = index1.to_int_index().indices int_indices2 = index2.to_int_index().indices expected = Series(values, index=int_indices1) expected = expected.reindex(int_indices2).fillna(fill_value) tm.assert_almost_equal(expected.values, reindexed.sp_values) # make sure level argument asserts # TODO: expected is not used anywhere...remove? expected = expected.reindex(int_indices2).fillna(fill_value) # noqa
def test_dropna(self): sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) sp_valid = sp.valid() expected = sp.to_dense().valid() expected = expected[expected != 0] tm.assert_almost_equal(sp_valid.values, expected.values) self.assertTrue(sp_valid.index.equals(expected.index)) self.assertEqual(len(sp_valid.sp_values), 2) result = self.bseries.dropna() expected = self.bseries.to_dense().dropna() self.assertNotIsInstance(result, SparseSeries) tm.assert_series_equal(result, expected)
def test_take(self): def _compare_with_dense(sp): dense = sp.to_dense() def _compare(idx): dense_result = dense.take(idx).values sparse_result = sp.take(idx) assert_almost_equal(dense_result, sparse_result) _compare([1., 2., 3., 4., 5., 0.]) _compare([7, 2, 9, 0, 4]) _compare([3, 6, 3, 4, 7]) self._check_all(_compare_with_dense) self.assertRaises(Exception, self.bseries.take, [-1, 0]) self.assertRaises(Exception, self.bseries.take, [0, len(self.bseries) + 1]) # Corner case sp = SparseSeries(np.ones(10.) * nan) assert_almost_equal(sp.take([0, 1, 2, 3, 4]), np.repeat(nan, 5))
def test_constructor(self): # test setup guys self.assert_(np.isnan(self.bseries.fill_value)) self.assert_(isinstance(self.bseries.sp_index, BlockIndex)) self.assert_(np.isnan(self.iseries.fill_value)) self.assert_(isinstance(self.iseries.sp_index, IntIndex)) self.assertEquals(self.zbseries.fill_value, 0) assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0)) # pass SparseSeries s2 = SparseSeries(self.bseries) s3 = SparseSeries(self.iseries) s4 = SparseSeries(self.zbseries) assert_sp_series_equal(s2, self.bseries) assert_sp_series_equal(s3, self.iseries) assert_sp_series_equal(s4, self.zbseries) # Sparse time series works date_index = DateRange('1/1/2000', periods=len(self.bseries)) s5 = SparseSeries(self.bseries, index=date_index) self.assert_(isinstance(s5, SparseTimeSeries)) # pass Series bseries2 = SparseSeries(self.bseries.to_dense()) assert_equal(self.bseries.sp_values, bseries2.sp_values) # pass dict? # don't copy the data by default values = np.ones(len(self.bseries.sp_values)) sp = SparseSeries(values, sparse_index=self.bseries.sp_index) sp.sp_values[:5] = 97 self.assert_(values[0] == 97) # but can make it copy! sp = SparseSeries(values, sparse_index=self.bseries.sp_index, copy=True) sp.sp_values[:5] = 100 self.assert_(values[0] == 97)
def test_numpy_take(self): sp = SparseSeries([1.0, 2.0, 3.0]) indices = [1, 2] tm.assert_series_equal( np.take(sp, indices, axis=0).to_dense(), np.take(sp.to_dense(), indices, axis=0)) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.take, sp, indices, out=np.empty(sp.shape)) msg = "the 'mode' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.take, sp, indices, mode='clip')