コード例 #1
0
ファイル: test_series.py プロジェクト: shahinsh/pandas
class TestSparseSeries(tm.TestCase, SharedWithSparse):
    _multiprocess_can_split_ = True

    def setUp(self):
        arr, index = _test_data1()

        date_index = bdate_range('1/1/2011', periods=len(index))

        self.bseries = SparseSeries(arr,
                                    index=index,
                                    kind='block',
                                    name='bseries')
        self.ts = self.bseries

        self.btseries = SparseSeries(arr, index=date_index, kind='block')

        self.iseries = SparseSeries(arr,
                                    index=index,
                                    kind='integer',
                                    name='iseries')

        arr, index = _test_data2()
        self.bseries2 = SparseSeries(arr, index=index, kind='block')
        self.iseries2 = SparseSeries(arr, index=index, kind='integer')

        arr, index = _test_data1_zero()
        self.zbseries = SparseSeries(arr,
                                     index=index,
                                     kind='block',
                                     fill_value=0,
                                     name='zbseries')
        self.ziseries = SparseSeries(arr,
                                     index=index,
                                     kind='integer',
                                     fill_value=0)

        arr, index = _test_data2_zero()
        self.zbseries2 = SparseSeries(arr,
                                      index=index,
                                      kind='block',
                                      fill_value=0)
        self.ziseries2 = SparseSeries(arr,
                                      index=index,
                                      kind='integer',
                                      fill_value=0)

    def test_constructor_dtype(self):
        arr = SparseSeries([np.nan, 1, 2, np.nan])
        self.assertEqual(arr.dtype, np.float64)
        self.assertTrue(np.isnan(arr.fill_value))

        arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0)
        self.assertEqual(arr.dtype, np.float64)
        self.assertEqual(arr.fill_value, 0)

        arr = SparseSeries([0, 1, 2, 4], dtype=np.int64)
        self.assertEqual(arr.dtype, np.int64)
        self.assertTrue(np.isnan(arr.fill_value))

        arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64)
        self.assertEqual(arr.dtype, np.int64)
        self.assertEqual(arr.fill_value, 0)

    def test_iteration_and_str(self):
        [x for x in self.bseries]
        str(self.bseries)

    def test_TimeSeries_deprecation(self):

        # deprecation TimeSeries, #10890
        with tm.assert_produces_warning(FutureWarning):
            pd.SparseTimeSeries(1, index=pd.date_range('20130101', periods=3))

    def test_construct_DataFrame_with_sp_series(self):
        # it works!
        df = DataFrame({'col': self.bseries})

        # printing & access
        df.iloc[:1]
        df['col']
        df.dtypes
        str(df)

        tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False)

        result = df.iloc[:, 0]
        tm.assert_sp_series_equal(result, self.bseries, check_names=False)

        # blocking
        expected = Series({'col': 'float64:sparse'})
        result = df.ftypes
        tm.assert_series_equal(expected, result)

    def test_series_density(self):
        # GH2803
        ts = Series(np.random.randn(10))
        ts[2:-2] = nan
        sts = ts.to_sparse()
        density = sts.density  # don't die
        self.assertEqual(density, 4 / 10.0)

    def test_sparse_to_dense(self):
        arr, index = _test_data1()
        series = self.bseries.to_dense()
        assert_equal(series, arr)

        series = self.bseries.to_dense(sparse_only=True)
        assert_equal(series, arr[np.isfinite(arr)])

        series = self.iseries.to_dense()
        assert_equal(series, arr)

        arr, index = _test_data1_zero()
        series = self.zbseries.to_dense()
        assert_equal(series, arr)

        series = self.ziseries.to_dense()
        assert_equal(series, arr)

    def test_to_dense_fill_value(self):
        s = pd.Series([1, np.nan, np.nan, 3, np.nan])
        res = SparseSeries(s).to_dense()
        tm.assert_series_equal(res, s)

        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([1, np.nan, 0, 3, 0])
        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
        res = SparseSeries(s).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

    def test_dense_to_sparse(self):
        series = self.bseries.to_dense()
        bseries = series.to_sparse(kind='block')
        iseries = series.to_sparse(kind='integer')
        tm.assert_sp_series_equal(bseries, self.bseries)
        tm.assert_sp_series_equal(iseries, self.iseries, check_names=False)
        self.assertEqual(iseries.name, self.bseries.name)

        self.assertEqual(len(series), len(bseries))
        self.assertEqual(len(series), len(iseries))
        self.assertEqual(series.shape, bseries.shape)
        self.assertEqual(series.shape, iseries.shape)

        # non-NaN fill value
        series = self.zbseries.to_dense()
        zbseries = series.to_sparse(kind='block', fill_value=0)
        ziseries = series.to_sparse(kind='integer', fill_value=0)
        tm.assert_sp_series_equal(zbseries, self.zbseries)
        tm.assert_sp_series_equal(ziseries, self.ziseries, check_names=False)
        self.assertEqual(ziseries.name, self.zbseries.name)

        self.assertEqual(len(series), len(zbseries))
        self.assertEqual(len(series), len(ziseries))
        self.assertEqual(series.shape, zbseries.shape)
        self.assertEqual(series.shape, ziseries.shape)

    def test_to_dense_preserve_name(self):
        assert (self.bseries.name is not None)
        result = self.bseries.to_dense()
        self.assertEqual(result.name, self.bseries.name)

    def test_constructor(self):
        # test setup guys
        self.assertTrue(np.isnan(self.bseries.fill_value))
        tm.assertIsInstance(self.bseries.sp_index, BlockIndex)
        self.assertTrue(np.isnan(self.iseries.fill_value))
        tm.assertIsInstance(self.iseries.sp_index, IntIndex)

        self.assertEqual(self.zbseries.fill_value, 0)
        assert_equal(self.zbseries.values.values,
                     self.bseries.to_dense().fillna(0).values)

        # pass SparseSeries
        def _check_const(sparse, name):
            # use passed series name
            result = SparseSeries(sparse)
            tm.assert_sp_series_equal(result, sparse)
            self.assertEqual(sparse.name, name)
            self.assertEqual(result.name, name)

            # use passed name
            result = SparseSeries(sparse, name='x')
            tm.assert_sp_series_equal(result, sparse, check_names=False)
            self.assertEqual(result.name, 'x')

        _check_const(self.bseries, 'bseries')
        _check_const(self.iseries, 'iseries')
        _check_const(self.zbseries, 'zbseries')

        # Sparse time series works
        date_index = bdate_range('1/1/2000', periods=len(self.bseries))
        s5 = SparseSeries(self.bseries, index=date_index)
        tm.assertIsInstance(s5, SparseSeries)

        # pass Series
        bseries2 = SparseSeries(self.bseries.to_dense())
        assert_equal(self.bseries.sp_values, bseries2.sp_values)

        # pass dict?

        # don't copy the data by default
        values = np.ones(self.bseries.npoints)
        sp = SparseSeries(values, sparse_index=self.bseries.sp_index)
        sp.sp_values[:5] = 97
        self.assertEqual(values[0], 97)

        self.assertEqual(len(sp), 20)
        self.assertEqual(sp.shape, (20, ))

        # but can make it copy!
        sp = SparseSeries(values,
                          sparse_index=self.bseries.sp_index,
                          copy=True)
        sp.sp_values[:5] = 100
        self.assertEqual(values[0], 97)

        self.assertEqual(len(sp), 20)
        self.assertEqual(sp.shape, (20, ))

    def test_constructor_scalar(self):
        data = 5
        sp = SparseSeries(data, np.arange(100))
        sp = sp.reindex(np.arange(200))
        self.assertTrue((sp.ix[:99] == data).all())
        self.assertTrue(isnull(sp.ix[100:]).all())

        data = np.nan
        sp = SparseSeries(data, np.arange(100))
        self.assertEqual(len(sp), 100)
        self.assertEqual(sp.shape, (100, ))

    def test_constructor_ndarray(self):
        pass

    def test_constructor_nonnan(self):
        arr = [0, 0, 0, nan, nan]
        sp_series = SparseSeries(arr, fill_value=0)
        assert_equal(sp_series.values.values, arr)
        self.assertEqual(len(sp_series), 5)
        self.assertEqual(sp_series.shape, (5, ))

    # GH 9272
    def test_constructor_empty(self):
        sp = SparseSeries()
        self.assertEqual(len(sp.index), 0)
        self.assertEqual(sp.shape, (0, ))

    def test_copy_astype(self):
        cop = self.bseries.astype(np.float64)
        self.assertIsNot(cop, self.bseries)
        self.assertIs(cop.sp_index, self.bseries.sp_index)
        self.assertEqual(cop.dtype, np.float64)

        cop2 = self.iseries.copy()

        tm.assert_sp_series_equal(cop, self.bseries)
        tm.assert_sp_series_equal(cop2, self.iseries)

        # test that data is copied
        cop[:5] = 97
        self.assertEqual(cop.sp_values[0], 97)
        self.assertNotEqual(self.bseries.sp_values[0], 97)

        # correct fill value
        zbcop = self.zbseries.copy()
        zicop = self.ziseries.copy()

        tm.assert_sp_series_equal(zbcop, self.zbseries)
        tm.assert_sp_series_equal(zicop, self.ziseries)

        # no deep copy
        view = self.bseries.copy(deep=False)
        view.sp_values[:5] = 5
        self.assertTrue((self.bseries.sp_values[:5] == 5).all())

    def test_shape(self):
        # GH 10452
        self.assertEqual(self.bseries.shape, (20, ))
        self.assertEqual(self.btseries.shape, (20, ))
        self.assertEqual(self.iseries.shape, (20, ))

        self.assertEqual(self.bseries2.shape, (15, ))
        self.assertEqual(self.iseries2.shape, (15, ))

        self.assertEqual(self.zbseries2.shape, (15, ))
        self.assertEqual(self.ziseries2.shape, (15, ))

    def test_astype(self):
        self.assertRaises(Exception, self.bseries.astype, np.int64)

    def test_kind(self):
        self.assertEqual(self.bseries.kind, 'block')
        self.assertEqual(self.iseries.kind, 'integer')

    def test_to_frame(self):
        # GH 9850
        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x')
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(), exp)

        exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(name='y'), exp)

        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0)
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]},
                                 default_fill_value=0)

        tm.assert_sp_frame_equal(s.to_frame(), exp)
        exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp)

    def test_pickle(self):
        def _test_roundtrip(series):
            unpickled = self.round_trip_pickle(series)
            tm.assert_sp_series_equal(series, unpickled)
            tm.assert_series_equal(series.to_dense(), unpickled.to_dense())

        self._check_all(_test_roundtrip)

    def _check_all(self, check_func):
        check_func(self.bseries)
        check_func(self.iseries)
        check_func(self.zbseries)
        check_func(self.ziseries)

    def test_getitem(self):
        def _check_getitem(sp, dense):
            for idx, val in compat.iteritems(dense):
                tm.assert_almost_equal(val, sp[idx])

            for i in range(len(dense)):
                tm.assert_almost_equal(sp[i], dense[i])
                # j = np.float64(i)
                # assert_almost_equal(sp[j], dense[j])

                # API change 1/6/2012
                # negative getitem works
                # for i in xrange(len(dense)):
                #     assert_almost_equal(sp[-i], dense[-i])

        _check_getitem(self.bseries, self.bseries.to_dense())
        _check_getitem(self.btseries, self.btseries.to_dense())

        _check_getitem(self.zbseries, self.zbseries.to_dense())
        _check_getitem(self.iseries, self.iseries.to_dense())
        _check_getitem(self.ziseries, self.ziseries.to_dense())

        # exception handling
        self.assertRaises(Exception, self.bseries.__getitem__,
                          len(self.bseries) + 1)

        # index not contained
        self.assertRaises(Exception, self.btseries.__getitem__,
                          self.btseries.index[-1] + BDay())

    def test_get_get_value(self):
        tm.assert_almost_equal(self.bseries.get(10), self.bseries[10])
        self.assertIsNone(self.bseries.get(len(self.bseries) + 1))

        dt = self.btseries.index[10]
        result = self.btseries.get(dt)
        expected = self.btseries.to_dense()[dt]
        tm.assert_almost_equal(result, expected)

        tm.assert_almost_equal(self.bseries.get_value(10), self.bseries[10])

    def test_set_value(self):

        idx = self.btseries.index[7]
        self.btseries.set_value(idx, 0)
        self.assertEqual(self.btseries[idx], 0)

        self.iseries.set_value('foobar', 0)
        self.assertEqual(self.iseries.index[-1], 'foobar')
        self.assertEqual(self.iseries['foobar'], 0)

    def test_getitem_slice(self):
        idx = self.bseries.index
        res = self.bseries[::2]
        tm.assertIsInstance(res, SparseSeries)

        expected = self.bseries.reindex(idx[::2])
        tm.assert_sp_series_equal(res, expected)

        res = self.bseries[:5]
        tm.assertIsInstance(res, SparseSeries)
        tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:5]))

        res = self.bseries[5:]
        tm.assert_sp_series_equal(res, self.bseries.reindex(idx[5:]))

        # negative indices
        res = self.bseries[:-3]
        tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:-3]))

    def test_take(self):
        def _compare_with_dense(sp):
            dense = sp.to_dense()

            def _compare(idx):
                dense_result = dense.take(idx).values
                sparse_result = sp.take(idx)
                self.assertIsInstance(sparse_result, SparseSeries)
                tm.assert_almost_equal(dense_result,
                                       sparse_result.values.values)

            _compare([1., 2., 3., 4., 5., 0.])
            _compare([7, 2, 9, 0, 4])
            _compare([3, 6, 3, 4, 7])

        self._check_all(_compare_with_dense)

        self.assertRaises(Exception, self.bseries.take,
                          [0, len(self.bseries) + 1])

        # Corner case
        sp = SparseSeries(np.ones(10) * nan)
        exp = pd.Series(np.repeat(nan, 5))
        tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp)

    def test_setitem(self):
        self.bseries[5] = 7.
        self.assertEqual(self.bseries[5], 7.)

    def test_setslice(self):
        self.bseries[5:10] = 7.
        tm.assert_series_equal(
            self.bseries[5:10].to_dense(),
            Series(7., index=range(5, 10), name=self.bseries.name))

    def test_operators(self):
        def _check_op(a, b, op):
            sp_result = op(a, b)
            adense = a.to_dense() if isinstance(a, SparseSeries) else a
            bdense = b.to_dense() if isinstance(b, SparseSeries) else b
            dense_result = op(adense, bdense)
            tm.assert_almost_equal(sp_result.to_dense(), dense_result)

        def check(a, b):
            _check_op(a, b, operator.add)
            _check_op(a, b, operator.sub)
            _check_op(a, b, operator.truediv)
            _check_op(a, b, operator.floordiv)
            _check_op(a, b, operator.mul)

            _check_op(a, b, lambda x, y: operator.add(y, x))
            _check_op(a, b, lambda x, y: operator.sub(y, x))
            _check_op(a, b, lambda x, y: operator.truediv(y, x))
            _check_op(a, b, lambda x, y: operator.floordiv(y, x))
            _check_op(a, b, lambda x, y: operator.mul(y, x))

            # NaN ** 0 = 1 in C?
            # _check_op(a, b, operator.pow)
            # _check_op(a, b, lambda x, y: operator.pow(y, x))

        check(self.bseries, self.bseries)
        check(self.iseries, self.iseries)
        check(self.bseries, self.iseries)

        check(self.bseries, self.bseries2)
        check(self.bseries, self.iseries2)
        check(self.iseries, self.iseries2)

        # scalar value
        check(self.bseries, 5)

        # zero-based
        check(self.zbseries, self.zbseries * 2)
        check(self.zbseries, self.zbseries2)
        check(self.ziseries, self.ziseries2)

        # with dense
        result = self.bseries + self.bseries.to_dense()
        tm.assert_sp_series_equal(result, self.bseries + self.bseries)

    def test_binary_operators(self):

        # skipping for now #####
        raise nose.SkipTest("skipping sparse binary operators test")

        def _check_inplace_op(iop, op):
            tmp = self.bseries.copy()

            expected = op(tmp, self.bseries)
            iop(tmp, self.bseries)
            tm.assert_sp_series_equal(tmp, expected)

        inplace_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow']
        for op in inplace_ops:
            _check_inplace_op(getattr(operator, "i%s" % op),
                              getattr(operator, op))

    def test_abs(self):
        s = SparseSeries([1, 2, -3], name='x')
        expected = SparseSeries([1, 2, 3], name='x')
        result = s.abs()
        tm.assert_sp_series_equal(result, expected)
        self.assertEqual(result.name, 'x')

        result = abs(s)
        tm.assert_sp_series_equal(result, expected)
        self.assertEqual(result.name, 'x')

        result = np.abs(s)
        tm.assert_sp_series_equal(result, expected)
        self.assertEqual(result.name, 'x')

    def test_reindex(self):
        def _compare_with_series(sps, new_index):
            spsre = sps.reindex(new_index)

            series = sps.to_dense()
            seriesre = series.reindex(new_index)
            seriesre = seriesre.to_sparse(fill_value=sps.fill_value)

            tm.assert_sp_series_equal(spsre, seriesre)
            tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense())

        _compare_with_series(self.bseries, self.bseries.index[::2])
        _compare_with_series(self.bseries, list(self.bseries.index[::2]))
        _compare_with_series(self.bseries, self.bseries.index[:10])
        _compare_with_series(self.bseries, self.bseries.index[5:])

        _compare_with_series(self.zbseries, self.zbseries.index[::2])
        _compare_with_series(self.zbseries, self.zbseries.index[:10])
        _compare_with_series(self.zbseries, self.zbseries.index[5:])

        # special cases
        same_index = self.bseries.reindex(self.bseries.index)
        tm.assert_sp_series_equal(self.bseries, same_index)
        self.assertIsNot(same_index, self.bseries)

        # corner cases
        sp = SparseSeries([], index=[])
        # TODO: sp_zero is not used anywhere...remove?
        sp_zero = SparseSeries([], index=[], fill_value=0)  # noqa
        _compare_with_series(sp, np.arange(10))

        # with copy=False
        reindexed = self.bseries.reindex(self.bseries.index, copy=True)
        reindexed.sp_values[:] = 1.
        self.assertTrue((self.bseries.sp_values != 1.).all())

        reindexed = self.bseries.reindex(self.bseries.index, copy=False)
        reindexed.sp_values[:] = 1.
        tm.assert_numpy_array_equal(self.bseries.sp_values, np.repeat(1., 10))

    def test_sparse_reindex(self):
        length = 10

        def _check(values, index1, index2, fill_value):
            first_series = SparseSeries(values,
                                        sparse_index=index1,
                                        fill_value=fill_value)
            reindexed = first_series.sparse_reindex(index2)
            self.assertIs(reindexed.sp_index, index2)

            int_indices1 = index1.to_int_index().indices
            int_indices2 = index2.to_int_index().indices

            expected = Series(values, index=int_indices1)
            expected = expected.reindex(int_indices2).fillna(fill_value)
            tm.assert_almost_equal(expected.values, reindexed.sp_values)

            # make sure level argument asserts
            # TODO: expected is not used anywhere...remove?
            expected = expected.reindex(int_indices2).fillna(
                fill_value)  # noqa

        def _check_with_fill_value(values, first, second, fill_value=nan):
            i_index1 = IntIndex(length, first)
            i_index2 = IntIndex(length, second)

            b_index1 = i_index1.to_block_index()
            b_index2 = i_index2.to_block_index()

            _check(values, i_index1, i_index2, fill_value)
            _check(values, b_index1, b_index2, fill_value)

        def _check_all(values, first, second):
            _check_with_fill_value(values, first, second, fill_value=nan)
            _check_with_fill_value(values, first, second, fill_value=0)

        index1 = [2, 4, 5, 6, 8, 9]
        values1 = np.arange(6.)

        _check_all(values1, index1, [2, 4, 5])
        _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9])
        _check_all(values1, index1, [0, 1])
        _check_all(values1, index1, [0, 1, 7, 8, 9])
        _check_all(values1, index1, [])

        first_series = SparseSeries(values1,
                                    sparse_index=IntIndex(length, index1),
                                    fill_value=nan)
        with tm.assertRaisesRegexp(TypeError,
                                   'new index must be a SparseIndex'):
            reindexed = first_series.sparse_reindex(0)  # noqa

    def test_repr(self):
        # TODO: These aren't used
        bsrepr = repr(self.bseries)  # noqa
        isrepr = repr(self.iseries)  # noqa

    def test_iter(self):
        pass

    def test_truncate(self):
        pass

    def test_fillna(self):
        pass

    def test_groupby(self):
        pass

    def test_reductions(self):
        def _compare_with_dense(obj, op):
            sparse_result = getattr(obj, op)()
            series = obj.to_dense()
            dense_result = getattr(series, op)()
            self.assertEqual(sparse_result, dense_result)

        to_compare = ['count', 'sum', 'mean', 'std', 'var', 'skew']

        def _compare_all(obj):
            for op in to_compare:
                _compare_with_dense(obj, op)

        _compare_all(self.bseries)

        self.bseries.sp_values[5:10] = np.NaN
        _compare_all(self.bseries)

        _compare_all(self.zbseries)
        self.zbseries.sp_values[5:10] = np.NaN
        _compare_all(self.zbseries)

        series = self.zbseries.copy()
        series.fill_value = 2
        _compare_all(series)

        nonna = Series(np.random.randn(20)).to_sparse()
        _compare_all(nonna)

        nonna2 = Series(np.random.randn(20)).to_sparse(fill_value=0)
        _compare_all(nonna2)

    def test_dropna(self):
        sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0)

        sp_valid = sp.valid()

        expected = sp.to_dense().valid()
        expected = expected[expected != 0]

        tm.assert_almost_equal(sp_valid.values, expected.values)
        self.assertTrue(sp_valid.index.equals(expected.index))
        self.assertEqual(len(sp_valid.sp_values), 2)

        result = self.bseries.dropna()
        expected = self.bseries.to_dense().dropna()
        self.assertNotIsInstance(result, SparseSeries)
        tm.assert_series_equal(result, expected)

    def test_homogenize(self):
        def _check_matches(indices, expected):
            data = {}
            for i, idx in enumerate(indices):
                data[i] = SparseSeries(idx.to_int_index().indices,
                                       sparse_index=idx)
            homogenized = spf.homogenize(data)

            for k, v in compat.iteritems(homogenized):
                assert (v.sp_index.equals(expected))

        indices1 = [
            BlockIndex(10, [2], [7]),
            BlockIndex(10, [1, 6], [3, 4]),
            BlockIndex(10, [0], [10])
        ]
        expected1 = BlockIndex(10, [2, 6], [2, 3])
        _check_matches(indices1, expected1)

        indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])]
        expected2 = indices2[0]
        _check_matches(indices2, expected2)

        # must have NaN fill value
        data = {
            'a': SparseSeries(np.arange(7),
                              sparse_index=expected2,
                              fill_value=0)
        }
        with tm.assertRaisesRegexp(TypeError, "NaN fill value"):
            spf.homogenize(data)

    def test_fill_value_corner(self):
        cop = self.zbseries.copy()
        cop.fill_value = 0
        result = self.bseries / cop

        self.assertTrue(np.isnan(result.fill_value))

        cop2 = self.zbseries.copy()
        cop2.fill_value = 1
        result = cop2 / cop
        self.assertTrue(np.isnan(result.fill_value))

    def test_fill_value_when_combine_const(self):
        # GH12723
        s = SparseSeries([0, 1, np.nan, 3, 4, 5], index=np.arange(6))

        exp = s.fillna(0).add(2)
        res = s.add(2, fill_value=0)
        self.assert_series_equal(res, exp)

    def test_shift(self):
        series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6))

        shifted = series.shift(0)
        self.assertIsNot(shifted, series)
        tm.assert_sp_series_equal(shifted, series)

        f = lambda s: s.shift(1)
        _dense_series_compare(series, f)

        f = lambda s: s.shift(-2)
        _dense_series_compare(series, f)

        series = SparseSeries([nan, 1., 2., 3., nan, nan],
                              index=bdate_range('1/1/2000', periods=6))
        f = lambda s: s.shift(2, freq='B')
        _dense_series_compare(series, f)

        f = lambda s: s.shift(2, freq=datetools.bday)
        _dense_series_compare(series, f)

    def test_shift_nan(self):
        # GH 12908
        orig = pd.Series([np.nan, 2, np.nan, 4, 0, np.nan, 0])
        sparse = orig.to_sparse()

        tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse())

        tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse())

        sparse = orig.to_sparse(fill_value=0)
        tm.assert_sp_series_equal(sparse.shift(0),
                                  orig.shift(0).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(1),
                                  orig.shift(1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(2),
                                  orig.shift(2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(3),
                                  orig.shift(3).to_sparse(fill_value=0))

        tm.assert_sp_series_equal(sparse.shift(-1),
                                  orig.shift(-1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-2),
                                  orig.shift(-2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-3),
                                  orig.shift(-3).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-4),
                                  orig.shift(-4).to_sparse(fill_value=0))

    def test_shift_dtype(self):
        # GH 12908
        orig = pd.Series([1, 2, 3, 4], dtype=np.int64)
        sparse = orig.to_sparse()

        tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse())

        tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse())

    def test_shift_dtype_fill_value(self):
        # GH 12908
        orig = pd.Series([1, 0, 0, 4], dtype=np.int64)
        sparse = orig.to_sparse(fill_value=0)

        tm.assert_sp_series_equal(sparse.shift(0),
                                  orig.shift(0).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(1),
                                  orig.shift(1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(2),
                                  orig.shift(2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(3),
                                  orig.shift(3).to_sparse(fill_value=0))

        tm.assert_sp_series_equal(sparse.shift(-1),
                                  orig.shift(-1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-2),
                                  orig.shift(-2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-3),
                                  orig.shift(-3).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-4),
                                  orig.shift(-4).to_sparse(fill_value=0))

    def test_cumsum(self):
        result = self.bseries.cumsum()
        expected = self.bseries.to_dense().cumsum()
        tm.assertIsInstance(result, SparseSeries)
        self.assertEqual(result.name, self.bseries.name)
        tm.assert_series_equal(result.to_dense(), expected)

        result = self.zbseries.cumsum()
        expected = self.zbseries.to_dense().cumsum()
        tm.assertIsInstance(result, Series)
        tm.assert_series_equal(result, expected)

    def test_combine_first(self):
        s = self.bseries

        result = s[::2].combine_first(s)
        result2 = s[::2].combine_first(s.to_dense())

        expected = s[::2].to_dense().combine_first(s.to_dense())
        expected = expected.to_sparse(fill_value=s.fill_value)

        tm.assert_sp_series_equal(result, result2)
        tm.assert_sp_series_equal(result, expected)
コード例 #2
0
ファイル: test_sparse.py プロジェクト: klausz/pandas
class TestSparseSeries(TestCase, test_series.CheckNameIntegration):
    def setUp(self):
        arr, index = _test_data1()

        date_index = DateRange("1/1/2011", periods=len(index))

        self.bseries = SparseSeries(arr, index=index, kind="block")
        self.bseries.name = "bseries"

        self.ts = self.bseries

        self.btseries = SparseSeries(arr, index=date_index, kind="block")

        self.iseries = SparseSeries(arr, index=index, kind="integer")

        arr, index = _test_data2()
        self.bseries2 = SparseSeries(arr, index=index, kind="block")
        self.iseries2 = SparseSeries(arr, index=index, kind="integer")

        arr, index = _test_data1_zero()
        self.zbseries = SparseSeries(arr, index=index, kind="block", fill_value=0)
        self.ziseries = SparseSeries(arr, index=index, kind="integer", fill_value=0)

        arr, index = _test_data2_zero()
        self.zbseries2 = SparseSeries(arr, index=index, kind="block", fill_value=0)
        self.ziseries2 = SparseSeries(arr, index=index, kind="integer", fill_value=0)

    def test_construct_DataFrame_with_sp_series(self):
        # it works!
        df = DataFrame({"col": self.bseries})

    def test_sparse_to_dense(self):
        arr, index = _test_data1()
        series = self.bseries.to_dense()
        assert_equal(series, arr)

        series = self.bseries.to_dense(sparse_only=True)
        assert_equal(series, arr[np.isfinite(arr)])

        series = self.iseries.to_dense()
        assert_equal(series, arr)

        arr, index = _test_data1_zero()
        series = self.zbseries.to_dense()
        assert_equal(series, arr)

        series = self.ziseries.to_dense()
        assert_equal(series, arr)

    def test_dense_to_sparse(self):
        series = self.bseries.to_dense()
        bseries = series.to_sparse(kind="block")
        iseries = series.to_sparse(kind="integer")
        assert_sp_series_equal(bseries, self.bseries)
        assert_sp_series_equal(iseries, self.iseries)

        # non-NaN fill value
        series = self.zbseries.to_dense()
        zbseries = series.to_sparse(kind="block", fill_value=0)
        ziseries = series.to_sparse(kind="integer", fill_value=0)
        assert_sp_series_equal(zbseries, self.zbseries)
        assert_sp_series_equal(ziseries, self.ziseries)

    def test_to_dense_preserve_name(self):
        assert self.bseries.name is not None
        result = self.bseries.to_dense()
        self.assertEquals(result.name, self.bseries.name)

    def test_constructor(self):
        # test setup guys
        self.assert_(np.isnan(self.bseries.fill_value))
        self.assert_(isinstance(self.bseries.sp_index, BlockIndex))
        self.assert_(np.isnan(self.iseries.fill_value))
        self.assert_(isinstance(self.iseries.sp_index, IntIndex))

        self.assertEquals(self.zbseries.fill_value, 0)
        assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0))

        # pass SparseSeries
        s2 = SparseSeries(self.bseries)
        s3 = SparseSeries(self.iseries)
        s4 = SparseSeries(self.zbseries)
        assert_sp_series_equal(s2, self.bseries)
        assert_sp_series_equal(s3, self.iseries)
        assert_sp_series_equal(s4, self.zbseries)

        # Sparse time series works
        date_index = DateRange("1/1/2000", periods=len(self.bseries))
        s5 = SparseSeries(self.bseries, index=date_index)
        self.assert_(isinstance(s5, SparseTimeSeries))

        # pass Series
        bseries2 = SparseSeries(self.bseries.to_dense())
        assert_equal(self.bseries.sp_values, bseries2.sp_values)

        # pass dict?

        # don't copy the data by default
        values = np.ones(len(self.bseries.sp_values))
        sp = SparseSeries(values, sparse_index=self.bseries.sp_index)
        sp.sp_values[:5] = 97
        self.assert_(values[0] == 97)

        # but can make it copy!
        sp = SparseSeries(values, sparse_index=self.bseries.sp_index, copy=True)
        sp.sp_values[:5] = 100
        self.assert_(values[0] == 97)

    def test_constructor_ndarray(self):
        pass

    def test_constructor_nonnan(self):
        arr = [0, 0, 0, nan, nan]
        sp_series = SparseSeries(arr, fill_value=0)
        assert_equal(sp_series.values, arr)

    def test_copy_astype(self):
        cop = self.bseries.astype(np.float_)
        self.assert_(cop is not self.bseries)
        self.assert_(cop.sp_index is self.bseries.sp_index)
        self.assert_(cop.dtype == np.float64)

        cop2 = self.iseries.copy()

        assert_sp_series_equal(cop, self.bseries)
        assert_sp_series_equal(cop2, self.iseries)

        # test that data is copied
        cop.sp_values[:5] = 97
        self.assert_(cop.sp_values[0] == 97)
        self.assert_(self.bseries.sp_values[0] != 97)

        # correct fill value
        zbcop = self.zbseries.copy()
        zicop = self.ziseries.copy()

        assert_sp_series_equal(zbcop, self.zbseries)
        assert_sp_series_equal(zicop, self.ziseries)

        # no deep copy
        view = self.bseries.copy(deep=False)
        view.sp_values[:5] = 5
        self.assert_((self.bseries.sp_values[:5] == 5).all())

    def test_astype(self):
        self.assertRaises(Exception, self.bseries.astype, np.int64)

    def test_kind(self):
        self.assertEquals(self.bseries.kind, "block")
        self.assertEquals(self.iseries.kind, "integer")

    def test_pickle(self):
        def _test_roundtrip(series):
            pickled = pickle.dumps(series, protocol=pickle.HIGHEST_PROTOCOL)
            unpickled = pickle.loads(pickled)
            assert_sp_series_equal(series, unpickled)
            assert_series_equal(series.to_dense(), unpickled.to_dense())

        self._check_all(_test_roundtrip)

    def _check_all(self, check_func):
        check_func(self.bseries)
        check_func(self.iseries)
        check_func(self.zbseries)
        check_func(self.ziseries)

    def test_getitem(self):
        def _check_getitem(sp, dense):
            for idx, val in dense.iteritems():
                assert_almost_equal(val, sp[idx])

            for i in xrange(len(dense)):
                assert_almost_equal(sp[i], dense[i])
                # j = np.float64(i)
                # assert_almost_equal(sp[j], dense[j])

            # API change 1/6/2012
            # negative getitem works
            # for i in xrange(len(dense)):
            #     assert_almost_equal(sp[-i], dense[-i])

        _check_getitem(self.bseries, self.bseries.to_dense())
        _check_getitem(self.btseries, self.btseries.to_dense())

        _check_getitem(self.zbseries, self.zbseries.to_dense())
        _check_getitem(self.iseries, self.iseries.to_dense())
        _check_getitem(self.ziseries, self.ziseries.to_dense())

        # exception handling
        self.assertRaises(Exception, self.bseries.__getitem__, len(self.bseries) + 1)

        # index not contained
        self.assertRaises(Exception, self.btseries.__getitem__, self.btseries.index[-1] + BDay())

    def test_get_get_value(self):
        assert_almost_equal(self.bseries.get(10), self.bseries[10])
        self.assert_(self.bseries.get(len(self.bseries) + 1) is None)

        dt = self.btseries.index[10]
        result = self.btseries.get(dt)
        expected = self.btseries.to_dense()[dt]
        assert_almost_equal(result, expected)

        assert_almost_equal(self.bseries.get_value(10), self.bseries[10])

    def test_set_value(self):
        idx = self.btseries.index[7]
        res = self.btseries.set_value(idx, 0)
        self.assert_(res is not self.btseries)
        self.assertEqual(res[idx], 0)

        res = self.iseries.set_value("foobar", 0)
        self.assert_(res is not self.iseries)
        self.assert_(res.index[-1] == "foobar")
        self.assertEqual(res["foobar"], 0)

    def test_getitem_slice(self):
        idx = self.bseries.index
        res = self.bseries[::2]
        self.assert_(isinstance(res, SparseSeries))
        assert_sp_series_equal(res, self.bseries.reindex(idx[::2]))

        res = self.bseries[:5]
        self.assert_(isinstance(res, SparseSeries))
        assert_sp_series_equal(res, self.bseries.reindex(idx[:5]))

        res = self.bseries[5:]
        assert_sp_series_equal(res, self.bseries.reindex(idx[5:]))

        # negative indices
        res = self.bseries[:-3]
        assert_sp_series_equal(res, self.bseries.reindex(idx[:-3]))

    def test_take(self):
        def _compare_with_dense(sp):
            dense = sp.to_dense()

            def _compare(idx):
                dense_result = dense.take(idx).values
                sparse_result = sp.take(idx)
                self.assert_(isinstance(sparse_result, SparseSeries))
                assert_almost_equal(dense_result, sparse_result.values)

            _compare([1.0, 2.0, 3.0, 4.0, 5.0, 0.0])
            _compare([7, 2, 9, 0, 4])
            _compare([3, 6, 3, 4, 7])

        self._check_all(_compare_with_dense)

        self.assertRaises(Exception, self.bseries.take, [-1, 0])
        self.assertRaises(Exception, self.bseries.take, [0, len(self.bseries) + 1])

        # Corner case
        sp = SparseSeries(np.ones(10.0) * nan)
        assert_almost_equal(sp.take([0, 1, 2, 3, 4]), np.repeat(nan, 5))

    def test_setitem(self):
        self.assertRaises(Exception, self.bseries.__setitem__, 5, 7.0)
        self.assertRaises(Exception, self.iseries.__setitem__, 5, 7.0)

    def test_setslice(self):
        self.assertRaises(Exception, self.bseries.__setslice__, 5, 10, 7.0)

    def test_operators(self):
        def _check_op(a, b, op):
            sp_result = op(a, b)
            adense = a.to_dense() if isinstance(a, SparseSeries) else a
            bdense = b.to_dense() if isinstance(b, SparseSeries) else b
            dense_result = op(adense, bdense)
            assert_almost_equal(sp_result.to_dense(), dense_result)

        def check(a, b):
            _check_op(a, b, operator.add)
            _check_op(a, b, operator.sub)
            _check_op(a, b, operator.truediv)
            _check_op(a, b, operator.floordiv)
            _check_op(a, b, operator.mul)

            _check_op(a, b, lambda x, y: operator.add(y, x))
            _check_op(a, b, lambda x, y: operator.sub(y, x))
            _check_op(a, b, lambda x, y: operator.truediv(y, x))
            _check_op(a, b, lambda x, y: operator.floordiv(y, x))
            _check_op(a, b, lambda x, y: operator.mul(y, x))

            # NaN ** 0 = 1 in C?
            # _check_op(a, b, operator.pow)
            # _check_op(a, b, lambda x, y: operator.pow(y, x))

        check(self.bseries, self.bseries)
        check(self.iseries, self.iseries)
        check(self.bseries, self.iseries)

        check(self.bseries, self.bseries2)
        check(self.bseries, self.iseries2)
        check(self.iseries, self.iseries2)

        # scalar value
        check(self.bseries, 5)

        # zero-based
        check(self.zbseries, self.zbseries * 2)
        check(self.zbseries, self.zbseries2)
        check(self.ziseries, self.ziseries2)

        # with dense
        result = self.bseries + self.bseries.to_dense()
        assert_sp_series_equal(result, self.bseries + self.bseries)

    # @dec.knownfailureif(True, 'Known NumPy failer as of 1.5.1')
    def test_operators_corner2(self):
        raise nose.SkipTest("known failer on numpy 1.5.1")

        # NumPy circumvents __r*__ operations
        val = np.float64(3.0)
        result = val - self.zbseries
        assert_sp_series_equal(result, 3 - self.zbseries)

    def test_binary_operators(self):
        def _check_inplace_op(op):
            tmp = self.bseries.copy()
            self.assertRaises(NotImplementedError, op, tmp, self.bseries)

        inplace_ops = ["iadd", "isub", "imul", "itruediv", "ifloordiv", "ipow"]
        for op in inplace_ops:
            _check_inplace_op(getattr(operator, op))

    def test_reindex(self):
        def _compare_with_series(sps, new_index):
            spsre = sps.reindex(new_index)

            series = sps.to_dense()
            seriesre = series.reindex(new_index)
            seriesre = seriesre.to_sparse(fill_value=sps.fill_value)

            assert_sp_series_equal(spsre, seriesre)
            assert_series_equal(spsre.to_dense(), seriesre.to_dense())

        _compare_with_series(self.bseries, self.bseries.index[::2])
        _compare_with_series(self.bseries, list(self.bseries.index[::2]))
        _compare_with_series(self.bseries, self.bseries.index[:10])
        _compare_with_series(self.bseries, self.bseries.index[5:])

        _compare_with_series(self.zbseries, self.zbseries.index[::2])
        _compare_with_series(self.zbseries, self.zbseries.index[:10])
        _compare_with_series(self.zbseries, self.zbseries.index[5:])

        # special cases
        same_index = self.bseries.reindex(self.bseries.index)
        assert_sp_series_equal(self.bseries, same_index)
        self.assert_(same_index is not self.bseries)

        # corner cases
        sp = SparseSeries([], index=[])
        sp_zero = SparseSeries([], index=[], fill_value=0)
        _compare_with_series(sp, np.arange(10))

        # with copy=False
        reindexed = self.bseries.reindex(self.bseries.index, copy=True)
        reindexed.sp_values[:] = 1.0
        self.assert_((self.bseries.sp_values != 1.0).all())

        reindexed = self.bseries.reindex(self.bseries.index, copy=False)
        reindexed.sp_values[:] = 1.0
        self.assert_((self.bseries.sp_values == 1.0).all())

    def test_sparse_reindex(self):
        length = 10

        def _check(values, index1, index2, fill_value):
            first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value)
            reindexed = first_series.sparse_reindex(index2)
            self.assert_(reindexed.sp_index is index2)

            int_indices1 = index1.to_int_index().indices
            int_indices2 = index2.to_int_index().indices

            expected = Series(values, index=int_indices1)
            expected = expected.reindex(int_indices2).fillna(fill_value)
            assert_almost_equal(expected.values, reindexed.sp_values)

            # make sure level argument asserts
            expected = expected.reindex(int_indices2).fillna(fill_value)

        def _check_with_fill_value(values, first, second, fill_value=nan):
            i_index1 = IntIndex(length, first)
            i_index2 = IntIndex(length, second)

            b_index1 = i_index1.to_block_index()
            b_index2 = i_index2.to_block_index()

            _check(values, i_index1, i_index2, fill_value)
            _check(values, b_index1, b_index2, fill_value)

        def _check_all(values, first, second):
            _check_with_fill_value(values, first, second, fill_value=nan)
            _check_with_fill_value(values, first, second, fill_value=0)

        index1 = [2, 4, 5, 6, 8, 9]
        values1 = np.arange(6.0)

        _check_all(values1, index1, [2, 4, 5])
        _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9])
        _check_all(values1, index1, [0, 1])
        _check_all(values1, index1, [0, 1, 7, 8, 9])
        _check_all(values1, index1, [])

    def test_repr(self):
        bsrepr = repr(self.bseries)
        isrepr = repr(self.iseries)

    def test_iter(self):
        pass

    def test_truncate(self):
        pass

    def test_fillna(self):
        pass

    def test_groupby(self):
        pass

    def test_reductions(self):
        def _compare_with_dense(obj, op):
            sparse_result = getattr(obj, op)()
            series = obj.to_dense()
            dense_result = getattr(series, op)()
            self.assertEquals(sparse_result, dense_result)

        to_compare = ["count", "sum", "mean", "std", "var", "skew"]

        def _compare_all(obj):
            for op in to_compare:
                _compare_with_dense(obj, op)

        _compare_all(self.bseries)
        self.bseries.sp_values[5:10] = np.NaN
        _compare_all(self.bseries)

        _compare_all(self.zbseries)
        self.zbseries.sp_values[5:10] = np.NaN
        _compare_all(self.zbseries)

        series = self.zbseries.copy()
        series.fill_value = 2
        _compare_all(series)

    def test_dropna(self):
        sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0)

        sp_valid = sp.valid()
        assert_almost_equal(sp_valid.values, sp.to_dense().valid().values)
        self.assert_(sp_valid.index.equals(sp.to_dense().valid().index))
        self.assertEquals(len(sp_valid.sp_values), 2)

        result = self.bseries.dropna()
        expected = self.bseries.to_dense().dropna()
        self.assert_(not isinstance(result, SparseSeries))
        tm.assert_series_equal(result, expected)

    def test_homogenize(self):
        def _check_matches(indices, expected):
            data = {}
            for i, idx in enumerate(indices):
                data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx)
            homogenized = spf.homogenize(data)

            for k, v in homogenized.iteritems():
                assert v.sp_index.equals(expected)

        indices1 = [BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10])]
        expected1 = BlockIndex(10, [2, 6], [2, 3])
        _check_matches(indices1, expected1)

        indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])]
        expected2 = indices2[0]
        _check_matches(indices2, expected2)

        # must have NaN fill value
        data = {"a": SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0)}
        nose.tools.assert_raises(Exception, spf.homogenize, data)

    def test_fill_value_corner(self):
        cop = self.zbseries.copy()
        cop.fill_value = 0
        result = self.bseries / cop

        self.assert_(np.isnan(result.fill_value))

        cop2 = self.zbseries.copy()
        cop2.fill_value = 1
        result = cop2 / cop
        self.assert_(np.isnan(result.fill_value))

    def test_shift(self):
        series = SparseSeries([nan, 1.0, 2.0, 3.0, nan, nan], index=np.arange(6))

        shifted = series.shift(0)
        self.assert_(shifted is not series)
        assert_sp_series_equal(shifted, series)

        f = lambda s: s.shift(1)
        _dense_series_compare(series, f)

        f = lambda s: s.shift(-2)
        _dense_series_compare(series, f)

        series = SparseSeries([nan, 1.0, 2.0, 3.0, nan, nan], index=DateRange("1/1/2000", periods=6))
        f = lambda s: s.shift(2, timeRule="WEEKDAY")
        _dense_series_compare(series, f)

        f = lambda s: s.shift(2, offset=datetools.bday)
        _dense_series_compare(series, f)

    def test_cumsum(self):
        result = self.bseries.cumsum()
        expected = self.bseries.to_dense().cumsum()
        self.assert_(isinstance(result, SparseSeries))
        self.assertEquals(result.name, self.bseries.name)
        assert_series_equal(result.to_dense(), expected)

        result = self.zbseries.cumsum()
        expected = self.zbseries.to_dense().cumsum()
        self.assert_(isinstance(result, Series))
        assert_series_equal(result, expected)

    def test_combine_first(self):
        s = self.bseries

        result = s[::2].combine_first(s)
        result2 = s[::2].combine_first(s.to_dense())

        expected = s[::2].to_dense().combine_first(s.to_dense())
        expected = expected.to_sparse(fill_value=s.fill_value)

        assert_sp_series_equal(result, result2)
        assert_sp_series_equal(result, expected)
コード例 #3
0
ファイル: test_series.py プロジェクト: jcfr/pandas
class TestSparseSeries(tm.TestCase, SharedWithSparse):
    _multiprocess_can_split_ = True

    def setUp(self):
        arr, index = _test_data1()

        date_index = bdate_range('1/1/2011', periods=len(index))

        self.bseries = SparseSeries(arr, index=index, kind='block',
                                    name='bseries')
        self.ts = self.bseries

        self.btseries = SparseSeries(arr, index=date_index, kind='block')

        self.iseries = SparseSeries(arr, index=index, kind='integer',
                                    name='iseries')

        arr, index = _test_data2()
        self.bseries2 = SparseSeries(arr, index=index, kind='block')
        self.iseries2 = SparseSeries(arr, index=index, kind='integer')

        arr, index = _test_data1_zero()
        self.zbseries = SparseSeries(arr, index=index, kind='block',
                                     fill_value=0, name='zbseries')
        self.ziseries = SparseSeries(arr, index=index, kind='integer',
                                     fill_value=0)

        arr, index = _test_data2_zero()
        self.zbseries2 = SparseSeries(arr, index=index, kind='block',
                                      fill_value=0)
        self.ziseries2 = SparseSeries(arr, index=index, kind='integer',
                                      fill_value=0)

    def test_constructor_dtype(self):
        arr = SparseSeries([np.nan, 1, 2, np.nan])
        self.assertEqual(arr.dtype, np.float64)
        self.assertTrue(np.isnan(arr.fill_value))

        arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0)
        self.assertEqual(arr.dtype, np.float64)
        self.assertEqual(arr.fill_value, 0)

        arr = SparseSeries([0, 1, 2, 4], dtype=np.int64)
        self.assertEqual(arr.dtype, np.int64)
        self.assertTrue(np.isnan(arr.fill_value))

        arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64)
        self.assertEqual(arr.dtype, np.int64)
        self.assertEqual(arr.fill_value, 0)

    def test_iteration_and_str(self):
        [x for x in self.bseries]
        str(self.bseries)

    def test_TimeSeries_deprecation(self):

        # deprecation TimeSeries, #10890
        with tm.assert_produces_warning(FutureWarning):
            pd.SparseTimeSeries(1, index=pd.date_range('20130101', periods=3))

    def test_construct_DataFrame_with_sp_series(self):
        # it works!
        df = DataFrame({'col': self.bseries})

        # printing & access
        df.iloc[:1]
        df['col']
        df.dtypes
        str(df)

        tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False)

        result = df.iloc[:, 0]
        tm.assert_sp_series_equal(result, self.bseries, check_names=False)

        # blocking
        expected = Series({'col': 'float64:sparse'})
        result = df.ftypes
        tm.assert_series_equal(expected, result)

    def test_series_density(self):
        # GH2803
        ts = Series(np.random.randn(10))
        ts[2:-2] = nan
        sts = ts.to_sparse()
        density = sts.density  # don't die
        self.assertEqual(density, 4 / 10.0)

    def test_sparse_to_dense(self):
        arr, index = _test_data1()
        series = self.bseries.to_dense()
        assert_equal(series, arr)

        series = self.bseries.to_dense(sparse_only=True)
        assert_equal(series, arr[np.isfinite(arr)])

        series = self.iseries.to_dense()
        assert_equal(series, arr)

        arr, index = _test_data1_zero()
        series = self.zbseries.to_dense()
        assert_equal(series, arr)

        series = self.ziseries.to_dense()
        assert_equal(series, arr)

    def test_to_dense_fill_value(self):
        s = pd.Series([1, np.nan, np.nan, 3, np.nan])
        res = SparseSeries(s).to_dense()
        tm.assert_series_equal(res, s)

        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([1, np.nan, 0, 3, 0])
        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
        res = SparseSeries(s).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

    def test_dense_to_sparse(self):
        series = self.bseries.to_dense()
        bseries = series.to_sparse(kind='block')
        iseries = series.to_sparse(kind='integer')
        tm.assert_sp_series_equal(bseries, self.bseries)
        tm.assert_sp_series_equal(iseries, self.iseries, check_names=False)
        self.assertEqual(iseries.name, self.bseries.name)

        self.assertEqual(len(series), len(bseries))
        self.assertEqual(len(series), len(iseries))
        self.assertEqual(series.shape, bseries.shape)
        self.assertEqual(series.shape, iseries.shape)

        # non-NaN fill value
        series = self.zbseries.to_dense()
        zbseries = series.to_sparse(kind='block', fill_value=0)
        ziseries = series.to_sparse(kind='integer', fill_value=0)
        tm.assert_sp_series_equal(zbseries, self.zbseries)
        tm.assert_sp_series_equal(ziseries, self.ziseries, check_names=False)
        self.assertEqual(ziseries.name, self.zbseries.name)

        self.assertEqual(len(series), len(zbseries))
        self.assertEqual(len(series), len(ziseries))
        self.assertEqual(series.shape, zbseries.shape)
        self.assertEqual(series.shape, ziseries.shape)

    def test_to_dense_preserve_name(self):
        assert (self.bseries.name is not None)
        result = self.bseries.to_dense()
        self.assertEqual(result.name, self.bseries.name)

    def test_constructor(self):
        # test setup guys
        self.assertTrue(np.isnan(self.bseries.fill_value))
        tm.assertIsInstance(self.bseries.sp_index, BlockIndex)
        self.assertTrue(np.isnan(self.iseries.fill_value))
        tm.assertIsInstance(self.iseries.sp_index, IntIndex)

        self.assertEqual(self.zbseries.fill_value, 0)
        assert_equal(self.zbseries.values.values,
                     self.bseries.to_dense().fillna(0).values)

        # pass SparseSeries
        def _check_const(sparse, name):
            # use passed series name
            result = SparseSeries(sparse)
            tm.assert_sp_series_equal(result, sparse)
            self.assertEqual(sparse.name, name)
            self.assertEqual(result.name, name)

            # use passed name
            result = SparseSeries(sparse, name='x')
            tm.assert_sp_series_equal(result, sparse, check_names=False)
            self.assertEqual(result.name, 'x')

        _check_const(self.bseries, 'bseries')
        _check_const(self.iseries, 'iseries')
        _check_const(self.zbseries, 'zbseries')

        # Sparse time series works
        date_index = bdate_range('1/1/2000', periods=len(self.bseries))
        s5 = SparseSeries(self.bseries, index=date_index)
        tm.assertIsInstance(s5, SparseSeries)

        # pass Series
        bseries2 = SparseSeries(self.bseries.to_dense())
        assert_equal(self.bseries.sp_values, bseries2.sp_values)

        # pass dict?

        # don't copy the data by default
        values = np.ones(self.bseries.npoints)
        sp = SparseSeries(values, sparse_index=self.bseries.sp_index)
        sp.sp_values[:5] = 97
        self.assertEqual(values[0], 97)

        self.assertEqual(len(sp), 20)
        self.assertEqual(sp.shape, (20, ))

        # but can make it copy!
        sp = SparseSeries(values, sparse_index=self.bseries.sp_index,
                          copy=True)
        sp.sp_values[:5] = 100
        self.assertEqual(values[0], 97)

        self.assertEqual(len(sp), 20)
        self.assertEqual(sp.shape, (20, ))

    def test_constructor_scalar(self):
        data = 5
        sp = SparseSeries(data, np.arange(100))
        sp = sp.reindex(np.arange(200))
        self.assertTrue((sp.ix[:99] == data).all())
        self.assertTrue(isnull(sp.ix[100:]).all())

        data = np.nan
        sp = SparseSeries(data, np.arange(100))
        self.assertEqual(len(sp), 100)
        self.assertEqual(sp.shape, (100, ))

    def test_constructor_ndarray(self):
        pass

    def test_constructor_nonnan(self):
        arr = [0, 0, 0, nan, nan]
        sp_series = SparseSeries(arr, fill_value=0)
        assert_equal(sp_series.values.values, arr)
        self.assertEqual(len(sp_series), 5)
        self.assertEqual(sp_series.shape, (5, ))

    # GH 9272
    def test_constructor_empty(self):
        sp = SparseSeries()
        self.assertEqual(len(sp.index), 0)
        self.assertEqual(sp.shape, (0, ))

    def test_copy_astype(self):
        cop = self.bseries.astype(np.float64)
        self.assertIsNot(cop, self.bseries)
        self.assertIs(cop.sp_index, self.bseries.sp_index)
        self.assertEqual(cop.dtype, np.float64)

        cop2 = self.iseries.copy()

        tm.assert_sp_series_equal(cop, self.bseries)
        tm.assert_sp_series_equal(cop2, self.iseries)

        # test that data is copied
        cop[:5] = 97
        self.assertEqual(cop.sp_values[0], 97)
        self.assertNotEqual(self.bseries.sp_values[0], 97)

        # correct fill value
        zbcop = self.zbseries.copy()
        zicop = self.ziseries.copy()

        tm.assert_sp_series_equal(zbcop, self.zbseries)
        tm.assert_sp_series_equal(zicop, self.ziseries)

        # no deep copy
        view = self.bseries.copy(deep=False)
        view.sp_values[:5] = 5
        self.assertTrue((self.bseries.sp_values[:5] == 5).all())

    def test_shape(self):
        # GH 10452
        self.assertEqual(self.bseries.shape, (20, ))
        self.assertEqual(self.btseries.shape, (20, ))
        self.assertEqual(self.iseries.shape, (20, ))

        self.assertEqual(self.bseries2.shape, (15, ))
        self.assertEqual(self.iseries2.shape, (15, ))

        self.assertEqual(self.zbseries2.shape, (15, ))
        self.assertEqual(self.ziseries2.shape, (15, ))

    def test_astype(self):
        self.assertRaises(Exception, self.bseries.astype, np.int64)

    def test_kind(self):
        self.assertEqual(self.bseries.kind, 'block')
        self.assertEqual(self.iseries.kind, 'integer')

    def test_to_frame(self):
        # GH 9850
        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x')
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(), exp)

        exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(name='y'), exp)

        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0)
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]},
                                 default_fill_value=0)

        tm.assert_sp_frame_equal(s.to_frame(), exp)
        exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp)

    def test_pickle(self):
        def _test_roundtrip(series):
            unpickled = self.round_trip_pickle(series)
            tm.assert_sp_series_equal(series, unpickled)
            tm.assert_series_equal(series.to_dense(), unpickled.to_dense())

        self._check_all(_test_roundtrip)

    def _check_all(self, check_func):
        check_func(self.bseries)
        check_func(self.iseries)
        check_func(self.zbseries)
        check_func(self.ziseries)

    def test_getitem(self):
        def _check_getitem(sp, dense):
            for idx, val in compat.iteritems(dense):
                tm.assert_almost_equal(val, sp[idx])

            for i in range(len(dense)):
                tm.assert_almost_equal(sp[i], dense[i])
                # j = np.float64(i)
                # assert_almost_equal(sp[j], dense[j])

                # API change 1/6/2012
                # negative getitem works
                # for i in xrange(len(dense)):
                #     assert_almost_equal(sp[-i], dense[-i])

        _check_getitem(self.bseries, self.bseries.to_dense())
        _check_getitem(self.btseries, self.btseries.to_dense())

        _check_getitem(self.zbseries, self.zbseries.to_dense())
        _check_getitem(self.iseries, self.iseries.to_dense())
        _check_getitem(self.ziseries, self.ziseries.to_dense())

        # exception handling
        self.assertRaises(Exception, self.bseries.__getitem__,
                          len(self.bseries) + 1)

        # index not contained
        self.assertRaises(Exception, self.btseries.__getitem__,
                          self.btseries.index[-1] + BDay())

    def test_get_get_value(self):
        tm.assert_almost_equal(self.bseries.get(10), self.bseries[10])
        self.assertIsNone(self.bseries.get(len(self.bseries) + 1))

        dt = self.btseries.index[10]
        result = self.btseries.get(dt)
        expected = self.btseries.to_dense()[dt]
        tm.assert_almost_equal(result, expected)

        tm.assert_almost_equal(self.bseries.get_value(10), self.bseries[10])

    def test_set_value(self):

        idx = self.btseries.index[7]
        self.btseries.set_value(idx, 0)
        self.assertEqual(self.btseries[idx], 0)

        self.iseries.set_value('foobar', 0)
        self.assertEqual(self.iseries.index[-1], 'foobar')
        self.assertEqual(self.iseries['foobar'], 0)

    def test_getitem_slice(self):
        idx = self.bseries.index
        res = self.bseries[::2]
        tm.assertIsInstance(res, SparseSeries)

        expected = self.bseries.reindex(idx[::2])
        tm.assert_sp_series_equal(res, expected)

        res = self.bseries[:5]
        tm.assertIsInstance(res, SparseSeries)
        tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:5]))

        res = self.bseries[5:]
        tm.assert_sp_series_equal(res, self.bseries.reindex(idx[5:]))

        # negative indices
        res = self.bseries[:-3]
        tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:-3]))

    def test_take(self):
        def _compare_with_dense(sp):
            dense = sp.to_dense()

            def _compare(idx):
                dense_result = dense.take(idx).values
                sparse_result = sp.take(idx)
                self.assertIsInstance(sparse_result, SparseSeries)
                tm.assert_almost_equal(dense_result,
                                       sparse_result.values.values)

            _compare([1., 2., 3., 4., 5., 0.])
            _compare([7, 2, 9, 0, 4])
            _compare([3, 6, 3, 4, 7])

        self._check_all(_compare_with_dense)

        self.assertRaises(Exception, self.bseries.take,
                          [0, len(self.bseries) + 1])

        # Corner case
        sp = SparseSeries(np.ones(10) * nan)
        exp = pd.Series(np.repeat(nan, 5))
        tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp)

    def test_setitem(self):
        self.bseries[5] = 7.
        self.assertEqual(self.bseries[5], 7.)

    def test_setslice(self):
        self.bseries[5:10] = 7.
        tm.assert_series_equal(self.bseries[5:10].to_dense(),
                               Series(7., index=range(5, 10),
                                      name=self.bseries.name))

    def test_operators(self):
        def _check_op(a, b, op):
            sp_result = op(a, b)
            adense = a.to_dense() if isinstance(a, SparseSeries) else a
            bdense = b.to_dense() if isinstance(b, SparseSeries) else b
            dense_result = op(adense, bdense)
            tm.assert_almost_equal(sp_result.to_dense(), dense_result)

        def check(a, b):
            _check_op(a, b, operator.add)
            _check_op(a, b, operator.sub)
            _check_op(a, b, operator.truediv)
            _check_op(a, b, operator.floordiv)
            _check_op(a, b, operator.mul)

            _check_op(a, b, lambda x, y: operator.add(y, x))
            _check_op(a, b, lambda x, y: operator.sub(y, x))
            _check_op(a, b, lambda x, y: operator.truediv(y, x))
            _check_op(a, b, lambda x, y: operator.floordiv(y, x))
            _check_op(a, b, lambda x, y: operator.mul(y, x))

            # NaN ** 0 = 1 in C?
            # _check_op(a, b, operator.pow)
            # _check_op(a, b, lambda x, y: operator.pow(y, x))

        check(self.bseries, self.bseries)
        check(self.iseries, self.iseries)
        check(self.bseries, self.iseries)

        check(self.bseries, self.bseries2)
        check(self.bseries, self.iseries2)
        check(self.iseries, self.iseries2)

        # scalar value
        check(self.bseries, 5)

        # zero-based
        check(self.zbseries, self.zbseries * 2)
        check(self.zbseries, self.zbseries2)
        check(self.ziseries, self.ziseries2)

        # with dense
        result = self.bseries + self.bseries.to_dense()
        tm.assert_sp_series_equal(result, self.bseries + self.bseries)

    def test_binary_operators(self):

        # skipping for now #####
        raise nose.SkipTest("skipping sparse binary operators test")

        def _check_inplace_op(iop, op):
            tmp = self.bseries.copy()

            expected = op(tmp, self.bseries)
            iop(tmp, self.bseries)
            tm.assert_sp_series_equal(tmp, expected)

        inplace_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow']
        for op in inplace_ops:
            _check_inplace_op(getattr(operator, "i%s" % op),
                              getattr(operator, op))

    def test_abs(self):
        s = SparseSeries([1, 2, -3], name='x')
        expected = SparseSeries([1, 2, 3], name='x')
        result = s.abs()
        tm.assert_sp_series_equal(result, expected)
        self.assertEqual(result.name, 'x')

        result = abs(s)
        tm.assert_sp_series_equal(result, expected)
        self.assertEqual(result.name, 'x')

        result = np.abs(s)
        tm.assert_sp_series_equal(result, expected)
        self.assertEqual(result.name, 'x')

    def test_reindex(self):
        def _compare_with_series(sps, new_index):
            spsre = sps.reindex(new_index)

            series = sps.to_dense()
            seriesre = series.reindex(new_index)
            seriesre = seriesre.to_sparse(fill_value=sps.fill_value)

            tm.assert_sp_series_equal(spsre, seriesre)
            tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense())

        _compare_with_series(self.bseries, self.bseries.index[::2])
        _compare_with_series(self.bseries, list(self.bseries.index[::2]))
        _compare_with_series(self.bseries, self.bseries.index[:10])
        _compare_with_series(self.bseries, self.bseries.index[5:])

        _compare_with_series(self.zbseries, self.zbseries.index[::2])
        _compare_with_series(self.zbseries, self.zbseries.index[:10])
        _compare_with_series(self.zbseries, self.zbseries.index[5:])

        # special cases
        same_index = self.bseries.reindex(self.bseries.index)
        tm.assert_sp_series_equal(self.bseries, same_index)
        self.assertIsNot(same_index, self.bseries)

        # corner cases
        sp = SparseSeries([], index=[])
        # TODO: sp_zero is not used anywhere...remove?
        sp_zero = SparseSeries([], index=[], fill_value=0)  # noqa
        _compare_with_series(sp, np.arange(10))

        # with copy=False
        reindexed = self.bseries.reindex(self.bseries.index, copy=True)
        reindexed.sp_values[:] = 1.
        self.assertTrue((self.bseries.sp_values != 1.).all())

        reindexed = self.bseries.reindex(self.bseries.index, copy=False)
        reindexed.sp_values[:] = 1.
        tm.assert_numpy_array_equal(self.bseries.sp_values, np.repeat(1., 10))

    def test_sparse_reindex(self):
        length = 10

        def _check(values, index1, index2, fill_value):
            first_series = SparseSeries(values, sparse_index=index1,
                                        fill_value=fill_value)
            reindexed = first_series.sparse_reindex(index2)
            self.assertIs(reindexed.sp_index, index2)

            int_indices1 = index1.to_int_index().indices
            int_indices2 = index2.to_int_index().indices

            expected = Series(values, index=int_indices1)
            expected = expected.reindex(int_indices2).fillna(fill_value)
            tm.assert_almost_equal(expected.values, reindexed.sp_values)

            # make sure level argument asserts
            # TODO: expected is not used anywhere...remove?
            expected = expected.reindex(int_indices2).fillna(fill_value)  # noqa

        def _check_with_fill_value(values, first, second, fill_value=nan):
            i_index1 = IntIndex(length, first)
            i_index2 = IntIndex(length, second)

            b_index1 = i_index1.to_block_index()
            b_index2 = i_index2.to_block_index()

            _check(values, i_index1, i_index2, fill_value)
            _check(values, b_index1, b_index2, fill_value)

        def _check_all(values, first, second):
            _check_with_fill_value(values, first, second, fill_value=nan)
            _check_with_fill_value(values, first, second, fill_value=0)

        index1 = [2, 4, 5, 6, 8, 9]
        values1 = np.arange(6.)

        _check_all(values1, index1, [2, 4, 5])
        _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9])
        _check_all(values1, index1, [0, 1])
        _check_all(values1, index1, [0, 1, 7, 8, 9])
        _check_all(values1, index1, [])

        first_series = SparseSeries(values1,
                                    sparse_index=IntIndex(length, index1),
                                    fill_value=nan)
        with tm.assertRaisesRegexp(TypeError,
                                   'new index must be a SparseIndex'):
            reindexed = first_series.sparse_reindex(0)  # noqa

    def test_repr(self):
        # TODO: These aren't used
        bsrepr = repr(self.bseries)  # noqa
        isrepr = repr(self.iseries)  # noqa

    def test_iter(self):
        pass

    def test_truncate(self):
        pass

    def test_fillna(self):
        pass

    def test_groupby(self):
        pass

    def test_reductions(self):
        def _compare_with_dense(obj, op):
            sparse_result = getattr(obj, op)()
            series = obj.to_dense()
            dense_result = getattr(series, op)()
            self.assertEqual(sparse_result, dense_result)

        to_compare = ['count', 'sum', 'mean', 'std', 'var', 'skew']

        def _compare_all(obj):
            for op in to_compare:
                _compare_with_dense(obj, op)

        _compare_all(self.bseries)

        self.bseries.sp_values[5:10] = np.NaN
        _compare_all(self.bseries)

        _compare_all(self.zbseries)
        self.zbseries.sp_values[5:10] = np.NaN
        _compare_all(self.zbseries)

        series = self.zbseries.copy()
        series.fill_value = 2
        _compare_all(series)

        nonna = Series(np.random.randn(20)).to_sparse()
        _compare_all(nonna)

        nonna2 = Series(np.random.randn(20)).to_sparse(fill_value=0)
        _compare_all(nonna2)

    def test_dropna(self):
        sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0)

        sp_valid = sp.valid()

        expected = sp.to_dense().valid()
        expected = expected[expected != 0]

        tm.assert_almost_equal(sp_valid.values, expected.values)
        self.assertTrue(sp_valid.index.equals(expected.index))
        self.assertEqual(len(sp_valid.sp_values), 2)

        result = self.bseries.dropna()
        expected = self.bseries.to_dense().dropna()
        self.assertNotIsInstance(result, SparseSeries)
        tm.assert_series_equal(result, expected)

    def test_homogenize(self):
        def _check_matches(indices, expected):
            data = {}
            for i, idx in enumerate(indices):
                data[i] = SparseSeries(idx.to_int_index().indices,
                                       sparse_index=idx)
            homogenized = spf.homogenize(data)

            for k, v in compat.iteritems(homogenized):
                assert (v.sp_index.equals(expected))

        indices1 = [BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]),
                    BlockIndex(10, [0], [10])]
        expected1 = BlockIndex(10, [2, 6], [2, 3])
        _check_matches(indices1, expected1)

        indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])]
        expected2 = indices2[0]
        _check_matches(indices2, expected2)

        # must have NaN fill value
        data = {'a': SparseSeries(np.arange(7), sparse_index=expected2,
                                  fill_value=0)}
        with tm.assertRaisesRegexp(TypeError, "NaN fill value"):
            spf.homogenize(data)

    def test_fill_value_corner(self):
        cop = self.zbseries.copy()
        cop.fill_value = 0
        result = self.bseries / cop

        self.assertTrue(np.isnan(result.fill_value))

        cop2 = self.zbseries.copy()
        cop2.fill_value = 1
        result = cop2 / cop
        self.assertTrue(np.isnan(result.fill_value))

    def test_fill_value_when_combine_const(self):
        # GH12723
        s = SparseSeries([0, 1, np.nan, 3, 4, 5], index=np.arange(6))

        exp = s.fillna(0).add(2)
        res = s.add(2, fill_value=0)
        self.assert_series_equal(res, exp)

    def test_shift(self):
        series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6))

        shifted = series.shift(0)
        self.assertIsNot(shifted, series)
        tm.assert_sp_series_equal(shifted, series)

        f = lambda s: s.shift(1)
        _dense_series_compare(series, f)

        f = lambda s: s.shift(-2)
        _dense_series_compare(series, f)

        series = SparseSeries([nan, 1., 2., 3., nan, nan],
                              index=bdate_range('1/1/2000', periods=6))
        f = lambda s: s.shift(2, freq='B')
        _dense_series_compare(series, f)

        f = lambda s: s.shift(2, freq=datetools.bday)
        _dense_series_compare(series, f)

    def test_shift_nan(self):
        # GH 12908
        orig = pd.Series([np.nan, 2, np.nan, 4, 0, np.nan, 0])
        sparse = orig.to_sparse()

        tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse())

        tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse())

        sparse = orig.to_sparse(fill_value=0)
        tm.assert_sp_series_equal(sparse.shift(0),
                                  orig.shift(0).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(1),
                                  orig.shift(1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(2),
                                  orig.shift(2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(3),
                                  orig.shift(3).to_sparse(fill_value=0))

        tm.assert_sp_series_equal(sparse.shift(-1),
                                  orig.shift(-1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-2),
                                  orig.shift(-2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-3),
                                  orig.shift(-3).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-4),
                                  orig.shift(-4).to_sparse(fill_value=0))

    def test_shift_dtype(self):
        # GH 12908
        orig = pd.Series([1, 2, 3, 4], dtype=np.int64)
        sparse = orig.to_sparse()

        tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse())

        tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse())

    def test_shift_dtype_fill_value(self):
        # GH 12908
        orig = pd.Series([1, 0, 0, 4], dtype=np.int64)
        sparse = orig.to_sparse(fill_value=0)

        tm.assert_sp_series_equal(sparse.shift(0),
                                  orig.shift(0).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(1),
                                  orig.shift(1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(2),
                                  orig.shift(2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(3),
                                  orig.shift(3).to_sparse(fill_value=0))

        tm.assert_sp_series_equal(sparse.shift(-1),
                                  orig.shift(-1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-2),
                                  orig.shift(-2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-3),
                                  orig.shift(-3).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-4),
                                  orig.shift(-4).to_sparse(fill_value=0))

    def test_cumsum(self):
        result = self.bseries.cumsum()
        expected = self.bseries.to_dense().cumsum()
        tm.assertIsInstance(result, SparseSeries)
        self.assertEqual(result.name, self.bseries.name)
        tm.assert_series_equal(result.to_dense(), expected)

        result = self.zbseries.cumsum()
        expected = self.zbseries.to_dense().cumsum()
        tm.assertIsInstance(result, Series)
        tm.assert_series_equal(result, expected)

    def test_combine_first(self):
        s = self.bseries

        result = s[::2].combine_first(s)
        result2 = s[::2].combine_first(s.to_dense())

        expected = s[::2].to_dense().combine_first(s.to_dense())
        expected = expected.to_sparse(fill_value=s.fill_value)

        tm.assert_sp_series_equal(result, result2)
        tm.assert_sp_series_equal(result, expected)
コード例 #4
0
ファイル: test_sparse.py プロジェクト: theandygross/pandas
class TestSparseSeries(TestCase, test_series.CheckNameIntegration):
    def setUp(self):
        arr, index = _test_data1()

        date_index = DateRange('1/1/2011', periods=len(index))

        self.bseries = SparseSeries(arr, index=index, kind='block')
        self.bseries.name = 'bseries'

        self.ts = self.bseries

        self.btseries = SparseSeries(arr, index=date_index, kind='block')

        self.iseries = SparseSeries(arr, index=index, kind='integer')

        arr, index = _test_data2()
        self.bseries2 = SparseSeries(arr, index=index, kind='block')
        self.iseries2 = SparseSeries(arr, index=index, kind='integer')

        arr, index = _test_data1_zero()
        self.zbseries = SparseSeries(arr,
                                     index=index,
                                     kind='block',
                                     fill_value=0)
        self.ziseries = SparseSeries(arr,
                                     index=index,
                                     kind='integer',
                                     fill_value=0)

        arr, index = _test_data2_zero()
        self.zbseries2 = SparseSeries(arr,
                                      index=index,
                                      kind='block',
                                      fill_value=0)
        self.ziseries2 = SparseSeries(arr,
                                      index=index,
                                      kind='integer',
                                      fill_value=0)

    def test_construct_DataFrame_with_sp_series(self):
        # it works!
        df = DataFrame({'col': self.bseries})

    def test_sparse_to_dense(self):
        arr, index = _test_data1()
        series = self.bseries.to_dense()
        assert_equal(series, arr)

        series = self.bseries.to_dense(sparse_only=True)
        assert_equal(series, arr[np.isfinite(arr)])

        series = self.iseries.to_dense()
        assert_equal(series, arr)

        arr, index = _test_data1_zero()
        series = self.zbseries.to_dense()
        assert_equal(series, arr)

        series = self.ziseries.to_dense()
        assert_equal(series, arr)

    def test_dense_to_sparse(self):
        series = self.bseries.to_dense()
        bseries = series.to_sparse(kind='block')
        iseries = series.to_sparse(kind='integer')
        assert_sp_series_equal(bseries, self.bseries)
        assert_sp_series_equal(iseries, self.iseries)

        # non-NaN fill value
        series = self.zbseries.to_dense()
        zbseries = series.to_sparse(kind='block', fill_value=0)
        ziseries = series.to_sparse(kind='integer', fill_value=0)
        assert_sp_series_equal(zbseries, self.zbseries)
        assert_sp_series_equal(ziseries, self.ziseries)

    def test_to_dense_preserve_name(self):
        assert (self.bseries.name is not None)
        result = self.bseries.to_dense()
        self.assertEquals(result.name, self.bseries.name)

    def test_constructor(self):
        # test setup guys
        self.assert_(np.isnan(self.bseries.fill_value))
        self.assert_(isinstance(self.bseries.sp_index, BlockIndex))
        self.assert_(np.isnan(self.iseries.fill_value))
        self.assert_(isinstance(self.iseries.sp_index, IntIndex))

        self.assertEquals(self.zbseries.fill_value, 0)
        assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0))

        # pass SparseSeries
        s2 = SparseSeries(self.bseries)
        s3 = SparseSeries(self.iseries)
        s4 = SparseSeries(self.zbseries)
        assert_sp_series_equal(s2, self.bseries)
        assert_sp_series_equal(s3, self.iseries)
        assert_sp_series_equal(s4, self.zbseries)

        # Sparse time series works
        date_index = DateRange('1/1/2000', periods=len(self.bseries))
        s5 = SparseSeries(self.bseries, index=date_index)
        self.assert_(isinstance(s5, SparseTimeSeries))

        # pass Series
        bseries2 = SparseSeries(self.bseries.to_dense())
        assert_equal(self.bseries.sp_values, bseries2.sp_values)

        # pass dict?

        # don't copy the data by default
        values = np.ones(len(self.bseries.sp_values))
        sp = SparseSeries(values, sparse_index=self.bseries.sp_index)
        sp.sp_values[:5] = 97
        self.assert_(values[0] == 97)

        # but can make it copy!
        sp = SparseSeries(values,
                          sparse_index=self.bseries.sp_index,
                          copy=True)
        sp.sp_values[:5] = 100
        self.assert_(values[0] == 97)

    def test_constructor_ndarray(self):
        pass

    def test_constructor_nonnan(self):
        arr = [0, 0, 0, nan, nan]
        sp_series = SparseSeries(arr, fill_value=0)
        assert_equal(sp_series.values, arr)

    def test_copy_astype(self):
        cop = self.bseries.astype(np.float_)
        self.assert_(cop is not self.bseries)
        self.assert_(cop.sp_index is self.bseries.sp_index)
        self.assert_(cop.dtype == np.float64)

        cop2 = self.iseries.copy()

        assert_sp_series_equal(cop, self.bseries)
        assert_sp_series_equal(cop2, self.iseries)

        # test that data is copied
        cop.sp_values[:5] = 97
        self.assert_(cop.sp_values[0] == 97)
        self.assert_(self.bseries.sp_values[0] != 97)

        # correct fill value
        zbcop = self.zbseries.copy()
        zicop = self.ziseries.copy()

        assert_sp_series_equal(zbcop, self.zbseries)
        assert_sp_series_equal(zicop, self.ziseries)

        # no deep copy
        view = self.bseries.copy(deep=False)
        view.sp_values[:5] = 5
        self.assert_((self.bseries.sp_values[:5] == 5).all())

    def test_astype(self):
        self.assertRaises(Exception, self.bseries.astype, np.int64)

    def test_kind(self):
        self.assertEquals(self.bseries.kind, 'block')
        self.assertEquals(self.iseries.kind, 'integer')

    def test_pickle(self):
        def _test_roundtrip(series):
            pickled = pickle.dumps(series, protocol=pickle.HIGHEST_PROTOCOL)
            unpickled = pickle.loads(pickled)
            assert_sp_series_equal(series, unpickled)
            assert_series_equal(series.to_dense(), unpickled.to_dense())

        self._check_all(_test_roundtrip)

    def _check_all(self, check_func):
        check_func(self.bseries)
        check_func(self.iseries)
        check_func(self.zbseries)
        check_func(self.ziseries)

    def test_getitem(self):
        def _check_getitem(sp, dense):
            for idx, val in dense.iteritems():
                assert_almost_equal(val, sp[idx])

            for i in xrange(len(dense)):
                assert_almost_equal(sp[i], dense[i])
                # j = np.float64(i)
                # assert_almost_equal(sp[j], dense[j])

            # negative getitem works
            for i in xrange(len(dense)):
                assert_almost_equal(sp[-i], dense[-i])

        _check_getitem(self.bseries, self.bseries.to_dense())
        _check_getitem(self.btseries, self.btseries.to_dense())

        _check_getitem(self.zbseries, self.zbseries.to_dense())
        _check_getitem(self.iseries, self.iseries.to_dense())
        _check_getitem(self.ziseries, self.ziseries.to_dense())

        # exception handling
        self.assertRaises(Exception, self.bseries.__getitem__,
                          len(self.bseries) + 1)

        # index not contained
        self.assertRaises(Exception, self.btseries.__getitem__,
                          self.btseries.index[-1] + BDay())

    def test_get_get_value(self):
        assert_almost_equal(self.bseries.get(10), self.bseries[10])
        self.assert_(self.bseries.get(len(self.bseries) + 1) is None)

        dt = self.btseries.index[10]
        result = self.btseries.get(dt)
        expected = self.btseries.to_dense()[dt]
        assert_almost_equal(result, expected)

        assert_almost_equal(self.bseries.get_value(10), self.bseries[10])

    def test_set_value(self):
        idx = self.btseries.index[7]
        res = self.btseries.set_value(idx, 0)
        self.assert_(res is not self.btseries)
        self.assertEqual(res[idx], 0)

        res = self.iseries.set_value('foobar', 0)
        self.assert_(res is not self.iseries)
        self.assert_(res.index[-1] == 'foobar')
        self.assertEqual(res['foobar'], 0)

    def test_getitem_slice(self):
        idx = self.bseries.index
        res = self.bseries[::2]
        self.assert_(isinstance(res, SparseSeries))
        assert_sp_series_equal(res, self.bseries.reindex(idx[::2]))

        res = self.bseries[:5]
        self.assert_(isinstance(res, SparseSeries))
        assert_sp_series_equal(res, self.bseries.reindex(idx[:5]))

        res = self.bseries[5:]
        assert_sp_series_equal(res, self.bseries.reindex(idx[5:]))

        # negative indices
        res = self.bseries[:-3]
        assert_sp_series_equal(res, self.bseries.reindex(idx[:-3]))

    def test_take(self):
        def _compare_with_dense(sp):
            dense = sp.to_dense()

            def _compare(idx):
                dense_result = dense.take(idx).values
                sparse_result = sp.take(idx)
                assert_almost_equal(dense_result, sparse_result)

            _compare([1., 2., 3., 4., 5., 0.])
            _compare([7, 2, 9, 0, 4])
            _compare([3, 6, 3, 4, 7])

        self._check_all(_compare_with_dense)

        self.assertRaises(Exception, self.bseries.take, [-1, 0])
        self.assertRaises(Exception, self.bseries.take,
                          [0, len(self.bseries) + 1])

        # Corner case
        sp = SparseSeries(np.ones(10.) * nan)
        assert_almost_equal(sp.take([0, 1, 2, 3, 4]), np.repeat(nan, 5))

    def test_setitem(self):
        self.assertRaises(Exception, self.bseries.__setitem__, 5, 7.)
        self.assertRaises(Exception, self.iseries.__setitem__, 5, 7.)

    def test_setslice(self):
        self.assertRaises(Exception, self.bseries.__setslice__, 5, 10, 7.)

    def test_operators(self):
        def _check_op(a, b, op):
            sp_result = op(a, b)
            adense = a.to_dense() if isinstance(a, SparseSeries) else a
            bdense = b.to_dense() if isinstance(b, SparseSeries) else b
            dense_result = op(adense, bdense)
            assert_almost_equal(sp_result.to_dense(), dense_result)

        def check(a, b):
            _check_op(a, b, operator.add)
            _check_op(a, b, operator.sub)
            _check_op(a, b, operator.truediv)
            _check_op(a, b, operator.floordiv)
            _check_op(a, b, operator.mul)

            _check_op(a, b, lambda x, y: operator.add(y, x))
            _check_op(a, b, lambda x, y: operator.sub(y, x))
            _check_op(a, b, lambda x, y: operator.truediv(y, x))
            _check_op(a, b, lambda x, y: operator.floordiv(y, x))
            _check_op(a, b, lambda x, y: operator.mul(y, x))

            # NaN ** 0 = 1 in C?
            # _check_op(a, b, operator.pow)
            # _check_op(a, b, lambda x, y: operator.pow(y, x))

        check(self.bseries, self.bseries)
        check(self.iseries, self.iseries)
        check(self.bseries, self.iseries)

        check(self.bseries, self.bseries2)
        check(self.bseries, self.iseries2)
        check(self.iseries, self.iseries2)

        # scalar value
        check(self.bseries, 5)

        # zero-based
        check(self.zbseries, self.zbseries * 2)
        check(self.zbseries, self.zbseries2)
        check(self.ziseries, self.ziseries2)

        # with dense
        result = self.bseries + self.bseries.to_dense()
        assert_sp_series_equal(result, self.bseries + self.bseries)

    # @dec.knownfailureif(True, 'Known NumPy failer as of 1.5.1')
    def test_operators_corner2(self):
        raise nose.SkipTest('known failer on numpy 1.5.1')

        # NumPy circumvents __r*__ operations
        val = np.float64(3.0)
        result = val - self.zbseries
        assert_sp_series_equal(result, 3 - self.zbseries)

    def test_binary_operators(self):
        def _check_inplace_op(op):
            tmp = self.bseries.copy()
            self.assertRaises(NotImplementedError, op, tmp, self.bseries)

        inplace_ops = ['iadd', 'isub', 'imul', 'itruediv', 'ifloordiv', 'ipow']
        for op in inplace_ops:
            _check_inplace_op(getattr(operator, op))

    def test_reindex(self):
        def _compare_with_series(sps, new_index):
            spsre = sps.reindex(new_index)

            series = sps.to_dense()
            seriesre = series.reindex(new_index)
            seriesre = seriesre.to_sparse(fill_value=sps.fill_value)

            assert_sp_series_equal(spsre, seriesre)
            assert_series_equal(spsre.to_dense(), seriesre.to_dense())

        _compare_with_series(self.bseries, self.bseries.index[::2])
        _compare_with_series(self.bseries, list(self.bseries.index[::2]))
        _compare_with_series(self.bseries, self.bseries.index[:10])
        _compare_with_series(self.bseries, self.bseries.index[5:])

        _compare_with_series(self.zbseries, self.zbseries.index[::2])
        _compare_with_series(self.zbseries, self.zbseries.index[:10])
        _compare_with_series(self.zbseries, self.zbseries.index[5:])

        # special cases
        same_index = self.bseries.reindex(self.bseries.index)
        assert_sp_series_equal(self.bseries, same_index)
        self.assert_(same_index is not self.bseries)

        # corner cases
        sp = SparseSeries([], index=[])
        sp_zero = SparseSeries([], index=[], fill_value=0)
        _compare_with_series(sp, np.arange(10))

        # with copy=False
        reindexed = self.bseries.reindex(self.bseries.index, copy=True)
        reindexed.sp_values[:] = 1.
        self.assert_((self.bseries.sp_values != 1.).all())

        reindexed = self.bseries.reindex(self.bseries.index, copy=False)
        reindexed.sp_values[:] = 1.
        self.assert_((self.bseries.sp_values == 1.).all())

    def test_sparse_reindex(self):
        length = 10

        def _check(values, index1, index2, fill_value):
            first_series = SparseSeries(values,
                                        sparse_index=index1,
                                        fill_value=fill_value)
            reindexed = first_series.sparse_reindex(index2)
            self.assert_(reindexed.sp_index is index2)

            int_indices1 = index1.to_int_index().indices
            int_indices2 = index2.to_int_index().indices

            expected = Series(values, index=int_indices1)
            expected = expected.reindex(int_indices2).fillna(fill_value)
            assert_almost_equal(expected.values, reindexed.sp_values)

            # make sure level argument asserts
            expected = expected.reindex(int_indices2).fillna(fill_value)

        def _check_with_fill_value(values, first, second, fill_value=nan):
            i_index1 = IntIndex(length, first)
            i_index2 = IntIndex(length, second)

            b_index1 = i_index1.to_block_index()
            b_index2 = i_index2.to_block_index()

            _check(values, i_index1, i_index2, fill_value)
            _check(values, b_index1, b_index2, fill_value)

        def _check_all(values, first, second):
            _check_with_fill_value(values, first, second, fill_value=nan)
            _check_with_fill_value(values, first, second, fill_value=0)

        index1 = [2, 4, 5, 6, 8, 9]
        values1 = np.arange(6.)

        _check_all(values1, index1, [2, 4, 5])
        _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9])
        _check_all(values1, index1, [0, 1])
        _check_all(values1, index1, [0, 1, 7, 8, 9])
        _check_all(values1, index1, [])

    def test_repr(self):
        bsrepr = repr(self.bseries)
        isrepr = repr(self.iseries)

    def test_iter(self):
        pass

    def test_truncate(self):
        pass

    def test_fillna(self):
        pass

    def test_groupby(self):
        pass

    def test_reductions(self):
        def _compare_with_dense(obj, op):
            sparse_result = getattr(obj, op)()
            series = obj.to_dense()
            dense_result = getattr(series, op)()
            self.assertEquals(sparse_result, dense_result)

        to_compare = ['count', 'sum', 'mean', 'std', 'var', 'skew']

        def _compare_all(obj):
            for op in to_compare:
                _compare_with_dense(obj, op)

        _compare_all(self.bseries)
        self.bseries.sp_values[5:10] = np.NaN
        _compare_all(self.bseries)

        _compare_all(self.zbseries)
        self.zbseries.sp_values[5:10] = np.NaN
        _compare_all(self.zbseries)

        series = self.zbseries.copy()
        series.fill_value = 2
        _compare_all(series)

    def test_valid(self):
        sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0)

        sp_valid = sp.valid()
        assert_almost_equal(sp_valid, sp.to_dense().valid())
        self.assert_(sp_valid.index.equals(sp.to_dense().valid().index))
        self.assertEquals(len(sp_valid.sp_values), 2)

    def test_homogenize(self):
        def _check_matches(indices, expected):
            data = {}
            for i, idx in enumerate(indices):
                data[i] = SparseSeries(idx.to_int_index().indices,
                                       sparse_index=idx)
            homogenized = spf.homogenize(data)

            for k, v in homogenized.iteritems():
                assert (v.sp_index.equals(expected))

        indices1 = [
            BlockIndex(10, [2], [7]),
            BlockIndex(10, [1, 6], [3, 4]),
            BlockIndex(10, [0], [10])
        ]
        expected1 = BlockIndex(10, [2, 6], [2, 3])
        _check_matches(indices1, expected1)

        indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])]
        expected2 = indices2[0]
        _check_matches(indices2, expected2)

        # must have NaN fill value
        data = {
            'a': SparseSeries(np.arange(7),
                              sparse_index=expected2,
                              fill_value=0)
        }
        nose.tools.assert_raises(Exception, spf.homogenize, data)

    def test_fill_value_corner(self):
        cop = self.zbseries.copy()
        cop.fill_value = 0
        result = self.bseries / cop

        self.assert_(np.isnan(result.fill_value))

        cop2 = self.zbseries.copy()
        cop2.fill_value = 1
        result = cop2 / cop
        self.assert_(np.isnan(result.fill_value))

    def test_shift(self):
        series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6))

        shifted = series.shift(0)
        self.assert_(shifted is not series)
        assert_sp_series_equal(shifted, series)

        f = lambda s: s.shift(1)
        _dense_series_compare(series, f)

        f = lambda s: s.shift(-2)
        _dense_series_compare(series, f)

        series = SparseSeries([nan, 1., 2., 3., nan, nan],
                              index=DateRange('1/1/2000', periods=6))
        f = lambda s: s.shift(2, timeRule='WEEKDAY')
        _dense_series_compare(series, f)

        f = lambda s: s.shift(2, offset=datetools.bday)
        _dense_series_compare(series, f)

    def test_cumsum(self):
        result = self.bseries.cumsum()
        expected = self.bseries.to_dense().cumsum()
        self.assert_(isinstance(result, SparseSeries))
        self.assertEquals(result.name, self.bseries.name)
        assert_series_equal(result.to_dense(), expected)

        result = self.zbseries.cumsum()
        expected = self.zbseries.to_dense().cumsum()
        self.assert_(isinstance(result, Series))
        assert_series_equal(result, expected)