Example #1
0
    def tests_indexing_with_sparse(self):
        # GH 13985

        for kind in ['integer', 'block']:
            for fill in [True, False, np.nan]:
                arr = pd.SparseArray([1, 2, 3], kind=kind)
                indexer = pd.SparseArray([True, False, True], fill_value=fill,
                                         dtype=bool)

                tm.assert_sp_array_equal(pd.SparseArray([1, 3], kind=kind),
                                         arr[indexer])

                s = pd.SparseSeries(arr, index=['a', 'b', 'c'],
                                    dtype=np.float64)
                exp = pd.SparseSeries([1, 3], index=['a', 'c'],
                                      dtype=np.float64, kind=kind)
                tm.assert_sp_series_equal(s[indexer], exp)
                tm.assert_sp_series_equal(s.loc[indexer], exp)
                tm.assert_sp_series_equal(s.iloc[indexer], exp)

                indexer = pd.SparseSeries(indexer, index=['a', 'b', 'c'])
                tm.assert_sp_series_equal(s[indexer], exp)
                tm.assert_sp_series_equal(s.loc[indexer], exp)

                msg = ("iLocation based boolean indexing cannot use an "
                       "indexable as a mask")
                with tm.assert_raises_regex(ValueError, msg):
                    s.iloc[indexer]
Example #2
0
 def test_series_indexing_multiple(self):
     tm.assert_sp_series_equal(self.ss.loc[['string', 'int']],
                               pd.SparseSeries(['a', 1],
                                               index=['string', 'int']))
     tm.assert_sp_series_equal(self.ss.loc[['string', 'object']],
                               pd.SparseSeries(['a', []],
                                               index=['string', 'object']))
Example #3
0
    def test_fill_value_reindex_coerces_float_int(self):
        orig = pd.Series([1, np.nan, 0, 3, 0], index=list('ABCDE'))
        sparse = orig.to_sparse(fill_value=0)

        res = sparse.reindex(['A', 'E', 'C', 'D'])
        exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse(fill_value=0)
        tm.assert_sp_series_equal(res, exp)
Example #4
0
    def test_loc(self):
        orig = self.orig
        sparse = self.sparse

        assert sparse.loc[0] == 1
        assert np.isnan(sparse.loc[1])

        result = sparse.loc[[1, 3, 4]]
        exp = orig.loc[[1, 3, 4]].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # exceeds the bounds
        result = sparse.reindex([1, 3, 4, 5])
        exp = orig.reindex([1, 3, 4, 5]).to_sparse()
        tm.assert_sp_series_equal(result, exp)
        # padded with NaN
        assert np.isnan(result[-1])

        # dense array
        result = sparse.loc[orig % 2 == 1]
        exp = orig.loc[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse % 2 == 1]
        exp = orig.loc[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array
        result = sparse.loc[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
        tm.assert_sp_series_equal(result, exp)
Example #5
0
    def tests_indexing_with_sparse(self, kind, fill):
        # see gh-13985
        arr = pd.SparseArray([1, 2, 3], kind=kind)
        indexer = pd.SparseArray([True, False, True],
                                 fill_value=fill,
                                 dtype=bool)

        expected = arr[indexer]
        result = pd.SparseArray([1, 3], kind=kind)
        tm.assert_sp_array_equal(result, expected)

        s = pd.SparseSeries(arr, index=["a", "b", "c"], dtype=np.float64)
        expected = pd.SparseSeries([1, 3], index=["a", "c"], kind=kind,
                                   dtype=SparseDtype(np.float64, s.fill_value))

        tm.assert_sp_series_equal(s[indexer], expected)
        tm.assert_sp_series_equal(s.loc[indexer], expected)
        tm.assert_sp_series_equal(s.iloc[indexer], expected)

        indexer = pd.SparseSeries(indexer, index=["a", "b", "c"])
        tm.assert_sp_series_equal(s[indexer], expected)
        tm.assert_sp_series_equal(s.loc[indexer], expected)

        msg = ("iLocation based boolean indexing cannot "
               "use an indexable as a mask")
        with pytest.raises(ValueError, match=msg):
            s.iloc[indexer]
Example #6
0
    def test_getitem_fill_value(self):
        orig = pd.DataFrame([[1, np.nan, 0],
                             [2, 3, np.nan],
                             [0, np.nan, 4],
                             [0, np.nan, 5]],
                            columns=list('xyz'))
        sparse = orig.to_sparse(fill_value=0)

        result = sparse[['z']]
        expected = orig[['z']].to_sparse(fill_value=0)
        tm.assert_sp_frame_equal(result, expected, check_fill_value=False)

        tm.assert_sp_series_equal(sparse['y'],
                                  orig['y'].to_sparse(fill_value=0))

        exp = orig[['x']].to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(sparse[['x']], exp)

        exp = orig[['z', 'x']].to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(sparse[['z', 'x']], exp)

        indexer = [True, False, True, True]
        exp = orig[indexer].to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(sparse[indexer], exp)

        exp = orig.iloc[[1, 2]].to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(sparse.iloc[[1, 2]], exp)
Example #7
0
    def test_getitem_ellipsis(self):
        # GH 9467
        s = pd.SparseSeries([1, np.nan, 2, 0, np.nan])
        tm.assert_sp_series_equal(s[...], s)

        s = pd.SparseSeries([1, np.nan, 2, 0, np.nan], fill_value=0)
        tm.assert_sp_series_equal(s[...], s)
Example #8
0
def compare_sp_series_ts(res, exp, typ, version):
    # SparseTimeSeries integrated into SparseSeries in 0.12.0
    # and deprecated in 0.17.0
    if version and LooseVersion(version) <= LooseVersion("0.12.0"):
        tm.assert_sp_series_equal(res, exp, check_series_type=False)
    else:
        tm.assert_sp_series_equal(res, exp)
Example #9
0
        def _compare_with_series(sps, new_index):
            spsre = sps.reindex(new_index)

            series = sps.to_dense()
            seriesre = series.reindex(new_index)
            seriesre = seriesre.to_sparse(fill_value=sps.fill_value)

            tm.assert_sp_series_equal(spsre, seriesre)
            tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense())
Example #10
0
    def test_setitem_array(self):
        arr = self.frame["B"]

        self.frame["E"] = arr
        tm.assert_sp_series_equal(self.frame["E"], self.frame["B"], check_names=False)

        self.frame["F"] = arr[:-1]
        index = self.frame.index[:-1]
        tm.assert_sp_series_equal(self.frame["E"].reindex(index), self.frame["F"].reindex(index), check_names=False)
Example #11
0
    def test_cumsum(self):
        result = self.bseries.cumsum()
        expected = SparseSeries(self.bseries.to_dense().cumsum())
        tm.assert_sp_series_equal(result, expected)

        # TODO: gh-12855 - return a SparseSeries here
        result = self.zbseries.cumsum()
        expected = self.zbseries.to_dense().cumsum()
        self.assertNotIsInstance(result, SparseSeries)
        tm.assert_series_equal(result, expected)
Example #12
0
    def test_loc_slice(self):
        orig = self.orig
        sparse = self.sparse
        tm.assert_sp_series_equal(sparse.loc['A':], orig.loc['A':].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['B':], orig.loc['B':].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['C':], orig.loc['C':].to_sparse())

        tm.assert_sp_series_equal(sparse.loc['A':'B'],
                                  orig.loc['A':'B'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:'B'], orig.loc[:'B'].to_sparse())
Example #13
0
    def test_subclass_sparse_addition(self):
        s1 = tm.SubclassedSparseSeries([1, 3, 5])
        s2 = tm.SubclassedSparseSeries([-2, 5, 12])
        exp = tm.SubclassedSparseSeries([-1, 8, 17])
        tm.assert_sp_series_equal(s1 + s2, exp)

        s1 = tm.SubclassedSparseSeries([4.0, 5.0, 6.0])
        s2 = tm.SubclassedSparseSeries([1.0, 2.0, 3.0])
        exp = tm.SubclassedSparseSeries([5., 7., 9.])
        tm.assert_sp_series_equal(s1 + s2, exp)
Example #14
0
    def test_reindex_nearest(self):
        s = pd.Series(np.arange(10, dtype='float64')).to_sparse()
        target = [0.1, 0.9, 1.5, 2.0]
        actual = s.reindex(target, method='nearest')
        expected = pd.Series(np.around(target), target).to_sparse()
        tm.assert_sp_series_equal(expected, actual)

        actual = s.reindex(target, method='nearest', tolerance=0.2)
        expected = pd.Series([0, 1, np.nan, 2], target).to_sparse()
        tm.assert_sp_series_equal(expected, actual)
Example #15
0
    def test_iloc_fill_value(self):
        orig = pd.Series([1, np.nan, 0, 3, 0])
        sparse = orig.to_sparse(fill_value=0)

        self.assertEqual(sparse.iloc[3], 3)
        self.assertTrue(np.isnan(sparse.iloc[1]))
        self.assertEqual(sparse.iloc[4], 0)

        result = sparse.iloc[[1, 3, 4]]
        exp = orig.iloc[[1, 3, 4]].to_sparse(fill_value=0)
        tm.assert_sp_series_equal(result, exp)
Example #16
0
        def _check_const(sparse, name):
            # use passed series name
            result = SparseSeries(sparse)
            tm.assert_sp_series_equal(result, sparse)
            self.assertEqual(sparse.name, name)
            self.assertEqual(result.name, name)

            # use passed name
            result = SparseSeries(sparse, name='x')
            tm.assert_sp_series_equal(result, sparse, check_names=False)
            self.assertEqual(result.name, 'x')
Example #17
0
    def test_combine_first(self):
        s = self.bseries

        result = s[::2].combine_first(s)
        result2 = s[::2].combine_first(s.to_dense())

        expected = s[::2].to_dense().combine_first(s.to_dense())
        expected = expected.to_sparse(fill_value=s.fill_value)

        tm.assert_sp_series_equal(result, result2)
        tm.assert_sp_series_equal(result, expected)
Example #18
0
    def test_iloc_fill_value(self):
        orig = pd.Series([1, np.nan, 0, 3, 0])
        sparse = orig.to_sparse(fill_value=0)

        assert sparse.iloc[3] == 3
        assert np.isnan(sparse.iloc[1])
        assert sparse.iloc[4] == 0

        result = sparse.iloc[[1, 3, 4]]
        exp = orig.iloc[[1, 3, 4]].to_sparse(fill_value=0)
        tm.assert_sp_series_equal(result, exp)
Example #19
0
    def test_getitems_slice_multi(self):
        orig = self.orig
        sparse = self.sparse

        tm.assert_sp_series_equal(sparse[2:], orig[2:].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['B':], orig.loc['B':].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['C':], orig.loc['C':].to_sparse())

        tm.assert_sp_series_equal(sparse.loc['A':'B'],
                                  orig.loc['A':'B'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:'B'], orig.loc[:'B'].to_sparse())
Example #20
0
    def test_icol(self):
        # 10711 deprecated

        # 2227
        result = self.frame.iloc[:, 0]
        self.assertTrue(isinstance(result, SparseSeries))
        tm.assert_sp_series_equal(result, self.frame["A"])

        # preserve sparse index type. #2251
        data = {"A": [0, 1]}
        iframe = SparseDataFrame(data, default_kind="integer")
        self.assertEqual(type(iframe["A"].sp_index), type(iframe.iloc[:, 0].sp_index))
Example #21
0
    def test_iloc(self):

        # 2227
        result = self.frame.iloc[:, 0]
        assert isinstance(result, SparseSeries)
        tm.assert_sp_series_equal(result, self.frame['A'])

        # preserve sparse index type. #2251
        data = {'A': [0, 1]}
        iframe = SparseDataFrame(data, default_kind='integer')
        tm.assert_class_equal(iframe['A'].sp_index,
                              iframe.iloc[:, 0].sp_index)
Example #22
0
    def test_setitem_array(self, float_frame):
        arr = float_frame['B']

        float_frame['E'] = arr
        tm.assert_sp_series_equal(float_frame['E'], float_frame['B'],
                                  check_names=False)

        float_frame['F'] = arr[:-1]
        index = float_frame.index[:-1]
        tm.assert_sp_series_equal(float_frame['E'].reindex(index),
                                  float_frame['F'].reindex(index),
                                  check_names=False)
Example #23
0
        def _check_frame(frame):
            N = len(frame)

            # insert SparseSeries
            frame['E'] = frame['A']
            tm.assertIsInstance(frame['E'], SparseSeries)
            tm.assert_sp_series_equal(frame['E'], frame['A'],
                                      check_names=False)

            # insert SparseSeries differently-indexed
            to_insert = frame['A'][::2]
            frame['E'] = to_insert
            expected = to_insert.to_dense().reindex(frame.index).fillna(
                to_insert.fill_value)
            result = frame['E'].to_dense()
            assert_series_equal(result, expected, check_names=False)
            self.assertEqual(result.name, 'E')

            # insert Series
            frame['F'] = frame['A'].to_dense()
            tm.assertIsInstance(frame['F'], SparseSeries)
            tm.assert_sp_series_equal(frame['F'], frame['A'],
                                      check_names=False)

            # insert Series differently-indexed
            to_insert = frame['A'].to_dense()[::2]
            frame['G'] = to_insert
            expected = to_insert.reindex(frame.index).fillna(
                frame.default_fill_value)
            expected.name = 'G'
            assert_series_equal(frame['G'].to_dense(), expected)

            # insert ndarray
            frame['H'] = np.random.randn(N)
            tm.assertIsInstance(frame['H'], SparseSeries)

            to_sparsify = np.random.randn(N)
            to_sparsify[N // 2:] = frame.default_fill_value
            frame['I'] = to_sparsify
            self.assertEqual(len(frame['I'].sp_values), N // 2)

            # insert ndarray wrong size
            self.assertRaises(Exception, frame.__setitem__, 'foo',
                              np.random.randn(N - 1))

            # scalar value
            frame['J'] = 5
            self.assertEqual(len(frame['J'].sp_values), N)
            self.assertTrue((frame['J'].sp_values == 5).all())

            frame['K'] = frame.default_fill_value
            self.assertEqual(len(frame['K'].sp_values), 0)
Example #24
0
    def test_iloc(self):
        orig = pd.Series([1, np.nan, np.nan, 3, np.nan])
        sparse = orig.to_sparse()

        self.assertEqual(sparse.iloc[3], 3)
        self.assertTrue(np.isnan(sparse.iloc[2]))

        result = sparse.iloc[[1, 3, 4]]
        exp = orig.iloc[[1, 3, 4]].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        with tm.assertRaises(IndexError):
            sparse.iloc[[1, 3, 5]]
Example #25
0
def test_where_with_numeric_data(data):
    # GH 17386
    lower_bound = 1.5

    sparse = SparseSeries(data)
    result = sparse.where(sparse > lower_bound)

    dense = Series(data)
    dense_expected = dense.where(dense > lower_bound)
    sparse_expected = SparseSeries(dense_expected)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)
Example #26
0
def test_where_with_numeric_data_and_other(data, other):
    # GH 17386
    lower_bound = 1.5

    sparse = SparseSeries(data)
    result = sparse.where(sparse > lower_bound, other)

    dense = Series(data)
    dense_expected = dense.where(dense > lower_bound, other)
    sparse_expected = SparseSeries(dense_expected, fill_value=other)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)
Example #27
0
def test_where_with_bool_data_and_other(other):
    # GH 17386
    data = [False, False, True, True, False, False]
    cond = True

    sparse = SparseSeries(data)
    result = sparse.where(sparse == cond, other)

    dense = Series(data)
    dense_expected = dense.where(dense == cond, other)
    sparse_expected = SparseSeries(dense_expected, fill_value=other)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)
Example #28
0
def test_where_with_bool_data():
    # GH 17386
    data = [False, False, True, True, False, False]
    cond = True

    sparse = SparseSeries(data)
    result = sparse.where(sparse == cond)

    dense = Series(data)
    dense_expected = dense.where(dense == cond)
    sparse_expected = SparseSeries(dense_expected)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)
Example #29
0
def test_quantile():
    # GH 17386
    data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
    q = 0.1

    sparse_df = SparseDataFrame(data)
    result = sparse_df.quantile(q)

    dense_df = DataFrame(data)
    dense_expected = dense_df.quantile(q)
    sparse_expected = SparseSeries(dense_expected)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)
Example #30
0
    def test_delitem(self):
        A = self.frame['A']
        C = self.frame['C']

        del self.frame['B']
        self.assertNotIn('B', self.frame)
        tm.assert_sp_series_equal(self.frame['A'], A)
        tm.assert_sp_series_equal(self.frame['C'], C)

        del self.frame['D']
        self.assertNotIn('D', self.frame)

        del self.frame['A']
        self.assertNotIn('A', self.frame)
Example #31
0
    def test_loc(self):
        orig = pd.DataFrame(
            [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]],
            columns=list('xyz'))
        sparse = orig.to_sparse()

        assert sparse.loc[0, 'x'] == 1
        assert np.isnan(sparse.loc[1, 'z'])
        assert sparse.loc[2, 'z'] == 4

        # have to specify `kind='integer'`, since we construct a
        # new SparseArray here, and the default sparse type is
        # integer there, but block in SparseSeries
        tm.assert_sp_series_equal(sparse.loc[0],
                                  orig.loc[0].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.loc[1],
                                  orig.loc[1].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.loc[2, :],
                                  orig.loc[2, :].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.loc[2, :],
                                  orig.loc[2, :].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.loc[:, 'y'],
                                  orig.loc[:, 'y'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:, 'y'],
                                  orig.loc[:, 'y'].to_sparse())

        result = sparse.loc[[1, 2]]
        exp = orig.loc[[1, 2]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[[1, 2], :]
        exp = orig.loc[[1, 2], :].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[:, ['x', 'z']]
        exp = orig.loc[:, ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[[0, 2], ['x', 'z']]
        exp = orig.loc[[0, 2], ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # exceeds the bounds
        result = sparse.reindex([1, 3, 4, 5])
        exp = orig.reindex([1, 3, 4, 5]).to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # dense array
        result = sparse.loc[orig.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array
        result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)]
        tm.assert_sp_frame_equal(result, exp)
Example #32
0
    def test_iloc(self):
        orig = pd.DataFrame([[1, np.nan, np.nan], [2, 3, np.nan],
                             [np.nan, np.nan, 4]])
        sparse = orig.to_sparse()

        assert sparse.iloc[1, 1] == 3
        assert np.isnan(sparse.iloc[2, 0])

        tm.assert_sp_series_equal(sparse.iloc[0],
                                  orig.loc[0].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.iloc[1],
                                  orig.loc[1].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.iloc[2, :],
                                  orig.iloc[2, :].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.iloc[2, :],
                                  orig.iloc[2, :].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.iloc[:, 1], orig.iloc[:,
                                                               1].to_sparse())
        tm.assert_sp_series_equal(sparse.iloc[:, 1], orig.iloc[:,
                                                               1].to_sparse())

        result = sparse.iloc[[1, 2]]
        exp = orig.iloc[[1, 2]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.iloc[[1, 2], :]
        exp = orig.iloc[[1, 2], :].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.iloc[:, [1, 0]]
        exp = orig.iloc[:, [1, 0]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.iloc[[2], [1, 0]]
        exp = orig.iloc[[2], [1, 0]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        with pytest.raises(IndexError):
            sparse.iloc[[1, 3, 5]]
Example #33
0
 def test_reindex_fill_value(self):
     floats = pd.Series([1., 2., 3.]).to_sparse()
     result = floats.reindex([1, 2, 3], fill_value=0)
     expected = pd.Series([2., 3., 0], index=[1, 2, 3]).to_sparse()
     tm.assert_sp_series_equal(result, expected)
Example #34
0
    def test_loc_index(self):
        orig = pd.DataFrame(
            [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]],
            index=list('abc'),
            columns=list('xyz'))
        sparse = orig.to_sparse()

        self.assertEqual(sparse.loc['a', 'x'], 1)
        self.assertTrue(np.isnan(sparse.loc['b', 'z']))
        self.assertEqual(sparse.loc['c', 'z'], 4)

        tm.assert_sp_series_equal(sparse.loc['a'], orig.loc['a'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['b'], orig.loc['b'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['b', :],
                                  orig.loc['b', :].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['b', :],
                                  orig.loc['b', :].to_sparse())

        tm.assert_sp_series_equal(sparse.loc[:, 'z'],
                                  orig.loc[:, 'z'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:, 'z'],
                                  orig.loc[:, 'z'].to_sparse())

        result = sparse.loc[['a', 'b']]
        exp = orig.loc[['a', 'b']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[['a', 'b'], :]
        exp = orig.loc[['a', 'b'], :].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[:, ['x', 'z']]
        exp = orig.loc[:, ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[['c', 'a'], ['x', 'z']]
        exp = orig.loc[['c', 'a'], ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # dense array
        result = sparse.loc[orig.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array
        result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)]
        tm.assert_sp_frame_equal(result, exp)
Example #35
0
    def test_loc_index(self):
        orig = pd.DataFrame(
            [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]],
            index=list("abc"),
            columns=list("xyz"),
        )
        sparse = orig.to_sparse()

        assert sparse.loc["a", "x"] == 1
        assert np.isnan(sparse.loc["b", "z"])
        assert sparse.loc["c", "z"] == 4

        tm.assert_sp_series_equal(
            sparse.loc["a"], orig.loc["a"].to_sparse(kind="integer")
        )
        tm.assert_sp_series_equal(
            sparse.loc["b"], orig.loc["b"].to_sparse(kind="integer")
        )
        tm.assert_sp_series_equal(
            sparse.loc["b", :], orig.loc["b", :].to_sparse(kind="integer")
        )
        tm.assert_sp_series_equal(
            sparse.loc["b", :], orig.loc["b", :].to_sparse(kind="integer")
        )

        tm.assert_sp_series_equal(sparse.loc[:, "z"], orig.loc[:, "z"].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:, "z"], orig.loc[:, "z"].to_sparse())

        result = sparse.loc[["a", "b"]]
        exp = orig.loc[["a", "b"]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[["a", "b"], :]
        exp = orig.loc[["a", "b"], :].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[:, ["x", "z"]]
        exp = orig.loc[:, ["x", "z"]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[["c", "a"], ["x", "z"]]
        exp = orig.loc[["c", "a"], ["x", "z"]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # dense array
        result = sparse.loc[orig.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array
        result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)]
        tm.assert_sp_frame_equal(result, exp)
Example #36
0
 def _test_roundtrip(series):
     unpickled = self.round_trip_pickle(series)
     tm.assert_sp_series_equal(series, unpickled)
     tm.assert_series_equal(series.to_dense(), unpickled.to_dense())
Example #37
0
        def _check_inplace_op(iop, op):
            tmp = self.bseries.copy()

            expected = op(tmp, self.bseries)
            iop(tmp, self.bseries)
            tm.assert_sp_series_equal(tmp, expected)
Example #38
0
    def test_loc(self):
        # need to be override to use different label
        orig = self.orig
        sparse = self.sparse

        tm.assert_sp_series_equal(sparse.loc['A'], orig.loc['A'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['B'], orig.loc['B'].to_sparse())

        result = sparse.loc[[1, 3, 4]]
        exp = orig.loc[[1, 3, 4]].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # exceeds the bounds
        result = sparse.loc[[1, 3, 4, 5]]
        exp = orig.loc[[1, 3, 4, 5]].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # single element list (GH 15447)
        result = sparse.loc[['A']]
        exp = orig.loc[['A']].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # dense array
        result = sparse.loc[orig % 2 == 1]
        exp = orig.loc[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse % 2 == 1]
        exp = orig.loc[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array
        result = sparse.loc[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
        tm.assert_sp_series_equal(result, exp)
Example #39
0
 def test_from_coo_nodense_index(self):
     ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=False)
     check = self.sparse_series[2]
     check = check.dropna().to_sparse()
     tm.assert_sp_series_equal(ss, check)
Example #40
0
 def test_setitem_corner(self):
     self.frame['a'] = self.frame['B']
     tm.assert_sp_series_equal(self.frame['a'], self.frame['B'],
                               check_names=False)
Example #41
0
 def test_from_coo_dense_index(self):
     ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=True)
     check = self.sparse_series[2]
     tm.assert_sp_series_equal(ss, check)
Example #42
0
def compare_sp_series_ts(res, exp, typ, version):
    tm.assert_sp_series_equal(res, exp)
Example #43
0
    def test_iloc(self):
        orig = pd.DataFrame([[1, np.nan, np.nan], [2, 3, np.nan],
                             [np.nan, np.nan, 4]])
        sparse = orig.to_sparse()

        self.assertEqual(sparse.iloc[1, 1], 3)
        self.assertTrue(np.isnan(sparse.iloc[2, 0]))

        tm.assert_sp_series_equal(sparse.iloc[0], orig.loc[0].to_sparse())
        tm.assert_sp_series_equal(sparse.iloc[1], orig.loc[1].to_sparse())
        tm.assert_sp_series_equal(sparse.iloc[2, :],
                                  orig.iloc[2, :].to_sparse())
        tm.assert_sp_series_equal(sparse.iloc[2, :],
                                  orig.iloc[2, :].to_sparse())
        tm.assert_sp_series_equal(sparse.iloc[:, 1], orig.iloc[:,
                                                               1].to_sparse())
        tm.assert_sp_series_equal(sparse.iloc[:, 1], orig.iloc[:,
                                                               1].to_sparse())

        result = sparse.iloc[[1, 2]]
        exp = orig.iloc[[1, 2]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.iloc[[1, 2], :]
        exp = orig.iloc[[1, 2], :].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.iloc[:, [1, 0]]
        exp = orig.iloc[:, [1, 0]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.iloc[[2], [1, 0]]
        exp = orig.iloc[[2], [1, 0]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        with tm.assertRaises(IndexError):
            sparse.iloc[[1, 3, 5]]
Example #44
0
 def test_iloc_slice(self):
     orig = pd.Series([1, np.nan, np.nan, 3, np.nan])
     sparse = orig.to_sparse()
     tm.assert_sp_series_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse())
Example #45
0
    def test_getitem_multi(self):
        orig = self.orig
        sparse = self.sparse

        assert sparse[0] == orig[0]
        assert np.isnan(sparse[1])
        assert sparse[3] == orig[3]

        tm.assert_sp_series_equal(sparse["A"], orig["A"].to_sparse())
        tm.assert_sp_series_equal(sparse["B"], orig["B"].to_sparse())

        result = sparse[[1, 3, 4]]
        exp = orig[[1, 3, 4]].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # dense array
        result = sparse[orig % 2 == 1]
        exp = orig[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse[sparse % 2 == 1]
        exp = orig[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array
        result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
        tm.assert_sp_series_equal(result, exp)
Example #46
0
    def test_subclass_sparse_slice(self):
        # int64
        s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
        exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3])
        tm.assert_sp_series_equal(s.loc[1:3], exp)
        assert s.loc[1:3].dtype == SparseDtype(np.int64)

        exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
        tm.assert_sp_series_equal(s.iloc[1:3], exp)
        assert s.iloc[1:3].dtype == SparseDtype(np.int64)

        exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
        tm.assert_sp_series_equal(s[1:3], exp)
        assert s[1:3].dtype == SparseDtype(np.int64)

        # float64
        s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.])
        exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3])
        tm.assert_sp_series_equal(s.loc[1:3], exp)
        assert s.loc[1:3].dtype == SparseDtype(np.float64)

        exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
        tm.assert_sp_series_equal(s.iloc[1:3], exp)
        assert s.iloc[1:3].dtype == SparseDtype(np.float64)

        exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
        tm.assert_sp_series_equal(s[1:3], exp)
        assert s[1:3].dtype == SparseDtype(np.float64)
Example #47
0
    def test_loc_slice_index_fill_value(self):
        orig = pd.Series([1, np.nan, 0, 3, 0], index=list('ABCDE'))
        sparse = orig.to_sparse(fill_value=0)

        tm.assert_sp_series_equal(sparse.loc['C':],
                                  orig.loc['C':].to_sparse(fill_value=0))
Example #48
0
 def test_loc_slice(self):
     orig = self.orig
     sparse = self.sparse
     tm.assert_sp_series_equal(sparse.loc[2:], orig.loc[2:].to_sparse())
Example #49
0
    def test_loc(self):
        orig = pd.DataFrame(
            [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]],
            columns=list('xyz'))
        sparse = orig.to_sparse()

        self.assertEqual(sparse.loc[0, 'x'], 1)
        self.assertTrue(np.isnan(sparse.loc[1, 'z']))
        self.assertEqual(sparse.loc[2, 'z'], 4)

        tm.assert_sp_series_equal(sparse.loc[0], orig.loc[0].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[1], orig.loc[1].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[2, :], orig.loc[2, :].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[2, :], orig.loc[2, :].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:, 'y'],
                                  orig.loc[:, 'y'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:, 'y'],
                                  orig.loc[:, 'y'].to_sparse())

        result = sparse.loc[[1, 2]]
        exp = orig.loc[[1, 2]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[[1, 2], :]
        exp = orig.loc[[1, 2], :].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[:, ['x', 'z']]
        exp = orig.loc[:, ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[[0, 2], ['x', 'z']]
        exp = orig.loc[[0, 2], ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # exceeds the bounds
        result = sparse.loc[[1, 3, 4, 5]]
        exp = orig.loc[[1, 3, 4, 5]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # dense array
        result = sparse.loc[orig.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array
        result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)]
        tm.assert_sp_frame_equal(result, exp)
def test_sparse_frame_stack(sparse_df, multi_index3):
    ss = sparse_df.stack()
    expected = pd.SparseSeries(np.ones(3), index=multi_index3)
    tm.assert_sp_series_equal(ss, expected)
Example #51
0
 def test_iloc_slice_fill_value(self):
     orig = pd.Series([1, np.nan, 0, 3, 0])
     sparse = orig.to_sparse(fill_value=0)
     tm.assert_sp_series_equal(sparse.iloc[2:],
                               orig.iloc[2:].to_sparse(fill_value=0))
    def test_shift_dtype_fill_value(self):
        # GH 12908
        orig = pd.Series([1, 0, 0, 4], dtype=np.int64)
        sparse = orig.to_sparse(fill_value=0)

        tm.assert_sp_series_equal(sparse.shift(0),
                                  orig.shift(0).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(1),
                                  orig.shift(1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(2),
                                  orig.shift(2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(3),
                                  orig.shift(3).to_sparse(fill_value=0))

        tm.assert_sp_series_equal(sparse.shift(-1),
                                  orig.shift(-1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-2),
                                  orig.shift(-2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-3),
                                  orig.shift(-3).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-4),
                                  orig.shift(-4).to_sparse(fill_value=0))
    def test_shift_nan(self):
        # GH 12908
        orig = pd.Series([np.nan, 2, np.nan, 4, 0, np.nan, 0])
        sparse = orig.to_sparse()

        tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse())

        tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse())
        tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse())

        sparse = orig.to_sparse(fill_value=0)
        tm.assert_sp_series_equal(sparse.shift(0),
                                  orig.shift(0).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(1),
                                  orig.shift(1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(2),
                                  orig.shift(2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(3),
                                  orig.shift(3).to_sparse(fill_value=0))

        tm.assert_sp_series_equal(sparse.shift(-1),
                                  orig.shift(-1).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-2),
                                  orig.shift(-2).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-3),
                                  orig.shift(-3).to_sparse(fill_value=0))
        tm.assert_sp_series_equal(sparse.shift(-4),
                                  orig.shift(-4).to_sparse(fill_value=0))
Example #54
0
    def test_getitem_multi(self):
        orig = self.orig
        sparse = self.sparse

        self.assertEqual(sparse[0], orig[0])
        self.assertTrue(np.isnan(sparse[1]))
        self.assertEqual(sparse[3], orig[3])

        tm.assert_sp_series_equal(sparse['A'], orig['A'].to_sparse())
        tm.assert_sp_series_equal(sparse['B'], orig['B'].to_sparse())

        result = sparse[[1, 3, 4]]
        exp = orig[[1, 3, 4]].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # dense array
        result = sparse[orig % 2 == 1]
        exp = orig[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse[sparse % 2 == 1]
        exp = orig[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array
        result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
        tm.assert_sp_series_equal(result, exp)