def tests_indexing_with_sparse(self): # GH 13985 for kind in ['integer', 'block']: for fill in [True, False, np.nan]: arr = pd.SparseArray([1, 2, 3], kind=kind) indexer = pd.SparseArray([True, False, True], fill_value=fill, dtype=bool) tm.assert_sp_array_equal(pd.SparseArray([1, 3], kind=kind), arr[indexer]) s = pd.SparseSeries(arr, index=['a', 'b', 'c'], dtype=np.float64) exp = pd.SparseSeries([1, 3], index=['a', 'c'], dtype=np.float64, kind=kind) tm.assert_sp_series_equal(s[indexer], exp) tm.assert_sp_series_equal(s.loc[indexer], exp) tm.assert_sp_series_equal(s.iloc[indexer], exp) indexer = pd.SparseSeries(indexer, index=['a', 'b', 'c']) tm.assert_sp_series_equal(s[indexer], exp) tm.assert_sp_series_equal(s.loc[indexer], exp) msg = ("iLocation based boolean indexing cannot use an " "indexable as a mask") with tm.assert_raises_regex(ValueError, msg): s.iloc[indexer]
def test_series_indexing_multiple(self): tm.assert_sp_series_equal(self.ss.loc[['string', 'int']], pd.SparseSeries(['a', 1], index=['string', 'int'])) tm.assert_sp_series_equal(self.ss.loc[['string', 'object']], pd.SparseSeries(['a', []], index=['string', 'object']))
def test_fill_value_reindex_coerces_float_int(self): orig = pd.Series([1, np.nan, 0, 3, 0], index=list('ABCDE')) sparse = orig.to_sparse(fill_value=0) res = sparse.reindex(['A', 'E', 'C', 'D']) exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse(fill_value=0) tm.assert_sp_series_equal(res, exp)
def test_loc(self): orig = self.orig sparse = self.sparse assert sparse.loc[0] == 1 assert np.isnan(sparse.loc[1]) result = sparse.loc[[1, 3, 4]] exp = orig.loc[[1, 3, 4]].to_sparse() tm.assert_sp_series_equal(result, exp) # exceeds the bounds result = sparse.reindex([1, 3, 4, 5]) exp = orig.reindex([1, 3, 4, 5]).to_sparse() tm.assert_sp_series_equal(result, exp) # padded with NaN assert np.isnan(result[-1]) # dense array result = sparse.loc[orig % 2 == 1] exp = orig.loc[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse % 2 == 1] exp = orig.loc[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array result = sparse.loc[pd.SparseArray(sparse % 2 == 1, dtype=bool)] tm.assert_sp_series_equal(result, exp)
def tests_indexing_with_sparse(self, kind, fill): # see gh-13985 arr = pd.SparseArray([1, 2, 3], kind=kind) indexer = pd.SparseArray([True, False, True], fill_value=fill, dtype=bool) expected = arr[indexer] result = pd.SparseArray([1, 3], kind=kind) tm.assert_sp_array_equal(result, expected) s = pd.SparseSeries(arr, index=["a", "b", "c"], dtype=np.float64) expected = pd.SparseSeries([1, 3], index=["a", "c"], kind=kind, dtype=SparseDtype(np.float64, s.fill_value)) tm.assert_sp_series_equal(s[indexer], expected) tm.assert_sp_series_equal(s.loc[indexer], expected) tm.assert_sp_series_equal(s.iloc[indexer], expected) indexer = pd.SparseSeries(indexer, index=["a", "b", "c"]) tm.assert_sp_series_equal(s[indexer], expected) tm.assert_sp_series_equal(s.loc[indexer], expected) msg = ("iLocation based boolean indexing cannot " "use an indexable as a mask") with pytest.raises(ValueError, match=msg): s.iloc[indexer]
def test_getitem_fill_value(self): orig = pd.DataFrame([[1, np.nan, 0], [2, 3, np.nan], [0, np.nan, 4], [0, np.nan, 5]], columns=list('xyz')) sparse = orig.to_sparse(fill_value=0) result = sparse[['z']] expected = orig[['z']].to_sparse(fill_value=0) tm.assert_sp_frame_equal(result, expected, check_fill_value=False) tm.assert_sp_series_equal(sparse['y'], orig['y'].to_sparse(fill_value=0)) exp = orig[['x']].to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(sparse[['x']], exp) exp = orig[['z', 'x']].to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(sparse[['z', 'x']], exp) indexer = [True, False, True, True] exp = orig[indexer].to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(sparse[indexer], exp) exp = orig.iloc[[1, 2]].to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(sparse.iloc[[1, 2]], exp)
def test_getitem_ellipsis(self): # GH 9467 s = pd.SparseSeries([1, np.nan, 2, 0, np.nan]) tm.assert_sp_series_equal(s[...], s) s = pd.SparseSeries([1, np.nan, 2, 0, np.nan], fill_value=0) tm.assert_sp_series_equal(s[...], s)
def compare_sp_series_ts(res, exp, typ, version): # SparseTimeSeries integrated into SparseSeries in 0.12.0 # and deprecated in 0.17.0 if version and LooseVersion(version) <= LooseVersion("0.12.0"): tm.assert_sp_series_equal(res, exp, check_series_type=False) else: tm.assert_sp_series_equal(res, exp)
def _compare_with_series(sps, new_index): spsre = sps.reindex(new_index) series = sps.to_dense() seriesre = series.reindex(new_index) seriesre = seriesre.to_sparse(fill_value=sps.fill_value) tm.assert_sp_series_equal(spsre, seriesre) tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense())
def test_setitem_array(self): arr = self.frame["B"] self.frame["E"] = arr tm.assert_sp_series_equal(self.frame["E"], self.frame["B"], check_names=False) self.frame["F"] = arr[:-1] index = self.frame.index[:-1] tm.assert_sp_series_equal(self.frame["E"].reindex(index), self.frame["F"].reindex(index), check_names=False)
def test_cumsum(self): result = self.bseries.cumsum() expected = SparseSeries(self.bseries.to_dense().cumsum()) tm.assert_sp_series_equal(result, expected) # TODO: gh-12855 - return a SparseSeries here result = self.zbseries.cumsum() expected = self.zbseries.to_dense().cumsum() self.assertNotIsInstance(result, SparseSeries) tm.assert_series_equal(result, expected)
def test_loc_slice(self): orig = self.orig sparse = self.sparse tm.assert_sp_series_equal(sparse.loc['A':], orig.loc['A':].to_sparse()) tm.assert_sp_series_equal(sparse.loc['B':], orig.loc['B':].to_sparse()) tm.assert_sp_series_equal(sparse.loc['C':], orig.loc['C':].to_sparse()) tm.assert_sp_series_equal(sparse.loc['A':'B'], orig.loc['A':'B'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:'B'], orig.loc[:'B'].to_sparse())
def test_subclass_sparse_addition(self): s1 = tm.SubclassedSparseSeries([1, 3, 5]) s2 = tm.SubclassedSparseSeries([-2, 5, 12]) exp = tm.SubclassedSparseSeries([-1, 8, 17]) tm.assert_sp_series_equal(s1 + s2, exp) s1 = tm.SubclassedSparseSeries([4.0, 5.0, 6.0]) s2 = tm.SubclassedSparseSeries([1.0, 2.0, 3.0]) exp = tm.SubclassedSparseSeries([5., 7., 9.]) tm.assert_sp_series_equal(s1 + s2, exp)
def test_reindex_nearest(self): s = pd.Series(np.arange(10, dtype='float64')).to_sparse() target = [0.1, 0.9, 1.5, 2.0] actual = s.reindex(target, method='nearest') expected = pd.Series(np.around(target), target).to_sparse() tm.assert_sp_series_equal(expected, actual) actual = s.reindex(target, method='nearest', tolerance=0.2) expected = pd.Series([0, 1, np.nan, 2], target).to_sparse() tm.assert_sp_series_equal(expected, actual)
def test_iloc_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0]) sparse = orig.to_sparse(fill_value=0) self.assertEqual(sparse.iloc[3], 3) self.assertTrue(np.isnan(sparse.iloc[1])) self.assertEqual(sparse.iloc[4], 0) result = sparse.iloc[[1, 3, 4]] exp = orig.iloc[[1, 3, 4]].to_sparse(fill_value=0) tm.assert_sp_series_equal(result, exp)
def _check_const(sparse, name): # use passed series name result = SparseSeries(sparse) tm.assert_sp_series_equal(result, sparse) self.assertEqual(sparse.name, name) self.assertEqual(result.name, name) # use passed name result = SparseSeries(sparse, name='x') tm.assert_sp_series_equal(result, sparse, check_names=False) self.assertEqual(result.name, 'x')
def test_combine_first(self): s = self.bseries result = s[::2].combine_first(s) result2 = s[::2].combine_first(s.to_dense()) expected = s[::2].to_dense().combine_first(s.to_dense()) expected = expected.to_sparse(fill_value=s.fill_value) tm.assert_sp_series_equal(result, result2) tm.assert_sp_series_equal(result, expected)
def test_iloc_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0]) sparse = orig.to_sparse(fill_value=0) assert sparse.iloc[3] == 3 assert np.isnan(sparse.iloc[1]) assert sparse.iloc[4] == 0 result = sparse.iloc[[1, 3, 4]] exp = orig.iloc[[1, 3, 4]].to_sparse(fill_value=0) tm.assert_sp_series_equal(result, exp)
def test_getitems_slice_multi(self): orig = self.orig sparse = self.sparse tm.assert_sp_series_equal(sparse[2:], orig[2:].to_sparse()) tm.assert_sp_series_equal(sparse.loc['B':], orig.loc['B':].to_sparse()) tm.assert_sp_series_equal(sparse.loc['C':], orig.loc['C':].to_sparse()) tm.assert_sp_series_equal(sparse.loc['A':'B'], orig.loc['A':'B'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:'B'], orig.loc[:'B'].to_sparse())
def test_icol(self): # 10711 deprecated # 2227 result = self.frame.iloc[:, 0] self.assertTrue(isinstance(result, SparseSeries)) tm.assert_sp_series_equal(result, self.frame["A"]) # preserve sparse index type. #2251 data = {"A": [0, 1]} iframe = SparseDataFrame(data, default_kind="integer") self.assertEqual(type(iframe["A"].sp_index), type(iframe.iloc[:, 0].sp_index))
def test_iloc(self): # 2227 result = self.frame.iloc[:, 0] assert isinstance(result, SparseSeries) tm.assert_sp_series_equal(result, self.frame['A']) # preserve sparse index type. #2251 data = {'A': [0, 1]} iframe = SparseDataFrame(data, default_kind='integer') tm.assert_class_equal(iframe['A'].sp_index, iframe.iloc[:, 0].sp_index)
def test_setitem_array(self, float_frame): arr = float_frame['B'] float_frame['E'] = arr tm.assert_sp_series_equal(float_frame['E'], float_frame['B'], check_names=False) float_frame['F'] = arr[:-1] index = float_frame.index[:-1] tm.assert_sp_series_equal(float_frame['E'].reindex(index), float_frame['F'].reindex(index), check_names=False)
def _check_frame(frame): N = len(frame) # insert SparseSeries frame['E'] = frame['A'] tm.assertIsInstance(frame['E'], SparseSeries) tm.assert_sp_series_equal(frame['E'], frame['A'], check_names=False) # insert SparseSeries differently-indexed to_insert = frame['A'][::2] frame['E'] = to_insert expected = to_insert.to_dense().reindex(frame.index).fillna( to_insert.fill_value) result = frame['E'].to_dense() assert_series_equal(result, expected, check_names=False) self.assertEqual(result.name, 'E') # insert Series frame['F'] = frame['A'].to_dense() tm.assertIsInstance(frame['F'], SparseSeries) tm.assert_sp_series_equal(frame['F'], frame['A'], check_names=False) # insert Series differently-indexed to_insert = frame['A'].to_dense()[::2] frame['G'] = to_insert expected = to_insert.reindex(frame.index).fillna( frame.default_fill_value) expected.name = 'G' assert_series_equal(frame['G'].to_dense(), expected) # insert ndarray frame['H'] = np.random.randn(N) tm.assertIsInstance(frame['H'], SparseSeries) to_sparsify = np.random.randn(N) to_sparsify[N // 2:] = frame.default_fill_value frame['I'] = to_sparsify self.assertEqual(len(frame['I'].sp_values), N // 2) # insert ndarray wrong size self.assertRaises(Exception, frame.__setitem__, 'foo', np.random.randn(N - 1)) # scalar value frame['J'] = 5 self.assertEqual(len(frame['J'].sp_values), N) self.assertTrue((frame['J'].sp_values == 5).all()) frame['K'] = frame.default_fill_value self.assertEqual(len(frame['K'].sp_values), 0)
def test_iloc(self): orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) sparse = orig.to_sparse() self.assertEqual(sparse.iloc[3], 3) self.assertTrue(np.isnan(sparse.iloc[2])) result = sparse.iloc[[1, 3, 4]] exp = orig.iloc[[1, 3, 4]].to_sparse() tm.assert_sp_series_equal(result, exp) with tm.assertRaises(IndexError): sparse.iloc[[1, 3, 5]]
def test_where_with_numeric_data(data): # GH 17386 lower_bound = 1.5 sparse = SparseSeries(data) result = sparse.where(sparse > lower_bound) dense = Series(data) dense_expected = dense.where(dense > lower_bound) sparse_expected = SparseSeries(dense_expected) tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected)
def test_where_with_numeric_data_and_other(data, other): # GH 17386 lower_bound = 1.5 sparse = SparseSeries(data) result = sparse.where(sparse > lower_bound, other) dense = Series(data) dense_expected = dense.where(dense > lower_bound, other) sparse_expected = SparseSeries(dense_expected, fill_value=other) tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected)
def test_where_with_bool_data_and_other(other): # GH 17386 data = [False, False, True, True, False, False] cond = True sparse = SparseSeries(data) result = sparse.where(sparse == cond, other) dense = Series(data) dense_expected = dense.where(dense == cond, other) sparse_expected = SparseSeries(dense_expected, fill_value=other) tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected)
def test_where_with_bool_data(): # GH 17386 data = [False, False, True, True, False, False] cond = True sparse = SparseSeries(data) result = sparse.where(sparse == cond) dense = Series(data) dense_expected = dense.where(dense == cond) sparse_expected = SparseSeries(dense_expected) tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected)
def test_quantile(): # GH 17386 data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]] q = 0.1 sparse_df = SparseDataFrame(data) result = sparse_df.quantile(q) dense_df = DataFrame(data) dense_expected = dense_df.quantile(q) sparse_expected = SparseSeries(dense_expected) tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected)
def test_delitem(self): A = self.frame['A'] C = self.frame['C'] del self.frame['B'] self.assertNotIn('B', self.frame) tm.assert_sp_series_equal(self.frame['A'], A) tm.assert_sp_series_equal(self.frame['C'], C) del self.frame['D'] self.assertNotIn('D', self.frame) del self.frame['A'] self.assertNotIn('A', self.frame)
def test_loc(self): orig = pd.DataFrame( [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]], columns=list('xyz')) sparse = orig.to_sparse() assert sparse.loc[0, 'x'] == 1 assert np.isnan(sparse.loc[1, 'z']) assert sparse.loc[2, 'z'] == 4 # have to specify `kind='integer'`, since we construct a # new SparseArray here, and the default sparse type is # integer there, but block in SparseSeries tm.assert_sp_series_equal(sparse.loc[0], orig.loc[0].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.loc[1], orig.loc[1].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.loc[2, :], orig.loc[2, :].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.loc[2, :], orig.loc[2, :].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.loc[:, 'y'], orig.loc[:, 'y'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, 'y'], orig.loc[:, 'y'].to_sparse()) result = sparse.loc[[1, 2]] exp = orig.loc[[1, 2]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[[1, 2], :] exp = orig.loc[[1, 2], :].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[:, ['x', 'z']] exp = orig.loc[:, ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[[0, 2], ['x', 'z']] exp = orig.loc[[0, 2], ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) # exceeds the bounds result = sparse.reindex([1, 3, 4, 5]) exp = orig.reindex([1, 3, 4, 5]).to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array result = sparse.loc[orig.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)] tm.assert_sp_frame_equal(result, exp)
def test_iloc(self): orig = pd.DataFrame([[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]]) sparse = orig.to_sparse() assert sparse.iloc[1, 1] == 3 assert np.isnan(sparse.iloc[2, 0]) tm.assert_sp_series_equal(sparse.iloc[0], orig.loc[0].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.iloc[1], orig.loc[1].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.iloc[2, :], orig.iloc[2, :].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.iloc[2, :], orig.iloc[2, :].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.iloc[:, 1], orig.iloc[:, 1].to_sparse()) tm.assert_sp_series_equal(sparse.iloc[:, 1], orig.iloc[:, 1].to_sparse()) result = sparse.iloc[[1, 2]] exp = orig.iloc[[1, 2]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.iloc[[1, 2], :] exp = orig.iloc[[1, 2], :].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.iloc[:, [1, 0]] exp = orig.iloc[:, [1, 0]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.iloc[[2], [1, 0]] exp = orig.iloc[[2], [1, 0]].to_sparse() tm.assert_sp_frame_equal(result, exp) with pytest.raises(IndexError): sparse.iloc[[1, 3, 5]]
def test_reindex_fill_value(self): floats = pd.Series([1., 2., 3.]).to_sparse() result = floats.reindex([1, 2, 3], fill_value=0) expected = pd.Series([2., 3., 0], index=[1, 2, 3]).to_sparse() tm.assert_sp_series_equal(result, expected)
def test_loc_index(self): orig = pd.DataFrame( [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]], index=list('abc'), columns=list('xyz')) sparse = orig.to_sparse() self.assertEqual(sparse.loc['a', 'x'], 1) self.assertTrue(np.isnan(sparse.loc['b', 'z'])) self.assertEqual(sparse.loc['c', 'z'], 4) tm.assert_sp_series_equal(sparse.loc['a'], orig.loc['a'].to_sparse()) tm.assert_sp_series_equal(sparse.loc['b'], orig.loc['b'].to_sparse()) tm.assert_sp_series_equal(sparse.loc['b', :], orig.loc['b', :].to_sparse()) tm.assert_sp_series_equal(sparse.loc['b', :], orig.loc['b', :].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, 'z'], orig.loc[:, 'z'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, 'z'], orig.loc[:, 'z'].to_sparse()) result = sparse.loc[['a', 'b']] exp = orig.loc[['a', 'b']].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[['a', 'b'], :] exp = orig.loc[['a', 'b'], :].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[:, ['x', 'z']] exp = orig.loc[:, ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[['c', 'a'], ['x', 'z']] exp = orig.loc[['c', 'a'], ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array result = sparse.loc[orig.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)] tm.assert_sp_frame_equal(result, exp)
def test_loc_index(self): orig = pd.DataFrame( [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]], index=list("abc"), columns=list("xyz"), ) sparse = orig.to_sparse() assert sparse.loc["a", "x"] == 1 assert np.isnan(sparse.loc["b", "z"]) assert sparse.loc["c", "z"] == 4 tm.assert_sp_series_equal( sparse.loc["a"], orig.loc["a"].to_sparse(kind="integer") ) tm.assert_sp_series_equal( sparse.loc["b"], orig.loc["b"].to_sparse(kind="integer") ) tm.assert_sp_series_equal( sparse.loc["b", :], orig.loc["b", :].to_sparse(kind="integer") ) tm.assert_sp_series_equal( sparse.loc["b", :], orig.loc["b", :].to_sparse(kind="integer") ) tm.assert_sp_series_equal(sparse.loc[:, "z"], orig.loc[:, "z"].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, "z"], orig.loc[:, "z"].to_sparse()) result = sparse.loc[["a", "b"]] exp = orig.loc[["a", "b"]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[["a", "b"], :] exp = orig.loc[["a", "b"], :].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[:, ["x", "z"]] exp = orig.loc[:, ["x", "z"]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[["c", "a"], ["x", "z"]] exp = orig.loc[["c", "a"], ["x", "z"]].to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array result = sparse.loc[orig.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)] tm.assert_sp_frame_equal(result, exp)
def _test_roundtrip(series): unpickled = self.round_trip_pickle(series) tm.assert_sp_series_equal(series, unpickled) tm.assert_series_equal(series.to_dense(), unpickled.to_dense())
def _check_inplace_op(iop, op): tmp = self.bseries.copy() expected = op(tmp, self.bseries) iop(tmp, self.bseries) tm.assert_sp_series_equal(tmp, expected)
def test_loc(self): # need to be override to use different label orig = self.orig sparse = self.sparse tm.assert_sp_series_equal(sparse.loc['A'], orig.loc['A'].to_sparse()) tm.assert_sp_series_equal(sparse.loc['B'], orig.loc['B'].to_sparse()) result = sparse.loc[[1, 3, 4]] exp = orig.loc[[1, 3, 4]].to_sparse() tm.assert_sp_series_equal(result, exp) # exceeds the bounds result = sparse.loc[[1, 3, 4, 5]] exp = orig.loc[[1, 3, 4, 5]].to_sparse() tm.assert_sp_series_equal(result, exp) # single element list (GH 15447) result = sparse.loc[['A']] exp = orig.loc[['A']].to_sparse() tm.assert_sp_series_equal(result, exp) # dense array result = sparse.loc[orig % 2 == 1] exp = orig.loc[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse % 2 == 1] exp = orig.loc[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array result = sparse.loc[pd.SparseArray(sparse % 2 == 1, dtype=bool)] tm.assert_sp_series_equal(result, exp)
def test_from_coo_nodense_index(self): ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=False) check = self.sparse_series[2] check = check.dropna().to_sparse() tm.assert_sp_series_equal(ss, check)
def test_setitem_corner(self): self.frame['a'] = self.frame['B'] tm.assert_sp_series_equal(self.frame['a'], self.frame['B'], check_names=False)
def test_from_coo_dense_index(self): ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=True) check = self.sparse_series[2] tm.assert_sp_series_equal(ss, check)
def compare_sp_series_ts(res, exp, typ, version): tm.assert_sp_series_equal(res, exp)
def test_iloc(self): orig = pd.DataFrame([[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]]) sparse = orig.to_sparse() self.assertEqual(sparse.iloc[1, 1], 3) self.assertTrue(np.isnan(sparse.iloc[2, 0])) tm.assert_sp_series_equal(sparse.iloc[0], orig.loc[0].to_sparse()) tm.assert_sp_series_equal(sparse.iloc[1], orig.loc[1].to_sparse()) tm.assert_sp_series_equal(sparse.iloc[2, :], orig.iloc[2, :].to_sparse()) tm.assert_sp_series_equal(sparse.iloc[2, :], orig.iloc[2, :].to_sparse()) tm.assert_sp_series_equal(sparse.iloc[:, 1], orig.iloc[:, 1].to_sparse()) tm.assert_sp_series_equal(sparse.iloc[:, 1], orig.iloc[:, 1].to_sparse()) result = sparse.iloc[[1, 2]] exp = orig.iloc[[1, 2]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.iloc[[1, 2], :] exp = orig.iloc[[1, 2], :].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.iloc[:, [1, 0]] exp = orig.iloc[:, [1, 0]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.iloc[[2], [1, 0]] exp = orig.iloc[[2], [1, 0]].to_sparse() tm.assert_sp_frame_equal(result, exp) with tm.assertRaises(IndexError): sparse.iloc[[1, 3, 5]]
def test_iloc_slice(self): orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) sparse = orig.to_sparse() tm.assert_sp_series_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse())
def test_getitem_multi(self): orig = self.orig sparse = self.sparse assert sparse[0] == orig[0] assert np.isnan(sparse[1]) assert sparse[3] == orig[3] tm.assert_sp_series_equal(sparse["A"], orig["A"].to_sparse()) tm.assert_sp_series_equal(sparse["B"], orig["B"].to_sparse()) result = sparse[[1, 3, 4]] exp = orig[[1, 3, 4]].to_sparse() tm.assert_sp_series_equal(result, exp) # dense array result = sparse[orig % 2 == 1] exp = orig[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse[sparse % 2 == 1] exp = orig[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)] tm.assert_sp_series_equal(result, exp)
def test_subclass_sparse_slice(self): # int64 s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5]) exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3]) tm.assert_sp_series_equal(s.loc[1:3], exp) assert s.loc[1:3].dtype == SparseDtype(np.int64) exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2]) tm.assert_sp_series_equal(s.iloc[1:3], exp) assert s.iloc[1:3].dtype == SparseDtype(np.int64) exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2]) tm.assert_sp_series_equal(s[1:3], exp) assert s[1:3].dtype == SparseDtype(np.int64) # float64 s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.]) exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3]) tm.assert_sp_series_equal(s.loc[1:3], exp) assert s.loc[1:3].dtype == SparseDtype(np.float64) exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2]) tm.assert_sp_series_equal(s.iloc[1:3], exp) assert s.iloc[1:3].dtype == SparseDtype(np.float64) exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2]) tm.assert_sp_series_equal(s[1:3], exp) assert s[1:3].dtype == SparseDtype(np.float64)
def test_loc_slice_index_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0], index=list('ABCDE')) sparse = orig.to_sparse(fill_value=0) tm.assert_sp_series_equal(sparse.loc['C':], orig.loc['C':].to_sparse(fill_value=0))
def test_loc_slice(self): orig = self.orig sparse = self.sparse tm.assert_sp_series_equal(sparse.loc[2:], orig.loc[2:].to_sparse())
def test_loc(self): orig = pd.DataFrame( [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]], columns=list('xyz')) sparse = orig.to_sparse() self.assertEqual(sparse.loc[0, 'x'], 1) self.assertTrue(np.isnan(sparse.loc[1, 'z'])) self.assertEqual(sparse.loc[2, 'z'], 4) tm.assert_sp_series_equal(sparse.loc[0], orig.loc[0].to_sparse()) tm.assert_sp_series_equal(sparse.loc[1], orig.loc[1].to_sparse()) tm.assert_sp_series_equal(sparse.loc[2, :], orig.loc[2, :].to_sparse()) tm.assert_sp_series_equal(sparse.loc[2, :], orig.loc[2, :].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, 'y'], orig.loc[:, 'y'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, 'y'], orig.loc[:, 'y'].to_sparse()) result = sparse.loc[[1, 2]] exp = orig.loc[[1, 2]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[[1, 2], :] exp = orig.loc[[1, 2], :].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[:, ['x', 'z']] exp = orig.loc[:, ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[[0, 2], ['x', 'z']] exp = orig.loc[[0, 2], ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) # exceeds the bounds result = sparse.loc[[1, 3, 4, 5]] exp = orig.loc[[1, 3, 4, 5]].to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array result = sparse.loc[orig.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)] tm.assert_sp_frame_equal(result, exp)
def test_sparse_frame_stack(sparse_df, multi_index3): ss = sparse_df.stack() expected = pd.SparseSeries(np.ones(3), index=multi_index3) tm.assert_sp_series_equal(ss, expected)
def test_iloc_slice_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0]) sparse = orig.to_sparse(fill_value=0) tm.assert_sp_series_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse(fill_value=0))
def test_shift_dtype_fill_value(self): # GH 12908 orig = pd.Series([1, 0, 0, 4], dtype=np.int64) sparse = orig.to_sparse(fill_value=0) tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse(fill_value=0))
def test_shift_nan(self): # GH 12908 orig = pd.Series([np.nan, 2, np.nan, 4, 0, np.nan, 0]) sparse = orig.to_sparse() tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse()) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse()) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse()) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse()) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse()) sparse = orig.to_sparse(fill_value=0) tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse(fill_value=0)) tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse(fill_value=0))
def test_getitem_multi(self): orig = self.orig sparse = self.sparse self.assertEqual(sparse[0], orig[0]) self.assertTrue(np.isnan(sparse[1])) self.assertEqual(sparse[3], orig[3]) tm.assert_sp_series_equal(sparse['A'], orig['A'].to_sparse()) tm.assert_sp_series_equal(sparse['B'], orig['B'].to_sparse()) result = sparse[[1, 3, 4]] exp = orig[[1, 3, 4]].to_sparse() tm.assert_sp_series_equal(result, exp) # dense array result = sparse[orig % 2 == 1] exp = orig[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse[sparse % 2 == 1] exp = orig[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)] tm.assert_sp_series_equal(result, exp)