def test_getitem_fill_value(self): orig = pd.DataFrame([[1, np.nan, 0], [2, 3, np.nan], [0, np.nan, 4], [0, np.nan, 5]], columns=list('xyz')) sparse = orig.to_sparse(fill_value=0) result = sparse[['z']] expected = orig[['z']].to_sparse(fill_value=0) tm.assert_sp_frame_equal(result, expected, check_fill_value=False) tm.assert_sp_series_equal(sparse['y'], orig['y'].to_sparse(fill_value=0)) exp = orig[['x']].to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(sparse[['x']], exp) exp = orig[['z', 'x']].to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(sparse[['z', 'x']], exp) indexer = [True, False, True, True] exp = orig[indexer].to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(sparse[indexer], exp) exp = orig.iloc[[1, 2]].to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(sparse.iloc[[1, 2]], exp)
def test_numeric_op_scalar(self): df = pd.DataFrame( {"A": [nan, nan, 0, 1], "B": [0, 1, 2, nan], "C": [1.0, 2.0, 3.0, 4.0], "D": [nan, nan, nan, nan]} ) sparse = df.to_sparse() tm.assert_sp_frame_equal(sparse + 1, (df + 1).to_sparse())
def test_reindex_fill_value(self): rng = bdate_range('20110110', periods=20) result = self.zframe.reindex(rng, fill_value=0) exp = self.zorig.reindex(rng, fill_value=0) exp = exp.to_sparse(self.zframe.default_fill_value) tm.assert_sp_frame_equal(result, exp)
def test_astype(self): sparse = pd.SparseDataFrame({'A': SparseArray([1, 2, 3, 4], dtype=np.int64), 'B': SparseArray([4, 5, 6, 7], dtype=np.int64)}) self.assertEqual(sparse['A'].dtype, np.int64) self.assertEqual(sparse['B'].dtype, np.int64) res = sparse.astype(np.float64) exp = pd.SparseDataFrame({'A': SparseArray([1., 2., 3., 4.], fill_value=0.), 'B': SparseArray([4., 5., 6., 7.], fill_value=0.)}, default_fill_value=np.nan) tm.assert_sp_frame_equal(res, exp) self.assertEqual(res['A'].dtype, np.float64) self.assertEqual(res['B'].dtype, np.float64) sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4], dtype=np.int64), 'B': SparseArray([0, 5, 0, 7], dtype=np.int64)}, default_fill_value=0) self.assertEqual(sparse['A'].dtype, np.int64) self.assertEqual(sparse['B'].dtype, np.int64) res = sparse.astype(np.float64) exp = pd.SparseDataFrame({'A': SparseArray([0., 2., 0., 4.], fill_value=0.), 'B': SparseArray([0., 5., 0., 7.], fill_value=0.)}, default_fill_value=0.) tm.assert_sp_frame_equal(res, exp) self.assertEqual(res['A'].dtype, np.float64) self.assertEqual(res['B'].dtype, np.float64)
def test_fillna(self): df = self.zframe.reindex(lrange(5)) dense = self.zorig.reindex(lrange(5)) result = df.fillna(0) expected = dense.fillna(0) tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0), exact_indices=False) tm.assert_frame_equal(result.to_dense(), expected) result = df.copy() result.fillna(0, inplace=True) expected = dense.fillna(0) tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0), exact_indices=False) tm.assert_frame_equal(result.to_dense(), expected) result = df.copy() result = df['A'] result.fillna(0, inplace=True) expected = dense['A'].fillna(0) # this changes internal SparseArray repr # tm.assert_sp_series_equal(result, expected.to_sparse(fill_value=0)) tm.assert_series_equal(result.to_dense(), expected)
def test_iloc_slice(self): orig = pd.DataFrame([[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]], columns=list('xyz')) sparse = orig.to_sparse() tm.assert_sp_frame_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse())
def test_numpy_transpose(self): sdf = SparseDataFrame([1, 2, 3], index=[1, 2, 3], columns=['a']) result = np.transpose(np.transpose(sdf)) tm.assert_sp_frame_equal(result, sdf) msg = "the 'axes' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.transpose, sdf, axes=1)
def test_subclass_sparse_transpose(self): ossdf = tm.SubclassedSparseDataFrame([[1, 2, 3], [4, 5, 6]]) essdf = tm.SubclassedSparseDataFrame([[1, 4], [2, 5], [3, 6]]) tm.assert_sp_frame_equal(ossdf.T, essdf)
def test_concat_different_columns_buggy(self): sparse = self.dense1.to_sparse(fill_value=0) sparse3 = self.dense3.to_sparse(fill_value=0) res = pd.concat([sparse, sparse3], sort=True) exp = (pd.concat([self.dense1, self.dense3], sort=True) .to_sparse(fill_value=0)) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(res, exp, check_kind=False, consolidate_block_indices=True) res = pd.concat([sparse3, sparse], sort=True) exp = (pd.concat([self.dense3, self.dense1], sort=True) .to_sparse(fill_value=0)) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(res, exp, check_kind=False, consolidate_block_indices=True) # different fill values sparse = self.dense1.to_sparse() sparse3 = self.dense3.to_sparse(fill_value=0) # each columns keeps its fill_value, thus compare in dense res = pd.concat([sparse, sparse3], sort=True) exp = pd.concat([self.dense1, self.dense3], sort=True) assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) res = pd.concat([sparse3, sparse], sort=True) exp = pd.concat([self.dense3, self.dense1], sort=True) assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp)
def test_basic(self, sparse, dtype): s_list = list('abc') s_series = Series(s_list) s_series_index = Series(s_list, list('ABC')) expected = DataFrame({'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype=self.effective_dtype(dtype)) result = get_dummies(s_list, sparse=sparse, dtype=dtype) if sparse: tm.assert_sp_frame_equal(result, expected.to_sparse(kind='integer', fill_value=0)) else: assert_frame_equal(result, expected) result = get_dummies(s_series, sparse=sparse, dtype=dtype) if sparse: expected = expected.to_sparse(kind='integer', fill_value=0) assert_frame_equal(result, expected) expected.index = list('ABC') result = get_dummies(s_series_index, sparse=sparse, dtype=dtype) if sparse: expected.to_sparse(kind='integer', fill_value=0) assert_frame_equal(result, expected)
def test_from_scipy_fillna(spmatrix): # GH 16112 arr = np.eye(3) arr[1:, 0] = np.nan try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = SparseDataFrame(spm).fillna(-1.0) # Returning frame should fill all nan values with -1.0 expected = SparseDataFrame({ 0: SparseSeries([1., -1, -1]), 1: SparseSeries([np.nan, 1, np.nan]), 2: SparseSeries([np.nan, np.nan, 1]), }, default_fill_value=-1) # fill_value is expected to be what .fillna() above was called with # We don't use -1 as initial fill_value in expected SparseSeries # construction because this way we obtain "compressed" SparseArrays, # avoiding having to construct them ourselves for col in expected: expected[col].fill_value = -1 tm.assert_sp_frame_equal(sdf, expected)
def test_reindex_fill_value(self, float_frame_fill0, float_frame_fill0_dense): rng = bdate_range('20110110', periods=20) result = float_frame_fill0.reindex(rng, fill_value=0) exp = float_frame_fill0_dense.reindex(rng, fill_value=0) exp = exp.to_sparse(float_frame_fill0.default_fill_value) tm.assert_sp_frame_equal(result, exp)
def test_copy(self): cp = self.frame.copy() tm.assertIsInstance(cp, SparseDataFrame) tm.assert_sp_frame_equal(cp, self.frame) # as of v0.15.0 # this is now identical (but not is_a ) self.assertTrue(cp.index.identical(self.frame.index))
def _check(frame, orig): transposed = frame.T untransposed = transposed.T tm.assert_sp_frame_equal(frame, untransposed) tm.assert_frame_equal(frame.T.to_dense(), orig.T) tm.assert_frame_equal(frame.T.T.to_dense(), orig.T.T) tm.assert_sp_frame_equal(frame, frame.T.T, exact_indices=False)
def test_numeric_op_scalar(self): df = pd.DataFrame({'A': [nan, nan, 0, 1, ], 'B': [0, 1, 2, nan], 'C': [1., 2., 3., 4.], 'D': [nan, nan, nan, nan]}) sparse = df.to_sparse() tm.assert_sp_frame_equal(sparse + 1, (df + 1).to_sparse())
def test_copy(self, float_frame): cp = float_frame.copy() assert isinstance(cp, SparseDataFrame) tm.assert_sp_frame_equal(cp, float_frame) # as of v0.15.0 # this is now identical (but not is_a ) assert cp.index.identical(float_frame.index)
def test_fill_value_when_combine_const(self): # GH12723 dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float') df = SparseDataFrame({'foo': dat}, index=range(6)) exp = df.fillna(0).add(2) res = df.add(2, fill_value=0) tm.assert_sp_frame_equal(res, exp)
def test_combine_add(self): df = self.frame.to_dense() df2 = df.copy() df2['C'][:3] = np.nan df['A'][:3] = 5.7 result = df.to_sparse().add(df2.to_sparse(), fill_value=0) expected = df.add(df2, fill_value=0).to_sparse() tm.assert_sp_frame_equal(result, expected)
def test_getitem(self): # 1585 select multiple columns sdf = SparseDataFrame(index=[0, 1, 2], columns=['a', 'b', 'c']) result = sdf[['a', 'b']] exp = sdf.reindex(columns=['a', 'b']) tm.assert_sp_frame_equal(result, exp) self.assertRaises(Exception, sdf.__getitem__, ['a', 'd'])
def test_fancy_index_misc(self, float_frame): # axis = 0 sliced = float_frame.iloc[-2:, :] expected = float_frame.reindex(index=float_frame.index[-2:]) tm.assert_sp_frame_equal(sliced, expected) # axis = 1 sliced = float_frame.iloc[:, -2:] expected = float_frame.reindex(columns=float_frame.columns[-2:]) tm.assert_sp_frame_equal(sliced, expected)
def test_numpy_cumsum(self): result = np.cumsum(self.frame) expected = SparseDataFrame(self.frame.to_dense().cumsum()) tm.assert_sp_frame_equal(result, expected) msg = "the 'dtype' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.cumsum, self.frame, dtype=np.int64) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.cumsum, self.frame, out=result)
def test_to_csv_sparse_dataframe(self, fill_value): # GH19384 sdf = SparseDataFrame({'a': type(self).fill_values}, default_fill_value=fill_value) with tm.ensure_clean('sparse_df.csv') as path: sdf.to_csv(path, index=False) df = read_csv(path, skip_blank_lines=False) tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf)
def test_fancy_index_misc(self): # axis = 0 sliced = self.frame.ix[-2:, :] expected = self.frame.reindex(index=self.frame.index[-2:]) tm.assert_sp_frame_equal(sliced, expected) # axis = 1 sliced = self.frame.ix[:, -2:] expected = self.frame.reindex(columns=self.frame.columns[-2:]) tm.assert_sp_frame_equal(sliced, expected)
def test_assign_with_sparse_frame(self): # GH 19163 df = pd.DataFrame({"a": [1, 2, 3]}) res = df.to_sparse(fill_value=False).assign(newcol=False) exp = df.assign(newcol=False).to_sparse(fill_value=False) tm.assert_sp_frame_equal(res, exp) for column in res.columns: assert type(res[column]) is SparseSeries
def test_type_coercion_at_construction(self): # GH 15682 result = pd.SparseDataFrame( {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8', default_fill_value=0) expected = pd.SparseDataFrame( {'a': pd.SparseSeries([1, 0, 0], dtype='uint8'), 'b': pd.SparseSeries([0, 1, 0], dtype='uint8'), 'c': pd.SparseSeries([0, 0, 1], dtype='uint8')}, default_fill_value=0) tm.assert_sp_frame_equal(result, expected)
def test_constructor_dict_order(self): # GH19018 # initialization ordering: by insertion order if python>= 3.6, else # order by value d = {'b': [2, 3], 'a': [0, 1]} frame = SparseDataFrame(data=d) if compat.PY36: expected = SparseDataFrame(data=d, columns=list('ba')) else: expected = SparseDataFrame(data=d, columns=list('ab')) tm.assert_sp_frame_equal(frame, expected)
def test_join(self): left = self.frame.ix[:, ["A", "B"]] right = self.frame.ix[:, ["C", "D"]] joined = left.join(right) tm.assert_sp_frame_equal(joined, self.frame, exact_indices=False) right = self.frame.ix[:, ["B", "D"]] self.assertRaises(Exception, left.join, right) with tm.assertRaisesRegexp(ValueError, "Other Series must have a name"): self.frame.join(Series(np.random.randn(len(self.frame)), index=self.frame.index))
def test_concat_different_columns_sort_warns(self): sparse = self.dense1.to_sparse() sparse3 = self.dense3.to_sparse() with tm.assert_produces_warning(FutureWarning): res = pd.concat([sparse, sparse3]) with tm.assert_produces_warning(FutureWarning): exp = pd.concat([self.dense1, self.dense3]) exp = exp.to_sparse() tm.assert_sp_frame_equal(res, exp)
def test_combine_first(self): df = self.frame result = df[::2].combine_first(df) result2 = df[::2].combine_first(df.to_dense()) expected = df[::2].to_dense().combine_first(df.to_dense()) expected = expected.to_sparse(fill_value=df.default_fill_value) tm.assert_sp_frame_equal(result, result2) tm.assert_sp_frame_equal(result, expected)
def test_sparse_repr_after_set(self): # GH 15488 sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]]) res = sdf.copy() # Ignore the warning with pd.option_context('mode.chained_assignment', None): sdf[0][1] = 2 # This line triggers the bug repr(sdf) tm.assert_sp_frame_equal(sdf, res)
def test_reindex_method(self): sparse = SparseDataFrame(data=[[11., 12., 14.], [21., 22., 24.], [41., 42., 44.]], index=[1, 2, 4], columns=[1, 2, 4], dtype=float) # Over indices # default method result = sparse.reindex(index=range(6)) expected = SparseDataFrame(data=[[nan, nan, nan], [11., 12., 14.], [21., 22., 24.], [nan, nan, nan], [41., 42., 44.], [nan, nan, nan]], index=range(6), columns=[1, 2, 4], dtype=float) tm.assert_sp_frame_equal(result, expected) # method='bfill' result = sparse.reindex(index=range(6), method='bfill') expected = SparseDataFrame(data=[[11., 12., 14.], [11., 12., 14.], [21., 22., 24.], [41., 42., 44.], [41., 42., 44.], [nan, nan, nan]], index=range(6), columns=[1, 2, 4], dtype=float) tm.assert_sp_frame_equal(result, expected) # method='ffill' result = sparse.reindex(index=range(6), method='ffill') expected = SparseDataFrame(data=[[nan, nan, nan], [11., 12., 14.], [21., 22., 24.], [21., 22., 24.], [41., 42., 44.], [41., 42., 44.]], index=range(6), columns=[1, 2, 4], dtype=float) tm.assert_sp_frame_equal(result, expected) # Over columns # default method result = sparse.reindex(columns=range(6)) expected = SparseDataFrame(data=[[nan, 11., 12., nan, 14., nan], [nan, 21., 22., nan, 24., nan], [nan, 41., 42., nan, 44., nan]], index=[1, 2, 4], columns=range(6), dtype=float) tm.assert_sp_frame_equal(result, expected) # method='bfill' with tm.assertRaises(NotImplementedError): sparse.reindex(columns=range(6), method='bfill') # method='ffill' with tm.assertRaises(NotImplementedError): sparse.reindex(columns=range(6), method='ffill')
def test_loc(self): orig = pd.DataFrame( [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]], columns=list('xyz')) sparse = orig.to_sparse() assert sparse.loc[0, 'x'] == 1 assert np.isnan(sparse.loc[1, 'z']) assert sparse.loc[2, 'z'] == 4 # have to specify `kind='integer'`, since we construct a # new SparseArray here, and the default sparse type is # integer there, but block in SparseSeries tm.assert_sp_series_equal(sparse.loc[0], orig.loc[0].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.loc[1], orig.loc[1].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.loc[2, :], orig.loc[2, :].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.loc[2, :], orig.loc[2, :].to_sparse(kind='integer')) tm.assert_sp_series_equal(sparse.loc[:, 'y'], orig.loc[:, 'y'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, 'y'], orig.loc[:, 'y'].to_sparse()) result = sparse.loc[[1, 2]] exp = orig.loc[[1, 2]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[[1, 2], :] exp = orig.loc[[1, 2], :].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[:, ['x', 'z']] exp = orig.loc[:, ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[[0, 2], ['x', 'z']] exp = orig.loc[[0, 2], ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) # exceeds the bounds result = sparse.reindex([1, 3, 4, 5]) exp = orig.reindex([1, 3, 4, 5]).to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array result = sparse.loc[orig.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)] tm.assert_sp_frame_equal(result, exp)
def test_cumsum(self): result = self.frame.cumsum() expected = SparseDataFrame(self.frame.to_dense().cumsum()) tm.assert_sp_frame_equal(result, expected)
def test_concat(self): # fill_value = np.nan sparse = self.dense1.to_sparse() sparse2 = self.dense2.to_sparse() res = pd.concat([sparse, sparse]) exp = pd.concat([self.dense1, self.dense1]).to_sparse() tm.assert_sp_frame_equal(res, exp) res = pd.concat([sparse2, sparse2]) exp = pd.concat([self.dense2, self.dense2]).to_sparse() tm.assert_sp_frame_equal(res, exp) res = pd.concat([sparse, sparse2]) exp = pd.concat([self.dense1, self.dense2]).to_sparse() tm.assert_sp_frame_equal(res, exp) res = pd.concat([sparse2, sparse]) exp = pd.concat([self.dense2, self.dense1]).to_sparse() tm.assert_sp_frame_equal(res, exp) # fill_value = 0 sparse = self.dense1.to_sparse(fill_value=0) sparse2 = self.dense2.to_sparse(fill_value=0) res = pd.concat([sparse, sparse]) exp = pd.concat([self.dense1, self.dense1]).to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(res, exp) res = pd.concat([sparse2, sparse2]) exp = pd.concat([self.dense2, self.dense2]).to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(res, exp) res = pd.concat([sparse, sparse2]) exp = pd.concat([self.dense1, self.dense2]).to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(res, exp) res = pd.concat([sparse2, sparse]) exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(res, exp)
def _check(frame, orig): transposed = frame.T untransposed = transposed.T tm.assert_sp_frame_equal(frame, untransposed)
def test_loc(self): orig = pd.DataFrame([[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]], columns=list('xyz')) sparse = orig.to_sparse() self.assertEqual(sparse.loc[0, 'x'], 1) self.assertTrue(np.isnan(sparse.loc[1, 'z'])) self.assertEqual(sparse.loc[2, 'z'], 4) tm.assert_sp_series_equal(sparse.loc[0], orig.loc[0].to_sparse()) tm.assert_sp_series_equal(sparse.loc[1], orig.loc[1].to_sparse()) tm.assert_sp_series_equal(sparse.loc[2, :], orig.loc[2, :].to_sparse()) tm.assert_sp_series_equal(sparse.loc[2, :], orig.loc[2, :].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, 'y'], orig.loc[:, 'y'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, 'y'], orig.loc[:, 'y'].to_sparse()) result = sparse.loc[[1, 2]] exp = orig.loc[[1, 2]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[[1, 2], :] exp = orig.loc[[1, 2], :].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[:, ['x', 'z']] exp = orig.loc[:, ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[[0, 2], ['x', 'z']] exp = orig.loc[[0, 2], ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) # exceeds the bounds result = sparse.loc[[1, 3, 4, 5]] exp = orig.loc[[1, 3, 4, 5]].to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array result = sparse.loc[orig.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)] tm.assert_sp_frame_equal(result, exp)
def test_iloc_slice(self): orig = pd.DataFrame( [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]], columns=list('xyz')) sparse = orig.to_sparse() tm.assert_sp_frame_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse())
def test_loc_index(self): orig = pd.DataFrame([[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]], index=list('abc'), columns=list('xyz')) sparse = orig.to_sparse() self.assertEqual(sparse.loc['a', 'x'], 1) self.assertTrue(np.isnan(sparse.loc['b', 'z'])) self.assertEqual(sparse.loc['c', 'z'], 4) tm.assert_sp_series_equal(sparse.loc['a'], orig.loc['a'].to_sparse()) tm.assert_sp_series_equal(sparse.loc['b'], orig.loc['b'].to_sparse()) tm.assert_sp_series_equal(sparse.loc['b', :], orig.loc['b', :].to_sparse()) tm.assert_sp_series_equal(sparse.loc['b', :], orig.loc['b', :].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, 'z'], orig.loc[:, 'z'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, 'z'], orig.loc[:, 'z'].to_sparse()) result = sparse.loc[['a', 'b']] exp = orig.loc[['a', 'b']].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[['a', 'b'], :] exp = orig.loc[['a', 'b'], :].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[:, ['x', 'z']] exp = orig.loc[:, ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[['c', 'a'], ['x', 'z']] exp = orig.loc[['c', 'a'], ['x', 'z']].to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array result = sparse.loc[orig.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)] tm.assert_sp_frame_equal(result, exp)
def test_constructor_dataframe(self): dense = self.frame.to_dense() sp = SparseDataFrame(dense) tm.assert_sp_frame_equal(sp, self.frame)
def _test_roundtrip(frame, orig): result = tm.round_trip_pickle(frame) tm.assert_sp_frame_equal(frame, result) tm.assert_frame_equal(result.to_dense(), orig, check_dtype=False)
def test_take(self): result = self.frame.take([1, 0, 2], axis=1) expected = self.frame.reindex(columns=['B', 'A', 'C']) tm.assert_sp_frame_equal(result, expected)
def _test_op(panel, op): # arithmetic tests with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = op(panel, 1) tm.assert_sp_frame_equal(result['ItemA'], op(panel['ItemA'], 1))
def compare_sp_frame_float(result, expected, typ, version): if LooseVersion(version) <= '0.18.1': tm.assert_sp_frame_equal(result, expected, exact_indices=False, check_dtype=False) else: tm.assert_sp_frame_equal(result, expected)
def test_loc_index(self): orig = pd.DataFrame( [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]], index=list("abc"), columns=list("xyz"), ) sparse = orig.to_sparse() assert sparse.loc["a", "x"] == 1 assert np.isnan(sparse.loc["b", "z"]) assert sparse.loc["c", "z"] == 4 tm.assert_sp_series_equal(sparse.loc["a"], orig.loc["a"].to_sparse(kind="integer")) tm.assert_sp_series_equal(sparse.loc["b"], orig.loc["b"].to_sparse(kind="integer")) tm.assert_sp_series_equal(sparse.loc["b", :], orig.loc["b", :].to_sparse(kind="integer")) tm.assert_sp_series_equal(sparse.loc["b", :], orig.loc["b", :].to_sparse(kind="integer")) tm.assert_sp_series_equal(sparse.loc[:, "z"], orig.loc[:, "z"].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:, "z"], orig.loc[:, "z"].to_sparse()) result = sparse.loc[["a", "b"]] exp = orig.loc[["a", "b"]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[["a", "b"], :] exp = orig.loc[["a", "b"], :].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[:, ["x", "z"]] exp = orig.loc[:, ["x", "z"]].to_sparse() tm.assert_sp_frame_equal(result, exp) result = sparse.loc[["c", "a"], ["x", "z"]] exp = orig.loc[["c", "a"], ["x", "z"]].to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array result = sparse.loc[orig.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse.x % 2 == 1] exp = orig.loc[orig.x % 2 == 1].to_sparse() tm.assert_sp_frame_equal(result, exp) # sparse array result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)] tm.assert_sp_frame_equal(result, exp)
def compare_sp_frame_float(result, expected, typ, version): tm.assert_sp_frame_equal(result, expected)