def test_lookup(self): for kind in ['integer', 'block']: idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) self.assertEqual(idx.lookup(-1), -1) self.assertEqual(idx.lookup(0), -1) self.assertEqual(idx.lookup(1), -1) self.assertEqual(idx.lookup(2), 0) self.assertEqual(idx.lookup(3), 1) self.assertEqual(idx.lookup(4), -1) idx = sparray._make_index(4, np.array([], dtype=np.int32), kind=kind) for i in range(-1, 5): self.assertEqual(idx.lookup(i), -1) idx = sparray._make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) self.assertEqual(idx.lookup(-1), -1) self.assertEqual(idx.lookup(0), 0) self.assertEqual(idx.lookup(1), 1) self.assertEqual(idx.lookup(2), 2) self.assertEqual(idx.lookup(3), 3) self.assertEqual(idx.lookup(4), -1) idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) self.assertEqual(idx.lookup(-1), -1) self.assertEqual(idx.lookup(0), 0) self.assertEqual(idx.lookup(1), -1) self.assertEqual(idx.lookup(2), 1) self.assertEqual(idx.lookup(3), 2) self.assertEqual(idx.lookup(4), -1)
def test_lookup_array(self): for kind in ['integer', 'block']: idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) exp = np.array([-1, -1, 0], dtype=np.int32) self.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) exp = np.array([-1, 0, -1, 1], dtype=np.int32) self.assert_numpy_array_equal(res, exp) idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) exp = np.array([-1, -1, -1, -1], dtype=np.int32) idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) exp = np.array([-1, 0, 2], dtype=np.int32) self.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) exp = np.array([-1, 2, 1, 3], dtype=np.int32) self.assert_numpy_array_equal(res, exp) idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) exp = np.array([1, -1, 2, 0], dtype=np.int32) self.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) exp = np.array([-1, -1, 1, -1], dtype=np.int32) self.assert_numpy_array_equal(res, exp)
def test_block_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') self.assertIsInstance(idx, BlockIndex) self.assertEqual(idx.npoints, 2) tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) idx = _make_index(4, np.array([], dtype=np.int32), kind='block') self.assertIsInstance(idx, BlockIndex) self.assertEqual(idx.npoints, 0) tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind='block') self.assertIsInstance(idx, BlockIndex) self.assertEqual(idx.npoints, 4) tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block') self.assertIsInstance(idx, BlockIndex) self.assertEqual(idx.npoints, 3) tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
def test_lookup(self): for kind in ['integer', 'block']: idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) self.assertEqual(idx.lookup(-1), -1) self.assertEqual(idx.lookup(0), -1) self.assertEqual(idx.lookup(1), -1) self.assertEqual(idx.lookup(2), 0) self.assertEqual(idx.lookup(3), 1) self.assertEqual(idx.lookup(4), -1) idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) for i in range(-1, 5): self.assertEqual(idx.lookup(i), -1) idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) self.assertEqual(idx.lookup(-1), -1) self.assertEqual(idx.lookup(0), 0) self.assertEqual(idx.lookup(1), 1) self.assertEqual(idx.lookup(2), 2) self.assertEqual(idx.lookup(3), 3) self.assertEqual(idx.lookup(4), -1) idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) self.assertEqual(idx.lookup(-1), -1) self.assertEqual(idx.lookup(0), 0) self.assertEqual(idx.lookup(1), -1) self.assertEqual(idx.lookup(2), 1) self.assertEqual(idx.lookup(3), 2) self.assertEqual(idx.lookup(4), -1)
def shift(self, periods, freq=None, axis=0): if periods == 0: return self.copy() # no special handling of fill values yet if not isnull(self.fill_value): shifted = self.to_dense().shift(periods, freq=freq, axis=axis) return shifted.to_sparse(fill_value=self.fill_value, kind=self.kind) if freq is not None: return self._constructor( self.sp_values, sparse_index=self.sp_index, index=self.index.shift(periods, freq), fill_value=self.fill_value).__finalize__(self) int_index = self.sp_index.to_int_index() new_indices = int_index.indices + periods start, end = new_indices.searchsorted([0, int_index.length]) new_indices = new_indices[start:end] new_sp_index = _make_index(len(self), new_indices, self.sp_index) arr = self.values._simple_new(self.sp_values[start:end].copy(), new_sp_index, fill_value=np.nan) return self._constructor(arr, index=self.index).__finalize__(self)
def test_make_block_boundary(self): for i in [5, 10, 100, 101]: idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32), kind='block') exp = np.arange(0, i, 2, dtype=np.int32) tm.assert_numpy_array_equal(idx.blocs, exp) tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32))
def test_int_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') self.assertIsInstance(idx, IntIndex) self.assertEqual(idx.npoints, 2) tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) idx = _make_index(4, np.array([], dtype=np.int32), kind='integer') self.assertIsInstance(idx, IntIndex) self.assertEqual(idx.npoints, 0) tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind='integer') self.assertIsInstance(idx, IntIndex) self.assertEqual(idx.npoints, 4) tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
def test_lookup_array(self): for kind in ['integer', 'block']: idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) exp = np.array([-1, -1, 0], dtype=np.int32) self.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) exp = np.array([-1, 0, -1, 1], dtype=np.int32) self.assert_numpy_array_equal(res, exp) idx = sparray._make_index(4, np.array([], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) exp = np.array([-1, -1, -1, -1], dtype=np.int32) idx = sparray._make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) exp = np.array([-1, 0, 2], dtype=np.int32) self.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) exp = np.array([-1, 2, 1, 3], dtype=np.int32) self.assert_numpy_array_equal(res, exp) idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) exp = np.array([1, -1, 2, 0], dtype=np.int32) self.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) exp = np.array([-1, -1, 1, -1], dtype=np.int32) self.assert_numpy_array_equal(res, exp)
def _concat_sparse(to_concat, axis=0, typs=None): """ provide concatenation of an sparse/dense array of arrays each of which is a single dtype Parameters ---------- to_concat : array of arrays axis : axis to provide concatenation typs : set of to_concat dtypes Returns ------- a single array, preserving the combined dtypes """ from pandas.sparse.array import SparseArray, _make_index def convert_sparse(x, axis): # coerce to native type if isinstance(x, SparseArray): x = x.get_values() x = x.ravel() if axis > 0: x = np.atleast_2d(x) return x if typs is None: typs = com.get_dtype_kinds(to_concat) if len(typs) == 1: # concat input as it is if all inputs are sparse # and have the same fill_value fill_values = set(c.fill_value for c in to_concat) if len(fill_values) == 1: sp_values = [c.sp_values for c in to_concat] indexes = [c.sp_index.to_int_index() for c in to_concat] indices = [] loc = 0 for idx in indexes: indices.append(idx.indices + loc) loc += idx.length sp_values = np.concatenate(sp_values) indices = np.concatenate(indices) sp_index = _make_index(loc, indices, kind=to_concat[0].sp_index) return SparseArray(sp_values, sparse_index=sp_index, fill_value=to_concat[0].fill_value) # input may be sparse / dense mixed and may have different fill_value # input must contain sparse at least 1 sparses = [c for c in to_concat if com.is_sparse(c)] fill_values = [c.fill_value for c in sparses] sp_indexes = [c.sp_index for c in sparses] # densify and regular concat to_concat = [convert_sparse(x, axis) for x in to_concat] result = np.concatenate(to_concat, axis=axis) if not len(typs - set(['sparse', 'f', 'i'])): # sparsify if inputs are sparse and dense numerics # first sparse input's fill_value and SparseIndex is used result = SparseArray(result.ravel(), fill_value=fill_values[0], kind=sp_indexes[0]) else: # coerce to object if needed result = result.astype('object') return result