def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) xdindex = xindex.to_int_index() ydindex = yindex.to_int_index() x = np.arange(xindex.npoints) * 10. + 1 y = np.arange(yindex.npoints) * 100. + 1 xfill = 0 yfill = 2 result_block_vals, rb_index = sparse_op( x, xindex, xfill, y, yindex, yfill) result_int_vals, ri_index = sparse_op(x, xdindex, xfill, y, ydindex, yfill) self.assert_(rb_index.to_int_index().equals(ri_index)) assert_equal(result_block_vals, result_int_vals) # check versus Series... xseries = Series(x, xdindex.indices) xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill) yseries = Series(y, ydindex.indices) yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill) series_result = python_op(xseries, yseries) series_result = series_result.reindex(ri_index.indices) assert_equal(result_block_vals, series_result.values) assert_equal(result_int_vals, series_result.values)
def test_to_int_index(self): locs = [0, 10] lengths = [4, 6] exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15] block = BlockIndex(20, locs, lengths) dense = block.to_int_index() assert_equal(dense.indices, exp_inds)
def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) # see if survive the round trip xbindex = xindex.to_int_index().to_block_index() ybindex = yindex.to_int_index().to_block_index() tm.assert_isinstance(xbindex, BlockIndex) self.assert_(xbindex.equals(xindex)) self.assert_(ybindex.equals(yindex))
def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) bresult = xindex.make_union(yindex) assert(isinstance(bresult, BlockIndex)) assert_equal(bresult.blocs, eloc) assert_equal(bresult.blengths, elen) ixindex = xindex.to_int_index() iyindex = yindex.to_int_index() iresult = ixindex.make_union(iyindex) assert(isinstance(iresult, IntIndex)) assert_equal(iresult.indices, bresult.to_int_index().indices)
def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) expected = BlockIndex(TEST_LENGTH, eloc, elen) longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) _check_correct(xindex, yindex, expected) _check_correct(xindex.to_int_index(), yindex.to_int_index(), expected.to_int_index()) _check_length_exc(xindex, longer_index) _check_length_exc(xindex.to_int_index(), longer_index.to_int_index())
def test_check_integrity(self): locs = [] lengths = [] # 0-length OK index = BlockIndex(0, locs, lengths) # also OK even though empty index = BlockIndex(1, locs, lengths) # block extend beyond end self.assertRaises(Exception, BlockIndex, 10, [5], [10]) # block overlap self.assertRaises(Exception, BlockIndex, 10, [2, 5], [5, 3])
def test_check_integrity(self): locs = [] lengths = [] # 0-length OK # TODO: index variables are not used...is that right? index = BlockIndex(0, locs, lengths) # noqa # also OK even though empty index = BlockIndex(1, locs, lengths) # noqa # block extend beyond end self.assertRaises(Exception, BlockIndex, 10, [5], [10]) # block overlap self.assertRaises(Exception, BlockIndex, 10, [2, 5], [5, 3])
def make_sparse(arr, kind='block', fill_value=nan): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value Returns ------- (sparse_values, index) : (ndarray, SparseIndex) """ arr = np.asarray(arr) length = len(arr) if np.isnan(fill_value): mask = -np.isnan(arr) else: mask = arr != fill_value indices = np.arange(length, dtype=np.int32)[mask] if kind == 'block': locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens) elif kind == 'integer': index = IntIndex(length, indices) else: # pragma: no cover raise ValueError('must be block or integer type') sparsified_values = arr[mask] return sparsified_values, index
def test_lookup(): def _check(index): assert index.lookup(0) == -1 assert index.lookup(5) == 0 assert index.lookup(7) == 2 assert index.lookup(8) == -1 assert index.lookup(9) == -1 assert index.lookup(10) == -1 assert index.lookup(11) == -1 assert index.lookup(12) == 3 assert index.lookup(17) == 8 assert index.lookup(18) == -1 bindex = BlockIndex(20, [5, 12], [3, 6]) iindex = bindex.to_int_index() _check(bindex) _check(iindex)
def test_lookup(): def _check(index): assert (index.lookup(0) == -1) assert (index.lookup(5) == 0) assert (index.lookup(7) == 2) assert (index.lookup(8) == -1) assert (index.lookup(9) == -1) assert (index.lookup(10) == -1) assert (index.lookup(11) == -1) assert (index.lookup(12) == 3) assert (index.lookup(17) == 8) assert (index.lookup(18) == -1) bindex = BlockIndex(20, [5, 12], [3, 6]) iindex = bindex.to_int_index() _check(bindex) _check(iindex)
def _make_index(length, indices, kind): if kind == 'block' or isinstance(kind, BlockIndex): locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens) elif kind == 'integer' or isinstance(kind, IntIndex): index = IntIndex(length, indices) else: # pragma: no cover raise ValueError('must be block or integer type') return index
def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) xdindex = xindex.to_int_index() ydindex = yindex.to_int_index() x = np.arange(xindex.npoints) * 10. + 1 y = np.arange(yindex.npoints) * 100. + 1 xfill = 0 yfill = 2 result_block_vals, rb_index = sparse_op(x, xindex, xfill, y, yindex, yfill) result_int_vals, ri_index = sparse_op(x, xdindex, xfill, y, ydindex, yfill) self.assert_(rb_index.to_int_index().equals(ri_index)) assert_equal(result_block_vals, result_int_vals) # check versus Series... xseries = Series(x, xdindex.indices) xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill) yseries = Series(y, ydindex.indices) yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill) series_result = python_op(xseries, yseries) series_result = series_result.reindex(ri_index.indices) assert_equal(result_block_vals, series_result.values) assert_equal(result_int_vals, series_result.values)
def test_homogenize(self): def _check_matches(indices, expected): data = {} for i, idx in enumerate(indices): data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx) homogenized = spf.homogenize(data) for k, v in compat.iteritems(homogenized): assert (v.sp_index.equals(expected)) indices1 = [ BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10]) ] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) # must have NaN fill value data = { 'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0) } with tm.assertRaisesRegexp(TypeError, "NaN fill value"): spf.homogenize(data)
def test_homogenize(self): def _check_matches(indices, expected): data = {} for i, idx in enumerate(indices): data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx) homogenized = spf.homogenize(data) for k, v in homogenized.iteritems(): assert (v.sp_index.equals(expected)) indices1 = [ BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10]) ] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) # must have NaN fill value data = { 'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0) } nose.tools.assert_raises(Exception, spf.homogenize, data)
def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) xdindex = xindex.to_int_index() ydindex = yindex.to_int_index() x = np.arange(xindex.npoints) * 10. + 1 y = np.arange(yindex.npoints) * 100. + 1 result_block_vals, rb_index = sparse_op(x, xindex, y, yindex) result_int_vals, ri_index = sparse_op(x, xdindex, y, ydindex) self.assertTrue(rb_index.to_int_index().equals(ri_index)) assert_equal(result_block_vals, result_int_vals) # check versus Series... xseries = Series(x, xdindex.indices) yseries = Series(y, ydindex.indices) series_result = python_op(xseries, yseries).valid() assert_equal(result_block_vals, series_result.values) assert_equal(result_int_vals, series_result.values)
def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) bresult = xindex.make_union(yindex) assert (isinstance(bresult, BlockIndex)) assert_equal(bresult.blocs, eloc) assert_equal(bresult.blengths, elen) ixindex = xindex.to_int_index() iyindex = yindex.to_int_index() iresult = ixindex.make_union(iyindex) assert (isinstance(iresult, IntIndex)) assert_equal(iresult.indices, bresult.to_int_index().indices)
def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) # see if survive the round trip xbindex = xindex.to_int_index().to_block_index() ybindex = yindex.to_int_index().to_block_index() self.assert_(isinstance(xbindex, BlockIndex)) self.assert_(xbindex.equals(xindex)) self.assert_(ybindex.equals(yindex))
def test_to_block_index(self): index = BlockIndex(10, [0, 5], [4, 5]) self.assert_(index.to_block_index() is index)
def test_equals(self): index = BlockIndex(10, [0, 4], [2, 5]) self.assertTrue(index.equals(index)) self.assertFalse(index.equals(BlockIndex(10, [0, 4], [2, 6])))
def test_equals(self): index = BlockIndex(10, [0, 4], [2, 5]) self.assert_(index.equals(index)) self.assert_(not index.equals(BlockIndex(10, [0, 4], [2, 6])))
def __new__(cls, data, index=None, sparse_index=None, kind='block', fill_value=None, name=None, copy=False): is_sparse_array = isinstance(data, SparseArray) if fill_value is None: if is_sparse_array: fill_value = data.fill_value else: fill_value = nan if is_sparse_array: if isinstance(data, SparseSeries) and index is None: index = data.index elif index is not None: assert(len(index) == len(data)) sparse_index = data.sp_index values = np.asarray(data) elif isinstance(data, (Series, dict)): if index is None: index = data.index data = Series(data) values, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: values, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) else: values = data assert(len(values) == sparse_index.npoints) else: if index is None: raise Exception('must pass index!') length = len(index) if data == fill_value or (isnull(data) and isnull(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) values = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) values = np.empty(length) values.fill(data) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # Create array, do *not* copy data by default if copy: subarr = np.array(values, dtype=np.float64, copy=True) else: subarr = np.asarray(values, dtype=np.float64) if index.is_all_dates: cls = SparseTimeSeries # Change the class of the array to be the subclass type. output = subarr.view(cls) output.sp_index = sparse_index output.fill_value = np.float64(fill_value) output.index = index output.name = name return output
def test_to_block_index(self): index = BlockIndex(10, [0, 5], [4, 5]) self.assertIs(index.to_block_index(), index)
def __init__(self, data=None, index=None, sparse_index=None, kind='block', fill_value=None, name=None, dtype=None, copy=False, fastpath=False): # we are called internally, so short-circuit if fastpath: # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): data = SingleBlockManager(data, index, fastpath=True) if copy: data = data.copy() else: if data is None: data = [] if isinstance(data, Series) and name is None: name = data.name is_sparse_array = isinstance(data, SparseArray) if fill_value is None: if is_sparse_array: fill_value = data.fill_value else: fill_value = np.nan if is_sparse_array: if isinstance(data, SparseSeries) and index is None: index = data.index.view() elif index is not None: assert (len(index) == len(data)) sparse_index = data.sp_index data = np.asarray(data) elif isinstance(data, SparseSeries): if index is None: index = data.index.view() # extract the SingleBlockManager data = data._data elif isinstance(data, (Series, dict)): if index is None: index = data.index.view() data = Series(data) data, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: data, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) else: assert (len(data) == sparse_index.npoints) elif isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype) if index is None: index = data.index.view() else: data = data.reindex(index, copy=False) else: length = len(index) if data == fill_value or (isnull(data) and isnull(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) data = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) v = data data = np.empty(length) data.fill(v) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # create/copy the manager if isinstance(data, SingleBlockManager): if copy: data = data.copy() else: # create a sparse array if not isinstance(data, SparseArray): data = SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy) data = SingleBlockManager(data, index) generic.NDFrame.__init__(self, data) self.index = index self.name = name