Example #1
0
        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)

            xdindex = xindex.to_int_index()
            ydindex = yindex.to_int_index()

            x = np.arange(xindex.npoints) * 10. + 1
            y = np.arange(yindex.npoints) * 100. + 1

            xfill = 0
            yfill = 2

            result_block_vals, rb_index = sparse_op(
                x, xindex, xfill, y, yindex, yfill)
            result_int_vals, ri_index = sparse_op(x, xdindex, xfill,
                                                  y, ydindex, yfill)

            self.assert_(rb_index.to_int_index().equals(ri_index))
            assert_equal(result_block_vals, result_int_vals)

            # check versus Series...
            xseries = Series(x, xdindex.indices)
            xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill)

            yseries = Series(y, ydindex.indices)
            yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill)

            series_result = python_op(xseries, yseries)
            series_result = series_result.reindex(ri_index.indices)

            assert_equal(result_block_vals, series_result.values)
            assert_equal(result_int_vals, series_result.values)
Example #2
0
    def test_to_int_index(self):
        locs = [0, 10]
        lengths = [4, 6]
        exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15]

        block = BlockIndex(20, locs, lengths)
        dense = block.to_int_index()

        assert_equal(dense.indices, exp_inds)
Example #3
0
    def test_to_int_index(self):
        locs = [0, 10]
        lengths = [4, 6]
        exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15]

        block = BlockIndex(20, locs, lengths)
        dense = block.to_int_index()

        assert_equal(dense.indices, exp_inds)
Example #4
0
        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)

            # see if survive the round trip
            xbindex = xindex.to_int_index().to_block_index()
            ybindex = yindex.to_int_index().to_block_index()
            tm.assert_isinstance(xbindex, BlockIndex)
            self.assert_(xbindex.equals(xindex))
            self.assert_(ybindex.equals(yindex))
Example #5
0
    def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
        xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
        yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
        bresult = xindex.make_union(yindex)
        assert(isinstance(bresult, BlockIndex))
        assert_equal(bresult.blocs, eloc)
        assert_equal(bresult.blengths, elen)

        ixindex = xindex.to_int_index()
        iyindex = yindex.to_int_index()
        iresult = ixindex.make_union(iyindex)
        assert(isinstance(iresult, IntIndex))
        assert_equal(iresult.indices, bresult.to_int_index().indices)
Example #6
0
    def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
        xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
        yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
        expected = BlockIndex(TEST_LENGTH, eloc, elen)
        longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)

        _check_correct(xindex, yindex, expected)
        _check_correct(xindex.to_int_index(), yindex.to_int_index(), expected.to_int_index())

        _check_length_exc(xindex, longer_index)
        _check_length_exc(xindex.to_int_index(), longer_index.to_int_index())
Example #7
0
    def test_check_integrity(self):
        locs = []
        lengths = []

        # 0-length OK
        index = BlockIndex(0, locs, lengths)

        # also OK even though empty
        index = BlockIndex(1, locs, lengths)

        # block extend beyond end
        self.assertRaises(Exception, BlockIndex, 10, [5], [10])

        # block overlap
        self.assertRaises(Exception, BlockIndex, 10, [2, 5], [5, 3])
    def test_check_integrity(self):
        locs = []
        lengths = []

        # 0-length OK
        # TODO: index variables are not used...is that right?
        index = BlockIndex(0, locs, lengths)  # noqa

        # also OK even though empty
        index = BlockIndex(1, locs, lengths)  # noqa

        # block extend beyond end
        self.assertRaises(Exception, BlockIndex, 10, [5], [10])

        # block overlap
        self.assertRaises(Exception, BlockIndex, 10, [2, 5], [5, 3])
Example #9
0
def make_sparse(arr, kind='block', fill_value=nan):
    """
    Convert ndarray to sparse format

    Parameters
    ----------
    arr : ndarray
    kind : {'block', 'integer'}
    fill_value : NaN or another value

    Returns
    -------
    (sparse_values, index) : (ndarray, SparseIndex)
    """
    arr = np.asarray(arr)
    length = len(arr)

    if np.isnan(fill_value):
        mask = -np.isnan(arr)
    else:
        mask = arr != fill_value

    indices = np.arange(length, dtype=np.int32)[mask]

    if kind == 'block':
        locs, lens = splib.get_blocks(indices)
        index = BlockIndex(length, locs, lens)
    elif kind == 'integer':
        index = IntIndex(length, indices)
    else:  # pragma: no cover
        raise ValueError('must be block or integer type')

    sparsified_values = arr[mask]
    return sparsified_values, index
Example #10
0
def test_lookup():
    def _check(index):
        assert index.lookup(0) == -1
        assert index.lookup(5) == 0
        assert index.lookup(7) == 2
        assert index.lookup(8) == -1
        assert index.lookup(9) == -1
        assert index.lookup(10) == -1
        assert index.lookup(11) == -1
        assert index.lookup(12) == 3
        assert index.lookup(17) == 8
        assert index.lookup(18) == -1

    bindex = BlockIndex(20, [5, 12], [3, 6])
    iindex = bindex.to_int_index()

    _check(bindex)
    _check(iindex)
Example #11
0
def test_lookup():
    def _check(index):
        assert (index.lookup(0) == -1)
        assert (index.lookup(5) == 0)
        assert (index.lookup(7) == 2)
        assert (index.lookup(8) == -1)
        assert (index.lookup(9) == -1)
        assert (index.lookup(10) == -1)
        assert (index.lookup(11) == -1)
        assert (index.lookup(12) == 3)
        assert (index.lookup(17) == 8)
        assert (index.lookup(18) == -1)

    bindex = BlockIndex(20, [5, 12], [3, 6])
    iindex = bindex.to_int_index()

    _check(bindex)
    _check(iindex)
Example #12
0
def _make_index(length, indices, kind):

    if kind == 'block' or isinstance(kind, BlockIndex):
        locs, lens = splib.get_blocks(indices)
        index = BlockIndex(length, locs, lens)
    elif kind == 'integer' or isinstance(kind, IntIndex):
        index = IntIndex(length, indices)
    else:  # pragma: no cover
        raise ValueError('must be block or integer type')
    return index
Example #13
0
        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)

            xdindex = xindex.to_int_index()
            ydindex = yindex.to_int_index()

            x = np.arange(xindex.npoints) * 10. + 1
            y = np.arange(yindex.npoints) * 100. + 1

            xfill = 0
            yfill = 2

            result_block_vals, rb_index = sparse_op(x, xindex, xfill, y,
                                                    yindex, yfill)
            result_int_vals, ri_index = sparse_op(x, xdindex, xfill, y,
                                                  ydindex, yfill)

            self.assert_(rb_index.to_int_index().equals(ri_index))
            assert_equal(result_block_vals, result_int_vals)

            # check versus Series...
            xseries = Series(x, xdindex.indices)
            xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill)

            yseries = Series(y, ydindex.indices)
            yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill)

            series_result = python_op(xseries, yseries)
            series_result = series_result.reindex(ri_index.indices)

            assert_equal(result_block_vals, series_result.values)
            assert_equal(result_int_vals, series_result.values)
Example #14
0
    def test_homogenize(self):
        def _check_matches(indices, expected):
            data = {}
            for i, idx in enumerate(indices):
                data[i] = SparseSeries(idx.to_int_index().indices,
                                       sparse_index=idx)
            homogenized = spf.homogenize(data)

            for k, v in compat.iteritems(homogenized):
                assert (v.sp_index.equals(expected))

        indices1 = [
            BlockIndex(10, [2], [7]),
            BlockIndex(10, [1, 6], [3, 4]),
            BlockIndex(10, [0], [10])
        ]
        expected1 = BlockIndex(10, [2, 6], [2, 3])
        _check_matches(indices1, expected1)

        indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])]
        expected2 = indices2[0]
        _check_matches(indices2, expected2)

        # must have NaN fill value
        data = {
            'a': SparseSeries(np.arange(7),
                              sparse_index=expected2,
                              fill_value=0)
        }
        with tm.assertRaisesRegexp(TypeError, "NaN fill value"):
            spf.homogenize(data)
Example #15
0
    def test_homogenize(self):
        def _check_matches(indices, expected):
            data = {}
            for i, idx in enumerate(indices):
                data[i] = SparseSeries(idx.to_int_index().indices,
                                       sparse_index=idx)
            homogenized = spf.homogenize(data)

            for k, v in homogenized.iteritems():
                assert (v.sp_index.equals(expected))

        indices1 = [
            BlockIndex(10, [2], [7]),
            BlockIndex(10, [1, 6], [3, 4]),
            BlockIndex(10, [0], [10])
        ]
        expected1 = BlockIndex(10, [2, 6], [2, 3])
        _check_matches(indices1, expected1)

        indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])]
        expected2 = indices2[0]
        _check_matches(indices2, expected2)

        # must have NaN fill value
        data = {
            'a': SparseSeries(np.arange(7),
                              sparse_index=expected2,
                              fill_value=0)
        }
        nose.tools.assert_raises(Exception, spf.homogenize, data)
Example #16
0
        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)

            xdindex = xindex.to_int_index()
            ydindex = yindex.to_int_index()

            x = np.arange(xindex.npoints) * 10. + 1
            y = np.arange(yindex.npoints) * 100. + 1

            result_block_vals, rb_index = sparse_op(x, xindex, y, yindex)
            result_int_vals, ri_index = sparse_op(x, xdindex, y, ydindex)

            self.assertTrue(rb_index.to_int_index().equals(ri_index))
            assert_equal(result_block_vals, result_int_vals)

            # check versus Series...
            xseries = Series(x, xdindex.indices)
            yseries = Series(y, ydindex.indices)
            series_result = python_op(xseries, yseries).valid()
            assert_equal(result_block_vals, series_result.values)
            assert_equal(result_int_vals, series_result.values)
Example #17
0
    def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
        xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
        yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
        bresult = xindex.make_union(yindex)
        assert (isinstance(bresult, BlockIndex))
        assert_equal(bresult.blocs, eloc)
        assert_equal(bresult.blengths, elen)

        ixindex = xindex.to_int_index()
        iyindex = yindex.to_int_index()
        iresult = ixindex.make_union(iyindex)
        assert (isinstance(iresult, IntIndex))
        assert_equal(iresult.indices, bresult.to_int_index().indices)
Example #18
0
        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)

            # see if survive the round trip
            xbindex = xindex.to_int_index().to_block_index()
            ybindex = yindex.to_int_index().to_block_index()
            self.assert_(isinstance(xbindex, BlockIndex))
            self.assert_(xbindex.equals(xindex))
            self.assert_(ybindex.equals(yindex))
        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)

            xdindex = xindex.to_int_index()
            ydindex = yindex.to_int_index()

            x = np.arange(xindex.npoints) * 10. + 1
            y = np.arange(yindex.npoints) * 100. + 1

            result_block_vals, rb_index = sparse_op(x, xindex, y, yindex)
            result_int_vals, ri_index = sparse_op(x, xdindex, y, ydindex)

            self.assertTrue(rb_index.to_int_index().equals(ri_index))
            assert_equal(result_block_vals, result_int_vals)

            # check versus Series...
            xseries = Series(x, xdindex.indices)
            yseries = Series(y, ydindex.indices)
            series_result = python_op(xseries, yseries).valid()
            assert_equal(result_block_vals, series_result.values)
            assert_equal(result_int_vals, series_result.values)
Example #20
0
 def test_to_block_index(self):
     index = BlockIndex(10, [0, 5], [4, 5])
     self.assert_(index.to_block_index() is index)
Example #21
0
    def test_equals(self):
        index = BlockIndex(10, [0, 4], [2, 5])

        self.assertTrue(index.equals(index))
        self.assertFalse(index.equals(BlockIndex(10, [0, 4], [2, 6])))
Example #22
0
    def test_equals(self):
        index = BlockIndex(10, [0, 4], [2, 5])

        self.assert_(index.equals(index))
        self.assert_(not index.equals(BlockIndex(10, [0, 4], [2, 6])))
Example #23
0
    def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
        xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
        yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
        expected = BlockIndex(TEST_LENGTH, eloc, elen)
        longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)

        _check_correct(xindex, yindex, expected)
        _check_correct(xindex.to_int_index(), yindex.to_int_index(),
                       expected.to_int_index())

        _check_length_exc(xindex, longer_index)
        _check_length_exc(xindex.to_int_index(), longer_index.to_int_index())
    def __new__(cls, data, index=None, sparse_index=None, kind='block',
                fill_value=None, name=None, copy=False):

        is_sparse_array = isinstance(data, SparseArray)
        if fill_value is None:
            if is_sparse_array:
                fill_value = data.fill_value
            else:
                fill_value = nan

        if is_sparse_array:
            if isinstance(data, SparseSeries) and index is None:
                index = data.index
            elif index is not None:
                assert(len(index) == len(data))

            sparse_index = data.sp_index
            values = np.asarray(data)
        elif isinstance(data, (Series, dict)):
            if index is None:
                index = data.index

            data = Series(data)
            values, sparse_index = make_sparse(data, kind=kind,
                                               fill_value=fill_value)
        elif isinstance(data, (tuple, list, np.ndarray)):
            # array-like
            if sparse_index is None:
                values, sparse_index = make_sparse(data, kind=kind,
                                                   fill_value=fill_value)
            else:
                values = data
                assert(len(values) == sparse_index.npoints)
        else:
            if index is None:
                raise Exception('must pass index!')

            length = len(index)

            if data == fill_value or (isnull(data)
                    and isnull(fill_value)):
                if kind == 'block':
                    sparse_index = BlockIndex(length, [], [])
                else:
                    sparse_index = IntIndex(length, [])
                values = np.array([])
            else:
                if kind == 'block':
                    locs, lens = ([0], [length]) if length else ([], [])
                    sparse_index = BlockIndex(length, locs, lens)
                else:
                    sparse_index = IntIndex(length, index)
                values = np.empty(length)
                values.fill(data)

        if index is None:
            index = com._default_index(sparse_index.length)
        index = _ensure_index(index)

        # Create array, do *not* copy data by default
        if copy:
            subarr = np.array(values, dtype=np.float64, copy=True)
        else:
            subarr = np.asarray(values, dtype=np.float64)

        if index.is_all_dates:
            cls = SparseTimeSeries

        # Change the class of the array to be the subclass type.
        output = subarr.view(cls)
        output.sp_index = sparse_index
        output.fill_value = np.float64(fill_value)
        output.index = index
        output.name = name
        return output
Example #25
0
    def test_equals(self):
        index = BlockIndex(10, [0, 4], [2, 5])

        self.assert_(index.equals(index))
        self.assert_(not index.equals(BlockIndex(10, [0, 4], [2, 6])))
Example #26
0
 def test_to_block_index(self):
     index = BlockIndex(10, [0, 5], [4, 5])
     self.assertIs(index.to_block_index(), index)
Example #27
0
    def __init__(self,
                 data=None,
                 index=None,
                 sparse_index=None,
                 kind='block',
                 fill_value=None,
                 name=None,
                 dtype=None,
                 copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            is_sparse_array = isinstance(data, SparseArray)
            if fill_value is None:
                if is_sparse_array:
                    fill_value = data.fill_value
                else:
                    fill_value = np.nan

            if is_sparse_array:
                if isinstance(data, SparseSeries) and index is None:
                    index = data.index.view()
                elif index is not None:
                    assert (len(index) == len(data))

                sparse_index = data.sp_index
                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()

                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                if index is None:
                    index = data.index.view()

                data = Series(data)
                data, sparse_index = make_sparse(data,
                                                 kind=kind,
                                                 fill_value=fill_value)

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    data, sparse_index = make_sparse(data,
                                                     kind=kind,
                                                     fill_value=fill_value)
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                else:

                    data = data.reindex(index, copy=False)

            else:
                length = len(index)

                if data == fill_value or (isnull(data) and isnull(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data,
                                       sparse_index=sparse_index,
                                       fill_value=fill_value,
                                       dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name
    def test_equals(self):
        index = BlockIndex(10, [0, 4], [2, 5])

        self.assertTrue(index.equals(index))
        self.assertFalse(index.equals(BlockIndex(10, [0, 4], [2, 6])))