Beispiel #1
0
class TestSparseDataFrame(TestCase, test_frame.SafeForSparse):
    klass = SparseDataFrame

    def setUp(self):
        self.data = {
            "A": [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            "B": [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            "C": np.arange(10),
            "D": [0, 1, 2, 3, 4, 5, nan, nan, nan, nan],
        }

        self.dates = DateRange("1/1/2011", periods=10)

        self.frame = SparseDataFrame(self.data, index=self.dates)
        self.iframe = SparseDataFrame(self.data, index=self.dates, default_kind="integer")

        values = self.frame.values.copy()
        values[np.isnan(values)] = 0

        self.zframe = SparseDataFrame(values, columns=["A", "B", "C", "D"], default_fill_value=0, index=self.dates)

        values = self.frame.values.copy()
        values[np.isnan(values)] = 2
        self.fill_frame = SparseDataFrame(values, columns=["A", "B", "C", "D"], default_fill_value=2, index=self.dates)

        self.empty = SparseDataFrame()

    def test_as_matrix(self):
        empty = self.empty.as_matrix()
        self.assert_(empty.shape == (0, 0))

        no_cols = SparseDataFrame(index=np.arange(10))
        mat = no_cols.as_matrix()
        self.assert_(mat.shape == (10, 0))

        no_index = SparseDataFrame(columns=np.arange(10))
        mat = no_index.as_matrix()
        self.assert_(mat.shape == (0, 10))

    def test_copy(self):
        cp = self.frame.copy()
        self.assert_(isinstance(cp, SparseDataFrame))
        assert_sp_frame_equal(cp, self.frame)
        self.assert_(cp.index is self.frame.index)

    def test_constructor(self):
        for col, series in self.frame.iteritems():
            self.assert_(isinstance(series, SparseSeries))

        self.assert_(isinstance(self.iframe["A"].sp_index, IntIndex))

        # constructed zframe from matrix above
        self.assertEquals(self.zframe["A"].fill_value, 0)
        assert_almost_equal([0, 0, 0, 0, 1, 2, 3, 4, 5, 6], self.zframe["A"].values)

        # construct from nested dict
        data = {}
        for c, s in self.frame.iteritems():
            data[c] = s.to_dict()

        sdf = SparseDataFrame(data)
        assert_sp_frame_equal(sdf, self.frame)

        # TODO: test data is copied from inputs

        # init dict with different index
        idx = self.frame.index[:5]
        cons = SparseDataFrame(
            self.frame._series,
            index=idx,
            columns=self.frame.columns,
            default_fill_value=self.frame.default_fill_value,
            default_kind=self.frame.default_kind,
        )
        reindexed = self.frame.reindex(idx)
        assert_sp_frame_equal(cons, reindexed)

        # assert level parameter breaks reindex
        self.assertRaises(Exception, self.frame.reindex, idx, level=0)

    def test_constructor_ndarray(self):
        # no index or columns
        sp = SparseDataFrame(self.frame.values)

        # 1d
        sp = SparseDataFrame(self.data["A"], index=self.dates, columns=["A"])
        assert_sp_frame_equal(sp, self.frame.reindex(columns=["A"]))

        # raise on level argument
        self.assertRaises(Exception, self.frame.reindex, columns=["A"], level=1)

        # wrong length index / columns
        self.assertRaises(Exception, SparseDataFrame, self.frame.values, index=self.frame.index[:-1])
        self.assertRaises(Exception, SparseDataFrame, self.frame.values, columns=self.frame.columns[:-1])

    def test_constructor_empty(self):
        sp = SparseDataFrame()
        self.assert_(len(sp.index) == 0)
        self.assert_(len(sp.columns) == 0)

    def test_constructor_dataframe(self):
        dense = self.frame.to_dense()
        sp = SparseDataFrame(dense)
        assert_sp_frame_equal(sp, self.frame)

    def test_array_interface(self):
        res = np.sqrt(self.frame)
        dres = np.sqrt(self.frame.to_dense())
        assert_frame_equal(res.to_dense(), dres)

    def test_pickle(self):
        def _test_roundtrip(frame):
            pickled = pickle.dumps(frame, protocol=pickle.HIGHEST_PROTOCOL)
            unpickled = pickle.loads(pickled)
            assert_sp_frame_equal(frame, unpickled)

        _test_roundtrip(SparseDataFrame())
        self._check_all(_test_roundtrip)

    def test_dense_to_sparse(self):
        df = DataFrame({"A": [nan, nan, nan, 1, 2], "B": [1, 2, nan, nan, nan]})
        sdf = df.to_sparse()
        self.assert_(isinstance(sdf, SparseDataFrame))
        self.assert_(np.isnan(sdf.default_fill_value))
        self.assert_(isinstance(sdf["A"].sp_index, BlockIndex))
        tm.assert_frame_equal(sdf.to_dense(), df)

        sdf = df.to_sparse(kind="integer")
        self.assert_(isinstance(sdf["A"].sp_index, IntIndex))

        df = DataFrame({"A": [0, 0, 0, 1, 2], "B": [1, 2, 0, 0, 0]}, dtype=float)
        sdf = df.to_sparse(fill_value=0)
        self.assertEquals(sdf.default_fill_value, 0)
        tm.assert_frame_equal(sdf.to_dense(), df)

    def test_sparse_to_dense(self):
        pass

    def test_sparse_series_ops(self):
        self._check_all(self._check_frame_ops)

    def _check_frame_ops(self, frame):
        fill = frame.default_fill_value

        def _compare_to_dense(a, b, da, db, op):
            sparse_result = op(a, b)
            dense_result = op(da, db)

            dense_result = dense_result.to_sparse(fill_value=fill)
            assert_sp_frame_equal(sparse_result, dense_result, exact_indices=False)

            if isinstance(a, DataFrame) and isinstance(db, DataFrame):
                mixed_result = op(a, db)
                self.assert_(isinstance(mixed_result, SparseDataFrame))
                assert_sp_frame_equal(mixed_result, sparse_result, exact_indices=False)

        opnames = ["add", "sub", "mul", "truediv", "floordiv"]
        ops = [getattr(operator, name) for name in opnames]

        fidx = frame.index

        # time series operations

        series = [
            frame["A"],
            frame["B"],
            frame["C"],
            frame["D"],
            frame["A"].reindex(fidx[:7]),
            frame["A"].reindex(fidx[::2]),
            SparseSeries([], index=[]),
        ]

        for op in ops:
            _compare_to_dense(frame, frame[::2], frame.to_dense(), frame[::2].to_dense(), op)
            for s in series:
                _compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), op)
                _compare_to_dense(s, frame, s.to_dense(), frame.to_dense(), op)

        # cross-sectional operations
        series = [frame.xs(fidx[0]), frame.xs(fidx[3]), frame.xs(fidx[5]), frame.xs(fidx[7]), frame.xs(fidx[5])[:2]]

        for op in ops:
            for s in series:
                _compare_to_dense(frame, s, frame.to_dense(), s, op)
                _compare_to_dense(s, frame, s, frame.to_dense(), op)

        # it works!
        result = self.frame + self.frame.ix[:, ["A", "B"]]

    def test_op_corners(self):
        empty = self.empty + self.empty
        self.assert_(not empty)

        foo = self.frame + self.empty
        assert_frame_equal(foo, self.frame * np.nan)

        foo = self.empty + self.frame
        assert_frame_equal(foo, self.frame * np.nan)

    def test_scalar_ops(self):
        pass

    def test_getitem(self):
        pass

    def test_set_value(self):
        res = self.frame.set_value("foobar", "B", 1.5)
        self.assert_(res is not self.frame)
        self.assert_(res.index[-1] == "foobar")
        self.assertEqual(res.get_value("foobar", "B"), 1.5)

        res2 = res.set_value("foobar", "qux", 1.5)
        self.assert_(res2 is not res)
        self.assert_(np.array_equal(res2.columns, list(self.frame.columns) + ["qux"]))
        self.assertEqual(res2.get_value("foobar", "qux"), 1.5)

    def test_fancy_index_misc(self):
        # axis = 0
        sliced = self.frame.ix[-2:, :]
        expected = self.frame.reindex(index=self.frame.index[-2:])
        assert_sp_frame_equal(sliced, expected)

        # axis = 1
        sliced = self.frame.ix[:, -2:]
        expected = self.frame.reindex(columns=self.frame.columns[-2:])
        assert_sp_frame_equal(sliced, expected)

    def test_getitem_overload(self):
        # slicing
        sl = self.frame[:20]
        assert_sp_frame_equal(sl, self.frame.reindex(self.frame.index[:20]))

        # boolean indexing
        d = self.frame.index[5]
        indexer = self.frame.index > d

        subindex = self.frame.index[indexer]
        subframe = self.frame[indexer]

        self.assert_(np.array_equal(subindex, subframe.index))
        self.assertRaises(Exception, self.frame.__getitem__, indexer[:-1])

    def test_setitem(self):
        def _check_frame(frame):
            N = len(frame)

            # insert SparseSeries
            frame["E"] = frame["A"]
            self.assert_(isinstance(frame["E"], SparseSeries))
            assert_sp_series_equal(frame["E"], frame["A"])

            # insert SparseSeries differently-indexed
            to_insert = frame["A"][::2]
            frame["E"] = to_insert
            assert_series_equal(frame["E"].to_dense(), to_insert.to_dense().reindex(frame.index))

            # insert Series
            frame["F"] = frame["A"].to_dense()
            self.assert_(isinstance(frame["F"], SparseSeries))
            assert_sp_series_equal(frame["F"], frame["A"])

            # insert Series differently-indexed
            to_insert = frame["A"].to_dense()[::2]
            frame["G"] = to_insert
            assert_series_equal(frame["G"].to_dense(), to_insert.reindex(frame.index))

            # insert ndarray
            frame["H"] = np.random.randn(N)
            self.assert_(isinstance(frame["H"], SparseSeries))

            to_sparsify = np.random.randn(N)
            to_sparsify[N // 2 :] = frame.default_fill_value
            frame["I"] = to_sparsify
            self.assertEquals(len(frame["I"].sp_values), N // 2)

            # insert ndarray wrong size
            self.assertRaises(Exception, frame.__setitem__, "foo", np.random.randn(N - 1))

            # scalar value
            frame["J"] = 5
            self.assertEquals(len(frame["J"].sp_values), N)
            self.assert_((frame["J"].sp_values == 5).all())

            frame["K"] = frame.default_fill_value
            self.assertEquals(len(frame["K"].sp_values), 0)

        self._check_all(_check_frame)

    def test_setitem_corner(self):
        self.frame["a"] = self.frame["B"]
        assert_sp_series_equal(self.frame["a"], self.frame["B"])

    def test_setitem_array(self):
        arr = self.frame["B"].view(SparseArray)

        self.frame["E"] = arr
        assert_sp_series_equal(self.frame["E"], self.frame["B"])
        self.assertRaises(Exception, self.frame.__setitem__, "F", arr[:-1])

    def test_delitem(self):
        A = self.frame["A"]
        C = self.frame["C"]

        del self.frame["B"]
        self.assert_("B" not in self.frame)
        assert_sp_series_equal(self.frame["A"], A)
        assert_sp_series_equal(self.frame["C"], C)

        del self.frame["D"]
        self.assert_("D" not in self.frame)

        del self.frame["A"]
        self.assert_("A" not in self.frame)

    def test_set_columns(self):
        self.frame.columns = self.frame.columns
        self.assertRaises(Exception, setattr, self.frame, "columns", self.frame.columns[:-1])

    def test_set_index(self):
        self.frame.index = self.frame.index
        self.assertRaises(Exception, setattr, self.frame, "index", self.frame.index[:-1])

    def test_append(self):
        a = self.frame[:5]
        b = self.frame[5:]

        appended = a.append(b)
        assert_sp_frame_equal(appended, self.frame)

        a = self.frame.ix[:5, :3]
        b = self.frame.ix[5:]
        appended = a.append(b)
        assert_sp_frame_equal(appended.ix[:, :3], self.frame.ix[:, :3])

    def test_apply(self):
        applied = self.frame.apply(np.sqrt)
        self.assert_(isinstance(applied, SparseDataFrame))
        assert_almost_equal(applied.values, np.sqrt(self.frame.values))

        applied = self.fill_frame.apply(np.sqrt)
        self.assert_(applied["A"].fill_value == np.sqrt(2))

        # agg / broadcast
        applied = self.frame.apply(np.sum)
        assert_series_equal(applied, self.frame.to_dense().apply(np.sum))

        broadcasted = self.frame.apply(np.sum, broadcast=True)
        self.assert_(isinstance(broadcasted, SparseDataFrame))
        assert_frame_equal(broadcasted.to_dense(), self.frame.to_dense().apply(np.sum, broadcast=True))

        self.assert_(self.empty.apply(np.sqrt) is self.empty)

    def test_applymap(self):
        # just test that it works
        result = self.frame.applymap(lambda x: x * 2)
        self.assert_(isinstance(result, SparseDataFrame))

    def test_astype(self):
        self.assertRaises(Exception, self.frame.astype, np.int64)

    def test_fillna(self):
        self.assertRaises(NotImplementedError, self.frame.fillna, 0)

    def test_rename(self):
        # just check this works
        renamed = self.frame.rename(index=str)
        renamed = self.frame.rename(columns=lambda x: "%s%d" % (x, len(x)))

    def test_corr(self):
        res = self.frame.corr()
        assert_frame_equal(res, self.frame.to_dense().corr())

    def test_describe(self):
        self.frame["foo"] = np.nan
        desc = self.frame.describe()

    def test_join(self):
        left = self.frame.ix[:, ["A", "B"]]
        right = self.frame.ix[:, ["C", "D"]]
        joined = left.join(right)
        assert_sp_frame_equal(joined, self.frame)

        right = self.frame.ix[:, ["B", "D"]]
        self.assertRaises(Exception, left.join, right)

    def test_reindex(self):
        def _check_frame(frame):
            index = frame.index
            sidx = index[::2]
            sidx2 = index[:5]

            sparse_result = frame.reindex(sidx)
            dense_result = frame.to_dense().reindex(sidx)
            assert_frame_equal(sparse_result.to_dense(), dense_result)

            assert_frame_equal(frame.reindex(list(sidx)).to_dense(), dense_result)

            sparse_result2 = sparse_result.reindex(index)
            dense_result2 = dense_result.reindex(index)
            assert_frame_equal(sparse_result2.to_dense(), dense_result2)

            # propagate CORRECT fill value
            assert_almost_equal(sparse_result.default_fill_value, frame.default_fill_value)
            assert_almost_equal(sparse_result["A"].fill_value, frame["A"].fill_value)

            # length zero
            length_zero = frame.reindex([])
            self.assertEquals(len(length_zero), 0)
            self.assertEquals(len(length_zero.columns), len(frame.columns))
            self.assertEquals(len(length_zero["A"]), 0)

            # frame being reindexed has length zero
            length_n = length_zero.reindex(index)
            self.assertEquals(len(length_n), len(frame))
            self.assertEquals(len(length_n.columns), len(frame.columns))
            self.assertEquals(len(length_n["A"]), len(frame))

            # reindex columns
            reindexed = frame.reindex(columns=["A", "B", "Z"])
            self.assertEquals(len(reindexed.columns), 3)
            assert_almost_equal(reindexed["Z"].fill_value, frame.default_fill_value)
            self.assert_(np.isnan(reindexed["Z"].sp_values).all())

        _check_frame(self.frame)
        _check_frame(self.iframe)
        _check_frame(self.zframe)
        _check_frame(self.fill_frame)

        # with copy=False
        reindexed = self.frame.reindex(self.frame.index, copy=False)
        reindexed["F"] = reindexed["A"]
        self.assert_("F" in self.frame)

        reindexed = self.frame.reindex(self.frame.index)
        reindexed["G"] = reindexed["A"]
        self.assert_("G" not in self.frame)

    def test_take(self):
        result = self.frame.take([1, 0, 2], axis=1)
        expected = self.frame.reindex(columns=["B", "A", "C"])
        assert_sp_frame_equal(result, expected)

    def test_density(self):
        df = SparseDataFrame(
            {
                "A": [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
                "B": [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
                "C": np.arange(10),
                "D": [0, 1, 2, 3, 4, 5, nan, nan, nan, nan],
            }
        )

        self.assertEquals(df.density, 0.75)

    def test_to_dense(self):
        def _check(frame):
            dense_dm = frame.to_dense()
            assert_frame_equal(frame, dense_dm)

        self._check_all(_check)

    def test_stack_sparse_frame(self):
        def _check(frame):
            dense_frame = frame.to_dense()

            wp = Panel.from_dict({"foo": frame})
            from_dense_lp = wp.to_frame()

            from_sparse_lp = spf.stack_sparse_frame(frame)

            self.assert_(np.array_equal(from_dense_lp.values, from_sparse_lp.values))

        _check(self.frame)
        _check(self.iframe)

        # for now
        self.assertRaises(Exception, _check, self.zframe)
        self.assertRaises(Exception, _check, self.fill_frame)

    def test_transpose(self):
        def _check(frame):
            transposed = frame.T
            untransposed = transposed.T
            assert_sp_frame_equal(frame, untransposed)

        self._check_all(_check)

    def test_shift(self):
        def _check(frame):
            shifted = frame.shift(0)
            self.assert_(shifted is not frame)
            assert_sp_frame_equal(shifted, frame)

            f = lambda s: s.shift(1)
            _dense_frame_compare(frame, f)

            f = lambda s: s.shift(-2)
            _dense_frame_compare(frame, f)

            f = lambda s: s.shift(2, timeRule="WEEKDAY")
            _dense_frame_compare(frame, f)

            f = lambda s: s.shift(2, offset=datetools.bday)
            _dense_frame_compare(frame, f)

        self._check_all(_check)

    def test_count(self):
        result = self.frame.count()
        dense_result = self.frame.to_dense().count()
        assert_series_equal(result, dense_result)

        result = self.frame.count(1)
        dense_result = self.frame.to_dense().count(1)

        # win32 don't check dtype
        assert_series_equal(result, dense_result, check_dtype=False)

    def test_cumsum(self):
        result = self.frame.cumsum()
        expected = self.frame.to_dense().cumsum()
        self.assert_(isinstance(result, SparseDataFrame))
        assert_frame_equal(result.to_dense(), expected)

    def _check_all(self, check_func):
        check_func(self.frame)
        check_func(self.iframe)
        check_func(self.zframe)
        check_func(self.fill_frame)

    def test_combine_first(self):
        df = self.frame

        result = df[::2].combine_first(df)
        result2 = df[::2].combine_first(df.to_dense())

        expected = df[::2].to_dense().combine_first(df.to_dense())
        expected = expected.to_sparse(fill_value=df.default_fill_value)

        assert_sp_frame_equal(result, result2)
        assert_sp_frame_equal(result, expected)
Beispiel #2
0
class TestSparseDataFrame(TestCase, test_frame.SafeForSparse):
    klass = SparseDataFrame

    def setUp(self):
        self.data = {
            'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            'C': np.arange(10),
            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]
        }

        self.dates = DateRange('1/1/2011', periods=10)

        self.frame = SparseDataFrame(self.data, index=self.dates)
        self.iframe = SparseDataFrame(self.data,
                                      index=self.dates,
                                      default_kind='integer')

        values = self.frame.values.copy()
        values[np.isnan(values)] = 0

        self.zframe = SparseDataFrame(values,
                                      columns=['A', 'B', 'C', 'D'],
                                      default_fill_value=0,
                                      index=self.dates)

        values = self.frame.values.copy()
        values[np.isnan(values)] = 2
        self.fill_frame = SparseDataFrame(values,
                                          columns=['A', 'B', 'C', 'D'],
                                          default_fill_value=2,
                                          index=self.dates)

        self.empty = SparseDataFrame()

    def test_as_matrix(self):
        empty = self.empty.as_matrix()
        self.assert_(empty.shape == (0, 0))

        no_cols = SparseDataFrame(index=np.arange(10))
        mat = no_cols.as_matrix()
        self.assert_(mat.shape == (10, 0))

        no_index = SparseDataFrame(columns=np.arange(10))
        mat = no_index.as_matrix()
        self.assert_(mat.shape == (0, 10))

    def test_copy(self):
        cp = self.frame.copy()
        self.assert_(isinstance(cp, SparseDataFrame))
        assert_sp_frame_equal(cp, self.frame)
        self.assert_(cp.index is self.frame.index)

    def test_constructor(self):
        for col, series in self.frame.iteritems():
            self.assert_(isinstance(series, SparseSeries))

        self.assert_(isinstance(self.iframe['A'].sp_index, IntIndex))

        # constructed zframe from matrix above
        self.assertEquals(self.zframe['A'].fill_value, 0)
        assert_almost_equal([0, 0, 0, 0, 1, 2, 3, 4, 5, 6],
                            self.zframe['A'].values)

        # construct from nested dict
        data = {}
        for c, s in self.frame.iteritems():
            data[c] = s.to_dict()

        sdf = SparseDataFrame(data)
        assert_sp_frame_equal(sdf, self.frame)

        # TODO: test data is copied from inputs

        # init dict with different index
        idx = self.frame.index[:5]
        cons = SparseDataFrame(
            self.frame._series,
            index=idx,
            columns=self.frame.columns,
            default_fill_value=self.frame.default_fill_value,
            default_kind=self.frame.default_kind)
        reindexed = self.frame.reindex(idx)
        assert_sp_frame_equal(cons, reindexed)

        # assert level parameter breaks reindex
        self.assertRaises(Exception, self.frame.reindex, idx, level=0)

    def test_constructor_ndarray(self):
        # no index or columns
        sp = SparseDataFrame(self.frame.values)

        # 1d
        sp = SparseDataFrame(self.data['A'], index=self.dates, columns=['A'])
        assert_sp_frame_equal(sp, self.frame.reindex(columns=['A']))

        # raise on level argument
        self.assertRaises(Exception,
                          self.frame.reindex,
                          columns=['A'],
                          level=1)

        # wrong length index / columns
        self.assertRaises(Exception,
                          SparseDataFrame,
                          self.frame.values,
                          index=self.frame.index[:-1])
        self.assertRaises(Exception,
                          SparseDataFrame,
                          self.frame.values,
                          columns=self.frame.columns[:-1])

    def test_constructor_empty(self):
        sp = SparseDataFrame()
        self.assert_(len(sp.index) == 0)
        self.assert_(len(sp.columns) == 0)

    def test_constructor_dataframe(self):
        dense = self.frame.to_dense()
        sp = SparseDataFrame(dense)
        assert_sp_frame_equal(sp, self.frame)

    def test_array_interface(self):
        res = np.sqrt(self.frame)
        dres = np.sqrt(self.frame.to_dense())
        assert_frame_equal(res.to_dense(), dres)

    def test_pickle(self):
        def _test_roundtrip(frame):
            pickled = pickle.dumps(frame, protocol=pickle.HIGHEST_PROTOCOL)
            unpickled = pickle.loads(pickled)
            assert_sp_frame_equal(frame, unpickled)

        self._check_all(_test_roundtrip)

    def test_dense_to_sparse(self):
        df = DataFrame({
            'A': [nan, nan, nan, 1, 2],
            'B': [1, 2, nan, nan, nan]
        })
        sdf = df.to_sparse()
        self.assert_(isinstance(sdf, SparseDataFrame))
        self.assert_(np.isnan(sdf.default_fill_value))
        self.assert_(isinstance(sdf['A'].sp_index, BlockIndex))
        testing.assert_frame_equal(sdf.to_dense(), df)

        sdf = df.to_sparse(kind='integer')
        self.assert_(isinstance(sdf['A'].sp_index, IntIndex))

        df = DataFrame({
            'A': [0, 0, 0, 1, 2],
            'B': [1, 2, 0, 0, 0]
        },
                       dtype=float)
        sdf = df.to_sparse(fill_value=0)
        self.assertEquals(sdf.default_fill_value, 0)
        testing.assert_frame_equal(sdf.to_dense(), df)

    def test_sparse_to_dense(self):
        pass

    def test_sparse_series_ops(self):
        self._check_all(self._check_frame_ops)

    def _check_frame_ops(self, frame):
        fill = frame.default_fill_value

        def _compare_to_dense(a, b, da, db, op):
            sparse_result = op(a, b)
            dense_result = op(da, db)

            dense_result = dense_result.to_sparse(fill_value=fill)
            assert_sp_frame_equal(sparse_result,
                                  dense_result,
                                  exact_indices=False)

            if isinstance(a, DataFrame) and isinstance(db, DataFrame):
                mixed_result = op(a, db)
                self.assert_(isinstance(mixed_result, SparseDataFrame))
                assert_sp_frame_equal(mixed_result,
                                      sparse_result,
                                      exact_indices=False)

        opnames = ['add', 'sub', 'mul', 'truediv', 'floordiv']
        ops = [getattr(operator, name) for name in opnames]

        fidx = frame.index

        # time series operations

        series = [
            frame['A'], frame['B'], frame['C'], frame['D'],
            frame['A'].reindex(fidx[:7]), frame['A'].reindex(fidx[::2]),
            SparseSeries([], index=[])
        ]

        for op in ops:
            _compare_to_dense(frame, frame[::2], frame.to_dense(),
                              frame[::2].to_dense(), op)
            for s in series:
                _compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), op)
                _compare_to_dense(s, frame, s.to_dense(), frame.to_dense(), op)

        # cross-sectional operations
        series = [
            frame.xs(fidx[0]),
            frame.xs(fidx[3]),
            frame.xs(fidx[5]),
            frame.xs(fidx[7]),
            frame.xs(fidx[5])[:2]
        ]

        for op in ops:
            for s in series:
                _compare_to_dense(frame, s, frame.to_dense(), s, op)
                _compare_to_dense(s, frame, s, frame.to_dense(), op)

    def test_op_corners(self):
        empty = self.empty + self.empty
        self.assert_(not empty)

        foo = self.frame + self.empty
        assert_sp_frame_equal(foo, self.frame * np.nan)

        foo = self.empty + self.frame
        assert_sp_frame_equal(foo, self.frame * np.nan)

    def test_scalar_ops(self):
        pass

    def test_getitem(self):
        pass

    def test_set_value(self):
        res = self.frame.set_value('foobar', 'B', 1.5)
        self.assert_(res is not self.frame)
        self.assert_(res.index[-1] == 'foobar')
        self.assertEqual(res.get_value('foobar', 'B'), 1.5)

        res2 = res.set_value('foobar', 'qux', 1.5)
        self.assert_(res2 is not res)
        self.assert_(
            np.array_equal(res2.columns,
                           list(self.frame.columns) + ['qux']))
        self.assertEqual(res2.get_value('foobar', 'qux'), 1.5)

    def test_fancy_index_misc(self):
        # axis = 0
        sliced = self.frame.ix[-2:, :]
        expected = self.frame.reindex(index=self.frame.index[-2:])
        assert_sp_frame_equal(sliced, expected)

        # axis = 1
        sliced = self.frame.ix[:, -2:]
        expected = self.frame.reindex(columns=self.frame.columns[-2:])
        assert_sp_frame_equal(sliced, expected)

    def test_getitem_overload(self):
        # slicing
        sl = self.frame[:20]
        assert_sp_frame_equal(sl, self.frame.reindex(self.frame.index[:20]))

        # boolean indexing
        d = self.frame.index[5]
        indexer = self.frame.index > d

        subindex = self.frame.index[indexer]
        subframe = self.frame[indexer]

        self.assert_(np.array_equal(subindex, subframe.index))
        self.assertRaises(Exception, self.frame.__getitem__, indexer[:-1])

    def test_setitem(self):
        def _check_frame(frame):
            N = len(frame)

            # insert SparseSeries
            frame['E'] = frame['A']
            self.assert_(isinstance(frame['E'], SparseSeries))
            assert_sp_series_equal(frame['E'], frame['A'])

            # insert SparseSeries differently-indexed
            to_insert = frame['A'][::2]
            frame['E'] = to_insert
            assert_series_equal(frame['E'].to_dense(),
                                to_insert.to_dense().reindex(frame.index))

            # insert Series
            frame['F'] = frame['A'].to_dense()
            self.assert_(isinstance(frame['F'], SparseSeries))
            assert_sp_series_equal(frame['F'], frame['A'])

            # insert Series differently-indexed
            to_insert = frame['A'].to_dense()[::2]
            frame['G'] = to_insert
            assert_series_equal(frame['G'].to_dense(),
                                to_insert.reindex(frame.index))

            # insert ndarray
            frame['H'] = np.random.randn(N)
            self.assert_(isinstance(frame['H'], SparseSeries))

            to_sparsify = np.random.randn(N)
            to_sparsify[N // 2:] = frame.default_fill_value
            frame['I'] = to_sparsify
            self.assertEquals(len(frame['I'].sp_values), N // 2)

            # insert ndarray wrong size
            self.assertRaises(Exception, frame.__setitem__, 'foo',
                              np.random.randn(N - 1))

            # scalar value
            frame['J'] = 5
            self.assertEquals(len(frame['J'].sp_values), N)
            self.assert_((frame['J'].sp_values == 5).all())

            frame['K'] = frame.default_fill_value
            self.assertEquals(len(frame['K'].sp_values), 0)

        self._check_all(_check_frame)

    def test_setitem_corner(self):
        self.frame['a'] = self.frame['B']
        assert_sp_series_equal(self.frame['a'], self.frame['B'])

    def test_setitem_array(self):
        arr = self.frame['B'].view(SparseArray)

        self.frame['E'] = arr
        assert_sp_series_equal(self.frame['E'], self.frame['B'])
        self.assertRaises(Exception, self.frame.__setitem__, 'F', arr[:-1])

    def test_delitem(self):
        A = self.frame['A']
        C = self.frame['C']

        del self.frame['B']
        self.assert_('B' not in self.frame)
        assert_sp_series_equal(self.frame['A'], A)
        assert_sp_series_equal(self.frame['C'], C)

        del self.frame['D']
        self.assert_('D' not in self.frame)

        del self.frame['A']
        self.assert_('A' not in self.frame)

    def test_set_columns(self):
        self.frame.columns = self.frame.columns
        self.assertRaises(Exception, setattr, self.frame, 'columns',
                          self.frame.columns[:-1])

    def test_set_index(self):
        self.frame.index = self.frame.index
        self.assertRaises(Exception, setattr, self.frame, 'index',
                          self.frame.index[:-1])

    def test_append(self):
        a = self.frame[:5]
        b = self.frame[5:]

        appended = a.append(b)
        assert_sp_frame_equal(appended, self.frame)

        a = self.frame.ix[:5, :3]
        b = self.frame.ix[5:]
        appended = a.append(b)
        assert_sp_frame_equal(appended.ix[:, :3], self.frame.ix[:, :3])

    def test_apply(self):
        applied = self.frame.apply(np.sqrt)
        self.assert_(isinstance(applied, SparseDataFrame))
        assert_almost_equal(applied.values, np.sqrt(self.frame.values))

        applied = self.fill_frame.apply(np.sqrt)
        self.assert_(applied['A'].fill_value == np.sqrt(2))

        # agg / broadcast
        applied = self.frame.apply(np.sum)
        assert_series_equal(applied, self.frame.to_dense().apply(np.sum))

        broadcasted = self.frame.apply(np.sum, broadcast=True)
        self.assert_(isinstance(broadcasted, SparseDataFrame))
        assert_frame_equal(broadcasted.to_dense(),
                           self.frame.to_dense().apply(np.sum, broadcast=True))

        self.assert_(self.empty.apply(np.sqrt) is self.empty)

    def test_applymap(self):
        # just test that it works
        result = self.frame.applymap(lambda x: x * 2)
        self.assert_(isinstance(result, SparseDataFrame))

    def test_astype(self):
        self.assertRaises(Exception, self.frame.astype, np.int64)

    def test_fillna(self):
        self.assertRaises(NotImplementedError, self.frame.fillna, 0)

    def test_rename(self):
        # just check this works
        renamed = self.frame.rename(index=str)
        renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x)))

    def test_corr(self):
        res = self.frame.corr()
        assert_frame_equal(res, self.frame.to_dense().corr())

    def test_describe(self):
        self.frame['foo'] = np.nan
        desc = self.frame.describe()

    def test_join(self):
        left = self.frame.ix[:, ['A', 'B']]
        right = self.frame.ix[:, ['C', 'D']]
        joined = left.join(right)
        assert_sp_frame_equal(joined, self.frame)

        right = self.frame.ix[:, ['B', 'D']]
        self.assertRaises(Exception, left.join, right)

    def test_reindex(self):
        def _check_frame(frame):
            index = frame.index
            sidx = index[::2]
            sidx2 = index[:5]

            sparse_result = frame.reindex(sidx)
            dense_result = frame.to_dense().reindex(sidx)
            assert_frame_equal(sparse_result.to_dense(), dense_result)

            assert_frame_equal(
                frame.reindex(list(sidx)).to_dense(), dense_result)

            sparse_result2 = sparse_result.reindex(index)
            dense_result2 = dense_result.reindex(index)
            assert_frame_equal(sparse_result2.to_dense(), dense_result2)

            # propagate CORRECT fill value
            assert_almost_equal(sparse_result.default_fill_value,
                                frame.default_fill_value)
            assert_almost_equal(sparse_result['A'].fill_value,
                                frame['A'].fill_value)

            # length zero
            length_zero = frame.reindex([])
            self.assertEquals(len(length_zero), 0)
            self.assertEquals(len(length_zero.columns), len(frame.columns))
            self.assertEquals(len(length_zero['A']), 0)

            # frame being reindexed has length zero
            length_n = length_zero.reindex(index)
            self.assertEquals(len(length_n), len(frame))
            self.assertEquals(len(length_n.columns), len(frame.columns))
            self.assertEquals(len(length_n['A']), len(frame))

            # reindex columns
            reindexed = frame.reindex(columns=['A', 'B', 'Z'])
            self.assertEquals(len(reindexed.columns), 3)
            assert_almost_equal(reindexed['Z'].fill_value,
                                frame.default_fill_value)
            self.assert_(np.isnan(reindexed['Z'].sp_values).all())

        _check_frame(self.frame)
        _check_frame(self.iframe)
        _check_frame(self.zframe)
        _check_frame(self.fill_frame)

        # with copy=False
        reindexed = self.frame.reindex(self.frame.index, copy=False)
        reindexed['F'] = reindexed['A']
        self.assert_('F' in self.frame)

        reindexed = self.frame.reindex(self.frame.index)
        reindexed['G'] = reindexed['A']
        self.assert_('G' not in self.frame)

    def test_density(self):
        df = SparseDataFrame({
            'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            'C': np.arange(10),
            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]
        })

        self.assertEquals(df.density, 0.75)

    def test_to_dense(self):
        def _check(frame):
            dense_dm = frame.to_dense()
            assert_frame_equal(frame, dense_dm)

        self._check_all(_check)

    def test_stack_sparse_frame(self):
        def _check(frame):
            dense_frame = frame.to_dense()

            wp = Panel.from_dict({'foo': frame})
            from_dense_lp = wp.to_frame()

            from_sparse_lp = spf.stack_sparse_frame(frame)

            self.assert_(
                np.array_equal(from_dense_lp.values, from_sparse_lp.values))

        _check(self.frame)
        _check(self.iframe)

        # for now
        self.assertRaises(Exception, _check, self.zframe)
        self.assertRaises(Exception, _check, self.fill_frame)

    def test_transpose(self):
        def _check(frame):
            transposed = frame.T
            untransposed = transposed.T
            assert_sp_frame_equal(frame, untransposed)

        self._check_all(_check)

    def test_shift(self):
        def _check(frame):
            shifted = frame.shift(0)
            self.assert_(shifted is not frame)
            assert_sp_frame_equal(shifted, frame)

            f = lambda s: s.shift(1)
            _dense_frame_compare(frame, f)

            f = lambda s: s.shift(-2)
            _dense_frame_compare(frame, f)

            f = lambda s: s.shift(2, timeRule='WEEKDAY')
            _dense_frame_compare(frame, f)

            f = lambda s: s.shift(2, offset=datetools.bday)
            _dense_frame_compare(frame, f)

        self._check_all(_check)

    def test_count(self):
        result = self.frame.count()
        dense_result = self.frame.to_dense().count()
        assert_series_equal(result, dense_result)

        result = self.frame.count(1)
        dense_result = self.frame.to_dense().count(1)

        # win32 don't check dtype
        assert_series_equal(result, dense_result, check_dtype=False)

    def test_cumsum(self):
        result = self.frame.cumsum()
        expected = self.frame.to_dense().cumsum()
        self.assert_(isinstance(result, SparseDataFrame))
        assert_frame_equal(result.to_dense(), expected)

    def _check_all(self, check_func):
        check_func(self.frame)
        check_func(self.iframe)
        check_func(self.zframe)
        check_func(self.fill_frame)