Example #1
0
    def test_getitem_fill_value(self):
        orig = pd.DataFrame([[1, np.nan, 0],
                             [2, 3, np.nan],
                             [0, np.nan, 4],
                             [0, np.nan, 5]],
                            columns=list('xyz'))
        sparse = orig.to_sparse(fill_value=0)

        result = sparse[['z']]
        expected = orig[['z']].to_sparse(fill_value=0)
        tm.assert_sp_frame_equal(result, expected, check_fill_value=False)

        tm.assert_sp_series_equal(sparse['y'],
                                  orig['y'].to_sparse(fill_value=0))

        exp = orig[['x']].to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(sparse[['x']], exp)

        exp = orig[['z', 'x']].to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(sparse[['z', 'x']], exp)

        indexer = [True, False, True, True]
        exp = orig[indexer].to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(sparse[indexer], exp)

        exp = orig.iloc[[1, 2]].to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(sparse.iloc[[1, 2]], exp)
Example #2
0
    def test_numeric_op_scalar(self):
        df = pd.DataFrame(
            {"A": [nan, nan, 0, 1], "B": [0, 1, 2, nan], "C": [1.0, 2.0, 3.0, 4.0], "D": [nan, nan, nan, nan]}
        )
        sparse = df.to_sparse()

        tm.assert_sp_frame_equal(sparse + 1, (df + 1).to_sparse())
Example #3
0
    def test_reindex_fill_value(self):
        rng = bdate_range('20110110', periods=20)

        result = self.zframe.reindex(rng, fill_value=0)
        exp = self.zorig.reindex(rng, fill_value=0)
        exp = exp.to_sparse(self.zframe.default_fill_value)
        tm.assert_sp_frame_equal(result, exp)
Example #4
0
    def test_astype(self):
        sparse = pd.SparseDataFrame({'A': SparseArray([1, 2, 3, 4],
                                                      dtype=np.int64),
                                     'B': SparseArray([4, 5, 6, 7],
                                                      dtype=np.int64)})
        self.assertEqual(sparse['A'].dtype, np.int64)
        self.assertEqual(sparse['B'].dtype, np.int64)

        res = sparse.astype(np.float64)
        exp = pd.SparseDataFrame({'A': SparseArray([1., 2., 3., 4.],
                                                   fill_value=0.),
                                  'B': SparseArray([4., 5., 6., 7.],
                                                   fill_value=0.)},
                                 default_fill_value=np.nan)
        tm.assert_sp_frame_equal(res, exp)
        self.assertEqual(res['A'].dtype, np.float64)
        self.assertEqual(res['B'].dtype, np.float64)

        sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4],
                                                      dtype=np.int64),
                                     'B': SparseArray([0, 5, 0, 7],
                                                      dtype=np.int64)},
                                    default_fill_value=0)
        self.assertEqual(sparse['A'].dtype, np.int64)
        self.assertEqual(sparse['B'].dtype, np.int64)

        res = sparse.astype(np.float64)
        exp = pd.SparseDataFrame({'A': SparseArray([0., 2., 0., 4.],
                                                   fill_value=0.),
                                  'B': SparseArray([0., 5., 0., 7.],
                                                   fill_value=0.)},
                                 default_fill_value=0.)
        tm.assert_sp_frame_equal(res, exp)
        self.assertEqual(res['A'].dtype, np.float64)
        self.assertEqual(res['B'].dtype, np.float64)
Example #5
0
    def test_fillna(self):
        df = self.zframe.reindex(lrange(5))
        dense = self.zorig.reindex(lrange(5))

        result = df.fillna(0)
        expected = dense.fillna(0)
        tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0),
                                 exact_indices=False)
        tm.assert_frame_equal(result.to_dense(), expected)

        result = df.copy()
        result.fillna(0, inplace=True)
        expected = dense.fillna(0)

        tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0),
                                 exact_indices=False)
        tm.assert_frame_equal(result.to_dense(), expected)

        result = df.copy()
        result = df['A']
        result.fillna(0, inplace=True)

        expected = dense['A'].fillna(0)
        # this changes internal SparseArray repr
        # tm.assert_sp_series_equal(result, expected.to_sparse(fill_value=0))
        tm.assert_series_equal(result.to_dense(), expected)
Example #6
0
 def test_iloc_slice(self):
     orig = pd.DataFrame([[1, np.nan, np.nan],
                          [2, 3, np.nan],
                          [np.nan, np.nan, 4]],
                         columns=list('xyz'))
     sparse = orig.to_sparse()
     tm.assert_sp_frame_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse())
Example #7
0
    def test_numpy_transpose(self):
        sdf = SparseDataFrame([1, 2, 3], index=[1, 2, 3], columns=['a'])
        result = np.transpose(np.transpose(sdf))
        tm.assert_sp_frame_equal(result, sdf)

        msg = "the 'axes' parameter is not supported"
        tm.assertRaisesRegexp(ValueError, msg, np.transpose, sdf, axes=1)
Example #8
0
 def test_subclass_sparse_transpose(self):
     ossdf = tm.SubclassedSparseDataFrame([[1, 2, 3],
                                           [4, 5, 6]])
     essdf = tm.SubclassedSparseDataFrame([[1, 4],
                                           [2, 5],
                                           [3, 6]])
     tm.assert_sp_frame_equal(ossdf.T, essdf)
    def test_concat_different_columns_buggy(self):
        sparse = self.dense1.to_sparse(fill_value=0)
        sparse3 = self.dense3.to_sparse(fill_value=0)

        res = pd.concat([sparse, sparse3], sort=True)
        exp = (pd.concat([self.dense1, self.dense3], sort=True)
                 .to_sparse(fill_value=0))
        exp._default_fill_value = np.nan

        tm.assert_sp_frame_equal(res, exp, check_kind=False,
                                 consolidate_block_indices=True)

        res = pd.concat([sparse3, sparse], sort=True)
        exp = (pd.concat([self.dense3, self.dense1], sort=True)
                 .to_sparse(fill_value=0))
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp, check_kind=False,
                                 consolidate_block_indices=True)

        # different fill values
        sparse = self.dense1.to_sparse()
        sparse3 = self.dense3.to_sparse(fill_value=0)
        # each columns keeps its fill_value, thus compare in dense
        res = pd.concat([sparse, sparse3], sort=True)
        exp = pd.concat([self.dense1, self.dense3], sort=True)
        assert isinstance(res, pd.SparseDataFrame)
        tm.assert_frame_equal(res.to_dense(), exp)

        res = pd.concat([sparse3, sparse], sort=True)
        exp = pd.concat([self.dense3, self.dense1], sort=True)
        assert isinstance(res, pd.SparseDataFrame)
        tm.assert_frame_equal(res.to_dense(), exp)
Example #10
0
    def test_basic(self, sparse, dtype):
        s_list = list('abc')
        s_series = Series(s_list)
        s_series_index = Series(s_list, list('ABC'))

        expected = DataFrame({'a': [1, 0, 0],
                              'b': [0, 1, 0],
                              'c': [0, 0, 1]},
                             dtype=self.effective_dtype(dtype))
        result = get_dummies(s_list, sparse=sparse, dtype=dtype)
        if sparse:
            tm.assert_sp_frame_equal(result,
                                     expected.to_sparse(kind='integer',
                                                        fill_value=0))
        else:
            assert_frame_equal(result, expected)

        result = get_dummies(s_series, sparse=sparse, dtype=dtype)
        if sparse:
            expected = expected.to_sparse(kind='integer', fill_value=0)
        assert_frame_equal(result, expected)

        expected.index = list('ABC')
        result = get_dummies(s_series_index, sparse=sparse, dtype=dtype)
        if sparse:
            expected.to_sparse(kind='integer', fill_value=0)
        assert_frame_equal(result, expected)
def test_from_scipy_fillna(spmatrix):
    # GH 16112
    arr = np.eye(3)
    arr[1:, 0] = np.nan

    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm).fillna(-1.0)

    # Returning frame should fill all nan values with -1.0
    expected = SparseDataFrame({
        0: SparseSeries([1., -1, -1]),
        1: SparseSeries([np.nan, 1, np.nan]),
        2: SparseSeries([np.nan, np.nan, 1]),
    }, default_fill_value=-1)

    # fill_value is expected to be what .fillna() above was called with
    # We don't use -1 as initial fill_value in expected SparseSeries
    # construction because this way we obtain "compressed" SparseArrays,
    # avoiding having to construct them ourselves
    for col in expected:
        expected[col].fill_value = -1

    tm.assert_sp_frame_equal(sdf, expected)
Example #12
0
    def test_reindex_fill_value(self, float_frame_fill0,
                                float_frame_fill0_dense):
        rng = bdate_range('20110110', periods=20)

        result = float_frame_fill0.reindex(rng, fill_value=0)
        exp = float_frame_fill0_dense.reindex(rng, fill_value=0)
        exp = exp.to_sparse(float_frame_fill0.default_fill_value)
        tm.assert_sp_frame_equal(result, exp)
Example #13
0
    def test_copy(self):
        cp = self.frame.copy()
        tm.assertIsInstance(cp, SparseDataFrame)
        tm.assert_sp_frame_equal(cp, self.frame)

        # as of v0.15.0
        # this is now identical (but not is_a )
        self.assertTrue(cp.index.identical(self.frame.index))
Example #14
0
        def _check(frame, orig):
            transposed = frame.T
            untransposed = transposed.T
            tm.assert_sp_frame_equal(frame, untransposed)

            tm.assert_frame_equal(frame.T.to_dense(), orig.T)
            tm.assert_frame_equal(frame.T.T.to_dense(), orig.T.T)
            tm.assert_sp_frame_equal(frame, frame.T.T, exact_indices=False)
Example #15
0
    def test_numeric_op_scalar(self):
        df = pd.DataFrame({'A': [nan, nan, 0, 1, ],
                           'B': [0, 1, 2, nan],
                           'C': [1., 2., 3., 4.],
                           'D': [nan, nan, nan, nan]})
        sparse = df.to_sparse()

        tm.assert_sp_frame_equal(sparse + 1, (df + 1).to_sparse())
Example #16
0
    def test_copy(self, float_frame):
        cp = float_frame.copy()
        assert isinstance(cp, SparseDataFrame)
        tm.assert_sp_frame_equal(cp, float_frame)

        # as of v0.15.0
        # this is now identical (but not is_a )
        assert cp.index.identical(float_frame.index)
Example #17
0
    def test_fill_value_when_combine_const(self):
        # GH12723
        dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float')
        df = SparseDataFrame({'foo': dat}, index=range(6))

        exp = df.fillna(0).add(2)
        res = df.add(2, fill_value=0)
        tm.assert_sp_frame_equal(res, exp)
Example #18
0
    def test_combine_add(self):
        df = self.frame.to_dense()
        df2 = df.copy()
        df2['C'][:3] = np.nan
        df['A'][:3] = 5.7

        result = df.to_sparse().add(df2.to_sparse(), fill_value=0)
        expected = df.add(df2, fill_value=0).to_sparse()
        tm.assert_sp_frame_equal(result, expected)
Example #19
0
    def test_getitem(self):
        # 1585 select multiple columns
        sdf = SparseDataFrame(index=[0, 1, 2], columns=['a', 'b', 'c'])

        result = sdf[['a', 'b']]
        exp = sdf.reindex(columns=['a', 'b'])
        tm.assert_sp_frame_equal(result, exp)

        self.assertRaises(Exception, sdf.__getitem__, ['a', 'd'])
Example #20
0
    def test_fancy_index_misc(self, float_frame):
        # axis = 0
        sliced = float_frame.iloc[-2:, :]
        expected = float_frame.reindex(index=float_frame.index[-2:])
        tm.assert_sp_frame_equal(sliced, expected)

        # axis = 1
        sliced = float_frame.iloc[:, -2:]
        expected = float_frame.reindex(columns=float_frame.columns[-2:])
        tm.assert_sp_frame_equal(sliced, expected)
Example #21
0
    def test_numpy_cumsum(self):
        result = np.cumsum(self.frame)
        expected = SparseDataFrame(self.frame.to_dense().cumsum())
        tm.assert_sp_frame_equal(result, expected)

        msg = "the 'dtype' parameter is not supported"
        tm.assertRaisesRegexp(ValueError, msg, np.cumsum, self.frame, dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        tm.assertRaisesRegexp(ValueError, msg, np.cumsum, self.frame, out=result)
Example #22
0
    def test_to_csv_sparse_dataframe(self, fill_value):
        # GH19384
        sdf = SparseDataFrame({'a': type(self).fill_values},
                              default_fill_value=fill_value)

        with tm.ensure_clean('sparse_df.csv') as path:
            sdf.to_csv(path, index=False)
            df = read_csv(path, skip_blank_lines=False)

            tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf)
Example #23
0
    def test_fancy_index_misc(self):
        # axis = 0
        sliced = self.frame.ix[-2:, :]
        expected = self.frame.reindex(index=self.frame.index[-2:])
        tm.assert_sp_frame_equal(sliced, expected)

        # axis = 1
        sliced = self.frame.ix[:, -2:]
        expected = self.frame.reindex(columns=self.frame.columns[-2:])
        tm.assert_sp_frame_equal(sliced, expected)
Example #24
0
    def test_assign_with_sparse_frame(self):
        # GH 19163
        df = pd.DataFrame({"a": [1, 2, 3]})
        res = df.to_sparse(fill_value=False).assign(newcol=False)
        exp = df.assign(newcol=False).to_sparse(fill_value=False)

        tm.assert_sp_frame_equal(res, exp)

        for column in res.columns:
            assert type(res[column]) is SparseSeries
Example #25
0
 def test_type_coercion_at_construction(self):
     # GH 15682
     result = pd.SparseDataFrame(
         {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8',
         default_fill_value=0)
     expected = pd.SparseDataFrame(
         {'a': pd.SparseSeries([1, 0, 0], dtype='uint8'),
          'b': pd.SparseSeries([0, 1, 0], dtype='uint8'),
          'c': pd.SparseSeries([0, 0, 1], dtype='uint8')},
         default_fill_value=0)
     tm.assert_sp_frame_equal(result, expected)
Example #26
0
 def test_constructor_dict_order(self):
     # GH19018
     # initialization ordering: by insertion order if python>= 3.6, else
     # order by value
     d = {'b': [2, 3], 'a': [0, 1]}
     frame = SparseDataFrame(data=d)
     if compat.PY36:
         expected = SparseDataFrame(data=d, columns=list('ba'))
     else:
         expected = SparseDataFrame(data=d, columns=list('ab'))
     tm.assert_sp_frame_equal(frame, expected)
Example #27
0
    def test_join(self):
        left = self.frame.ix[:, ["A", "B"]]
        right = self.frame.ix[:, ["C", "D"]]
        joined = left.join(right)
        tm.assert_sp_frame_equal(joined, self.frame, exact_indices=False)

        right = self.frame.ix[:, ["B", "D"]]
        self.assertRaises(Exception, left.join, right)

        with tm.assertRaisesRegexp(ValueError, "Other Series must have a name"):
            self.frame.join(Series(np.random.randn(len(self.frame)), index=self.frame.index))
Example #28
0
    def test_concat_different_columns_sort_warns(self):
        sparse = self.dense1.to_sparse()
        sparse3 = self.dense3.to_sparse()

        with tm.assert_produces_warning(FutureWarning):
            res = pd.concat([sparse, sparse3])
        with tm.assert_produces_warning(FutureWarning):
            exp = pd.concat([self.dense1, self.dense3])

        exp = exp.to_sparse()
        tm.assert_sp_frame_equal(res, exp)
Example #29
0
    def test_combine_first(self):
        df = self.frame

        result = df[::2].combine_first(df)
        result2 = df[::2].combine_first(df.to_dense())

        expected = df[::2].to_dense().combine_first(df.to_dense())
        expected = expected.to_sparse(fill_value=df.default_fill_value)

        tm.assert_sp_frame_equal(result, result2)
        tm.assert_sp_frame_equal(result, expected)
Example #30
0
    def test_sparse_repr_after_set(self):
        # GH 15488
        sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
        res = sdf.copy()

        # Ignore the warning
        with pd.option_context('mode.chained_assignment', None):
            sdf[0][1] = 2  # This line triggers the bug

        repr(sdf)
        tm.assert_sp_frame_equal(sdf, res)
Example #31
0
    def test_reindex_method(self):

        sparse = SparseDataFrame(data=[[11., 12., 14.],
                                       [21., 22., 24.],
                                       [41., 42., 44.]],
                                 index=[1, 2, 4],
                                 columns=[1, 2, 4],
                                 dtype=float)

        # Over indices

        # default method
        result = sparse.reindex(index=range(6))
        expected = SparseDataFrame(data=[[nan, nan, nan],
                                         [11., 12., 14.],
                                         [21., 22., 24.],
                                         [nan, nan, nan],
                                         [41., 42., 44.],
                                         [nan, nan, nan]],
                                   index=range(6),
                                   columns=[1, 2, 4],
                                   dtype=float)
        tm.assert_sp_frame_equal(result, expected)

        # method='bfill'
        result = sparse.reindex(index=range(6), method='bfill')
        expected = SparseDataFrame(data=[[11., 12., 14.],
                                         [11., 12., 14.],
                                         [21., 22., 24.],
                                         [41., 42., 44.],
                                         [41., 42., 44.],
                                         [nan, nan, nan]],
                                   index=range(6),
                                   columns=[1, 2, 4],
                                   dtype=float)
        tm.assert_sp_frame_equal(result, expected)

        # method='ffill'
        result = sparse.reindex(index=range(6), method='ffill')
        expected = SparseDataFrame(data=[[nan, nan, nan],
                                         [11., 12., 14.],
                                         [21., 22., 24.],
                                         [21., 22., 24.],
                                         [41., 42., 44.],
                                         [41., 42., 44.]],
                                   index=range(6),
                                   columns=[1, 2, 4],
                                   dtype=float)
        tm.assert_sp_frame_equal(result, expected)

        # Over columns

        # default method
        result = sparse.reindex(columns=range(6))
        expected = SparseDataFrame(data=[[nan, 11., 12., nan, 14., nan],
                                         [nan, 21., 22., nan, 24., nan],
                                         [nan, 41., 42., nan, 44., nan]],
                                   index=[1, 2, 4],
                                   columns=range(6),
                                   dtype=float)
        tm.assert_sp_frame_equal(result, expected)

        # method='bfill'
        with tm.assertRaises(NotImplementedError):
            sparse.reindex(columns=range(6), method='bfill')

        # method='ffill'
        with tm.assertRaises(NotImplementedError):
            sparse.reindex(columns=range(6), method='ffill')
    def test_loc(self):
        orig = pd.DataFrame(
            [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]],
            columns=list('xyz'))
        sparse = orig.to_sparse()

        assert sparse.loc[0, 'x'] == 1
        assert np.isnan(sparse.loc[1, 'z'])
        assert sparse.loc[2, 'z'] == 4

        # have to specify `kind='integer'`, since we construct a
        # new SparseArray here, and the default sparse type is
        # integer there, but block in SparseSeries
        tm.assert_sp_series_equal(sparse.loc[0],
                                  orig.loc[0].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.loc[1],
                                  orig.loc[1].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.loc[2, :],
                                  orig.loc[2, :].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.loc[2, :],
                                  orig.loc[2, :].to_sparse(kind='integer'))
        tm.assert_sp_series_equal(sparse.loc[:, 'y'],
                                  orig.loc[:, 'y'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:, 'y'],
                                  orig.loc[:, 'y'].to_sparse())

        result = sparse.loc[[1, 2]]
        exp = orig.loc[[1, 2]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[[1, 2], :]
        exp = orig.loc[[1, 2], :].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[:, ['x', 'z']]
        exp = orig.loc[:, ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[[0, 2], ['x', 'z']]
        exp = orig.loc[[0, 2], ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # exceeds the bounds
        result = sparse.reindex([1, 3, 4, 5])
        exp = orig.reindex([1, 3, 4, 5]).to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # dense array
        result = sparse.loc[orig.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array
        result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)]
        tm.assert_sp_frame_equal(result, exp)
 def test_cumsum(self):
     result = self.frame.cumsum()
     expected = SparseDataFrame(self.frame.to_dense().cumsum())
     tm.assert_sp_frame_equal(result, expected)
    def test_concat(self):
        # fill_value = np.nan
        sparse = self.dense1.to_sparse()
        sparse2 = self.dense2.to_sparse()

        res = pd.concat([sparse, sparse])
        exp = pd.concat([self.dense1, self.dense1]).to_sparse()
        tm.assert_sp_frame_equal(res, exp)

        res = pd.concat([sparse2, sparse2])
        exp = pd.concat([self.dense2, self.dense2]).to_sparse()
        tm.assert_sp_frame_equal(res, exp)

        res = pd.concat([sparse, sparse2])
        exp = pd.concat([self.dense1, self.dense2]).to_sparse()
        tm.assert_sp_frame_equal(res, exp)

        res = pd.concat([sparse2, sparse])
        exp = pd.concat([self.dense2, self.dense1]).to_sparse()
        tm.assert_sp_frame_equal(res, exp)

        # fill_value = 0
        sparse = self.dense1.to_sparse(fill_value=0)
        sparse2 = self.dense2.to_sparse(fill_value=0)

        res = pd.concat([sparse, sparse])
        exp = pd.concat([self.dense1, self.dense1]).to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp)

        res = pd.concat([sparse2, sparse2])
        exp = pd.concat([self.dense2, self.dense2]).to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp)

        res = pd.concat([sparse, sparse2])
        exp = pd.concat([self.dense1, self.dense2]).to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp)

        res = pd.concat([sparse2, sparse])
        exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp)
Example #35
0
 def _check(frame, orig):
     transposed = frame.T
     untransposed = transposed.T
     tm.assert_sp_frame_equal(frame, untransposed)
    def test_loc(self):
        orig = pd.DataFrame([[1, np.nan, np.nan],
                             [2, 3, np.nan],
                             [np.nan, np.nan, 4]],
                            columns=list('xyz'))
        sparse = orig.to_sparse()

        self.assertEqual(sparse.loc[0, 'x'], 1)
        self.assertTrue(np.isnan(sparse.loc[1, 'z']))
        self.assertEqual(sparse.loc[2, 'z'], 4)

        tm.assert_sp_series_equal(sparse.loc[0], orig.loc[0].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[1], orig.loc[1].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[2, :],
                                  orig.loc[2, :].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[2, :],
                                  orig.loc[2, :].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:, 'y'],
                                  orig.loc[:, 'y'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:, 'y'],
                                  orig.loc[:, 'y'].to_sparse())

        result = sparse.loc[[1, 2]]
        exp = orig.loc[[1, 2]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[[1, 2], :]
        exp = orig.loc[[1, 2], :].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[:, ['x', 'z']]
        exp = orig.loc[:, ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[[0, 2], ['x', 'z']]
        exp = orig.loc[[0, 2], ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # exceeds the bounds
        result = sparse.loc[[1, 3, 4, 5]]
        exp = orig.loc[[1, 3, 4, 5]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # dense array
        result = sparse.loc[orig.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array
        result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)]
        tm.assert_sp_frame_equal(result, exp)
 def test_iloc_slice(self):
     orig = pd.DataFrame(
         [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]],
         columns=list('xyz'))
     sparse = orig.to_sparse()
     tm.assert_sp_frame_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse())
Example #38
0
 def test_subclass_sparse_transpose(self):
     ossdf = tm.SubclassedSparseDataFrame([[1, 2, 3], [4, 5, 6]])
     essdf = tm.SubclassedSparseDataFrame([[1, 4], [2, 5], [3, 6]])
     tm.assert_sp_frame_equal(ossdf.T, essdf)
    def test_loc_index(self):
        orig = pd.DataFrame([[1, np.nan, np.nan],
                             [2, 3, np.nan],
                             [np.nan, np.nan, 4]],
                            index=list('abc'), columns=list('xyz'))
        sparse = orig.to_sparse()

        self.assertEqual(sparse.loc['a', 'x'], 1)
        self.assertTrue(np.isnan(sparse.loc['b', 'z']))
        self.assertEqual(sparse.loc['c', 'z'], 4)

        tm.assert_sp_series_equal(sparse.loc['a'], orig.loc['a'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['b'], orig.loc['b'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['b', :],
                                  orig.loc['b', :].to_sparse())
        tm.assert_sp_series_equal(sparse.loc['b', :],
                                  orig.loc['b', :].to_sparse())

        tm.assert_sp_series_equal(sparse.loc[:, 'z'],
                                  orig.loc[:, 'z'].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:, 'z'],
                                  orig.loc[:, 'z'].to_sparse())

        result = sparse.loc[['a', 'b']]
        exp = orig.loc[['a', 'b']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[['a', 'b'], :]
        exp = orig.loc[['a', 'b'], :].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[:, ['x', 'z']]
        exp = orig.loc[:, ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[['c', 'a'], ['x', 'z']]
        exp = orig.loc[['c', 'a'], ['x', 'z']].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # dense array
        result = sparse.loc[orig.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array
        result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)]
        tm.assert_sp_frame_equal(result, exp)
Example #40
0
 def test_constructor_dataframe(self):
     dense = self.frame.to_dense()
     sp = SparseDataFrame(dense)
     tm.assert_sp_frame_equal(sp, self.frame)
Example #41
0
 def _test_roundtrip(frame, orig):
     result = tm.round_trip_pickle(frame)
     tm.assert_sp_frame_equal(frame, result)
     tm.assert_frame_equal(result.to_dense(), orig, check_dtype=False)
Example #42
0
 def test_take(self):
     result = self.frame.take([1, 0, 2], axis=1)
     expected = self.frame.reindex(columns=['B', 'A', 'C'])
     tm.assert_sp_frame_equal(result, expected)
Example #43
0
 def _test_op(panel, op):
     # arithmetic tests
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         result = op(panel, 1)
     tm.assert_sp_frame_equal(result['ItemA'], op(panel['ItemA'], 1))
Example #44
0
def compare_sp_frame_float(result, expected, typ, version):
    if LooseVersion(version) <= '0.18.1':
        tm.assert_sp_frame_equal(result, expected, exact_indices=False,
                                 check_dtype=False)
    else:
        tm.assert_sp_frame_equal(result, expected)
Example #45
0
    def test_loc_index(self):
        orig = pd.DataFrame(
            [[1, np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, 4]],
            index=list("abc"),
            columns=list("xyz"),
        )
        sparse = orig.to_sparse()

        assert sparse.loc["a", "x"] == 1
        assert np.isnan(sparse.loc["b", "z"])
        assert sparse.loc["c", "z"] == 4

        tm.assert_sp_series_equal(sparse.loc["a"],
                                  orig.loc["a"].to_sparse(kind="integer"))
        tm.assert_sp_series_equal(sparse.loc["b"],
                                  orig.loc["b"].to_sparse(kind="integer"))
        tm.assert_sp_series_equal(sparse.loc["b", :],
                                  orig.loc["b", :].to_sparse(kind="integer"))
        tm.assert_sp_series_equal(sparse.loc["b", :],
                                  orig.loc["b", :].to_sparse(kind="integer"))

        tm.assert_sp_series_equal(sparse.loc[:, "z"],
                                  orig.loc[:, "z"].to_sparse())
        tm.assert_sp_series_equal(sparse.loc[:, "z"],
                                  orig.loc[:, "z"].to_sparse())

        result = sparse.loc[["a", "b"]]
        exp = orig.loc[["a", "b"]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[["a", "b"], :]
        exp = orig.loc[["a", "b"], :].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[:, ["x", "z"]]
        exp = orig.loc[:, ["x", "z"]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        result = sparse.loc[["c", "a"], ["x", "z"]]
        exp = orig.loc[["c", "a"], ["x", "z"]].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # dense array
        result = sparse.loc[orig.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse.x % 2 == 1]
        exp = orig.loc[orig.x % 2 == 1].to_sparse()
        tm.assert_sp_frame_equal(result, exp)

        # sparse array
        result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)]
        tm.assert_sp_frame_equal(result, exp)
Example #46
0
def compare_sp_frame_float(result, expected, typ, version):
    tm.assert_sp_frame_equal(result, expected)