Python SparseDataFrame Examples, pandas.SparseDataFrame Python Examples

Example #1

0

Show file

def test_from_scipy_fillna(spmatrix):
    # GH 16112
    arr = np.eye(3)
    arr[1:, 0] = np.nan

    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm).fillna(-1.0)

    # Returning frame should fill all nan values with -1.0
    expected = SparseDataFrame(
        {
            0: SparseSeries([1., -1, -1]),
            1: SparseSeries([np.nan, 1, np.nan]),
            2: SparseSeries([np.nan, np.nan, 1]),
        },
        default_fill_value=-1)

    # fill_value is expected to be what .fillna() above was called with
    # We don't use -1 as initial fill_value in expected SparseSeries
    # construction because this way we obtain "compressed" SparseArrays,
    # avoiding having to construct them ourselves
    for col in expected:
        expected[col].fill_value = -1

    tm.assert_sp_frame_equal(sdf, expected)

Example #2

0

Show file

def test_apply_keep_sparse_dtype():
    # GH 23744
    sdf = SparseDataFrame(np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
                          columns=['b', 'a', 'c'], default_fill_value=1)
    df = DataFrame(sdf)

    expected = sdf.apply(np.exp)
    result = df.apply(np.exp)
    tm.assert_frame_equal(expected, result)

Example #3

0

Show file

File: test_apply.py Project: forking-repos/pandas

def test_apply_keep_sparse_dtype():
    # GH 23744
    sdf = SparseDataFrame(np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
                          columns=['b', 'a', 'c'], default_fill_value=1)
    df = DataFrame(sdf)

    expected = sdf.apply(np.exp)
    result = df.apply(np.exp)
    tm.assert_frame_equal(expected, result)

Example #4

0

Show file

    def test_to_csv_sparse_dataframe(self, fill_value):
        # GH19384
        sdf = SparseDataFrame({'a': type(self).fill_values},
                              default_fill_value=fill_value)

        with tm.ensure_clean('sparse_df.csv') as path:
            sdf.to_csv(path, index=False)
            df = read_csv(path, skip_blank_lines=False)

            tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf)

Example #5

0

Show file

File: test_to_csv.py Project: pydata/pandas

    def test_to_csv_sparse_dataframe(self, fill_value):
        # GH19384
        sdf = SparseDataFrame({'a': type(self).fill_values},
                              default_fill_value=fill_value)

        with tm.ensure_clean('sparse_df.csv') as path:
            sdf.to_csv(path, index=False)
            df = read_csv(path, skip_blank_lines=False)

            tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf)

Example #6

0

Show file

def float_frame_fill0():
    """
    Fixture for sparse DataFrame of floats with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0
    """
    values = SparseDataFrame(data).values
    values[np.isnan(values)] = 0
    return SparseDataFrame(
        values, columns=["A", "B", "C", "D"], default_fill_value=0, index=dates
    )

Example #7

0

Show file

File: test_indexing.py Project: Michael-E-Rose/pandas

def test_where_with_numeric_data(data):
    # GH 17386
    lower_bound = 1.5

    sparse = SparseDataFrame(data)
    result = sparse.where(sparse > lower_bound)

    dense = DataFrame(data)
    dense_expected = dense.where(dense > lower_bound)
    sparse_expected = SparseDataFrame(dense_expected)

    tm.assert_frame_equal(result, dense_expected)
    tm.assert_sp_frame_equal(result, sparse_expected)

Example #8

0

Show file

def test_where_with_numeric_data(data):
    # GH 17386
    lower_bound = 1.5

    sparse = SparseDataFrame(data)
    result = sparse.where(sparse > lower_bound)

    dense = DataFrame(data)
    dense_expected = dense.where(dense > lower_bound)
    sparse_expected = SparseDataFrame(dense_expected)

    tm.assert_frame_equal(result, dense_expected)
    tm.assert_sp_frame_equal(result, sparse_expected)

Example #9

0

Show file

def test_where_with_numeric_data_and_other(data, other):
    # GH 17386
    lower_bound = 1.5

    sparse = SparseDataFrame(data)
    result = sparse.where(sparse > lower_bound, other)

    dense = DataFrame(data)
    dense_expected = dense.where(dense > lower_bound, other)
    sparse_expected = SparseDataFrame(dense_expected, default_fill_value=other)

    tm.assert_frame_equal(result, dense_expected)
    tm.assert_sp_frame_equal(result, sparse_expected)

Example #10

0

Show file

File: test_indexing.py Project: Michael-E-Rose/pandas

def test_where_with_numeric_data_and_other(data, other):
    # GH 17386
    lower_bound = 1.5

    sparse = SparseDataFrame(data)
    result = sparse.where(sparse > lower_bound, other)

    dense = DataFrame(data)
    dense_expected = dense.where(dense > lower_bound, other)
    sparse_expected = SparseDataFrame(dense_expected,
                                      default_fill_value=other)

    tm.assert_frame_equal(result, dense_expected)
    tm.assert_sp_frame_equal(result, sparse_expected)

Example #11

0

Show file

File: test_analytics.py Project: chaudharypoojabcg/PandaClone

def test_quantile():
    # GH 17386
    data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
    q = 0.1

    sparse_df = SparseDataFrame(data)
    result = sparse_df.quantile(q)

    dense_df = DataFrame(data)
    dense_expected = dense_df.quantile(q)
    sparse_expected = SparseSeries(dense_expected)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)

Example #12

0

Show file

def test_where_with_bool_data():
    # GH 17386
    data = [[False, False], [True, True], [False, False]]
    cond = True

    sparse = SparseDataFrame(data)
    result = sparse.where(sparse == cond)

    dense = DataFrame(data)
    dense_expected = dense.where(dense == cond)
    sparse_expected = SparseDataFrame(dense_expected)

    tm.assert_frame_equal(result, dense_expected)
    tm.assert_sp_frame_equal(result, sparse_expected)

Example #13

0

Show file

def test_where_with_bool_data_and_other(other):
    # GH 17386
    data = [[False, False], [True, True], [False, False]]
    cond = True

    sparse = SparseDataFrame(data)
    result = sparse.where(sparse == cond, other)

    dense = DataFrame(data)
    dense_expected = dense.where(dense == cond, other)
    sparse_expected = SparseDataFrame(dense_expected, default_fill_value=other)

    tm.assert_frame_equal(result, dense_expected)
    tm.assert_sp_frame_equal(result, sparse_expected)

Example #14

0

Show file

File: test_analytics.py Project: chaudharypoojabcg/PandaClone

def test_quantile_multi():
    # GH 17386
    data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
    q = [0.1, 0.5]

    sparse_df = SparseDataFrame(data)
    result = sparse_df.quantile(q)

    dense_df = DataFrame(data)
    dense_expected = dense_df.quantile(q)
    sparse_expected = SparseDataFrame(dense_expected)

    tm.assert_frame_equal(result, dense_expected)
    tm.assert_sp_frame_equal(result, sparse_expected)

Example #15

0

Show file

File: test_indexing.py Project: Michael-E-Rose/pandas

def test_where_with_bool_data():
    # GH 17386
    data = [[False, False], [True, True], [False, False]]
    cond = True

    sparse = SparseDataFrame(data)
    result = sparse.where(sparse == cond)

    dense = DataFrame(data)
    dense_expected = dense.where(dense == cond)
    sparse_expected = SparseDataFrame(dense_expected)

    tm.assert_frame_equal(result, dense_expected)
    tm.assert_sp_frame_equal(result, sparse_expected)

Example #16

0

Show file

File: test_indexing.py Project: Michael-E-Rose/pandas

def test_where_with_bool_data_and_other(other):
    # GH 17386
    data = [[False, False], [True, True], [False, False]]
    cond = True

    sparse = SparseDataFrame(data)
    result = sparse.where(sparse == cond, other)

    dense = DataFrame(data)
    dense_expected = dense.where(dense == cond, other)
    sparse_expected = SparseDataFrame(dense_expected,
                                      default_fill_value=other)

    tm.assert_frame_equal(result, dense_expected)
    tm.assert_sp_frame_equal(result, sparse_expected)

Example #17

0

Show file

def frame(dates):
    data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
            'C': np.arange(10, dtype=np.float64),
            'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]}

    return SparseDataFrame(data, index=dates)

Example #18

0

Show file

def fill_frame(frame):
    values = frame.values.copy()
    values[np.isnan(values)] = 2

    return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
                           default_fill_value=2,
                           index=frame.index)

Example #19

0

Show file

File: test_apply.py Project: 09acp/Dash-Examples

def fill_frame(frame):
    values = frame.values.copy()
    values[np.isnan(values)] = 2

    return SparseDataFrame(
        values, columns=["A", "B", "C", "D"], default_fill_value=2, index=frame.index
    )

Example #20

0

Show file

File: test_to_from_scipy.py Project: TomAugspurger/pandas

def test_from_scipy_correct_ordering(spmatrix):
    # GH 16179
    arr = np.arange(1, 5).reshape(2, 2)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm)
    expected = SparseDataFrame(arr)
    tm.assert_sp_frame_equal(sdf, expected)
    tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())

Example #21

0

Show file

def float_frame():
    """
    Fixture for sparse DataFrame of floats with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D']; some entries are missing
    """
    # default_kind='block' is the default
    return SparseDataFrame(data, index=dates, default_kind='block')

Example #22

0

Show file

def float_frame_int_kind():
    """
    Fixture for sparse DataFrame of floats with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D'] and default_kind='integer'.
    Some entries are missing.
    """
    return SparseDataFrame(data, index=dates, default_kind='integer')

Example #23

0

Show file

File: gfa.py Project: nvrivera/genome_runner

def coo_to_df(triplets):
    """
    Create a SparseDataFrame from a sequence of (row,col,value) triplets.
    """
    data = defaultdict(dict)
    for row, col, val in triplets:
        data[col][row] = val
    return SparseDataFrame(data)

Example #24

0

Show file

def float_frame_fill2_dense():
    """
    Fixture for dense DataFrame of floats with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2
    """
    values = SparseDataFrame(data).values
    values[np.isnan(values)] = 2
    return DataFrame(values, columns=['A', 'B', 'C', 'D'], index=dates)

Example #25

0

Show file

def float_string_frame():
    """
    Fixture for sparse DataFrame of floats and strings with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D', 'foo']; some entries are missing
    """
    sdf = SparseDataFrame(data, index=dates)
    sdf['foo'] = SparseArray(['bar'] * len(dates))
    return sdf

Example #26

0

Show file

File: test_apply.py Project: 09acp/Dash-Examples

def frame(dates):
    data = {
        "A": [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
        "B": [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
        "C": np.arange(10, dtype=np.float64),
        "D": [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan],
    }

    return SparseDataFrame(data, index=dates)

Example #27

0

Show file

File: generate_legacy_storage_files.py Project: lichunhong2010/-python-

def _create_sp_frame():
    nan = np.nan

    data = {u'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            u'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            u'C': np.arange(10).astype(np.int64),
            u'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}

    dates = bdate_range('1/1/2011', periods=10)
    return SparseDataFrame(data, index=dates)

Example #28

0

Show file

def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
    # GH 4343
    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(3, dtype=dtype)
    # GH 16179
    arr[0, 1] = dtype(2)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm,
                          index=index,
                          columns=columns,
                          default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(object)
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    assert dict(sdf.to_coo().todok()) == dict(spm.todok())

    # Ensure dtype is preserved if possible
    was_upcast = ((fill_value is None or is_float(fill_value))
                  and not is_object_dtype(dtype) and not is_float_dtype(dtype))
    res_dtype = (bool
                 if is_bool_dtype(dtype) else float if was_upcast else dtype)
    tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
    assert sdf.to_coo().dtype == res_dtype

    # However, adding a str column results in an upcast to object
    sdf['strings'] = np.arange(len(sdf)).astype(str)
    assert sdf.to_coo().dtype == np.object_

Example #29

0

Show file

File: generate_legacy_storage_files.py Project: 09acp/Dash-Examples

def _create_sp_frame():
    nan = np.nan

    data = {
        "A": [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
        "B": [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
        "C": np.arange(10).astype(np.int64),
        "D": [0, 1, 2, 3, 4, 5, nan, nan, nan, nan],
    }

    dates = bdate_range("1/1/2011", periods=10)
    return SparseDataFrame(data, index=dates)

Example #30

0

Show file

def _create_sp_frame():
    import numpy as np
    from pandas import bdate_range, SparseDataFrame

    nan = np.nan

    data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            'C': np.arange(10).astype(np.int64),
            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}

    dates = bdate_range('1/1/2011', periods=10)
    return SparseDataFrame(data, index=dates)

Example #31

0

Show file

def test_from_to_scipy_object(spmatrix, fill_value):
    # GH 4343
    dtype = object
    columns = list('cd')
    index = list('ab')

    if (spmatrix is scipy.sparse.dok_matrix
            and LooseVersion(scipy.__version__) >= LooseVersion('0.19.0')):
        pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(2, dtype=dtype)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm,
                          index=index,
                          columns=columns,
                          default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(object)
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    with catch_warnings(record=True):
        assert dict(sdf.to_coo().todok()) == dict(spm.todok())

    # Ensure dtype is preserved if possible
    res_dtype = object
    tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
    assert sdf.to_coo().dtype == res_dtype

Example #32

0

Show file

File: test_to_from_scipy.py Project: glyg/pandas

def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
    # GH 4343
    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(3, dtype=dtype)
    # GH 16179
    arr[0, 1] = dtype(2)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm, index=index, columns=columns,
                          default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(object)
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    assert dict(sdf.to_coo().todok()) == dict(spm.todok())

    # Ensure dtype is preserved if possible
    was_upcast = ((fill_value is None or is_float(fill_value)) and
                  not is_object_dtype(dtype) and
                  not is_float_dtype(dtype))
    res_dtype = (bool if is_bool_dtype(dtype) else
                 float if was_upcast else
                 dtype)
    tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
    assert sdf.to_coo().dtype == res_dtype

    # However, adding a str column results in an upcast to object
    sdf['strings'] = np.arange(len(sdf)).astype(str)
    assert sdf.to_coo().dtype == np.object_

Example #33

0

Show file

def test_from_scipy_correct_ordering(spmatrix):
    # GH 16179
    arr = np.arange(1, 5).reshape(2, 2)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm)
    expected = SparseDataFrame(arr)
    tm.assert_sp_frame_equal(sdf, expected)
    tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())

Example #34

0

Show file

File: test_to_from_scipy.py Project: TomAugspurger/pandas

def test_from_to_scipy_object(spmatrix, fill_value):
    # GH 4343
    dtype = object
    columns = list('cd')
    index = list('ab')

    if (spmatrix is scipy.sparse.dok_matrix and LooseVersion(
            scipy.__version__) >= LooseVersion('0.19.0')):
        pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(2, dtype=dtype)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm, index=index, columns=columns,
                          default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(SparseDtype(object, fill_value))
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    assert dict(sdf.to_coo().todok()) == dict(spm.todok())

    # Ensure dtype is preserved if possible
    res_dtype = object
    tm.assert_contains_all(sdf.dtypes.apply(lambda dtype: dtype.subtype),
                           {np.dtype(res_dtype)})
    assert sdf.to_coo().dtype == res_dtype

Example #35

0

Show file

 def time_from_dict(self):
     SparseDataFrame(self.dict)

Example #36

0

Show file

 def time_from_scipy(self):
     SparseDataFrame(self.sparse)

Example #37

0

Show file

 def time_constructor(self):
     SparseDataFrame(columns=self.arr, index=self.arr)

Example #38

0

Show file

 def time_series_to_frame(self):
     SparseDataFrame(self.series)

Example #39

0

Show file

File: test_apply.py Project: zhengpingwan/pandas

def empty():
    return SparseDataFrame()