Example #1
0
def test_from_scipy_fillna(spmatrix):
    # GH 16112
    arr = np.eye(3)
    arr[1:, 0] = np.nan

    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm).fillna(-1.0)

    # Returning frame should fill all nan values with -1.0
    expected = SparseDataFrame(
        {
            0: SparseSeries([1., -1, -1]),
            1: SparseSeries([np.nan, 1, np.nan]),
            2: SparseSeries([np.nan, np.nan, 1]),
        },
        default_fill_value=-1)

    # fill_value is expected to be what .fillna() above was called with
    # We don't use -1 as initial fill_value in expected SparseSeries
    # construction because this way we obtain "compressed" SparseArrays,
    # avoiding having to construct them ourselves
    for col in expected:
        expected[col].fill_value = -1

    tm.assert_sp_frame_equal(sdf, expected)
def _create_sp_series():
    nan = np.nan

    # nan-based
    arr = np.arange(15, dtype=np.float64)
    arr[7:12] = nan
    arr[-1:] = nan

    bseries = SparseSeries(arr, kind='block')
    bseries.name = u'bseries'
    return bseries
Example #3
0
def _create_sp_series():
    nan = np.nan

    # nan-based
    arr = np.arange(15, dtype=np.float64)
    arr[7:12] = nan
    arr[-1:] = nan

    bseries = SparseSeries(arr, kind='block')
    bseries.name = 'bseries'
    return bseries
def _create_sp_tsseries():
    nan = np.nan

    # nan-based
    arr = np.arange(15, dtype=np.float64)
    arr[7:12] = nan
    arr[-1:] = nan

    date_index = bdate_range('1/1/2011', periods=len(arr))
    bseries = SparseSeries(arr, index=date_index, kind='block')
    bseries.name = u'btsseries'
    return bseries
Example #5
0
def _create_sp_tsseries():
    nan = np.nan

    # nan-based
    arr = np.arange(15, dtype=np.float64)
    arr[7:12] = nan
    arr[-1:] = nan

    date_index = bdate_range('1/1/2011', periods=len(arr))
    bseries = SparseSeries(arr, index=date_index, kind='block')
    bseries.name = 'btsseries'
    return bseries
def test_where_with_numeric_data_and_other(data, other):
    # GH 17386
    lower_bound = 1.5

    sparse = SparseSeries(data)
    result = sparse.where(sparse > lower_bound, other)

    dense = Series(data)
    dense_expected = dense.where(dense > lower_bound, other)
    sparse_expected = SparseSeries(dense_expected, fill_value=other)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)
def test_where_with_numeric_data(data):
    # GH 17386
    lower_bound = 1.5

    sparse = SparseSeries(data)
    result = sparse.where(sparse > lower_bound)

    dense = Series(data)
    dense_expected = dense.where(dense > lower_bound)
    sparse_expected = SparseSeries(dense_expected)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)
def test_where_with_bool_data():
    # GH 17386
    data = [False, False, True, True, False, False]
    cond = True

    sparse = SparseSeries(data)
    result = sparse.where(sparse == cond)

    dense = Series(data)
    dense_expected = dense.where(dense == cond)
    sparse_expected = SparseSeries(dense_expected)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)
def test_where_with_bool_data_and_other(other):
    # GH 17386
    data = [False, False, True, True, False, False]
    cond = True

    sparse = SparseSeries(data)
    result = sparse.where(sparse == cond, other)

    dense = Series(data)
    dense_expected = dense.where(dense == cond, other)
    sparse_expected = SparseSeries(dense_expected, fill_value=other)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)
Example #10
0
def _create_sp_series():

    import numpy as np
    from pandas import SparseSeries

    nan = np.nan

    # nan-based
    arr = np.arange(15, dtype=np.float64)
    arr[7:12] = nan
    arr[-1:] = nan

    bseries = SparseSeries(arr, kind='block')
    bseries.name = 'bseries'
    return bseries
def test_type_of_target():
    for group, group_examples in EXAMPLES.items():
        for example in group_examples:
            assert_equal(type_of_target(example),
                         group,
                         msg=('type_of_target(%r) should be %r, got %r' %
                              (example, group, type_of_target(example))))

    for example in NON_ARRAY_LIKE_EXAMPLES:
        msg_regex = r'Expected array-like \(array or non-string sequence\).*'
        assert_raises_regex(ValueError, msg_regex, type_of_target, example)

    for example in MULTILABEL_SEQUENCES:
        msg = ('You appear to be using a legacy multi-label data '
               'representation. Sequence of sequences are no longer supported;'
               ' use a binary array or sparse matrix instead.')
        assert_raises_regex(ValueError, msg, type_of_target, example)

    try:
        from pandas import SparseSeries
    except ImportError:
        raise SkipTest("Pandas not found")

    y = SparseSeries([1, 0, 0, 1, 0])
    msg = "y cannot be class 'SparseSeries'."
    assert_raises_regex(ValueError, msg, type_of_target, y)
def _create_sp_series():

    import numpy as np
    from pandas import bdate_range, SparseSeries

    nan = np.nan

    # nan-based
    arr = np.arange(15, dtype=float)
    index = np.arange(15)
    arr[7:12] = nan
    arr[-1:] = nan

    date_index = bdate_range('1/1/2011', periods=len(index))
    bseries = SparseSeries(arr, index=index, kind='block')
    bseries.name = 'bseries'
    return bseries
def _create_sp_series():

    import numpy as np
    from pandas import bdate_range, SparseSeries

    nan = np.nan

    # nan-based
    arr = np.arange(15, dtype=float)
    index = np.arange(15)
    arr[7:12] = nan
    arr[-1:] = nan

    date_index = bdate_range('1/1/2011', periods=len(index))
    bseries = SparseSeries(arr, index=index, kind='block')
    bseries.name = 'bseries'
    return bseries
Example #14
0
 def setup(self):
     K = 50
     N = 50001
     rng = date_range('1/1/2000', periods=N, freq='T')
     self.series = {}
     for i in range(1, K):
         data = np.random.randn(N)[:-i]
         idx = rng[:-i]
         data[100:] = np.nan
         self.series[i] = SparseSeries(data, index=idx)
Example #15
0
 def setup(self):
     self.K = 50
     self.N = 50000
     self.rng = np.asarray(date_range('1/1/2000', periods=self.N, freq='T'))
     self.series = {}
     for i in range(1, (self.K + 1)):
         self.data = np.random.randn(self.N)[:(-i)]
         self.this_rng = self.rng[:(-i)]
         self.data[100:] = np.nan
         self.series[i] = SparseSeries(self.data, index=self.this_rng)
def test_quantile():
    # GH 17386
    data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
    q = 0.1

    sparse_df = SparseDataFrame(data)
    result = sparse_df.quantile(q)

    dense_df = DataFrame(data)
    dense_expected = dense_df.quantile(q)
    sparse_expected = SparseSeries(dense_expected)

    tm.assert_series_equal(result, dense_expected)
    tm.assert_sp_series_equal(result, sparse_expected)
Example #17
0
 def time_sparse_series_from_coo(self):
     self.ss = SparseSeries.from_coo(self.A)
Example #18
0
 def time_sparse_series_from_coo(self):
     SparseSeries.from_coo(self.matrix)
Example #19
0
 def time_sparse_series_from_coo(self):
     SparseSeries.from_coo(self.matrix)
Example #20
0
 def time_sparse_series_from_coo(self):
     self.ss = SparseSeries.from_coo(self.A)