def _simple_new(cls, data, sp_index, fill_value): if not isinstance(sp_index, SparseIndex): # caller must pass SparseIndex raise ValueError('sp_index must be a SparseIndex') if fill_value is None: if sp_index.ngaps > 0: # has missing hole fill_value = np.nan else: fill_value = na_value_for_dtype(data.dtype) if (is_integer_dtype(data) and is_float(fill_value) and sp_index.ngaps > 0): # if float fill_value is being included in dense repr, # convert values to float data = data.astype(float) result = data.view(cls) if not isinstance(sp_index, SparseIndex): # caller must pass SparseIndex raise ValueError('sp_index must be a SparseIndex') result.sp_index = sp_index result._fill_value = fill_value return result
def test_na_value_for_dtype(): for dtype in [np.dtype('M8[ns]'), np.dtype('m8[ns]'), DatetimeTZDtype('datetime64[ns, US/Eastern]')]: assert na_value_for_dtype(dtype) is NaT for dtype in ['u1', 'u2', 'u4', 'u8', 'i1', 'i2', 'i4', 'i8']: assert na_value_for_dtype(np.dtype(dtype)) == 0 for dtype in ['bool']: assert na_value_for_dtype(np.dtype(dtype)) is False for dtype in ['f2', 'f4', 'f8']: assert np.isnan(na_value_for_dtype(np.dtype(dtype))) for dtype in ['O']: assert np.isnan(na_value_for_dtype(np.dtype(dtype)))
def make_sparse(arr, kind='block', fill_value=None): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value Returns ------- (sparse_values, index) : (ndarray, SparseIndex) """ arr = _sanitize_values(arr) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) if isnull(fill_value): mask = notnull(arr) else: # For str arrays in NumPy 1.12.0, operator!= below isn't # element-wise but just returns False if fill_value is not str, # so cast to object comparison to be safe if is_string_dtype(arr): arr = arr.astype(object) mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index, fill_value
def make_sparse(arr, kind='block', fill_value=None): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value Returns ------- (sparse_values, index) : (ndarray, SparseIndex) """ arr = _sanitize_values(arr) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) if isnull(fill_value): mask = notnull(arr) else: mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = np.arange(length, dtype=np.int32)[mask] index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index, fill_value