def test_isnull_datetime(self): self.assertFalse(isnull(datetime.now())) self.assertTrue(notnull(datetime.now())) idx = date_range('1/1/1990', periods=20) exp = np.ones(len(idx), dtype=bool) tm.assert_numpy_array_equal(notnull(idx), exp) idx = np.asarray(idx) idx[0] = iNaT idx = DatetimeIndex(idx) mask = isnull(idx) self.assertTrue(mask[0]) exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) tm.assert_numpy_array_equal(mask, exp) # GH 9129 pidx = idx.to_period(freq='M') mask = isnull(pidx) self.assertTrue(mask[0]) exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) tm.assert_numpy_array_equal(mask, exp) mask = isnull(pidx[1:]) exp = np.zeros(len(mask), dtype=bool) tm.assert_numpy_array_equal(mask, exp)
def test_datetime_other_units(self): idx = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-02']) exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(idx), exp) tm.assert_numpy_array_equal(notnull(idx), ~exp) tm.assert_numpy_array_equal(isnull(idx.values), exp) tm.assert_numpy_array_equal(notnull(idx.values), ~exp) for dtype in [ 'datetime64[D]', 'datetime64[h]', 'datetime64[m]', 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]' ]: values = idx.values.astype(dtype) exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(values), exp) tm.assert_numpy_array_equal(notnull(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp) s = pd.Series(values, dtype=object) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp)
def _combine_match_index(self, other, func, level=None, fill_value=None): new_data = {} if fill_value is not None: raise NotImplementedError("'fill_value' argument is not supported") if level is not None: raise NotImplementedError("'level' argument is not supported") new_index = self.index.union(other.index) this = self if self.index is not new_index: this = self.reindex(new_index) if other.index is not new_index: other = other.reindex(new_index) for col, series in compat.iteritems(this): new_data[col] = func(series.values, other.values) # fill_value is a function of our operator if isnull(other.fill_value) or isnull(self.default_fill_value): fill_value = np.nan else: fill_value = func(np.float64(self.default_fill_value), np.float64(other.fill_value)) return self._constructor( new_data, index=new_index, columns=self.columns, default_fill_value=fill_value).__finalize__(self)
def test_timedelta_other_units(self): idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days']) exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(idx), exp) tm.assert_numpy_array_equal(notnull(idx), ~exp) tm.assert_numpy_array_equal(isnull(idx.values), exp) tm.assert_numpy_array_equal(notnull(idx.values), ~exp) for dtype in [ 'timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]', 'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]', 'timedelta64[ns]' ]: values = idx.values.astype(dtype) exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(values), exp) tm.assert_numpy_array_equal(notnull(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp) s = pd.Series(values, dtype=object) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp)
def test_isnull_nat(self): result = isnull([NaT]) exp = np.array([True]) tm.assert_numpy_array_equal(result, exp) result = isnull(np.array([NaT], dtype=object)) exp = np.array([True]) tm.assert_numpy_array_equal(result, exp)
def test_isnull(self): self.assertFalse(isnull(1.)) self.assertTrue(isnull(None)) self.assertTrue(isnull(np.NaN)) self.assertTrue(float('nan')) self.assertFalse(isnull(np.inf)) self.assertFalse(isnull(-np.inf)) # series for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: assert isinstance(isnull(s), Series) # frame for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame()]: result = isnull(df) expected = df.apply(isnull) tm.assert_frame_equal(result, expected) # panel with catch_warnings(record=True): for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel())]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel_equal(result, expected) # panel 4d with catch_warnings(record=True): for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel4d_equal(result, expected)
def test_isnull(self): assert not isnull(1.) assert isnull(None) assert isnull(np.NaN) assert float('nan') assert not isnull(np.inf) assert not isnull(-np.inf) # series for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: assert isinstance(isnull(s), Series) # frame for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame()]: result = isnull(df) expected = df.apply(isnull) tm.assert_frame_equal(result, expected) # panel with catch_warnings(record=True): for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel())]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel_equal(result, expected) # panel 4d with catch_warnings(record=True): for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel4d_equal(result, expected)
def test_period(self): idx = pd.PeriodIndex(['2011-01', 'NaT', '2012-01'], freq='M') exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(idx), exp) tm.assert_numpy_array_equal(notnull(idx), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(idx) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp) s = pd.Series(idx, dtype=object) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp)
def f(x, y): xmask = isnull(x) ymask = isnull(y) mask = xmask | ymask with np.errstate(all='ignore'): result = op(x, y) if mask.any(): if is_bool_dtype(result): result = result.astype('O') np.putmask(result, mask, np.nan) return result
def test_empty_object(self): for shape in [(4, 0), (4, )]: arr = np.empty(shape=shape, dtype=object) result = isnull(arr) expected = np.ones(shape=shape, dtype=bool) tm.assert_numpy_array_equal(result, expected)
def _hash_scalar(val, encoding='utf8', hash_key=None): """ Hash scalar value Returns ------- 1d uint64 numpy array of hash value, of length 1 """ if isnull(val): # this is to be consistent with the _hash_categorical implementation return np.array([np.iinfo(np.uint64).max], dtype='u8') if getattr(val, 'tzinfo', None) is not None: # for tz-aware datetimes, we need the underlying naive UTC value and # not the tz aware object or pd extension type (as # infer_dtype_from_scalar would do) if not isinstance(val, tslib.Timestamp): val = tslib.Timestamp(val) val = val.tz_convert(None) dtype, val = infer_dtype_from_scalar(val) vals = np.array([val], dtype=dtype) return hash_array(vals, hash_key=hash_key, encoding=encoding, categorize=False)
def backfill_2d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None if is_float_dtype(values): _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _backfill_2d_datetime elif is_integer_dtype(values): values = _ensure_float64(values) _method = algos.backfill_2d_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_2d_inplace_object if _method is None: raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) if mask is None: mask = isnull(values) mask = mask.view(np.uint8) if np.all(values.shape): _method(values, mask, limit=limit) else: # for test coverage pass return values
def nargsort(items, kind='quicksort', ascending=True, na_position='last'): """ This is intended to be a drop-in replacement for np.argsort which handles NaNs. It adds ascending and na_position parameters. GH #6399, #5231 """ # specially handle Categorical if is_categorical_dtype(items): return items.argsort(ascending=ascending) items = np.asanyarray(items) idx = np.arange(len(items)) mask = isnull(items) non_nans = items[~mask] non_nan_idx = idx[~mask] nan_idx = np.nonzero(mask)[0] if not ascending: non_nans = non_nans[::-1] non_nan_idx = non_nan_idx[::-1] indexer = non_nan_idx[non_nans.argsort(kind=kind)] if not ascending: indexer = indexer[::-1] # Finally, place the NaNs at the end or the beginning according to # na_position if na_position == 'last': indexer = np.concatenate([indexer, nan_idx]) elif na_position == 'first': indexer = np.concatenate([nan_idx, indexer]) else: raise ValueError('invalid na_position: {!r}'.format(na_position)) return indexer
def test_notnull(): assert notnull(1.) assert not notnull(None) assert not notnull(np.NaN) with cf.option_context("mode.use_inf_as_null", False): assert notnull(np.inf) assert notnull(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) result = notnull(arr) assert result.all() with cf.option_context("mode.use_inf_as_null", True): assert not notnull(np.inf) assert not notnull(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) result = notnull(arr) assert result.sum() == 2 with cf.option_context("mode.use_inf_as_null", False): for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries() ]: assert (isinstance(isnull(s), Series))
def _isfinite(values): if is_datetime_or_timedelta_dtype(values): return isnull(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): return ~np.isfinite(values) return ~np.isfinite(values.astype('float64'))
def wrapper(self, other): msg = "cannot compare a TimedeltaIndex with type {0}" func = getattr(super(TimedeltaIndex, self), opname) if _is_convertible_to_td(other) or other is NaT: try: other = _to_m8(other) except ValueError: # failed to parse as timedelta raise TypeError(msg.format(type(other))) result = func(other) if isnull(other): result.fill(nat_result) else: if not is_list_like(other): raise TypeError(msg.format(type(other))) other = TimedeltaIndex(other).values result = func(other) result = _values_from_object(result) if isinstance(other, Index): o_mask = other.values.view('i8') == iNaT else: o_mask = other.view('i8') == iNaT if o_mask.any(): result[o_mask] = nat_result if self.hasnans: result[self._isnan] = nat_result # support of bool dtype indexers if is_bool_dtype(result): return result return Index(result)
def test_notnull(): assert notnull(1.) assert not notnull(None) assert not notnull(np.NaN) with cf.option_context("mode.use_inf_as_null", False): assert notnull(np.inf) assert notnull(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) result = notnull(arr) assert result.all() with cf.option_context("mode.use_inf_as_null", True): assert not notnull(np.inf) assert not notnull(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) result = notnull(arr) assert result.sum() == 2 with cf.option_context("mode.use_inf_as_null", False): for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: assert (isinstance(isnull(s), Series))
def nargsort(items, kind='quicksort', ascending=True, na_position='last'): """ This is intended to be a drop-in replacement for np.argsort which handles NaNs. It adds ascending and na_position parameters. GH #6399, #5231 """ # specially handle Categorical if is_categorical_dtype(items): return items.argsort(ascending=ascending, kind=kind) items = np.asanyarray(items) idx = np.arange(len(items)) mask = isnull(items) non_nans = items[~mask] non_nan_idx = idx[~mask] nan_idx = np.nonzero(mask)[0] if not ascending: non_nans = non_nans[::-1] non_nan_idx = non_nan_idx[::-1] indexer = non_nan_idx[non_nans.argsort(kind=kind)] if not ascending: indexer = indexer[::-1] # Finally, place the NaNs at the end or the beginning according to # na_position if na_position == 'last': indexer = np.concatenate([indexer, nan_idx]) elif na_position == 'first': indexer = np.concatenate([nan_idx, indexer]) else: raise ValueError('invalid na_position: {!r}'.format(na_position)) return indexer
def nanprod(values, axis=None, skipna=True): mask = isnull(values) if skipna and not is_any_int_dtype(values): values = values.copy() values[mask] = 1 result = values.prod(axis) return _maybe_null_out(result, axis, mask)
def test_empty_object(self): for shape in [(4, 0), (4,)]: arr = np.empty(shape=shape, dtype=object) result = isnull(arr) expected = np.ones(shape=shape, dtype=bool) tm.assert_numpy_array_equal(result, expected)
def shift(self, periods, freq=None, axis=0): if periods == 0: return self.copy() # no special handling of fill values yet if not isnull(self.fill_value): shifted = self.to_dense().shift(periods, freq=freq, axis=axis) return shifted.to_sparse(fill_value=self.fill_value, kind=self.kind) if freq is not None: return self._constructor( self.sp_values, sparse_index=self.sp_index, index=self.index.shift(periods, freq), fill_value=self.fill_value).__finalize__(self) int_index = self.sp_index.to_int_index() new_indices = int_index.indices + periods start, end = new_indices.searchsorted([0, int_index.length]) new_indices = new_indices[start:end] new_sp_index = _make_index(len(self), new_indices, self.sp_index) arr = self.values._simple_new(self.sp_values[start:end].copy(), new_sp_index, fill_value=np.nan) return self._constructor(arr, index=self.index).__finalize__(self)
def test_isnull_lists(self): result = isnull([[False]]) exp = np.array([[False]]) tm.assert_numpy_array_equal(result, exp) result = isnull([[1], [2]]) exp = np.array([[False], [False]]) tm.assert_numpy_array_equal(result, exp) # list of strings / unicode result = isnull(['foo', 'bar']) exp = np.array([False, False]) tm.assert_numpy_array_equal(result, exp) result = isnull([u('foo'), u('bar')]) exp = np.array([False, False]) tm.assert_numpy_array_equal(result, exp)
def mask_missing(arr, values_to_mask): """ Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True """ dtype, values_to_mask = infer_dtype_from_array(values_to_mask) try: values_to_mask = np.array(values_to_mask, dtype=dtype) except Exception: values_to_mask = np.array(values_to_mask, dtype=object) na_mask = isnull(values_to_mask) nonna = values_to_mask[~na_mask] mask = None for x in nonna: if mask is None: # numpy elementwise comparison warning if is_numeric_v_string_like(arr, x): mask = False else: mask = arr == x # if x is a string and arr is not, then we get False and we must # expand the mask to size arr.shape if is_scalar(mask): mask = np.zeros(arr.shape, dtype=bool) else: # numpy elementwise comparison warning if is_numeric_v_string_like(arr, x): mask |= False else: mask |= arr == x if na_mask.any(): if mask is None: mask = isnull(arr) else: mask |= isnull(arr) return mask
def nansem(values, axis=None, skipna=True, ddof=1): var = nanvar(values, axis, skipna, ddof=ddof) mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count, _ = _get_counts_nanvar(mask, axis, ddof, values.dtype) var = nanvar(values, axis, skipna, ddof=ddof) return np.sqrt(var) / np.sqrt(count)
def test_isnull_numpy_nat(self): arr = np.array([ NaT, np.datetime64('NaT'), np.timedelta64('NaT'), np.datetime64('NaT', 's') ]) result = isnull(arr) expected = np.array([True] * 4) tm.assert_numpy_array_equal(result, expected)
def _bins_to_cuts(x, bins, right=True, labels=None, precision=3, include_lowest=False, dtype=None, duplicates='raise'): if duplicates not in ['raise', 'drop']: raise ValueError("invalid value for 'duplicates' parameter, " "valid options are: raise, drop") if isinstance(bins, IntervalIndex): # we have a fast-path here ids = bins.get_indexer(x) result = algos.take_nd(bins, ids) result = Categorical(result, categories=bins, ordered=True) return result, bins unique_bins = algos.unique(bins) if len(unique_bins) < len(bins) and len(bins) != 2: if duplicates == 'raise': raise ValueError("Bin edges must be unique: {}.\nYou " "can drop duplicate edges by setting " "the 'duplicates' kwarg".format(repr(bins))) else: bins = unique_bins side = 'left' if right else 'right' ids = _ensure_int64(bins.searchsorted(x, side=side)) if include_lowest: ids[x == bins[0]] = 1 na_mask = isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: labels = _format_labels(bins, precision, right=right, include_lowest=include_lowest, dtype=dtype) else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') if not is_categorical_dtype(labels): labels = Categorical(labels, categories=labels, ordered=True) np.putmask(ids, na_mask, 0) result = algos.take_nd(labels, ids - 1) else: result = ids - 1 if has_nas: result = result.astype(np.float64) np.putmask(result, na_mask, np.nan) return result, bins
def _bins_to_cuts(x, bins, right=True, labels=None, precision=3, include_lowest=False, dtype=None, duplicates='raise'): if duplicates not in ['raise', 'drop']: raise ValueError("invalid value for 'duplicates' parameter, " "valid options are: raise, drop") if isinstance(bins, IntervalIndex): # we have a fast-path here ids = bins.get_indexer(x) result = algos.take_nd(bins, ids) result = Categorical(result, categories=bins, ordered=True) return result, bins unique_bins = algos.unique(bins) if len(unique_bins) < len(bins) and len(bins) != 2: if duplicates == 'raise': raise ValueError("Bin edges must be unique: {}.\nYou " "can drop duplicate edges by setting " "the 'duplicates' kwarg".format(repr(bins))) else: bins = unique_bins side = 'left' if right else 'right' ids = _ensure_int64(bins.searchsorted(x, side=side)) if include_lowest: ids[x == bins[0]] = 1 na_mask = isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: labels = _format_labels(bins, precision, right=right, include_lowest=include_lowest, dtype=dtype) else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') if not is_categorical_dtype(labels): labels = Categorical(labels, ordered=True) np.putmask(ids, na_mask, 0) result = algos.take_nd(labels, ids - 1) else: result = ids - 1 if has_nas: result = result.astype(np.float64) np.putmask(result, na_mask, np.nan) return result, bins
def fillna(self, value, downcast=None): if downcast is not None: raise NotImplementedError if issubclass(self.dtype.type, np.floating): value = float(value) new_values = np.where(isnull(self.sp_values), value, self.sp_values) fill_value = value if self._null_fill_value else self.fill_value return self._simple_new(new_values, self.sp_index, fill_value=fill_value)
def nanskew(values, axis=None, skipna=True): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly from the second and third central moment. """ values = _values_from_object(values) mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) else: count = _get_counts(mask, axis, dtype=values.dtype) if skipna: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna: np.putmask(adjusted, mask, 0) adjusted2 = adjusted ** 2 adjusted3 = adjusted2 * adjusted m2 = adjusted2.sum(axis, dtype=np.float64) m3 = adjusted3.sum(axis, dtype=np.float64) # floating point error m2 = _zero_out_fperr(m2) m3 = _zero_out_fperr(m3) with np.errstate(invalid='ignore', divide='ignore'): result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5) dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(m2 == 0, 0, result) result[count < 3] = np.nan return result else: result = 0 if m2 == 0 else result if count < 3: return np.nan return result
def test_0d_array(self): self.assertTrue(isnull(np.array(np.nan))) self.assertFalse(isnull(np.array(0.0))) self.assertFalse(isnull(np.array(0))) # test object dtype self.assertTrue(isnull(np.array(np.nan, dtype=object))) self.assertFalse(isnull(np.array(0.0, dtype=object))) self.assertFalse(isnull(np.array(0, dtype=object)))
def test_0d_array(self): assert isnull(np.array(np.nan)) assert not isnull(np.array(0.0)) assert not isnull(np.array(0)) # test object dtype assert isnull(np.array(np.nan, dtype=object)) assert not isnull(np.array(0.0, dtype=object)) assert not isnull(np.array(0, dtype=object))
def fillna(self, value, downcast=None): if downcast is not None: raise NotImplementedError if issubclass(self.dtype.type, np.floating): value = float(value) if self._null_fill_value: return self._simple_new(self.sp_values, self.sp_index, fill_value=value) else: new_values = self.sp_values.copy() new_values[isnull(new_values)] = value return self._simple_new(new_values, self.sp_index, fill_value=self.fill_value)
def test_timedelta_other_units(self): idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days']) exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(idx), exp) tm.assert_numpy_array_equal(notnull(idx), ~exp) tm.assert_numpy_array_equal(isnull(idx.values), exp) tm.assert_numpy_array_equal(notnull(idx.values), ~exp) for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]', 'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]', 'timedelta64[ns]']: values = idx.values.astype(dtype) exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(values), exp) tm.assert_numpy_array_equal(notnull(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp) s = pd.Series(values, dtype=object) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp)
def test_datetime_other_units(self): idx = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-02']) exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(idx), exp) tm.assert_numpy_array_equal(notnull(idx), ~exp) tm.assert_numpy_array_equal(isnull(idx.values), exp) tm.assert_numpy_array_equal(notnull(idx.values), ~exp) for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]', 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: values = idx.values.astype(dtype) exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(values), exp) tm.assert_numpy_array_equal(notnull(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp) s = pd.Series(values, dtype=object) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp)
def dropna(self, axis=0, inplace=False, **kwargs): """ Analogous to Series.dropna. If fill_value=NaN, returns a dense Series """ # TODO: make more efficient axis = self._get_axis_number(axis or 0) dense_valid = self.to_dense().valid() if inplace: raise NotImplementedError("Cannot perform inplace dropna" " operations on a SparseSeries") if isnull(self.fill_value): return dense_valid else: dense_valid = dense_valid[dense_valid != self.fill_value] return dense_valid.to_sparse(fill_value=self.fill_value)
def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = _values_from_object(values) if isfinite: mask = _isfinite(values) else: mask = isnull(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) if skipna: if copy: values = values.copy() if dtype_ok: np.putmask(values, mask, fill_value) # promote if needed else: values, changed = maybe_upcast_putmask(values, mask, fill_value) elif copy: values = values.copy() values = _view_if_needed(values) # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): dtype_max = np.int64 elif is_float_dtype(dtype): dtype_max = np.float64 return values, mask, dtype, dtype_max
def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ if lib.isscalar(other) and isnull(other): other = iNaT elif isinstance(other, ABCIndexClass): # convert tz if needed if getattr(other, 'tz', None) is not None: other = other.tz_localize(None).asi8 else: other = other.asi8 else: try: other = np.array(other, copy=False).view('i8') except TypeError: # period array cannot be coerces to int other = Index(other).asi8 return other
def make_sparse(arr, kind='block', fill_value=None): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value Returns ------- (sparse_values, index) : (ndarray, SparseIndex) """ arr = _sanitize_values(arr) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) if isnull(fill_value): mask = notnull(arr) else: # For str arrays in NumPy 1.12.0, operator!= below isn't # element-wise but just returns False if fill_value is not str, # so cast to object comparison to be safe if is_string_dtype(arr): arr = arr.astype(object) mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index, fill_value
def test_nan_to_nat_conversions(): df = DataFrame(dict({ 'A': np.asarray( lrange(10), dtype='float64'), 'B': Timestamp('20010101') })) df.iloc[3:6, :] = np.nan result = df.loc[4, 'B'].value assert (result == tslib.iNaT) s = df['B'].copy() s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) assert (isnull(s[8])) # numpy < 1.7.0 is wrong from distutils.version import LooseVersion if LooseVersion(np.__version__) >= '1.7.0': assert (s[8].value == np.datetime64('NaT').astype(np.int64))