def backfill_2d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None if is_float_dtype(values): _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _backfill_2d_datetime elif is_integer_dtype(values): values = _ensure_float64(values) _method = algos.backfill_2d_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_2d_inplace_object if _method is None: raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) if mask is None: mask = isnull(values) mask = mask.view(np.uint8) if np.all(values.shape): _method(values, mask, limit=limit) else: # for test coverage pass return values
def unique1d(values): """ Hash table-based unique """ if np.issubdtype(values.dtype, np.floating): table = htable.Float64HashTable(len(values)) uniques = np.array(table.unique(_ensure_float64(values)), dtype=np.float64) elif np.issubdtype(values.dtype, np.datetime64): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('M8[ns]') elif np.issubdtype(values.dtype, np.timedelta64): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('m8[ns]') elif np.issubdtype(values.dtype, np.signedinteger): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) elif np.issubdtype(values.dtype, np.unsignedinteger): table = htable.UInt64HashTable(len(values)) uniques = table.unique(_ensure_uint64(values)) else: # its cheaper to use a String Hash Table than Object if lib.infer_dtype(values) in ['string']: table = htable.StringHashTable(len(values)) else: table = htable.PyObjectHashTable(len(values)) uniques = table.unique(_ensure_object(values)) return uniques
def _get_data_algo(values, func_map): f = None if is_float_dtype(values): f = func_map['float64'] values = _ensure_float64(values) elif needs_i8_conversion(values): f = func_map['int64'] values = values.view('i8') elif is_signed_integer_dtype(values): f = func_map['int64'] values = _ensure_int64(values) elif is_unsigned_integer_dtype(values): f = func_map['uint64'] values = _ensure_uint64(values) else: values = _ensure_object(values) # its cheaper to use a String Hash Table than Object if lib.infer_dtype(values) in ['string']: try: f = func_map['string'] except KeyError: pass if f is None: f = func_map['object'] return f, values
def _value_counts_arraylike(values, dropna=True): is_datetimetz_type = is_datetimetz(values) is_period_type = (is_period_dtype(values) or is_period_arraylike(values)) orig = values from pandas.core.series import Series values = Series(values).values dtype = values.dtype if needs_i8_conversion(dtype) or is_period_type: from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex if is_period_type: # values may be an object values = PeriodIndex(values) freq = values.freq values = values.view(np.int64) keys, counts = htable.value_count_int64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] # convert the keys back to the dtype we came in keys = keys.astype(dtype) # dtype handling if is_datetimetz_type: keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz) if is_period_type: keys = PeriodIndex._simple_new(keys, freq=freq) elif is_signed_integer_dtype(dtype): values = _ensure_int64(values) keys, counts = htable.value_count_int64(values, dropna) elif is_unsigned_integer_dtype(dtype): values = _ensure_uint64(values) keys, counts = htable.value_count_uint64(values, dropna) elif is_float_dtype(dtype): values = _ensure_float64(values) keys, counts = htable.value_count_float64(values, dropna) else: values = _ensure_object(values) keys, counts = htable.value_count_object(values, dropna) mask = isnull(values) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) return keys, counts
def _value_counts_arraylike(values, dropna=True): is_datetimetz_type = is_datetimetz(values) is_period = (isinstance(values, ABCPeriodIndex) or is_period_arraylike(values)) orig = values from pandas.core.series import Series values = Series(values).values dtype = values.dtype if is_datetime_or_timedelta_dtype(dtype) or is_period: from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex if is_period: values = PeriodIndex(values) freq = values.freq values = values.view(np.int64) keys, counts = htable.value_count_scalar64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] # convert the keys back to the dtype we came in keys = keys.astype(dtype) # dtype handling if is_datetimetz_type: if isinstance(orig, ABCDatetimeIndex): tz = orig.tz else: tz = orig.dt.tz keys = DatetimeIndex._simple_new(keys, tz=tz) if is_period: keys = PeriodIndex._simple_new(keys, freq=freq) elif is_integer_dtype(dtype): values = _ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) elif is_float_dtype(dtype): values = _ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: values = _ensure_object(values) mask = isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) return keys, counts
def duplicated(values, keep='first'): """ Return boolean ndarray denoting duplicate values. .. versionadded:: 0.19.0 Parameters ---------- values : ndarray-like Array over which to check for duplicate values. keep : {'first', 'last', False}, default 'first' - ``first`` : Mark duplicates as ``True`` except for the first occurrence. - ``last`` : Mark duplicates as ``True`` except for the last occurrence. - False : Mark all duplicates as ``True``. Returns ------- duplicated : ndarray """ dtype = values.dtype # no need to revert to original type if needs_i8_conversion(dtype): values = values.view(np.int64) elif is_period_arraylike(values): from pandas.tseries.period import PeriodIndex values = PeriodIndex(values).asi8 elif is_categorical_dtype(dtype): values = values.values.codes elif isinstance(values, (ABCSeries, ABCIndex)): values = values.values if is_signed_integer_dtype(dtype): values = _ensure_int64(values) duplicated = htable.duplicated_int64(values, keep=keep) elif is_unsigned_integer_dtype(dtype): values = _ensure_uint64(values) duplicated = htable.duplicated_uint64(values, keep=keep) elif is_float_dtype(dtype): values = _ensure_float64(values) duplicated = htable.duplicated_float64(values, keep=keep) else: values = _ensure_object(values) duplicated = htable.duplicated_object(values, keep=keep) return duplicated
def _get_data_algo(values, func_map): if is_float_dtype(values): f = func_map['float64'] values = _ensure_float64(values) elif needs_i8_conversion(values): f = func_map['int64'] values = values.view('i8') elif is_integer_dtype(values): f = func_map['int64'] values = _ensure_int64(values) else: f = func_map['generic'] values = _ensure_object(values) return f, values
def pad_1d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None if is_float_dtype(values): _method = getattr(algos, 'pad_inplace_%s' % dtype.name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _pad_1d_datetime elif is_integer_dtype(values): values = _ensure_float64(values) _method = algos.pad_inplace_float64 elif values.dtype == np.object_: _method = algos.pad_inplace_object if _method is None: raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) if mask is None: mask = isnull(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) return values
def unique1d(values): """ Hash table-based unique """ if np.issubdtype(values.dtype, np.floating): table = _hash.Float64HashTable(len(values)) uniques = np.array(table.unique(_ensure_float64(values)), dtype=np.float64) elif np.issubdtype(values.dtype, np.datetime64): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view("M8[ns]") elif np.issubdtype(values.dtype, np.timedelta64): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view("m8[ns]") elif np.issubdtype(values.dtype, np.integer): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) else: table = _hash.PyObjectHashTable(len(values)) uniques = table.unique(_ensure_object(values)) return uniques
def unique1d(values): """ Hash table-based unique """ if np.issubdtype(values.dtype, np.floating): table = _hash.Float64HashTable(len(values)) uniques = np.array(table.unique(_ensure_float64(values)), dtype=np.float64) elif np.issubdtype(values.dtype, np.datetime64): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('M8[ns]') elif np.issubdtype(values.dtype, np.timedelta64): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('m8[ns]') elif np.issubdtype(values.dtype, np.integer): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) else: table = _hash.PyObjectHashTable(len(values)) uniques = table.unique(_ensure_object(values)) return uniques