def _factorize_keys(lk, rk, sort=True): if com.is_integer_dtype(lk) and com.is_integer_dtype(rk): klass = lib.Int64Factorizer lk = com._ensure_int64(lk) rk = com._ensure_int64(rk) else: klass = lib.Factorizer lk = com._ensure_object(lk) rk = com._ensure_object(rk) rizer = klass(max(len(lk), len(rk))) llab, _ = rizer.factorize(lk) rlab, _ = rizer.factorize(rk) count = rizer.get_count() if sort: llab, rlab = _sort_labels(rizer.uniques, llab, rlab) # NA group lmask = llab == -1; lany = lmask.any() rmask = rlab == -1; rany = rmask.any() if lany or rany: if lany: np.putmask(llab, lmask, count) if rany: np.putmask(rlab, rmask, count) count += 1 return llab, rlab, count
def wrapper(self, other): is_self_int_dtype = com.is_integer_dtype(self.dtype) fill_int = lambda x: x.fillna(0) fill_bool = lambda x: x.fillna(False).astype(bool) if isinstance(other, pd.Series): name = _maybe_match_name(self, other) other = other.reindex_like(self) is_other_int_dtype = com.is_integer_dtype(other.dtype) other = fill_int(other) if is_other_int_dtype else fill_bool(other) filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool return filler( self._constructor(na_op(self.values, other.values), index=self.index, name=name)) elif isinstance(other, pd.DataFrame): return NotImplemented else: # scalars, list, tuple, np.array filler = fill_int if is_self_int_dtype and com.is_integer_dtype( np.asarray(other)) else fill_bool return filler( self._constructor(na_op(self.values, other), index=self.index)).__finalize__(self)
def wrapper(self, other): is_self_int_dtype = is_integer_dtype(self.dtype) fill_int = lambda x: x.fillna(0) fill_bool = lambda x: x.fillna(False).astype(bool) if isinstance(other, pd.Series): name = _maybe_match_name(self, other) other = other.reindex_like(self) is_other_int_dtype = is_integer_dtype(other.dtype) other = fill_int(other) if is_other_int_dtype else fill_bool(other) filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool return filler(self._constructor(na_op(self.values, other.values), index=self.index, name=name)) elif isinstance(other, pd.DataFrame): return NotImplemented else: # scalars, list, tuple, np.array filler = fill_int if is_self_int_dtype and is_integer_dtype(np.asarray(other)) else fill_bool return filler(self._constructor(na_op(self.values, other), index=self.index)).__finalize__(self)
def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(["foo", "bar"], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index) deleveled = df.reset_index() self.assert_(com.is_integer_dtype(deleveled["prm1"])) self.assert_(com.is_float_dtype(deleveled["prm2"]))
def _prep_values(self, values=None, kill_inf=True, how=None): if values is None: values = getattr(self._selected_obj, 'values', self._selected_obj) # GH #12373 : rolling functions error on float32 data # make sure the data is coerced to float64 if com.is_float_dtype(values.dtype): values = com._ensure_float64(values) elif com.is_integer_dtype(values.dtype): values = com._ensure_float64(values) elif com.needs_i8_conversion(values.dtype): raise NotImplementedError("ops for {action} for this " "dtype {dtype} are not " "implemented".format( action=self._window_type, dtype=values.dtype)) else: try: values = com._ensure_float64(values) except (ValueError, TypeError): raise TypeError("cannot handle this type -> {0}" "".format(values.dtype)) if kill_inf: values = values.copy() values[np.isinf(values)] = np.NaN return values
def _get_data_algo(values, func_map): mask = None if com.is_float_dtype(values): f = func_map['float64'] values = com._ensure_float64(values) elif com.needs_i8_conversion(values): # if we have NaT, punt to object dtype mask = com.isnull(values) if mask.ravel().any(): f = func_map['generic'] values = com._ensure_object(values) values[mask] = np.nan else: f = func_map['int64'] values = values.view('i8') elif com.is_integer_dtype(values): f = func_map['int64'] values = com._ensure_int64(values) else: f = func_map['generic'] values = com._ensure_object(values) return f, values
def _maybe_convert_timedelta(self, other): if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if nanos % offset_nanos == 0: return nanos // offset_nanos elif isinstance(other, offsets.DateOffset): freqstr = frequencies.get_standard_freq(other) base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: return other.n elif isinstance(other, np.ndarray): if com.is_integer_dtype(other): return other elif com.is_timedelta64_dtype(other): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if (nanos % offset_nanos).all() == 0: return nanos // offset_nanos msg = "Input has different freq from PeriodIndex(freq={0})" raise ValueError(msg.format(self.freqstr))
def value_counts(values, sort=True, ascending=False): """ Compute a histogram of the counts of non-null values Parameters ---------- values : ndarray (1-d) sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order Returns ------- value_counts : Series """ from pandas.core.series import Series from collections import defaultdict if com.is_integer_dtype(values.dtype): values = com._ensure_int64(values) keys, counts = lib.value_count_int64(values) result = Series(counts, index=keys) else: counter = defaultdict(lambda: 0) values = values[com.notnull(values)] for value in values: counter[value] += 1 result = Series(counter) if sort: result.sort() if not ascending: result = result[::-1] return result
def _sqlalchemy_type(self, arr_or_dtype): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Interval) if arr_or_dtype is date: return Date if com.is_datetime64_dtype(arr_or_dtype): try: tz = arr_or_dtype.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(arr_or_dtype): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(arr_or_dtype): return Float elif com.is_integer_dtype(arr_or_dtype): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(arr_or_dtype): return Boolean return Text
def _isfinite(values): if is_datetime_or_timedelta_dtype(values): return isnull(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): return ~np.isfinite(values) return ~np.isfinite(values.astype('float64'))
def convert(values, unit, axis): def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, pydt.time): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values elif isinstance(values, compat.string_types): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): values = com._asarray_tuplesafe(values) if com.is_integer_dtype(values) or com.is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = values.map(_dt_to_float_ordinal) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: pass return values
def _convert_listlike(arg, box, unit): if isinstance(arg, (list, tuple)) or ((hasattr(arg, '__iter__') and not hasattr(arg, 'dtype'))): arg = np.array(list(arg), dtype='O') if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): # these are shortcutable value = arg.astype( 'timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]') else: try: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit) except: # try to process strings fast; may need to fallback try: value = np.array( [_get_string_converter(r, unit=unit)() for r in arg], dtype='m8[ns]') except: value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ]) if box: from pandas import TimedeltaIndex value = TimedeltaIndex(value, unit='ns') return value
def test_nanmean_overflow(self): # GH 10155 # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy from pandas import Series # numpy < 1.9.0 is not computing this correctly from distutils.version import LooseVersion if LooseVersion(np.__version__) >= '1.9.0': for a in [2**55, -2**55, 20150515061816532]: s = Series(a, index=range(500), dtype=np.int64) result = s.mean() np_result = s.values.mean() self.assertEqual(result, a) self.assertEqual(result, np_result) self.assertTrue(result.dtype == np.float64) # check returned dtype for dtype in [ np.int16, np.int32, np.int64, np.float16, np.float32, np.float64 ]: s = Series(range(10), dtype=dtype) result = s.mean() if is_integer_dtype(dtype): self.assertTrue(result.dtype == np.float64) else: self.assertTrue(result.dtype == dtype)
def test_nanmean_overflow(self): # GH 10155 # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy from pandas import Series # numpy < 1.9.0 is not computing this correctly from distutils.version import LooseVersion if LooseVersion(np.__version__) >= '1.9.0': for a in [2 ** 55, -2 ** 55, 20150515061816532]: s = Series(a, index=range(500), dtype=np.int64) result = s.mean() np_result = s.values.mean() self.assertEqual(result, a) self.assertEqual(result, np_result) self.assertTrue(result.dtype == np.float64) # check returned dtype for dtype in [np.int16, np.int32, np.int64, np.float16, np.float32, np.float64]: s = Series(range(10), dtype=dtype) result = s.mean() if is_integer_dtype(dtype): self.assertTrue(result.dtype == np.float64) else: self.assertTrue(result.dtype == dtype)
def value_counts(values, sort=True, ascending=False): """ Compute a histogram of the counts of non-null values Returns ------- value_counts : Series """ from collections import defaultdict if com.is_integer_dtype(values.dtype): values = com._ensure_int64(values) keys, counts = lib.value_count_int64(values) result = Series(counts, index=keys) else: counter = defaultdict(lambda: 0) values = values[com.notnull(values)] for value in values: counter[value] += 1 result = Series(counter) if sort: result.sort() if not ascending: result = result[::-1] return result
def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Time, Interval) if com.is_datetime64_dtype(col): try: tz = col.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(col): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(col): return Float elif com.is_integer_dtype(col): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(col): return Boolean inferred = lib.infer_dtype(com._ensure_object(col)) if inferred == 'date': return Date if inferred == 'time': return Time return Text
def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right'): if com.is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter elif com.is_integer_dtype(values.dtype): fmt_klass = IntArrayFormatter elif com.is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter else: fmt_klass = GenericArrayFormatter if space is None: space = get_option("print.column_space") if float_format is None: float_format = get_option("print.float_format") if digits is None: digits = get_option("print.precision") fmt_obj = fmt_klass(values, digits, na_rep=na_rep, float_format=float_format, formatter=formatter, space=space, justify=justify) return fmt_obj.get_result()
def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Time) if com.is_datetime64_dtype(col): try: tz = col.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(col): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(col): return Float elif com.is_integer_dtype(col): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(col): return Boolean inferred = lib.infer_dtype(com._ensure_object(col)) if inferred == 'date': return Date if inferred == 'time': return Time return Text
def mode(values): """Returns the mode or mode(s) of the passed Series or ndarray (sorted)""" # must sort because hash order isn't necessarily defined. from pandas.core.series import Series if isinstance(values, Series): constructor = values._constructor values = values.values else: values = np.asanyarray(values) constructor = Series dtype = values.dtype if com.is_integer_dtype(values.dtype): values = com._ensure_int64(values) result = constructor(sorted(htable.mode_int64(values)), dtype=dtype) elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): dtype = values.dtype values = values.view(np.int64) result = constructor(sorted(htable.mode_int64(values)), dtype=dtype) else: mask = com.isnull(values) values = com._ensure_object(values) res = htable.mode_object(values, mask) try: res = sorted(res) except TypeError as e: warn("Unable to sort modes: %s" % e) result = constructor(res, dtype=dtype) return result
def _maybe_convert_timedelta(self, other): if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if nanos % offset_nanos == 0: return nanos // offset_nanos elif isinstance(other, offsets.DateOffset): freqstr = frequencies.get_standard_freq(other) base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: return other.n msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) elif isinstance(other, np.ndarray): if com.is_integer_dtype(other): return other elif com.is_timedelta64_dtype(other): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if (nanos % offset_nanos).all() == 0: return nanos // offset_nanos # raise when input doesn't have freq msg = "Input has different freq from PeriodIndex(freq={0})" raise IncompatibleFrequency(msg.format(self.freqstr))
def backfill_2d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None if com.is_float_dtype(values): _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None) elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): _method = _backfill_2d_datetime elif com.is_integer_dtype(values): values = com._ensure_float64(values) _method = algos.backfill_2d_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_2d_inplace_object if _method is None: raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) if mask is None: mask = com.isnull(values) mask = mask.view(np.uint8) if np.all(values.shape): _method(values, mask, limit=limit) else: # for test coverage pass return values
def _convert_listlike(arg, box, unit): if isinstance(arg, (list,tuple)) or ((hasattr(arg,'__iter__') and not hasattr(arg,'dtype'))): arg = np.array(list(arg), dtype='O') if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): # these are shortcutable value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]') else: try: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce) except: # try to process strings fast; may need to fallback try: value = np.array([ _get_string_converter(r, unit=unit)() for r in arg ],dtype='m8[ns]') except: value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit, coerce=coerce) for r in arg ]) value = value.astype('timedelta64[ns]', copy=False) if box: from pandas import TimedeltaIndex value = TimedeltaIndex(value,unit='ns') return value
def _isfinite(values): if _is_datetime_or_timedelta_dtype(values): return isnull(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): return ~np.isfinite(values) return ~np.isfinite(values.astype('float64'))
def _delegate_property_get(self, name): from pandas import Series result = getattr(self.values, name) # maybe need to upcast (ints) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype('int64') elif not is_list_like(result): return result # blow up if we operate on categories if self.orig is not None: result = take_1d(result, self.orig.cat.codes) # return the result as a Series, which is by definition a copy result = Series(result, index=self.index, name=self.name) # setting this object will show a SettingWithCopyWarning/Error result.is_copy = ("modifications to a property of a datetimelike " "object are not supported and are discarded. " "Change values on the original.") return result
def _convert_listlike(arg, box, unit): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): unit = _validate_timedelta_unit(unit) # these are shortcutable value = arg.astype( 'timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]') else: try: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit) except: value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ]) if box: from pandas import Series value = Series(value, dtype='m8[ns]') return value
def restore_type(self, dtype, sample=None): """Restore type from Pandas """ # Pandas types if pdc.is_bool_dtype(dtype): return 'boolean' elif pdc.is_datetime64_any_dtype(dtype): return 'datetime' elif pdc.is_integer_dtype(dtype): return 'integer' elif pdc.is_numeric_dtype(dtype): return 'number' # Python types if sample is not None: if isinstance(sample, (list, tuple)): return 'array' elif isinstance(sample, datetime.date): return 'date' elif isinstance(sample, isodate.Duration): return 'duration' elif isinstance(sample, dict): return 'object' elif isinstance(sample, six.string_types): return 'string' elif isinstance(sample, datetime.time): return 'time' return 'string'
def mode(values): """Returns the mode or mode(s) of the passed Series or ndarray (sorted)""" # must sort because hash order isn't necessarily defined. from pandas.core.series import Series if isinstance(values, Series): constructor = values._constructor values = values.values else: values = np.asanyarray(values) constructor = Series dtype = values.dtype if com.is_integer_dtype(values): values = com._ensure_int64(values) result = constructor(sorted(htable.mode_int64(values)), dtype=dtype) elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): dtype = values.dtype values = values.view(np.int64) result = constructor(sorted(htable.mode_int64(values)), dtype=dtype) elif com.is_categorical_dtype(values): result = constructor(values.mode()) else: mask = com.isnull(values) values = com._ensure_object(values) res = htable.mode_object(values, mask) try: res = sorted(res) except TypeError as e: warn("Unable to sort modes: %s" % e) result = constructor(res, dtype=dtype) return result
def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right'): if com.is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter elif com.is_integer_dtype(values.dtype): fmt_klass = IntArrayFormatter elif com.is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter else: fmt_klass = GenericArrayFormatter if space is None: space = print_config.column_space if float_format is None: float_format = print_config.float_format if digits is None: digits = print_config.precision fmt_obj = fmt_klass(values, digits, na_rep=na_rep, float_format=float_format, formatter=formatter, space=space, justify=justify) return fmt_obj.get_result()
def na_value_for_dtype(dtype): """ Return a dtype compat na value Parameters ---------- dtype : string / dtype Returns ------- dtype compat na value """ from pandas.core import common as com from pandas import NaT dtype = pandas_dtype(dtype) if (com.is_datetime64_dtype(dtype) or com.is_datetime64tz_dtype(dtype) or com.is_timedelta64_dtype(dtype)): return NaT elif com.is_float_dtype(dtype): return np.nan elif com.is_integer_dtype(dtype): return 0 elif com.is_bool_dtype(dtype): return False return np.nan
def convert(values, unit, axis): from pandas.tseries.index import DatetimeIndex def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, pydt.time): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values elif isinstance(values, basestring): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): values = np._asarray_tuplesafe(values) if com.is_integer_dtype(values) or com.is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = values.map(_dt_to_float_ordinal) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: pass return values
def _sqlalchemy_type(self, arr_or_dtype): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Interval) if arr_or_dtype is date: return Date if com.is_datetime64_dtype(arr_or_dtype): try: tz = arr_or_dtype.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(arr_or_dtype): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(arr_or_dtype): return Float elif com.is_integer_dtype(arr_or_dtype): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(arr_or_dtype): return Boolean return Text
def astype(self, dtype, copy=True): dtype = np.dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
def coerce(values): # we allow coercion to if errors allows values = to_numeric(values, errors=errors) # prevent overflow in case of int8 or int16 if com.is_integer_dtype(values): values = values.astype('int64', copy=False) return values
def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if com.is_integer_dtype(result): result = result.astype('int64') elif not com.is_list_like(result): return result return Series(result, index=s.index, name=s.name)
def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() self.assert_(com.is_integer_dtype(deleveled['prm1'])) self.assert_(com.is_float_dtype(deleveled['prm2']))
def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'], index=index) deleveled = df.delevel() self.assert_(com.is_integer_dtype(deleveled['prm1'])) self.assert_(com.is_float_dtype(deleveled['prm2']))
def count(values, uniques=None): if uniques is not None: raise NotImplementedError else: if com.is_float_dtype(values): return _count_generic(values, lib.Float64HashTable, _ensure_float64) elif com.is_integer_dtype(values): return _count_generic(values, lib.Int64HashTable, _ensure_int64) else: return _count_generic(values, lib.PyObjectHashTable, _ensure_object)
def _hashtable_algo(f, dtype): """ f(HashTable, type_caster) -> result """ if com.is_float_dtype(dtype): return f(htable.Float64HashTable, com._ensure_float64) elif com.is_integer_dtype(dtype): return f(htable.Int64HashTable, com._ensure_int64) else: return f(htable.PyObjectHashTable, com._ensure_object)
def _wrap_access_object(self, obj): # we may need to coerce the input as we don't want non int64 if # we have an integer result if hasattr(obj, 'dtype') and com.is_integer_dtype(obj): obj = obj.astype(np.int64) if isinstance(self, com.ABCSeries): return self._constructor(obj, index=self.index).__finalize__(self) return obj
def _wrap_access_object(self, obj): # we may need to coerce the input as we don't want non int64 if # we have an integer result if hasattr(obj,'dtype') and com.is_integer_dtype(obj): obj = obj.astype(np.int64) if isinstance(self, com.ABCSeries): return self._constructor(obj,index=self.index).__finalize__(self) return obj
def _value_counts_arraylike(values, dropna=True): is_datetimetz = com.is_datetimetz(values) is_period = (isinstance(values, gt.ABCPeriodIndex) or com.is_period_arraylike(values)) orig = values from pandas.core.series import Series values = Series(values).values dtype = values.dtype if com.is_datetime_or_timedelta_dtype(dtype) or is_period: from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex if is_period: values = PeriodIndex(values) freq = values.freq values = values.view(np.int64) keys, counts = htable.value_count_scalar64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] # convert the keys back to the dtype we came in keys = keys.astype(dtype) # dtype handling if is_datetimetz: if isinstance(orig, gt.ABCDatetimeIndex): tz = orig.tz else: tz = orig.dt.tz keys = DatetimeIndex._simple_new(keys, tz=tz) if is_period: keys = PeriodIndex._simple_new(keys, freq=freq) elif com.is_integer_dtype(dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) elif com.is_float_dtype(dtype): values = com._ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: values = com._ensure_object(values) mask = com.isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) return keys, counts
def _get_hash_table_and_cast(values): if com.is_float_dtype(values): klass = lib.Float64HashTable values = com._ensure_float64(values) elif com.is_integer_dtype(values): klass = lib.Int64HashTable values = com._ensure_int64(values) else: klass = lib.PyObjectHashTable values = com._ensure_object(values) return klass, values
def _get_data_algo(values, func_map): if com.is_float_dtype(values): f = func_map['float64'] values = com._ensure_float64(values) elif com.is_integer_dtype(values): f = func_map['int64'] values = com._ensure_int64(values) else: f = func_map['generic'] values = com._ensure_object(values) return f, values
def astype(self, dtype): dtype = pandas_dtype(dtype) if is_float_dtype(dtype) or is_integer_dtype(dtype): values = self._values.astype(dtype) elif is_object_dtype(dtype): values = self._values else: raise TypeError('Setting %s dtype to anything other than ' 'float64 or object is not supported' % self.__class__) return Index(values, name=self.name, dtype=dtype)