def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data))) index = data.index if issubclass(data.dtype.type, np.datetime64): return DatetimeProperties(DatetimeIndex(data, copy=copy), index) else: if com.is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index) raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data))) index = data.index if is_datetime64_dtype(data.dtype) or is_datetime64tz_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ if not isinstance(data, Series): raise TypeError( "cannot convert an object of type {0} to a datetimelike index". format(type(data))) index = data.index if issubclass(data.dtype.type, np.datetime64): return DatetimeProperties(DatetimeIndex(data, copy=copy), index) else: if com.is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index) raise TypeError( "cannot convert an object of type {0} to a datetimelike index".format( type(data)))
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError( "cannot convert an object of type {0} to a datetimelike index". format(type(data))) index = data.index if is_datetime64_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) elif is_datetime64tz_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'), index, name=data.name) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) raise TypeError( "cannot convert an object of type {0} to a datetimelike index".format( type(data)))
def _value_counts_arraylike(values, dropna=True): is_datetimetz = com.is_datetimetz(values) is_period = (isinstance(values, gt.ABCPeriodIndex) or com.is_period_arraylike(values)) orig = values from pandas.core.series import Series values = Series(values).values dtype = values.dtype if com.is_datetime_or_timedelta_dtype(dtype) or is_period: from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex if is_period: values = PeriodIndex(values) freq = values.freq values = values.view(np.int64) keys, counts = htable.value_count_scalar64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] # convert the keys back to the dtype we came in keys = keys.astype(dtype) # dtype handling if is_datetimetz: if isinstance(orig, gt.ABCDatetimeIndex): tz = orig.tz else: tz = orig.dt.tz keys = DatetimeIndex._simple_new(keys, tz=tz) if is_period: keys = PeriodIndex._simple_new(keys, freq=freq) elif com.is_integer_dtype(dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) elif com.is_float_dtype(dtype): values = com._ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: values = com._ensure_object(values) mask = com.isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) return keys, counts
def infer_freq(index, warn=True): """ Infer the most likely frequency given the input index. If the frequency is uncertain, a warning will be printed. Parameters ---------- index : DatetimeIndex or TimedeltaIndex if passed a Series will use the values of the series (NOT THE INDEX) warn : boolean, default True Returns ------- freq : string or None None if no discernible frequency TypeError if the index is not datetime-like ValueError if there are less than three values. """ import pandas as pd if isinstance(index, com.ABCSeries): values = index._values if not (com.is_datetime64_dtype(values) or com.is_timedelta64_dtype(values) or values.dtype == object): raise TypeError( "cannot infer freq from a non-convertible dtype on a Series of {0}" .format(index.dtype)) index = values if com.is_period_arraylike(index): raise TypeError("PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq.") elif isinstance(index, pd.TimedeltaIndex): inferer = _TimedeltaFrequencyInferer(index, warn=warn) return inferer.get_freq() if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError( "cannot infer freq from a non-convertible index type {0}". format(type(index))) index = index.values if not isinstance(index, pd.DatetimeIndex): try: index = pd.DatetimeIndex(index) except AmbiguousTimeError: index = pd.DatetimeIndex(index.asi8) inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq()
def infer_freq(index, warn=True): """ Infer the most likely frequency given the input index. If the frequency is uncertain, a warning will be printed. Parameters ---------- index : DatetimeIndex or TimedeltaIndex if passed a Series will use the values of the series (NOT THE INDEX) warn : boolean, default True Returns ------- freq : string or None None if no discernible frequency TypeError if the index is not datetime-like ValueError if there are less than three values. """ import pandas as pd if isinstance(index, com.ABCSeries): values = index._values if not (com.is_datetime64_dtype(values) or com.is_timedelta64_dtype(values) or values.dtype == object): raise TypeError("cannot infer freq from a non-convertible " "dtype on a Series of {0}".format(index.dtype)) index = values if com.is_period_arraylike(index): raise TypeError("PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq.") elif isinstance(index, pd.TimedeltaIndex): inferer = _TimedeltaFrequencyInferer(index, warn=warn) return inferer.get_freq() if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError("cannot infer freq from a non-convertible index " "type {0}".format(type(index))) index = index.values if not isinstance(index, pd.DatetimeIndex): try: index = pd.DatetimeIndex(index) except AmbiguousTimeError: index = pd.DatetimeIndex(index.asi8) inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq()
def convert(values, units, axis): if not hasattr(axis, "freq"): raise TypeError("Axis must have `freq` set to convert to Periods") valid_types = (compat.string_types, datetime, Period, pydt.date, pydt.time) if isinstance(values, valid_types) or com.is_integer(values) or com.is_float(values): return get_datevalue(values, axis.freq) if isinstance(values, PeriodIndex): return values.asfreq(axis.freq).values if isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) if com.is_period_arraylike(values): return PeriodIndex(values, freq=axis.freq).values if isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] return values
def convert(values, units, axis): if not hasattr(axis, 'freq'): raise TypeError('Axis must have `freq` set to convert to Periods') valid_types = (compat.string_types, datetime, Period, pydt.date, pydt.time) if (isinstance(values, valid_types) or com.is_integer(values) or com.is_float(values)): return get_datevalue(values, axis.freq) if isinstance(values, PeriodIndex): return values.asfreq(axis.freq).values if isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) if com.is_period_arraylike(values): return PeriodIndex(values, freq=axis.freq).values if isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] return values
def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True): """ Compute a histogram of the counts of non-null values. Parameters ---------- values : ndarray (1-d) sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order normalize: boolean, default False If True then compute a relative histogram bins : integer, optional Rather than count values, group them into half-open bins, convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN Returns ------- value_counts : Series """ from pandas.core.series import Series from pandas.tools.tile import cut from pandas import Index, PeriodIndex, DatetimeIndex name = getattr(values, 'name', None) values = Series(values).values if bins is not None: try: cat, bins = cut(values, bins, retbins=True) except TypeError: raise TypeError("bins argument only works with numeric data.") values = cat.codes if com.is_categorical_dtype(values.dtype): result = values.value_counts(dropna) else: dtype = values.dtype is_period = com.is_period_arraylike(values) is_datetimetz = com.is_datetimetz(values) if com.is_datetime_or_timedelta_dtype( dtype) or is_period or is_datetimetz: if is_period: values = PeriodIndex(values) elif is_datetimetz: tz = getattr(values, 'tz', None) values = DatetimeIndex(values).tz_localize(None) values = values.view(np.int64) keys, counts = htable.value_count_scalar64(values, dropna) if dropna: from pandas.tslib import iNaT msk = keys != iNaT keys, counts = keys[msk], counts[msk] # localize to the original tz if necessary if is_datetimetz: keys = DatetimeIndex(keys).tz_localize(tz) # convert the keys back to the dtype we came in else: keys = keys.astype(dtype) elif com.is_integer_dtype(dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) elif com.is_float_dtype(dtype): values = com._ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: values = com._ensure_object(values) mask = com.isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) if not isinstance(keys, Index): keys = Index(keys) result = Series(counts, index=keys, name=name) if bins is not None: # TODO: This next line should be more efficient result = result.reindex(np.arange(len(cat.categories)), fill_value=0) result.index = bins[:-1] if sort: result = result.sort_values(ascending=ascending) if normalize: result = result / float(values.size) return result
def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True): """ Compute a histogram of the counts of non-null values. Parameters ---------- values : ndarray (1-d) sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order normalize: boolean, default False If True then compute a relative histogram bins : integer, optional Rather than count values, group them into half-open bins, convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN Returns ------- value_counts : Series """ from pandas.core.series import Series from pandas.tools.tile import cut from pandas.tseries.period import PeriodIndex is_period = com.is_period_arraylike(values) values = Series(values).values is_category = com.is_categorical_dtype(values.dtype) if bins is not None: try: cat, bins = cut(values, bins, retbins=True) except TypeError: raise TypeError("bins argument only works with numeric data.") values = cat.codes elif is_category: bins = values.categories cat = values values = cat.codes dtype = values.dtype if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)) or is_period: if is_period: values = PeriodIndex(values) values = values.view(np.int64) keys, counts = htable.value_count_int64(values) if dropna: from pandas.tslib import iNaT msk = keys != iNaT keys, counts = keys[msk], counts[msk] # convert the keys back to the dtype we came in keys = keys.astype(dtype) elif com.is_integer_dtype(dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_int64(values) else: values = com._ensure_object(values) mask = com.isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna: keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) result = Series(counts, index=com._values_from_object(keys)) if bins is not None: # TODO: This next line should be more efficient result = result.reindex(np.arange(len(cat.categories)), fill_value=0) if not is_category: result.index = bins[:-1] else: result.index = cat.categories if sort: result.sort() if not ascending: result = result[::-1] if normalize: result = result / float(values.size) return result
def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True): """ Compute a histogram of the counts of non-null values. Parameters ---------- values : ndarray (1-d) sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order normalize: boolean, default False If True then compute a relative histogram bins : integer, optional Rather than count values, group them into half-open bins, convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN Returns ------- value_counts : Series """ from pandas.core.series import Series from pandas.tools.tile import cut from pandas import Index, PeriodIndex, DatetimeIndex name = getattr(values, 'name', None) values = Series(values).values if bins is not None: try: cat, bins = cut(values, bins, retbins=True) except TypeError: raise TypeError("bins argument only works with numeric data.") values = cat.codes if com.is_categorical_dtype(values.dtype): result = values.value_counts(dropna) else: dtype = values.dtype is_period = com.is_period_arraylike(values) is_datetimetz = com.is_datetimetz(values) if com.is_datetime_or_timedelta_dtype(dtype) or is_period or \ is_datetimetz: if is_period: values = PeriodIndex(values) elif is_datetimetz: tz = getattr(values, 'tz', None) values = DatetimeIndex(values).tz_localize(None) values = values.view(np.int64) keys, counts = htable.value_count_scalar64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] # localize to the original tz if necessary if is_datetimetz: keys = DatetimeIndex(keys).tz_localize(tz) # convert the keys back to the dtype we came in else: keys = keys.astype(dtype) elif com.is_integer_dtype(dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) elif com.is_float_dtype(dtype): values = com._ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: values = com._ensure_object(values) mask = com.isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) if not isinstance(keys, Index): keys = Index(keys) result = Series(counts, index=keys, name=name) if bins is not None: # TODO: This next line should be more efficient result = result.reindex(np.arange(len(cat.categories)), fill_value=0) result.index = bins[:-1] if sort: result = result.sort_values(ascending=ascending) if normalize: result = result / float(values.size) return result