def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data))) index = data.index if is_datetime64_dtype(data.dtype) or is_datetime64tz_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data))) index = data.index if issubclass(data.dtype.type, np.datetime64): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index) elif issubclass(data.dtype.type, np.timedelta64): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index) raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) index = data.index name = data.name orig = data if is_categorical_dtype(data) else None if orig is not None: data = orig.values.categories if is_datetime64_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) elif is_datetime64tz_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'), index, data.name, orig=orig) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=name, orig=orig) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data)))
def guess_formatter(values, precision=1, commas=True, parens=True, nan='nan', prefix=None, pcts=0, trunc_dot_zeros=0): """Based on the values, return the most suitable formatter Parameters ---------- values : Series, DataFrame, scalar, list, tuple, or ndarray Values used to determine which formatter is the best fit """ formatter_args = dict(precision=precision, commas=commas, parens=parens, nan=nan, prefix=prefix, trunc_dot_zeros=trunc_dot_zeros) try: if isinstance(values, pd.Series): # added a helper method for date time specific arrays as timestamps can be annoying when printed if is_datetime_arraylike(values): # basic date formatter if no hours or minutes if (values.dt.hour == 0).all() and (values.dt.minute == 0).all(): return new_datetime_formatter() aval = values.abs() vmax, vmin = aval.max(), aval.min() elif isinstance(values, np.ndarray): if values.ndim == 2: avalues = pd.DataFrame(values).abs() vmax = avalues.max().max() vmin = avalues.min().min() elif values.ndim == 1: aval = pd.Series(values).abs() vmax, vmin = aval.max(), aval.min() else: raise ValueError('cannot accept frame with more than 2-dimensions') elif isinstance(values, pd.DataFrame): avalues = values.abs() vmax = avalues.max().max() vmin = avalues.min().min() elif isinstance(values, (list, tuple)): vmax = max(values) vmin = min(values) else: vmax = vmin = abs(values) if np.isnan(vmin): return new_float_formatter(**formatter_args) else: min_digits = 0 if vmin == 0 else math.floor(math.log10(vmin)) # max_digits = math.floor(math.log10(vmax)) if min_digits >= 12: return new_trillions_formatter(**formatter_args) elif min_digits >= 9: return new_billions_formatter(**formatter_args) elif min_digits >= 6: return new_millions_formatter(**formatter_args) elif min_digits >= 3: return new_thousands_formatter(**formatter_args) elif pcts and min_digits < 0 and vmax < 1: return new_percent_formatter(**formatter_args) else: if isinstance(vmax, int): formatter_args.pop('precision') return new_int_formatter(**formatter_args) else: return new_float_formatter(**formatter_args) except: #import sys #e = sys.exc_info()[0] return lambda x: x
def censor(x, range=(0, 1), only_finite=True): """ Convert any values outside of range to a **NULL** type object. Parameters ---------- x : array_like Values to manipulate range : tuple (min, max) giving desired output range only_finite : bool If True (the default), will only modify finite values. Returns ------- x : array_like Censored array Examples -------- >>> a = [1, 2, np.inf, 3, 4, -np.inf, 5] >>> censor(a, (0, 10)) [1, 2, inf, 3, 4, -inf, 5] >>> censor(a, (0, 10), False) [1, 2, nan, 3, 4, nan, 5] >>> censor(a, (2, 4)) [nan, 2, inf, 3, 4, -inf, nan] Notes ----- All values in ``x`` should be of the same type. ``only_finite`` parameter is not considered for Datetime and Timedelta types. The **NULL** type object depends on the type of values in **x**. - :class:`float` - :py:`float('nan')` - :class:`int` - :py:`float('nan')` - :class:`datetime.datetime` : :py:`np.datetime64(NaT)` - :class:`datetime.timedelta` : :py:`np.timedelta64(NaT)` """ if not len(x): return x py_time_types = (datetime.datetime, datetime.timedelta) np_pd_time_types = (pd.Timestamp, pd.Timedelta, np.datetime64, np.timedelta64) x0 = first_element(x) # Yes, we want type not isinstance if type(x0) in py_time_types: return _censor_with(x, range, 'NaT') if not hasattr(x, 'dtype') and isinstance(x0, np_pd_time_types): return _censor_with(x, range, type(x0)('NaT')) x_array = np.asarray(x) if pdtypes.is_number(x0) and not isinstance(x0, np.timedelta64): null = float('nan') elif com.is_datetime_arraylike(x_array): null = pd.Timestamp('NaT') elif pdtypes.is_datetime64_dtype(x_array): null = np.datetime64('NaT') elif isinstance(x0, pd.Timedelta): null = pd.Timedelta('NaT') elif pdtypes.is_timedelta64_dtype(x_array): null = np.timedelta64('NaT') else: raise ValueError("Do not know how to censor values of type " "{}".format(type(x0))) if only_finite: try: finite = np.isfinite(x) except TypeError: finite = np.repeat(True, len(x)) else: finite = np.repeat(True, len(x)) if hasattr(x, 'dtype'): outside = (x < range[0]) | (x > range[1]) bool_idx = finite & outside x = x.copy() x[bool_idx] = null else: x = [ null if not range[0] <= val <= range[1] and f else val for val, f in zip(x, finite) ] return x