def __init__(self, left, right, name): self.name = name # need to make sure that we are aligning the data if isinstance(left, pd.Series) and isinstance(right, pd.Series): left, right = left.align(right,copy=False) self.left = left self.right = right self.is_offset_lhs = self._is_offset(left) self.is_offset_rhs = self._is_offset(right) lvalues = self._convert_to_array(left, name=name) self.is_timedelta_lhs = is_timedelta64_dtype(left) self.is_datetime_lhs = is_datetime64_dtype(left) self.is_integer_lhs = left.dtype.kind in ['i', 'u'] rvalues = self._convert_to_array(right, name=name, other=lvalues) self.is_datetime_rhs = is_datetime64_dtype(rvalues) self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self._validate() self._convert_for_datetime(lvalues, rvalues)
def __init__(self, left, right, name, na_op): # need to make sure that we are aligning the data if isinstance(left, pd.Series) and isinstance(right, pd.Series): left, right = left.align(right,copy=False) lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) self.name = name self.na_op = na_op # left self.left = left self.is_offset_lhs = self._is_offset(left) self.is_timedelta_lhs = is_timedelta64_dtype(lvalues) self.is_datetime64_lhs = is_datetime64_dtype(lvalues) self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues) self.is_datetime_lhs = self.is_datetime64_lhs or self.is_datetime64tz_lhs self.is_integer_lhs = left.dtype.kind in ['i', 'u'] self.is_floating_lhs = left.dtype.kind == 'f' # right self.right = right self.is_offset_rhs = self._is_offset(right) self.is_datetime64_rhs = is_datetime64_dtype(rvalues) self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues) self.is_datetime_rhs = self.is_datetime64_rhs or self.is_datetime64tz_rhs self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self.is_floating_rhs = rvalues.dtype.kind == 'f' self._validate(lvalues, rvalues, name) self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, rvalues)
def __init__(self, left, right, name, na_op): super(_TimeOp, self).__init__(left, right, name, na_op) lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) # left self.is_offset_lhs = self._is_offset(left) self.is_timedelta_lhs = is_timedelta64_dtype(lvalues) self.is_datetime64_lhs = is_datetime64_dtype(lvalues) self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues) self.is_datetime_lhs = (self.is_datetime64_lhs or self.is_datetime64tz_lhs) self.is_integer_lhs = left.dtype.kind in ['i', 'u'] self.is_floating_lhs = left.dtype.kind == 'f' # right self.is_offset_rhs = self._is_offset(right) self.is_datetime64_rhs = is_datetime64_dtype(rvalues) self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues) self.is_datetime_rhs = (self.is_datetime64_rhs or self.is_datetime64tz_rhs) self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self.is_floating_rhs = rvalues.dtype.kind == 'f' self._validate(lvalues, rvalues, name) self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, rvalues)
def test_is_timedelta(): assert (com.is_timedelta64_dtype('timedelta64')) assert (com.is_timedelta64_dtype('timedelta64[ns]')) assert (not com.is_timedelta64_ns_dtype('timedelta64')) assert (com.is_timedelta64_ns_dtype('timedelta64[ns]')) tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') assert (com.is_timedelta64_dtype(tdi)) assert (com.is_timedelta64_ns_dtype(tdi)) assert (com.is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))) # Conversion to Int64Index: assert (not com.is_timedelta64_ns_dtype(tdi.astype('timedelta64'))) assert (not com.is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]')))
def _sqlalchemy_type(self, arr_or_dtype): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Interval) if arr_or_dtype is date: return Date if com.is_datetime64_dtype(arr_or_dtype): try: tz = arr_or_dtype.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(arr_or_dtype): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(arr_or_dtype): return Float elif com.is_integer_dtype(arr_or_dtype): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(arr_or_dtype): return Boolean return Text
def get_dtype_kinds(l): """ Parameters ---------- l : list of arrays Returns ------- a set of kinds that exist in this list of arrays """ typs = set() for arr in l: dtype = arr.dtype if com.is_categorical_dtype(dtype): typ = "category" elif com.is_sparse(arr): typ = "sparse" elif com.is_datetimetz(arr): typ = "datetimetz" elif com.is_datetime64_dtype(dtype): typ = "datetime" elif com.is_timedelta64_dtype(dtype): typ = "timedelta" elif com.is_object_dtype(dtype): typ = "object" elif com.is_bool_dtype(dtype): typ = "bool" else: typ = dtype.kind typs.add(typ) return typs
def _wrap_results(result, dtype): """ wrap our results if needed """ if is_datetime64_dtype(dtype): if not isinstance(result, np.ndarray): result = lib.Timestamp(result) else: result = result.view(dtype) elif is_timedelta64_dtype(dtype): if not isinstance(result, np.ndarray): # this is a scalar timedelta result! # we have series convert then take the element (scalar) # as series will do the right thing in py3 (and deal with numpy # 1.6.2 bug in that it results dtype of timedelta64[us] from pandas import Series # coerce float to results if is_float(result): result = int(result) result = Series([result], dtype='timedelta64[ns]') else: result = result.view(dtype) return result
def _sqlalchemy_type(self, arr_or_dtype): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Interval) if arr_or_dtype is date: return Date if com.is_datetime64_dtype(arr_or_dtype): try: tz = arr_or_dtype.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(arr_or_dtype): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(arr_or_dtype): return Float elif com.is_integer_dtype(arr_or_dtype): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(arr_or_dtype): return Boolean return Text
def _maybe_convert_timedelta(self, other): if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if nanos % offset_nanos == 0: return nanos // offset_nanos elif isinstance(other, offsets.DateOffset): freqstr = frequencies.get_standard_freq(other) base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: return other.n msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) elif isinstance(other, np.ndarray): if com.is_integer_dtype(other): return other elif com.is_timedelta64_dtype(other): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if (nanos % offset_nanos).all() == 0: return nanos // offset_nanos # raise when input doesn't have freq msg = "Input has different freq from PeriodIndex(freq={0})" raise IncompatibleFrequency(msg.format(self.freqstr))
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data))) index = data.index if is_datetime64_dtype(data.dtype) or is_datetime64tz_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
def get_dtype_kinds(l): """ Parameters ---------- l : list of arrays Returns ------- a set of kinds that exist in this list of arrays """ typs = set() for arr in l: dtype = arr.dtype if com.is_categorical_dtype(dtype): typ = 'category' elif com.is_sparse(arr): typ = 'sparse' elif com.is_datetimetz(arr): typ = 'datetimetz' elif com.is_datetime64_dtype(dtype): typ = 'datetime' elif com.is_timedelta64_dtype(dtype): typ = 'timedelta' elif com.is_object_dtype(dtype): typ = 'object' elif com.is_bool_dtype(dtype): typ = 'bool' else: typ = dtype.kind typs.add(typ) return typs
def f(values, axis=None, skipna=True, **kwds): if len(self.kwargs) > 0: for k, v in compat.iteritems(self.kwargs): if k not in kwds: kwds[k] = v try: if self.zero_value is not None and values.size == 0: if values.ndim == 1: # wrap the 0's if needed if is_timedelta64_dtype(values): return lib.Timedelta(0) return 0 else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) result = np.empty(result_shape) result.fill(0) return result if _USE_BOTTLENECK and skipna and _bn_ok_dtype( values.dtype, bn_name): result = bn_func(values, axis=axis, **kwds) # prefer to treat inf/-inf as NA, but must compute the func # twice :( if _has_infs(result): result = alt(values, axis=axis, skipna=skipna, **kwds) else: result = alt(values, axis=axis, skipna=skipna, **kwds) except Exception: result = alt(values, axis=axis, skipna=skipna, **kwds) return result
def f(values, axis=None, skipna=True, **kwds): if len(self.kwargs) > 0: for k, v in compat.iteritems(self.kwargs): if k not in kwds: kwds[k] = v try: if self.zero_value is not None and values.size == 0: if values.ndim == 1: # wrap the 0's if needed if is_timedelta64_dtype(values): return lib.Timedelta(0) return 0 else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) result = np.empty(result_shape) result.fill(0) return result if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name): result = bn_func(values, axis=axis, **kwds) # prefer to treat inf/-inf as NA, but must compute the func # twice :( if _has_infs(result): result = alt(values, axis=axis, skipna=skipna, **kwds) else: result = alt(values, axis=axis, skipna=skipna, **kwds) except Exception: result = alt(values, axis=axis, skipna=skipna, **kwds) return result
def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Time, Interval) if com.is_datetime64_dtype(col): try: tz = col.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(col): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(col): return Float elif com.is_integer_dtype(col): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(col): return Boolean inferred = lib.infer_dtype(com._ensure_object(col)) if inferred == 'date': return Date if inferred == 'time': return Time return Text
def _sql_type_name(self, col): pytype = col.dtype.type pytype_name = "text" if issubclass(pytype, np.floating): pytype_name = "float" elif com.is_timedelta64_dtype(pytype): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) pytype_name = "int" elif issubclass(pytype, np.integer): pytype_name = "int" elif issubclass(pytype, np.datetime64) or pytype is datetime: # Caution: np.datetime64 is also a subclass of np.number. pytype_name = "datetime" elif issubclass(pytype, np.bool_): pytype_name = "bool" elif issubclass(pytype, np.object): pytype = lib.infer_dtype(com._ensure_object(col)) if pytype == "date": pytype_name = "date" elif pytype == "time": pytype_name = "time" return _SQL_TYPES[pytype_name][self.pd_sql.flavor]
def _convert_listlike(arg, box, unit): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): unit = _validate_timedelta_unit(unit) # these are shortcutable value = arg.astype( 'timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]') else: try: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit) except: value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ]) if box: from pandas import Series value = Series(value, dtype='m8[ns]') return value
def _sql_type_name(self, col): pytype = col.dtype.type pytype_name = "text" if issubclass(pytype, np.floating): pytype_name = "float" elif com.is_timedelta64_dtype(pytype): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) pytype_name = "int" elif issubclass(pytype, np.integer): pytype_name = "int" elif issubclass(pytype, np.datetime64) or pytype is datetime: # Caution: np.datetime64 is also a subclass of np.number. pytype_name = "datetime" elif issubclass(pytype, np.bool_): pytype_name = "bool" elif issubclass(pytype, np.object): pytype = lib.infer_dtype(com._ensure_object(col)) if pytype == "date": pytype_name = "date" elif pytype == "time": pytype_name = "time" return _SQL_TYPES[pytype_name][self.pd_sql.flavor]
def get_op(cls, left, right, name, na_op): """ Get op dispatcher, returns _Op or _TimeOp. If ``left`` and ``right`` are appropriate for datetime arithmetic with operation ``name``, processes them and returns a ``_TimeOp`` object that stores all the required values. Otherwise, it will generate either a ``_Op``, indicating that the operation is performed via normal numpy path. """ is_timedelta_lhs = is_timedelta64_dtype(left) is_datetime_lhs = (is_datetime64_dtype(left) or is_datetime64tz_dtype(left)) if isinstance(left, ABCSeries) and isinstance(right, ABCSeries): # avoid repated alignment if not left.index.equals(right.index): left, right = left.align(right, copy=False) index, lidx, ridx = left.index.join(right.index, how='outer', return_indexers=True) # if DatetimeIndex have different tz, convert to UTC left.index = index right.index = index if not (is_datetime_lhs or is_timedelta_lhs): return _Op(left, right, name, na_op) else: return _TimeOp(left, right, name, na_op)
def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Time) if com.is_datetime64_dtype(col): try: tz = col.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(col): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(col): return Float elif com.is_integer_dtype(col): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(col): return Boolean inferred = lib.infer_dtype(com._ensure_object(col)) if inferred == 'date': return Date if inferred == 'time': return Time return Text
def _maybe_convert_timedelta(self, other): if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if nanos % offset_nanos == 0: return nanos // offset_nanos elif isinstance(other, offsets.DateOffset): freqstr = frequencies.get_standard_freq(other) base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: return other.n elif isinstance(other, np.ndarray): if com.is_integer_dtype(other): return other elif com.is_timedelta64_dtype(other): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if (nanos % offset_nanos).all() == 0: return nanos // offset_nanos msg = "Input has different freq from PeriodIndex(freq={0})" raise ValueError(msg.format(self.freqstr))
def _convert_listlike(arg, box, unit): if isinstance(arg, (list, tuple)) or ((hasattr(arg, '__iter__') and not hasattr(arg, 'dtype'))): arg = np.array(list(arg), dtype='O') if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): # these are shortcutable value = arg.astype( 'timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]') else: try: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit) except: # try to process strings fast; may need to fallback try: value = np.array( [_get_string_converter(r, unit=unit)() for r in arg], dtype='m8[ns]') except: value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ]) if box: from pandas import TimedeltaIndex value = TimedeltaIndex(value, unit='ns') return value
def _convert_listlike(arg, box, unit): if isinstance(arg, (list,tuple)) or ((hasattr(arg,'__iter__') and not hasattr(arg,'dtype'))): arg = np.array(list(arg), dtype='O') if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): # these are shortcutable value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]') else: try: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce) except: # try to process strings fast; may need to fallback try: value = np.array([ _get_string_converter(r, unit=unit)() for r in arg ],dtype='m8[ns]') except: value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit, coerce=coerce) for r in arg ]) value = value.astype('timedelta64[ns]', copy=False) if box: from pandas import TimedeltaIndex value = TimedeltaIndex(value,unit='ns') return value
def na_value_for_dtype(dtype): """ Return a dtype compat na value Parameters ---------- dtype : string / dtype Returns ------- dtype compat na value """ from pandas.core import common as com from pandas import NaT dtype = pandas_dtype(dtype) if (com.is_datetime64_dtype(dtype) or com.is_datetime64tz_dtype(dtype) or com.is_timedelta64_dtype(dtype)): return NaT elif com.is_float_dtype(dtype): return np.nan elif com.is_integer_dtype(dtype): return 0 elif com.is_bool_dtype(dtype): return False return np.nan
def __init__(self, left, right, name): self.name = name lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) self.is_timedelta_lhs = com.is_timedelta64_dtype(left) self.is_datetime_lhs = com.is_datetime64_dtype(left) self.is_integer_lhs = left.dtype.kind in ['i', 'u'] self.is_datetime_rhs = com.is_datetime64_dtype(rvalues) self.is_timedelta_rhs = (com.is_timedelta64_dtype(rvalues) or (not self.is_datetime_rhs and pd._np_version_under1p7)) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self._validate() self._convert_for_datetime(lvalues, rvalues)
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError( "cannot convert an object of type {0} to a datetimelike index". format(type(data))) index = data.index if is_datetime64_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) elif is_datetime64tz_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'), index, name=data.name) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) raise TypeError( "cannot convert an object of type {0} to a datetimelike index".format( type(data)))
def __init__(self, left, right, name): self.name = name lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name) self.is_timedelta_lhs = com.is_timedelta64_dtype(left) self.is_datetime_lhs = com.is_datetime64_dtype(left) self.is_integer_lhs = left.dtype.kind in ['i','u'] self.is_datetime_rhs = com.is_datetime64_dtype(rvalues) self.is_timedelta_rhs = (com.is_timedelta64_dtype(rvalues) or (not self.is_datetime_rhs and pd._np_version_under1p7)) self.is_integer_rhs = rvalues.dtype.kind in ('i','u') self._validate() self._convert_for_datetime(lvalues, rvalues)
def nansum(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna, 0) dtype_sum = dtype_max if is_float_dtype(dtype): dtype_sum = dtype elif is_timedelta64_dtype(dtype): dtype_sum = np.float64 the_sum = values.sum(axis, dtype=dtype_sum) the_sum = _maybe_null_out(the_sum, axis, mask) return _wrap_results(the_sum, dtype)
def pandas_col_to_ibis_type(col): import pandas.core.common as pdcom import ibis.expr.datatypes as dt import numpy as np dty = col.dtype # datetime types if pdcom.is_datetime64_dtype(dty): if pdcom.is_datetime64_ns_dtype(dty): return 'timestamp' else: raise com.IbisTypeError("Column {0} has dtype {1}, which is " "datetime64-like but does " "not use nanosecond units" .format(col.name, dty)) if pdcom.is_timedelta64_dtype(dty): print("Warning: encoding a timedelta64 as an int64") return 'int64' if pdcom.is_categorical_dtype(dty): return dt.Category(len(col.cat.categories)) if pdcom.is_bool_dtype(dty): return 'boolean' # simple numerical types if issubclass(dty.type, np.int8): return 'int8' if issubclass(dty.type, np.int16): return 'int16' if issubclass(dty.type, np.int32): return 'int32' if issubclass(dty.type, np.int64): return 'int64' if issubclass(dty.type, np.float32): return 'float' if issubclass(dty.type, np.float64): return 'double' if issubclass(dty.type, np.uint8): return 'int16' if issubclass(dty.type, np.uint16): return 'int32' if issubclass(dty.type, np.uint32): return 'int64' if issubclass(dty.type, np.uint64): raise com.IbisTypeError("Column {0} is an unsigned int64" .format(col.name)) if pdcom.is_object_dtype(dty): # TODO: overly broad? return 'string' raise com.IbisTypeError("Column {0} is dtype {1}" .format(col.name, dty))
def pandas_col_to_ibis_type(col): import pandas.core.common as pdcom import ibis.expr.datatypes as dt import numpy as np dty = col.dtype # datetime types if pdcom.is_datetime64_dtype(dty): if pdcom.is_datetime64_ns_dtype(dty): return 'timestamp' else: raise com.IbisTypeError("Column {0} has dtype {1}, which is " "datetime64-like but does " "not use nanosecond units".format( col.name, dty)) if pdcom.is_timedelta64_dtype(dty): print("Warning: encoding a timedelta64 as an int64") return 'int64' if pdcom.is_categorical_dtype(dty): return dt.Category(len(col.cat.categories)) if pdcom.is_bool_dtype(dty): return 'boolean' # simple numerical types if issubclass(dty.type, np.int8): return 'int8' if issubclass(dty.type, np.int16): return 'int16' if issubclass(dty.type, np.int32): return 'int32' if issubclass(dty.type, np.int64): return 'int64' if issubclass(dty.type, np.float32): return 'float' if issubclass(dty.type, np.float64): return 'double' if issubclass(dty.type, np.uint8): return 'int16' if issubclass(dty.type, np.uint16): return 'int32' if issubclass(dty.type, np.uint32): return 'int64' if issubclass(dty.type, np.uint64): raise com.IbisTypeError("Column {0} is an unsigned int64".format( col.name)) if pdcom.is_object_dtype(dty): # TODO: overly broad? return 'string' raise com.IbisTypeError("Column {0} is dtype {1}".format(col.name, dty))
def isin(comps, values): """ Compute the isin boolean array Parameters ---------- comps: array-like values: array-like Returns ------- boolean array same length as comps """ if not com.is_list_like(comps): raise TypeError( "only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__) ) comps = np.asarray(comps) if not com.is_list_like(values): raise TypeError( "only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__) ) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) else: f = lambda x, y: lib.ismember_int64(x, set(y)) # may need i8 conversion for proper membership testing if com.is_datetime64_dtype(comps): from pandas.tseries.tools import to_datetime values = to_datetime(values)._values.view("i8") comps = comps.view("i8") elif com.is_timedelta64_dtype(comps): from pandas.tseries.timedeltas import to_timedelta values = to_timedelta(values)._values.view("i8") comps = comps.view("i8") elif com.is_int64_dtype(comps): pass else: f = lambda x, y: lib.ismember(x, set(values)) return f(comps, values)
def __init__(self, left, right, name): self.name = name # need to make sure that we are aligning the data if isinstance(left, pd.Series) and isinstance(right, pd.Series): left, right = left.align(right,copy=False) self.left = left self.right = right lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) self.is_timedelta_lhs = is_timedelta64_dtype(left) self.is_datetime_lhs = is_datetime64_dtype(left) self.is_integer_lhs = left.dtype.kind in ['i', 'u'] self.is_datetime_rhs = is_datetime64_dtype(rvalues) self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self._validate() self._convert_for_datetime(lvalues, rvalues)
def infer_freq(index, warn=True): """ Infer the most likely frequency given the input index. If the frequency is uncertain, a warning will be printed. Parameters ---------- index : DatetimeIndex or TimedeltaIndex if passed a Series will use the values of the series (NOT THE INDEX) warn : boolean, default True Returns ------- freq : string or None None if no discernible frequency TypeError if the index is not datetime-like ValueError if there are less than three values. """ import pandas as pd if isinstance(index, com.ABCSeries): values = index._values if not (com.is_datetime64_dtype(values) or com.is_timedelta64_dtype(values) or values.dtype == object): raise TypeError( "cannot infer freq from a non-convertible dtype on a Series of {0}" .format(index.dtype)) index = values if com.is_period_arraylike(index): raise TypeError("PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq.") elif isinstance(index, pd.TimedeltaIndex): inferer = _TimedeltaFrequencyInferer(index, warn=warn) return inferer.get_freq() if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError( "cannot infer freq from a non-convertible index type {0}". format(type(index))) index = index.values if not isinstance(index, pd.DatetimeIndex): try: index = pd.DatetimeIndex(index) except AmbiguousTimeError: index = pd.DatetimeIndex(index.asi8) inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq()
def infer_freq(index, warn=True): """ Infer the most likely frequency given the input index. If the frequency is uncertain, a warning will be printed. Parameters ---------- index : DatetimeIndex or TimedeltaIndex if passed a Series will use the values of the series (NOT THE INDEX) warn : boolean, default True Returns ------- freq : string or None None if no discernible frequency TypeError if the index is not datetime-like ValueError if there are less than three values. """ import pandas as pd if isinstance(index, com.ABCSeries): values = index._values if not (com.is_datetime64_dtype(values) or com.is_timedelta64_dtype(values) or values.dtype == object): raise TypeError("cannot infer freq from a non-convertible " "dtype on a Series of {0}".format(index.dtype)) index = values if com.is_period_arraylike(index): raise TypeError("PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq.") elif isinstance(index, pd.TimedeltaIndex): inferer = _TimedeltaFrequencyInferer(index, warn=warn) return inferer.get_freq() if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError("cannot infer freq from a non-convertible index " "type {0}".format(type(index))) index = index.values if not isinstance(index, pd.DatetimeIndex): try: index = pd.DatetimeIndex(index) except AmbiguousTimeError: index = pd.DatetimeIndex(index.asi8) inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq()
def __init__(self, left, right, name, na_op): # need to make sure that we are aligning the data if isinstance(left, ABCSeries) and isinstance(right, ABCSeries): left, right = left.align(right, copy=False) lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) self.name = name self.na_op = na_op # left self.left = left self.is_offset_lhs = self._is_offset(left) self.is_timedelta_lhs = is_timedelta64_dtype(lvalues) self.is_datetime64_lhs = is_datetime64_dtype(lvalues) self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues) self.is_datetime_lhs = (self.is_datetime64_lhs or self.is_datetime64tz_lhs) self.is_integer_lhs = left.dtype.kind in ['i', 'u'] self.is_floating_lhs = left.dtype.kind == 'f' # right self.right = right self.is_offset_rhs = self._is_offset(right) self.is_datetime64_rhs = is_datetime64_dtype(rvalues) self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues) self.is_datetime_rhs = (self.is_datetime64_rhs or self.is_datetime64tz_rhs) self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self.is_floating_rhs = rvalues.dtype.kind == 'f' self._validate(lvalues, rvalues, name) self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, rvalues)
def isin(comps, values): """ Compute the isin boolean array Parameters ---------- comps: array-like values: array-like Returns ------- boolean array same length as comps """ if not com.is_list_like(comps): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__)) comps = np.asarray(comps) if not com.is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__)) if not isinstance(values, np.ndarray): values = list(values) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) else: f = lambda x, y: lib.ismember_int64(x, set(y)) # may need i8 conversion for proper membership testing if com.is_datetime64_dtype(comps): from pandas.tseries.tools import to_datetime values = to_datetime(values)._values.view('i8') comps = comps.view('i8') elif com.is_timedelta64_dtype(comps): from pandas.tseries.timedeltas import to_timedelta values = to_timedelta(values)._values.view('i8') comps = comps.view('i8') elif com.is_int64_dtype(comps): pass else: f = lambda x, y: lib.ismember(x, set(values)) return f(comps, values)
def __init__(self, left, right, name): self.name = name # need to make sure that we are aligning the data if isinstance(left, pd.Series) and isinstance(right, pd.Series): left, right = left.align(right) self.left = left self.right = right lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) self.is_timedelta_lhs = com.is_timedelta64_dtype(left) self.is_datetime_lhs = com.is_datetime64_dtype(left) self.is_integer_lhs = left.dtype.kind in ['i', 'u'] self.is_datetime_rhs = com.is_datetime64_dtype(rvalues) self.is_timedelta_rhs = (com.is_timedelta64_dtype(rvalues) or (not self.is_datetime_rhs and pd._np_version_under1p7)) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self._validate() self._convert_for_datetime(lvalues, rvalues)
def _wrap_results(result, dtype): """ wrap our results if needed """ if is_datetime64_dtype(dtype): if not isinstance(result, np.ndarray): result = lib.Timestamp(result) else: result = result.view(dtype) elif is_timedelta64_dtype(dtype): if not isinstance(result, np.ndarray): result = lib.Timedelta(result) else: result = result.astype('i8').view(dtype) return result
def _convert_listlike(arg, box): if isinstance(arg, (list,tuple)): arg = np.array(arg, dtype='O') if is_timedelta64_dtype(arg): if box: from pandas import Series return Series(arg,dtype='m8[ns]') return arg value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ]) if box: from pandas import Series value = Series(value,dtype='m8[ns]') return value
def _hashtable_algo(f, dtype, return_dtype=None): """ f(HashTable, type_caster) -> result """ if com.is_float_dtype(dtype): return f(htable.Float64HashTable, com._ensure_float64) elif com.is_integer_dtype(dtype): return f(htable.Int64HashTable, com._ensure_int64) elif com.is_datetime64_dtype(dtype): return_dtype = return_dtype or 'M8[ns]' return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) elif com.is_timedelta64_dtype(dtype): return_dtype = return_dtype or 'm8[ns]' return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) else: return f(htable.PyObjectHashTable, com._ensure_object)
def as_json_table_type(x): """ Convert a NumPy / pandas type to its corresponding json_table. Parameters ---------- x : array or dtype Returns ------- t : str the Table Schema data types Notes ----- This table shows the relationship between NumPy / pandas dtypes, and Table Schema dtypes. ============== ================= Pandas type Table Schema type ============== ================= int64 integer float64 number bool boolean datetime64[ns] datetime timedelta64[ns] duration object str categorical any =============== ================= """ if is_integer_dtype(x): return 'integer' elif is_bool_dtype(x): return 'boolean' elif is_numeric_dtype(x): return 'number' elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x)): return 'datetime' elif is_timedelta64_dtype(x): return 'duration' elif is_categorical_dtype(x): return 'any' elif is_string_dtype(x): return 'string' else: return 'any'
def maybe_convert_for_time_op(cls, left, right, name, na_op): """ if ``left`` and ``right`` are appropriate for datetime arithmetic with operation ``name``, processes them and returns a ``_TimeOp`` object that stores all the required values. Otherwise, it will generate either a ``NotImplementedError`` or ``None``, indicating that the operation is unsupported for datetimes (e.g., an unsupported r_op) or that the data is not the right type for time ops. """ # decide if we can do it is_timedelta_lhs = is_timedelta64_dtype(left) is_datetime_lhs = is_datetime64_dtype(left) or is_datetime64tz_dtype(left) if not (is_datetime_lhs or is_timedelta_lhs): return None return cls(left, right, name, na_op)
def _convert_listlike(arg, box, unit, name=None): if isinstance(arg, (list,tuple)) or ((hasattr(arg,'__iter__') and not hasattr(arg,'dtype'))): arg = np.array(list(arg), dtype='O') # these are shortcutable if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]', copy=False) else: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, errors=errors) value = value.astype('timedelta64[ns]', copy=False) if box: from pandas import TimedeltaIndex value = TimedeltaIndex(value,unit='ns', name=name) return value
def _convert_listlike(arg, box, unit, name=None): if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'): arg = np.array(list(arg), dtype='O') # these are shortcutable if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]', copy=False) else: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, errors=errors) value = value.astype('timedelta64[ns]', copy=False) if box: from pandas import TimedeltaIndex value = TimedeltaIndex(value,unit='ns', name=name) return value
def maybe_convert_for_time_op(cls, left, right, name, na_op): """ if ``left`` and ``right`` are appropriate for datetime arithmetic with operation ``name``, processes them and returns a ``_TimeOp`` object that stores all the required values. Otherwise, it will generate either a ``NotImplementedError`` or ``None``, indicating that the operation is unsupported for datetimes (e.g., an unsupported r_op) or that the data is not the right type for time ops. """ # decide if we can do it is_timedelta_lhs = is_timedelta64_dtype(left) is_datetime_lhs = (is_datetime64_dtype(left) or is_datetime64tz_dtype(left)) if not (is_datetime_lhs or is_timedelta_lhs): return None return cls(left, right, name, na_op)
def f(values, axis=None, skipna=True, **kwds): if len(self.kwargs) > 0: for k, v in compat.iteritems(self.kwargs): if k not in kwds: kwds[k] = v try: if self.zero_value is not None and values.size == 0: if values.ndim == 1: # wrap the 0's if needed if is_timedelta64_dtype(values): return lib.Timedelta(0) return 0 else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) result = np.empty(result_shape) result.fill(0) return result if (_USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name)): result = bn_func(values, axis=axis, **kwds) # prefer to treat inf/-inf as NA, but must compute the func # twice :( if _has_infs(result): result = alt(values, axis=axis, skipna=skipna, **kwds) else: result = alt(values, axis=axis, skipna=skipna, **kwds) except Exception: try: result = alt(values, axis=axis, skipna=skipna, **kwds) except ValueError as e: # we want to transform an object array # ValueError message to the more typical TypeError # e.g. this is normally a disallowed function on # object arrays that contain strings if is_object_dtype(values): raise TypeError(e) raise return result
def _wrap_result(self, result, block=None, obj=None): """ wrap a single result """ if obj is None: obj = self._selected_obj if isinstance(result, np.ndarray): # coerce if necessary if block is not None: if com.is_timedelta64_dtype(block.values.dtype): result = pd.to_timedelta( result.ravel(), unit='ns').values.reshape(result.shape) if result.ndim == 1: from pandas import Series return Series(result, obj.index, name=obj.name) return type(obj)(result, index=obj.index, columns=block.columns) return result
def maybe_convert_for_time_op(cls, left, right, name): """ if ``left`` and ``right`` are appropriate for datetime arithmetic with operation ``name``, processes them and returns a ``_TimeOp`` object that stores all the required values. Otherwise, it will generate either a ``NotImplementedError`` or ``None``, indicating that the operation is unsupported for datetimes (e.g., an unsupported r_op) or that the data is not the right type for time ops. """ # decide if we can do it is_timedelta_lhs = com.is_timedelta64_dtype(left) is_datetime_lhs = com.is_datetime64_dtype(left) if not (is_datetime_lhs or is_timedelta_lhs): return None # rops are allowed. No need for special checks, just strip off # r part. if name.startswith('__r'): name = "__" + name[3:] return cls(left, right, name)