def na_op(x, y): try: result = op(x, y) except TypeError: if isinstance(y, list): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, pd.Series)): if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: x = com._ensure_object(x) y = com._ensure_object(y) result = lib.vec_binop(x, y, op) else: try: # let null fall thru if not isnull(y): y = bool(y) result = lib.scalar_binop(x, y, op) except: raise TypeError("cannot compare a dtyped [{0}] array with " "a scalar of type [{1}]".format( x.dtype, type(y).__name__)) return result
def get_dtype_kinds(l): """ Parameters ---------- l : list of arrays Returns ------- a set of kinds that exist in this list of arrays """ typs = set() for arr in l: dtype = arr.dtype if com.is_categorical_dtype(dtype): typ = "category" elif com.is_sparse(arr): typ = "sparse" elif com.is_datetimetz(arr): typ = "datetimetz" elif com.is_datetime64_dtype(dtype): typ = "datetime" elif com.is_timedelta64_dtype(dtype): typ = "timedelta" elif com.is_object_dtype(dtype): typ = "object" elif com.is_bool_dtype(dtype): typ = "bool" else: typ = dtype.kind typs.add(typ) return typs
def _wrap_result_expand(self, result, expand=False): from pandas.core.index import Index if not hasattr(result, 'ndim'): return result if isinstance(self.series, Index): name = getattr(result, 'name', None) # if result is a boolean np.array, return the np.array # instead of wrapping it into a boolean Index (GH 8875) if hasattr(result, 'dtype') and is_bool_dtype(result): return result if expand: result = list(result) return Index(result, name=name) else: index = self.series.index if expand: cons_row = self.series._constructor cons = self.series._constructor_expanddim data = [cons_row(x) for x in result] return cons(data, index=index) else: name = getattr(result, 'name', None) cons = self.series._constructor return cons(result, name=name, index=index)
def _sqlalchemy_type(self, arr_or_dtype): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Interval) if arr_or_dtype is date: return Date if com.is_datetime64_dtype(arr_or_dtype): try: tz = arr_or_dtype.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(arr_or_dtype): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(arr_or_dtype): return Float elif com.is_integer_dtype(arr_or_dtype): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(arr_or_dtype): return Boolean return Text
def restore_type(self, dtype, sample=None): """Restore type from Pandas """ # Pandas types if pdc.is_bool_dtype(dtype): return 'boolean' elif pdc.is_datetime64_any_dtype(dtype): return 'datetime' elif pdc.is_integer_dtype(dtype): return 'integer' elif pdc.is_numeric_dtype(dtype): return 'number' # Python types if sample is not None: if isinstance(sample, (list, tuple)): return 'array' elif isinstance(sample, datetime.date): return 'date' elif isinstance(sample, isodate.Duration): return 'duration' elif isinstance(sample, dict): return 'object' elif isinstance(sample, six.string_types): return 'string' elif isinstance(sample, datetime.time): return 'time' return 'string'
def _wrap_result(self, result, use_codes=True, name=None): # for category, we do the stuff on the categories, so blow it up # to the full series again # But for some operations, we have to do the stuff on the full values, # so make it possible to skip this step as the method already did this before # the transformation... if use_codes and self._is_categorical: result = take_1d(result, self._orig.cat.codes) # leave as it is to keep extract and get_dummies results # can be merged to _wrap_result_expand in v0.17 from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.index import Index if not hasattr(result, 'ndim'): return result name = name or getattr(result, 'name', None) or self._orig.name if result.ndim == 1: if isinstance(self._orig, Index): # if result is a boolean np.array, return the np.array # instead of wrapping it into a boolean Index (GH 8875) if is_bool_dtype(result): return result return Index(result, name=name) return Series(result, index=self._orig.index, name=name) else: assert result.ndim < 3 return DataFrame(result, index=self._orig.index)
def _evaluate_compare(self, other, op): """ We have been called because a comparison between 8 aware arrays. numpy >= 1.11 will now warn about NaT comparisons """ # coerce to a similar object if not isinstance(other, type(self)): if not com.is_list_like(other): # scalar other = [other] elif lib.isscalar(lib.item_from_zerodim(other)): # ndarray scalar other = [other.item()] other = type(self)(other) # compare result = getattr(self.asi8, op)(other.asi8) # technically we could support bool dtyped Index # for now just return the indexing array directly mask = (self._isnan) | (other._isnan) if is_bool_dtype(result): result[mask] = False return result try: result[mask] = tslib.iNaT return Index(result) except TypeError: return result
def wrapper(self, other): func = getattr(super(TimedeltaIndex, self), opname) if _is_convertible_to_td(other): other = _to_m8(other) result = func(other) if com.isnull(other): result.fill(nat_result) else: if not com.is_list_like(other): raise TypeError("cannot compare a TimedeltaIndex with type " "{0}".format(type(other))) other = TimedeltaIndex(other).values result = func(other) result = _values_from_object(result) if isinstance(other, Index): o_mask = other.values.view('i8') == tslib.iNaT else: o_mask = other.view('i8') == tslib.iNaT if o_mask.any(): result[o_mask] = nat_result if self.hasnans: result[self._isnan] = nat_result # support of bool dtype indexers if com.is_bool_dtype(result): return result return Index(result)
def na_value_for_dtype(dtype): """ Return a dtype compat na value Parameters ---------- dtype : string / dtype Returns ------- dtype compat na value """ from pandas.core import common as com from pandas import NaT dtype = pandas_dtype(dtype) if (com.is_datetime64_dtype(dtype) or com.is_datetime64tz_dtype(dtype) or com.is_timedelta64_dtype(dtype)): return NaT elif com.is_float_dtype(dtype): return np.nan elif com.is_integer_dtype(dtype): return 0 elif com.is_bool_dtype(dtype): return False return np.nan
def get_dtype_kinds(l): """ Parameters ---------- l : list of arrays Returns ------- a set of kinds that exist in this list of arrays """ typs = set() for arr in l: dtype = arr.dtype if com.is_categorical_dtype(dtype): typ = 'category' elif com.is_sparse(arr): typ = 'sparse' elif com.is_datetimetz(arr): typ = 'datetimetz' elif com.is_datetime64_dtype(dtype): typ = 'datetime' elif com.is_timedelta64_dtype(dtype): typ = 'timedelta' elif com.is_object_dtype(dtype): typ = 'object' elif com.is_bool_dtype(dtype): typ = 'bool' else: typ = dtype.kind typs.add(typ) return typs
def _sqlalchemy_type(self, arr_or_dtype): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Interval) if arr_or_dtype is date: return Date if com.is_datetime64_dtype(arr_or_dtype): try: tz = arr_or_dtype.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(arr_or_dtype): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(arr_or_dtype): return Float elif com.is_integer_dtype(arr_or_dtype): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(arr_or_dtype): return Boolean return Text
def _wrap_result_expand(self, result, expand=False): if not isinstance(expand, bool): raise ValueError("expand must be True or False") from pandas.core.index import Index, MultiIndex if not hasattr(result, 'ndim'): return result if isinstance(self.series, Index): name = getattr(result, 'name', None) # if result is a boolean np.array, return the np.array # instead of wrapping it into a boolean Index (GH 8875) if hasattr(result, 'dtype') and is_bool_dtype(result): return result if expand: result = list(result) return MultiIndex.from_tuples(result, names=name) else: return Index(result, name=name) else: index = self.series.index if expand: def cons_row(x): if is_list_like(x): return x else: return [ x ] cons = self.series._constructor_expanddim data = [cons_row(x) for x in result] return cons(data, index=index) else: name = getattr(result, 'name', None) cons = self.series._constructor return cons(result, name=name, index=index)
def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Time, Interval) if com.is_datetime64_dtype(col): try: tz = col.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(col): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(col): return Float elif com.is_integer_dtype(col): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(col): return Boolean inferred = lib.infer_dtype(com._ensure_object(col)) if inferred == 'date': return Date if inferred == 'time': return Time return Text
def _isfinite(values): if is_datetime_or_timedelta_dtype(values): return isnull(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): return ~np.isfinite(values) return ~np.isfinite(values.astype('float64'))
def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Time) if com.is_datetime64_dtype(col): try: tz = col.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(col): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(col): return Float elif com.is_integer_dtype(col): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(col): return Boolean inferred = lib.infer_dtype(com._ensure_object(col)) if inferred == 'date': return Date if inferred == 'time': return Time return Text
def _isfinite(values): if _is_datetime_or_timedelta_dtype(values): return isnull(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): return ~np.isfinite(values) return ~np.isfinite(values.astype('float64'))
def pandas_col_to_ibis_type(col): import pandas.core.common as pdcom import ibis.expr.datatypes as dt import numpy as np dty = col.dtype # datetime types if pdcom.is_datetime64_dtype(dty): if pdcom.is_datetime64_ns_dtype(dty): return 'timestamp' else: raise com.IbisTypeError("Column {0} has dtype {1}, which is " "datetime64-like but does " "not use nanosecond units" .format(col.name, dty)) if pdcom.is_timedelta64_dtype(dty): print("Warning: encoding a timedelta64 as an int64") return 'int64' if pdcom.is_categorical_dtype(dty): return dt.Category(len(col.cat.categories)) if pdcom.is_bool_dtype(dty): return 'boolean' # simple numerical types if issubclass(dty.type, np.int8): return 'int8' if issubclass(dty.type, np.int16): return 'int16' if issubclass(dty.type, np.int32): return 'int32' if issubclass(dty.type, np.int64): return 'int64' if issubclass(dty.type, np.float32): return 'float' if issubclass(dty.type, np.float64): return 'double' if issubclass(dty.type, np.uint8): return 'int16' if issubclass(dty.type, np.uint16): return 'int32' if issubclass(dty.type, np.uint32): return 'int64' if issubclass(dty.type, np.uint64): raise com.IbisTypeError("Column {0} is an unsigned int64" .format(col.name)) if pdcom.is_object_dtype(dty): # TODO: overly broad? return 'string' raise com.IbisTypeError("Column {0} is dtype {1}" .format(col.name, dty))
def dtype_to_jtstype(dtype): # Convert if pdc.is_bool_dtype(dtype): return 'boolean' elif pdc.is_integer_dtype(dtype): return 'integer' elif pdc.is_numeric_dtype(dtype): return 'number' elif pdc.is_datetime64_any_dtype(dtype): return 'datetime' else: return 'string'
def pandas_col_to_ibis_type(col): import pandas.core.common as pdcom import ibis.expr.datatypes as dt import numpy as np dty = col.dtype # datetime types if pdcom.is_datetime64_dtype(dty): if pdcom.is_datetime64_ns_dtype(dty): return 'timestamp' else: raise com.IbisTypeError("Column {0} has dtype {1}, which is " "datetime64-like but does " "not use nanosecond units".format( col.name, dty)) if pdcom.is_timedelta64_dtype(dty): print("Warning: encoding a timedelta64 as an int64") return 'int64' if pdcom.is_categorical_dtype(dty): return dt.Category(len(col.cat.categories)) if pdcom.is_bool_dtype(dty): return 'boolean' # simple numerical types if issubclass(dty.type, np.int8): return 'int8' if issubclass(dty.type, np.int16): return 'int16' if issubclass(dty.type, np.int32): return 'int32' if issubclass(dty.type, np.int64): return 'int64' if issubclass(dty.type, np.float32): return 'float' if issubclass(dty.type, np.float64): return 'double' if issubclass(dty.type, np.uint8): return 'int16' if issubclass(dty.type, np.uint16): return 'int32' if issubclass(dty.type, np.uint32): return 'int64' if issubclass(dty.type, np.uint64): raise com.IbisTypeError("Column {0} is an unsigned int64".format( col.name)) if pdcom.is_object_dtype(dty): # TODO: overly broad? return 'string' raise com.IbisTypeError("Column {0} is dtype {1}".format(col.name, dty))
def f(x, y): xmask = isnull(x) ymask = isnull(y) mask = xmask | ymask result = op(x, y) if mask.any(): if is_bool_dtype(result): result = result.astype('O') np.putmask(result, mask, np.nan) return result
def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = _values_from_object(values) if isfinite: mask = _isfinite(values) else: mask = isnull(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) if skipna: if copy: values = values.copy() if dtype_ok: np.putmask(values, mask, fill_value) # promote if needed else: values, changed = _maybe_upcast_putmask(values, mask, fill_value) elif copy: values = values.copy() values = _view_if_needed(values) # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): dtype_max = np.int64 elif is_float_dtype(dtype): dtype_max = np.float64 return values, mask, dtype, dtype_max
def as_json_table_type(x): """ Convert a NumPy / pandas type to its corresponding json_table. Parameters ---------- x : array or dtype Returns ------- t : str the Table Schema data types Notes ----- This table shows the relationship between NumPy / pandas dtypes, and Table Schema dtypes. ============== ================= Pandas type Table Schema type ============== ================= int64 integer float64 number bool boolean datetime64[ns] datetime timedelta64[ns] duration object str categorical any =============== ================= """ if is_integer_dtype(x): return 'integer' elif is_bool_dtype(x): return 'boolean' elif is_numeric_dtype(x): return 'number' elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x)): return 'datetime' elif is_timedelta64_dtype(x): return 'duration' elif is_categorical_dtype(x): return 'any' elif is_string_dtype(x): return 'string' else: return 'any'
def _wrap_result_expand(self, result, expand=False): if not isinstance(expand, bool): raise ValueError("expand must be True or False") # for category, we do the stuff on the categories, so blow it up # to the full series again if self._is_categorical: result = take_1d(result, self._orig.cat.codes) from pandas.core.index import Index, MultiIndex if not hasattr(result, 'ndim'): return result if isinstance(self._orig, Index): name = getattr(result, 'name', None) # if result is a boolean np.array, return the np.array # instead of wrapping it into a boolean Index (GH 8875) if hasattr(result, 'dtype') and is_bool_dtype(result): return result if expand: result = list(result) return MultiIndex.from_tuples(result, names=name) else: return Index(result, name=name) else: index = self._orig.index if expand: def cons_row(x): if is_list_like(x): return x else: return [x] cons = self._orig._constructor_expanddim data = [cons_row(x) for x in result] return cons(data, index=index) else: name = getattr(result, 'name', None) cons = self._orig._constructor return cons(result, name=name, index=index)
def _wrap_result(self, result): from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.index import Index if not hasattr(result, 'ndim'): return result elif result.ndim == 1: name = getattr(result, 'name', None) if isinstance(self.series, Index): # if result is a boolean np.array, return the np.array # instead of wrapping it into a boolean Index (GH 8875) if is_bool_dtype(result): return result return Index(result, name=name or self.series.name) return Series(result, index=self.series.index, name=name or self.series.name) else: assert result.ndim < 3 return DataFrame(result, index=self.series.index)
def _ensure_numeric(x): if isinstance(x, np.ndarray): if is_integer_dtype(x) or is_bool_dtype(x): x = x.astype(np.float64) elif is_object_dtype(x): try: x = x.astype(np.complex128) except: x = x.astype(np.float64) else: if not np.any(x.imag): x = x.real elif not (is_float(x) or is_integer(x) or is_complex(x)): try: x = float(x) except Exception: try: x = complex(x) except Exception: raise TypeError('Could not convert %s to numeric' % str(x)) return x
def _wrap_result(self, result, **kwargs): # leave as it is to keep extract and get_dummies results # can be merged to _wrap_result_expand in v0.17 from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.index import Index if not hasattr(result, 'ndim'): return result name = kwargs.get('name') or getattr(result, 'name', None) or self.series.name if result.ndim == 1: if isinstance(self.series, Index): # if result is a boolean np.array, return the np.array # instead of wrapping it into a boolean Index (GH 8875) if is_bool_dtype(result): return result return Index(result, name=name) return Series(result, index=self.series.index, name=name) else: assert result.ndim < 3 return DataFrame(result, index=self.series.index)
def __array_wrap__(self, result, context=None): """ Gets called after a ufunc. Needs additional handling as PeriodIndex stores internal data as int dtype Replace this to __numpy_ufunc__ in future version """ if isinstance(context, tuple) and len(context) > 0: func = context[0] if (func is np.add): return self._add_delta(context[1][1]) elif (func is np.subtract): return self._add_delta(-context[1][1]) elif isinstance(func, np.ufunc): if 'M->M' not in func.types: msg = "ufunc '{0}' not supported for the PeriodIndex" # This should be TypeError, but TypeError cannot be raised # from here because numpy catches. raise ValueError(msg.format(func.__name__)) if com.is_bool_dtype(result): return result return PeriodIndex(result, freq=self.freq, name=self.name)