def test_infer_dtype_bytes(self): compare = 'string' if PY2 else 'bytes' # string array of bytes arr = np.array(list('abc'), dtype='S1') self.assertEqual(lib.infer_dtype(arr), compare) # object array of bytes arr = arr.astype(object) self.assertEqual(lib.infer_dtype(arr), compare)
def test_integers(self): arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') result = lib.infer_dtype(arr) self.assertEqual(result, 'integer') arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') result = lib.infer_dtype(arr) self.assertEqual(result, 'mixed-integer') arr = np.array([1, 2, 3, 4, 5], dtype='i4') result = lib.infer_dtype(arr) self.assertEqual(result, 'integer')
def test_integers(self): arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype="O") result = lib.infer_dtype(arr) self.assertEqual(result, "integer") arr = np.array([1, 2, 3, np.int64(4), np.int32(5), "foo"], dtype="O") result = lib.infer_dtype(arr) self.assertEqual(result, "mixed-integer") arr = np.array([1, 2, 3, 4, 5], dtype="i4") result = lib.infer_dtype(arr) self.assertEqual(result, "integer")
def _convert_to_array(self, values, name=None, other=None): """converts values to ndarray""" from pandas.tseries.timedeltas import to_timedelta coerce = True if not is_list_like(values): values = np.array([values]) inferred_type = lib.infer_dtype(values) if inferred_type in ('datetime64', 'datetime', 'date', 'time'): # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path if (other is not None and other.dtype == 'timedelta64[ns]' and all(isnull(v) for v in values)): values = np.empty(values.shape, dtype=other.dtype) values[:] = tslib.iNaT # a datelike elif isinstance(values, pd.DatetimeIndex): values = values.to_series() elif not (isinstance(values, (np.ndarray, pd.Series)) and com.is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here values = to_timedelta(values, coerce=coerce) elif inferred_type == 'integer': # py3 compat where dtype is 'm' but is an integer if values.dtype.kind == 'm': values = values.astype('timedelta64[ns]') elif isinstance(values, pd.PeriodIndex): values = values.to_timestamp().to_series() elif name not in ('__truediv__', '__div__', '__mul__'): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) elif isinstance(values[0], pd.DateOffset): # handle DateOffsets os = np.array([getattr(v, 'delta', None) for v in values]) mask = isnull(os) if mask.any(): raise TypeError( "cannot use a non-absolute DateOffset in " "datetime/timedelta operations [{0}]".format(', '.join( [com.pprint_thing(v) for v in values[mask]]))) values = to_timedelta(os, coerce=coerce) elif inferred_type == 'floating': # all nan, so ok, use the other dtype (e.g. timedelta or datetime) if isnull(values).all(): values = np.empty(values.shape, dtype=other.dtype) values[:] = tslib.iNaT else: raise TypeError( 'incompatible type [{0}] for a datetime/timedelta ' 'operation'.format(np.array(values).dtype)) else: raise TypeError("incompatible type [{0}] for a datetime/timedelta" " operation".format(np.array(values).dtype)) return values
def _convert_to_array(self, values, name=None, other=None): """converts values to ndarray""" from pandas.tseries.timedeltas import to_timedelta coerce = True if not is_list_like(values): values = np.array([values]) inferred_type = lib.infer_dtype(values) if inferred_type in ('datetime64', 'datetime', 'date', 'time'): # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path if (other is not None and other.dtype == 'timedelta64[ns]' and all(isnull(v) for v in values)): values = np.empty(values.shape, dtype=other.dtype) values[:] = iNaT # a datelike elif isinstance(values, pd.DatetimeIndex): values = values.to_series() elif not (isinstance(values, (np.ndarray, pd.Series)) and is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here values = to_timedelta(values, coerce=coerce) elif inferred_type == 'integer': # py3 compat where dtype is 'm' but is an integer if values.dtype.kind == 'm': values = values.astype('timedelta64[ns]') elif isinstance(values, pd.PeriodIndex): values = values.to_timestamp().to_series() elif name not in ('__truediv__', '__div__', '__mul__'): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) elif isinstance(values[0], pd.DateOffset): # handle DateOffsets os = np.array([getattr(v, 'delta', None) for v in values]) mask = isnull(os) if mask.any(): raise TypeError("cannot use a non-absolute DateOffset in " "datetime/timedelta operations [{0}]".format( ', '.join([com.pprint_thing(v) for v in values[mask]]))) values = to_timedelta(os, coerce=coerce) elif inferred_type == 'floating': # all nan, so ok, use the other dtype (e.g. timedelta or datetime) if isnull(values).all(): values = np.empty(values.shape, dtype=other.dtype) values[:] = iNaT else: raise TypeError( 'incompatible type [{0}] for a datetime/timedelta ' 'operation'.format(np.array(values).dtype)) else: raise TypeError("incompatible type [{0}] for a datetime/timedelta" " operation".format(np.array(values).dtype)) return values
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data))) index = data.index if issubclass(data.dtype.type, np.datetime64): return DatetimeProperties(DatetimeIndex(data, copy=copy), index) else: if isinstance(data, PeriodIndex): return PeriodProperties(PeriodIndex(data, copy=copy), index) data = com._values_from_object(data) inferred = lib.infer_dtype(data) if inferred == 'period': return PeriodProperties(PeriodIndex(data), index) raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
def _from_arraylike(cls, data, freq, tz): if freq is not None: freq = Period._maybe_convert_freq(freq) if not isinstance( data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) try: data = _ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq) for x in data], dtype=np.int64) except (TypeError, ValueError): data = _ensure_object(data) if freq is None: freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) else: if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: freq = data.freq data = data._values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._values, base1, base2, 1) else: if is_object_dtype(data): inferred = infer_dtype(data) if inferred == 'integer': data = data.astype(np.int64) if freq is None and is_object_dtype(data): # must contain Period instance and thus extract ordinals freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) if freq is None: msg = 'freq not specified and cannot be inferred' raise ValueError(msg) if data.dtype != np.int64: if np.issubdtype(data.dtype, np.datetime64): data = dt64arr_to_periodarr(data, freq, tz) else: data = _ensure_object(data) data = period.extract_ordinals(data, freq) return data, freq
def test_bools(self): arr = np.array([True, False, True, True, True], dtype="O") result = lib.infer_dtype(arr) self.assertEqual(result, "boolean") arr = np.array([np.bool_(True), np.bool_(False)], dtype="O") result = lib.infer_dtype(arr) self.assertEqual(result, "boolean") arr = np.array([True, False, True, "foo"], dtype="O") result = lib.infer_dtype(arr) self.assertEqual(result, "mixed") arr = np.array([True, False, True], dtype=bool) result = lib.infer_dtype(arr) self.assertEqual(result, "boolean")
def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Time) if com.is_datetime64_dtype(col): try: tz = col.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(col): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(col): return Float elif com.is_integer_dtype(col): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(col): return Boolean inferred = lib.infer_dtype(com._ensure_object(col)) if inferred == 'date': return Date if inferred == 'time': return Time return Text
def _sql_type_name(self, col): pytype = col.dtype.type pytype_name = "text" if issubclass(pytype, np.floating): pytype_name = "float" elif com.is_timedelta64_dtype(pytype): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) pytype_name = "int" elif issubclass(pytype, np.integer): pytype_name = "int" elif issubclass(pytype, np.datetime64) or pytype is datetime: # Caution: np.datetime64 is also a subclass of np.number. pytype_name = "datetime" elif issubclass(pytype, np.bool_): pytype_name = "bool" elif issubclass(pytype, np.object): pytype = lib.infer_dtype(com._ensure_object(col)) if pytype == "date": pytype_name = "date" elif pytype == "time": pytype_name = "time" return _SQL_TYPES[pytype_name][self.pd_sql.flavor]
def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Time, Interval) if com.is_datetime64_dtype(col): try: tz = col.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(col): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(col): return Float elif com.is_integer_dtype(col): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(col): return Boolean inferred = lib.infer_dtype(com._ensure_object(col)) if inferred == 'date': return Date if inferred == 'time': return Time return Text
def is_datetime_arraylike(arr): """ return if we are datetime arraylike / DatetimeIndex """ if isinstance(arr, ABCDatetimeIndex): return True elif isinstance(arr, (np.ndarray, ABCSeries)): return arr.dtype == object and lib.infer_dtype(arr) == 'datetime' return getattr(arr, 'inferred_type', None) == 'datetime'
def is_period_arraylike(arr): """ return if we are period arraylike / PeriodIndex """ if isinstance(arr, ABCPeriodIndex): return True elif isinstance(arr, (np.ndarray, ABCSeries)): return arr.dtype == object and lib.infer_dtype(arr) == 'period' return getattr(arr, 'inferred_type', None) == 'period'
def _hashtable_algo(f, values, return_dtype=None): """ f(HashTable, type_caster) -> result """ dtype = values.dtype if is_float_dtype(dtype): return f(htable.Float64HashTable, _ensure_float64) elif is_signed_integer_dtype(dtype): return f(htable.Int64HashTable, _ensure_int64) elif is_unsigned_integer_dtype(dtype): return f(htable.UInt64HashTable, _ensure_uint64) elif is_datetime64_dtype(dtype): return_dtype = return_dtype or 'M8[ns]' return f(htable.Int64HashTable, _ensure_int64).view(return_dtype) elif is_timedelta64_dtype(dtype): return_dtype = return_dtype or 'm8[ns]' return f(htable.Int64HashTable, _ensure_int64).view(return_dtype) # its cheaper to use a String Hash Table than Object if lib.infer_dtype(values) in ['string']: return f(htable.StringHashTable, _ensure_object) # use Object return f(htable.PyObjectHashTable, _ensure_object)
def make_block(values, items, ref_items): dtype = values.dtype vtype = dtype.type klass = None if issubclass(vtype, np.floating): klass = FloatBlock elif issubclass(vtype, np.complexfloating): klass = ComplexBlock elif issubclass(vtype, np.datetime64): klass = DatetimeBlock elif issubclass(vtype, np.integer): if vtype != np.int64: values = values.astype("i8") klass = IntBlock elif dtype == np.bool_: klass = BoolBlock # try to infer a datetimeblock if klass is None and np.prod(values.shape): flat = values.flatten() inferred_type = lib.infer_dtype(flat) if inferred_type == "datetime": # we have an object array that has been inferred as datetime, so convert it try: values = tslib.array_to_datetime(flat).reshape(values.shape) klass = DatetimeBlock except: # it already object, so leave it pass if klass is None: klass = ObjectBlock return klass(values, items, ref_items, ndim=values.ndim)
def test_floats(self): arr = np.array([1.0, 2.0, 3.0, np.float64(4), np.float32(5)], dtype="O") result = lib.infer_dtype(arr) self.assertEqual(result, "floating") arr = np.array([1, 2, 3, np.float64(4), np.float32(5), "foo"], dtype="O") result = lib.infer_dtype(arr) self.assertEqual(result, "mixed-integer") arr = np.array([1, 2, 3, 4, 5], dtype="f4") result = lib.infer_dtype(arr) self.assertEqual(result, "floating") arr = np.array([1, 2, 3, 4, 5], dtype="f8") result = lib.infer_dtype(arr) self.assertEqual(result, "floating")
def _sql_type_name(self, col): pytype = col.dtype.type pytype_name = "text" if issubclass(pytype, np.floating): pytype_name = "float" elif com.is_timedelta64_dtype(pytype): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) pytype_name = "int" elif issubclass(pytype, np.integer): pytype_name = "int" elif issubclass(pytype, np.datetime64) or pytype is datetime: # Caution: np.datetime64 is also a subclass of np.number. pytype_name = "datetime" elif issubclass(pytype, np.bool_): pytype_name = "bool" elif issubclass(pytype, np.object): pytype = lib.infer_dtype(com._ensure_object(col)) if pytype == "date": pytype_name = "date" elif pytype == "time": pytype_name = "time" return _SQL_TYPES[pytype_name][self.pd_sql.flavor]
def _from_arraylike(cls, data, freq, tz): if freq is not None: freq = Period._maybe_convert_freq(freq) if not isinstance(data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) try: data = _ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq) for x in data], dtype=np.int64) except (TypeError, ValueError): data = _ensure_object(data) if freq is None: freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) else: if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: freq = data.freq data = data._values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._values, base1, base2, 1) else: if is_object_dtype(data): inferred = infer_dtype(data) if inferred == 'integer': data = data.astype(np.int64) if freq is None and is_object_dtype(data): # must contain Period instance and thus extract ordinals freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) if freq is None: msg = 'freq not specified and cannot be inferred' raise ValueError(msg) if data.dtype != np.int64: if np.issubdtype(data.dtype, np.datetime64): data = dt64arr_to_periodarr(data, freq, tz) else: data = _ensure_object(data) data = period.extract_ordinals(data, freq) return data, freq
def test_bools(self): arr = np.array([True, False, True, True, True], dtype='O') result = lib.infer_dtype(arr) self.assertEqual(result, 'boolean') arr = np.array([np.bool_(True), np.bool_(False)], dtype='O') result = lib.infer_dtype(arr) self.assertEqual(result, 'boolean') arr = np.array([True, False, True, 'foo'], dtype='O') result = lib.infer_dtype(arr) self.assertEqual(result, 'mixed') arr = np.array([True, False, True], dtype=bool) result = lib.infer_dtype(arr) self.assertEqual(result, 'boolean')
def test_object(self): # GH 7431 # cannot infer more than this as only a single element arr = np.array([None], dtype='O') result = lib.infer_dtype(arr) self.assertEqual(result, 'mixed')
def _get_data_algo(values, func_map): f = None if is_float_dtype(values): f = func_map['float64'] values = _ensure_float64(values) elif needs_i8_conversion(values): f = func_map['int64'] values = values.view('i8') elif is_signed_integer_dtype(values): f = func_map['int64'] values = _ensure_int64(values) elif is_unsigned_integer_dtype(values): f = func_map['uint64'] values = _ensure_uint64(values) else: values = _ensure_object(values) # its cheaper to use a String Hash Table than Object if lib.infer_dtype(values) in ['string']: try: f = func_map['string'] except KeyError: pass if f is None: f = func_map['object'] return f, values
def unique1d(values): """ Hash table-based unique """ if np.issubdtype(values.dtype, np.floating): table = htable.Float64HashTable(len(values)) uniques = np.array(table.unique(_ensure_float64(values)), dtype=np.float64) elif np.issubdtype(values.dtype, np.datetime64): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('M8[ns]') elif np.issubdtype(values.dtype, np.timedelta64): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('m8[ns]') elif np.issubdtype(values.dtype, np.signedinteger): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) elif np.issubdtype(values.dtype, np.unsignedinteger): table = htable.UInt64HashTable(len(values)) uniques = table.unique(_ensure_uint64(values)) else: # its cheaper to use a String Hash Table than Object if lib.infer_dtype(values) in ['string']: table = htable.StringHashTable(len(values)) else: table = htable.PyObjectHashTable(len(values)) uniques = table.unique(_ensure_object(values)) return uniques
def _convert_to_array(self, values, name=None, other=None): """converts values to ndarray""" from pandas.tseries.timedeltas import to_timedelta ovalues = values if not is_list_like(values): values = np.array([values]) inferred_type = lib.infer_dtype(values) if inferred_type in ('datetime64', 'datetime', 'date', 'time'): # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path if (other is not None and other.dtype == 'timedelta64[ns]' and all(isnull(v) for v in values)): values = np.empty(values.shape, dtype=other.dtype) values[:] = iNaT # a datelike elif isinstance(values, pd.DatetimeIndex): values = values.to_series() # datetime with tz elif isinstance(ovalues, datetime.datetime) and hasattr(ovalues,'tz'): values = pd.DatetimeIndex(values) # datetime array with tz elif com.is_datetimetz(values): if isinstance(values, pd.Series): values = values._values elif not (isinstance(values, (np.ndarray, pd.Series)) and is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here values = to_timedelta(values, errors='coerce') elif inferred_type == 'integer': # py3 compat where dtype is 'm' but is an integer if values.dtype.kind == 'm': values = values.astype('timedelta64[ns]') elif isinstance(values, pd.PeriodIndex): values = values.to_timestamp().to_series() elif name not in ('__truediv__', '__div__', '__mul__'): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) elif inferred_type == 'floating': # all nan, so ok, use the other dtype (e.g. timedelta or datetime) if isnull(values).all(): values = np.empty(values.shape, dtype=other.dtype) values[:] = iNaT else: raise TypeError( 'incompatible type [{0}] for a datetime/timedelta ' 'operation'.format(np.array(values).dtype)) elif self._is_offset(values): return values else: raise TypeError("incompatible type [{0}] for a datetime/timedelta" " operation".format(np.array(values).dtype)) return values
def test_floats(self): arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') result = lib.infer_dtype(arr) self.assertEqual(result, 'floating') arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'], dtype='O') result = lib.infer_dtype(arr) self.assertEqual(result, 'mixed-integer') arr = np.array([1, 2, 3, 4, 5], dtype='f4') result = lib.infer_dtype(arr) self.assertEqual(result, 'floating') arr = np.array([1, 2, 3, 4, 5], dtype='f8') result = lib.infer_dtype(arr) self.assertEqual(result, 'floating')
def test_categorical(self): # GH 8974 from pandas import Categorical, Series arr = Categorical(list('abc')) result = lib.infer_dtype(arr) self.assertEqual(result, 'categorical') result = lib.infer_dtype(Series(arr)) self.assertEqual(result, 'categorical') arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) result = lib.infer_dtype(arr) self.assertEqual(result, 'categorical') result = lib.infer_dtype(Series(arr)) self.assertEqual(result, 'categorical')
def _convert_to_array(self, values, name=None, other=None): """converts values to ndarray""" from pandas.tseries.timedeltas import _possibly_cast_to_timedelta coerce = "compat" if pd._np_version_under1p7 else True if not is_list_like(values): values = np.array([values]) inferred_type = lib.infer_dtype(values) if inferred_type in ("datetime64", "datetime", "date", "time"): # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path if other is not None and other.dtype == "timedelta64[ns]" and all(isnull(v) for v in values): values = np.empty(values.shape, dtype=other.dtype) values[:] = tslib.iNaT # a datetlike elif not (isinstance(values, (pa.Array, pd.Series)) and com.is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif isinstance(values, pd.DatetimeIndex): values = values.to_series() elif inferred_type in ("timedelta", "timedelta64"): # have a timedelta, convert to to ns here values = _possibly_cast_to_timedelta(values, coerce=coerce) elif inferred_type == "integer": # py3 compat where dtype is 'm' but is an integer if values.dtype.kind == "m": values = values.astype("timedelta64[ns]") elif isinstance(values, pd.PeriodIndex): values = values.to_timestamp().to_series() elif name not in ("__truediv__", "__div__", "__mul__"): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) elif isinstance(values[0], pd.DateOffset): # handle DateOffsets os = pa.array([getattr(v, "delta", None) for v in values]) mask = isnull(os) if mask.any(): raise TypeError( "cannot use a non-absolute DateOffset in " "datetime/timedelta operations [{0}]".format(", ".join([com.pprint_thing(v) for v in values[mask]])) ) values = _possibly_cast_to_timedelta(os, coerce=coerce) elif inferred_type == "floating": # all nan, so ok, use the other dtype (e.g. timedelta or datetime) if isnull(values).all(): values = np.empty(values.shape, dtype=other.dtype) values[:] = tslib.iNaT else: raise TypeError( "incompatible type [{0}] for a datetime/timedelta " "operation".format(pa.array(values).dtype) ) else: raise TypeError( "incompatible type [{0}] for a datetime/timedelta" " operation".format(pa.array(values).dtype) ) return values
def test_categorical(self): # GH 8974 from pandas import Categorical, Series arr = Categorical(list("abc")) result = lib.infer_dtype(arr) self.assertEqual(result, "categorical") result = lib.infer_dtype(Series(arr)) self.assertEqual(result, "categorical") arr = Categorical(list("abc"), categories=["cegfab"], ordered=True) result = lib.infer_dtype(arr) self.assertEqual(result, "categorical") result = lib.infer_dtype(Series(arr)) self.assertEqual(result, "categorical")
def is_datetime_arraylike(arr): if isinstance(arr, pd.DataFrame): return arr.apply(pd_is_datetime_arraylike).all() elif pd_is_datetime_arraylike is not None: return pd_is_datetime_arraylike(arr) elif isinstance(arr, pd.DatetimeIndex): return True else: inferred = lib.infer_dtype(arr) return 'datetime' in inferred
def _convert_bin_to_numeric_type(x): """ if the passed bin is of datetime/timedelta type, this method converts it to integer """ dtype = infer_dtype(x) if dtype == 'timedelta' or dtype == 'timedelta64': x = to_timedelta(x).view(np.int64) elif dtype == 'datetime' or dtype == 'datetime64': x = to_datetime(x).view(np.int64) return x
def _convert_obj(obj): """ Convert a series to pytables values and Atom """ if isinstance(obj, pd.DatetimeIndex): converted = obj.asi8 return converted, 'datetime64', tb.Int64Atom() elif isinstance(obj, pd.PeriodIndex): converted = obj.values return converted, 'periodindex', tb.Int64Atom() elif isinstance(obj, pd.PeriodIndex): converted = obj.values return converted, 'int64', tb.Int64Atom() inferred_type = lib.infer_dtype(obj) values = np.asarray(obj) if inferred_type == 'datetime64': converted = values.view('i8') return converted, inferred_type, tb.Int64Atom() if inferred_type == 'string': # TODO, am I doing this right? converted = np.array(list(values), dtype=np.bytes_) itemsize = converted.dtype.itemsize # for OBT, can't assume value will be right for future # frame keys if itemsize < MIN_ITEMSIZE: itemsize = MIN_ITEMSIZE converted = converted.astype("S{0}".format(itemsize)) return converted, inferred_type, tb.StringAtom(itemsize) elif inferred_type == 'unicode': # table's don't seem to support objects raise Exception("Unsupported inferred_type {0}".format(inferred_type)) converted = np.asarray(values, dtype='O') return converted, inferred_type, tb.ObjectAtom() elif inferred_type == 'datetime': converted = np.array([(time.mktime(v.timetuple()) + v.microsecond / 1E6) for v in values], dtype=np.float64) return converted, inferred_type, tb.Time64Atom() elif inferred_type == 'integer': converted = np.asarray(values, dtype=np.int64) return converted, inferred_type, tb.Int64Atom() elif inferred_type == 'floating': converted = np.asarray(values, dtype=np.float64) return converted, inferred_type, tb.Float64Atom() raise Exception("Unsupported inferred_type {0} {1}".format(inferred_type, str(values[-5:])))
def _convert_index(index): if isinstance(index, DatetimeIndex): converted = index.asi8 return converted, 'datetime64', _tables().Int64Col() elif isinstance(index, (Int64Index, PeriodIndex)): atom = _tables().Int64Col() return index.values, 'integer', atom if isinstance(index, MultiIndex): raise Exception('MultiIndex not supported here!') inferred_type = lib.infer_dtype(index) values = np.asarray(index) if inferred_type == 'datetime64': converted = values.view('i8') return converted, 'datetime64', _tables().Int64Col() elif inferred_type == 'datetime': converted = np.array([(time.mktime(v.timetuple()) + v.microsecond / 1E6) for v in values], dtype=np.float64) return converted, 'datetime', _tables().Time64Col() elif inferred_type == 'date': converted = np.array([time.mktime(v.timetuple()) for v in values], dtype=np.int32) return converted, 'date', _tables().Time32Col() elif inferred_type == 'string': # atom = _tables().ObjectAtom() # return np.asarray(values, dtype='O'), 'object', atom converted = np.array(list(values), dtype=np.str_) itemsize = converted.dtype.itemsize return converted, 'string', _tables().StringCol(itemsize) elif inferred_type == 'unicode': atom = _tables().ObjectAtom() return np.asarray(values, dtype='O'), 'object', atom elif inferred_type == 'integer': # take a guess for now, hope the values fit atom = _tables().Int64Col() return np.asarray(values, dtype=np.int64), 'integer', atom elif inferred_type == 'floating': atom = _tables().Float64Col() return np.asarray(values, dtype=np.float64), 'float', atom else: # pragma: no cover atom = _tables().ObjectAtom() return np.asarray(values, dtype='O'), 'object', atom
def _convert_to_array(self, values, name=None): """converts values to ndarray""" from pandas.tseries.timedeltas import _possibly_cast_to_timedelta coerce = 'compat' if pd._np_version_under1p7 else True if not is_list_like(values): values = np.array([values]) inferred_type = lib.infer_dtype(values) if inferred_type in ('datetime64', 'datetime', 'date', 'time'): # a datetlike if not (isinstance(values, (pa.Array, pd.Series)) and com.is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif isinstance(values, pd.DatetimeIndex): values = values.to_series() elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here values = _possibly_cast_to_timedelta(values, coerce=coerce) elif inferred_type == 'integer': # py3 compat where dtype is 'm' but is an integer if values.dtype.kind == 'm': values = values.astype('timedelta64[ns]') elif isinstance(values, pd.PeriodIndex): values = values.to_timestamp().to_series() elif name not in ('__truediv__', '__div__', '__mul__'): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) elif isinstance(values[0], pd.DateOffset): # handle DateOffsets os = pa.array([getattr(v, 'delta', None) for v in values]) mask = isnull(os) if mask.any(): raise TypeError( "cannot use a non-absolute DateOffset in " "datetime/timedelta operations [{0}]".format(','.join( [com.pprint_thing(v) for v in values[mask]]))) values = _possibly_cast_to_timedelta(os, coerce=coerce) else: raise TypeError( "incompatible type [{0}] for a datetime/timedelta operation". format(pa.array(values).dtype)) return values
def _infer_fill_value(val): """ infer the fill value for the nan/NaT from the provided scalar/ndarray/list-like if we are a NaT, return the correct dtyped element to provide proper block construction """ if not is_list_like(val): val = [val] val = np.array(val, copy=False) if is_datetimelike(val): return np.array('NaT', dtype=val.dtype) elif is_object_dtype(val.dtype): dtype = lib.infer_dtype(_ensure_object(val)) if dtype in ['datetime', 'datetime64']: return np.array('NaT', dtype=_NS_DTYPE) elif dtype in ['timedelta', 'timedelta64']: return np.array('NaT', dtype=_TD_DTYPE) return np.nan
def _convert_to_array(self, values, name=None): """converts values to ndarray""" from pandas.tseries.timedeltas import _possibly_cast_to_timedelta coerce = 'compat' if pd._np_version_under1p7 else True if not is_list_like(values): values = np.array([values]) inferred_type = lib.infer_dtype(values) if inferred_type in ('datetime64', 'datetime', 'date', 'time'): # a datetlike if not (isinstance(values, (pa.Array, pd.Series)) and com.is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif isinstance(values, pd.DatetimeIndex): values = values.to_series() elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here values = _possibly_cast_to_timedelta(values, coerce=coerce) elif inferred_type == 'integer': # py3 compat where dtype is 'm' but is an integer if values.dtype.kind == 'm': values = values.astype('timedelta64[ns]') elif isinstance(values, pd.PeriodIndex): values = values.to_timestamp().to_series() elif name not in ('__truediv__', '__div__', '__mul__'): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) elif isinstance(values[0], pd.DateOffset): # handle DateOffsets os = pa.array([getattr(v, 'delta', None) for v in values]) mask = isnull(os) if mask.any(): raise TypeError("cannot use a non-absolute DateOffset in " "datetime/timedelta operations [{0}]".format( ', '.join([com.pprint_thing(v) for v in values[mask]]))) values = _possibly_cast_to_timedelta(os, coerce=coerce) else: raise TypeError("incompatible type [{0}] for a datetime/timedelta" " operation".format(pa.array(values).dtype)) return values
def fill_hdf(self, table = None, dataframe = None): assert table is not None, u"The mandatory keyword argument 'table' is not provided" assert dataframe is not None, u"The mandatory keyword argument 'dataframe' is not provided" if table not in self.tables: self.tables[table] = {} log.info("Inserting table {} in HDF file {}".format( table, self.hdf5_file_path, ) ) store_path = table try: dataframe.to_hdf(self.hdf5_file_path, store_path, format = 'table', append = False) except TypeError: types = dataframe.apply(lambda x: infer_dtype(x.values)) log.info("The following types are converted to strings \n {}".format(types[types=='unicode'])) for column in types[types=='unicode'].index: dataframe[column] = dataframe[column].astype(str) dataframe.to_hdf(self.hdf5_file_path, store_path)
def _possibly_cast_to_datetime(value, dtype, coerce = False): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ if isinstance(dtype, basestring): dtype = np.dtype(dtype) if dtype is not None and is_datetime64_dtype(dtype): if np.isscalar(value): if value == tslib.iNaT or isnull(value): value = tslib.iNaT else: value = np.array(value) # have a scalar array-like (e.g. NaT) if value.ndim == 0: value = tslib.iNaT # we have an array of datetime & nulls elif np.prod(value.shape): try: value = tslib.array_to_datetime(value, coerce = coerce) except: pass elif dtype is None: # we might have a array (or single object) that is datetime like, and no dtype is passed # don't change the value unless we find a datetime set v = value if not (is_list_like(v) or hasattr(v,'len')): v = [ v ] if len(v): inferred_type = lib.infer_dtype(v) if inferred_type == 'datetime': try: value = tslib.array_to_datetime(np.array(v)) except: pass return value
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ if not isinstance(data, Series): raise TypeError( "cannot convert an object of type {0} to a datetimelike index". format(type(data))) index = data.index if issubclass(data.dtype.type, np.datetime64): return DatetimeProperties(DatetimeIndex(data, copy=copy), index) else: if isinstance(data, PeriodIndex): return PeriodProperties(PeriodIndex(data, copy=copy), index) data = com._values_from_object(data) inferred = lib.infer_dtype(data) if inferred == 'period': return PeriodProperties(PeriodIndex(data), index) raise TypeError( "cannot convert an object of type {0} to a datetimelike index".format( type(data)))
def make_block(values, items, ref_items): dtype = values.dtype vtype = dtype.type klass = None if issubclass(vtype, np.floating): klass = FloatBlock elif issubclass(vtype, np.complexfloating): klass = ComplexBlock elif issubclass(vtype, np.datetime64): klass = DatetimeBlock elif issubclass(vtype, np.integer): if vtype != np.int64: values = values.astype('i8') klass = IntBlock elif dtype == np.bool_: klass = BoolBlock # try to infer a datetimeblock if klass is None and np.prod(values.shape): flat = values.flatten() inferred_type = lib.infer_dtype(flat) if inferred_type == 'datetime': # we have an object array that has been inferred as datetime, so # convert it try: values = tslib.array_to_datetime(flat).reshape(values.shape) klass = DatetimeBlock except: # it already object, so leave it pass if klass is None: klass = ObjectBlock return klass(values, items, ref_items, ndim=values.ndim)
def test_constructor_with_datetime_tz(self): # 8260 # support datetime64 with tz dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]') self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]') self.assertTrue(is_datetime64tz_dtype(s.dtype)) self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # export result = s.values self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == 'datetime64[ns]') exp = pd.DatetimeIndex(result) exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz) self.assert_index_equal(dr, exp) # indexing result = s.iloc[0] self.assertEqual( result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D')) result = s[0] self.assertEqual( result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D')) result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) result = s.iloc[0:1] assert_series_equal(result, Series(dr[0:1])) # concat result = pd.concat([s.iloc[0:1], s.iloc[1:]]) assert_series_equal(result, s) # astype result = s.astype(object) expected = Series(DatetimeIndex(s._values).asobject) assert_series_equal(result, expected) result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz) assert_series_equal(result, s) # astype - datetime64[ns, tz] result = Series(s.values).astype('datetime64[ns, US/Eastern]') assert_series_equal(result, s) result = Series(s.values).astype(s.dtype) assert_series_equal(result, s) result = s.astype('datetime64[ns, CET]') expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET')) assert_series_equal(result, expected) # short str self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # formatting with NaT result = s.shift() self.assertTrue('datetime64[ns, US/Eastern]' in str(result)) self.assertTrue('NaT' in str(result)) # long str t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) self.assertTrue('datetime64[ns, US/Eastern]' in str(t)) result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) # inference s = Series([ pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific') ]) self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]') self.assertTrue(lib.infer_dtype(s) == 'datetime64') s = Series([ pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern') ]) self.assertTrue(s.dtype == 'object') self.assertTrue(lib.infer_dtype(s) == 'datetime') # with all NaT s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected)
def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False): """ Sort ``values`` and reorder corresponding ``labels``. ``values`` should be unique if ``labels`` is not None. Safe for use with mixed types (int, str), orders ints before strs. .. versionadded:: 0.19.0 Parameters ---------- values : list-like Sequence; must be unique if ``labels`` is not None. labels : list_like Indices to ``values``. All out of bound indices are treated as "not found" and will be masked with ``na_sentinel``. na_sentinel : int, default -1 Value in ``labels`` to mark "not found". Ignored when ``labels`` is None. assume_unique : bool, default False When True, ``values`` are assumed to be unique, which can speed up the calculation. Ignored when ``labels`` is None. Returns ------- ordered : ndarray Sorted ``values`` new_labels : ndarray Reordered ``labels``; returned when ``labels`` is not None. Raises ------ TypeError * If ``values`` is not list-like or if ``labels`` is neither None nor list-like * If ``values`` cannot be sorted ValueError * If ``labels`` is not None and ``values`` contain duplicates. """ if not is_list_like(values): raise TypeError("Only list-like objects are allowed to be passed to" "safe_sort as values") values = np.array(values, copy=False) def sort_mixed(values): # order ints before strings, safe in py3 str_pos = np.array([isinstance(x, string_types) for x in values], dtype=bool) nums = np.sort(values[~str_pos]) strs = np.sort(values[str_pos]) return _ensure_object(np.concatenate([nums, strs])) sorter = None if compat.PY3 and lib.infer_dtype(values) == 'mixed-integer': # unorderable in py3 if mixed str/int ordered = sort_mixed(values) else: try: sorter = values.argsort() ordered = values.take(sorter) except TypeError: # try this anyway ordered = sort_mixed(values) # labels: if labels is None: return ordered if not is_list_like(labels): raise TypeError("Only list-like objects or None are allowed to be" "passed to safe_sort as labels") labels = _ensure_platform_int(np.asarray(labels)) from pandas import Index if not assume_unique and not Index(values).is_unique: raise ValueError("values should be unique if labels is not None") if sorter is None: # mixed types (hash_klass, _), values = _get_data_algo(values, _hashtables) t = hash_klass(len(values)) t.map_locations(values) sorter = _ensure_platform_int(t.lookup(ordered)) reverse_indexer = np.empty(len(sorter), dtype=np.int_) reverse_indexer.put(sorter, np.arange(len(sorter))) mask = (labels < -len(values)) | (labels >= len(values)) | \ (labels == na_sentinel) # (Out of bound indices will be masked with `na_sentinel` next, so we may # deal with them here without performance loss using `mode='wrap'`.) new_labels = reverse_indexer.take(labels, mode='wrap') np.putmask(new_labels, mask, na_sentinel) return ordered, _ensure_platform_int(new_labels)
def _possibly_infer_to_datetimelike(value, convert_dates=False): """ we might have a array (or single object) that is datetime like, and no dtype is passed don't change the value unless we find a datetime/timedelta set this is pretty strict in that a datetime/timedelta is REQUIRED in addition to possible nulls/string likes ONLY strings are NOT datetimelike Parameters ---------- value : np.array / Series / Index / list-like convert_dates : boolean, default False if True try really hard to convert dates (such as datetime.date), other leave inferred dtype 'date' alone """ if isinstance(value, (ABCDatetimeIndex, ABCPeriodIndex)): return value elif isinstance(value, ABCSeries): if isinstance(value._values, ABCDatetimeIndex): return value._values v = value if not is_list_like(v): v = [v] v = np.array(v, copy=False) shape = v.shape if not v.ndim == 1: v = v.ravel() if len(v): def _try_datetime(v): # safe coerce to datetime64 try: v = tslib.array_to_datetime(v, errors='raise') except ValueError: # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype try: from pandas import to_datetime return to_datetime(v) except: pass except: pass return v.reshape(shape) def _try_timedelta(v): # safe coerce to timedelta64 # will try first with a string & object conversion from pandas import to_timedelta try: return to_timedelta(v)._values.reshape(shape) except: return v # do a quick inference for perf sample = v[:min(3, len(v))] inferred_type = lib.infer_dtype(sample) if (inferred_type in ['datetime', 'datetime64'] or (convert_dates and inferred_type in ['date'])): value = _try_datetime(v) elif inferred_type in ['timedelta', 'timedelta64']: value = _try_timedelta(v) # It's possible to have nulls intermixed within the datetime or # timedelta. These will in general have an inferred_type of 'mixed', # so have to try both datetime and timedelta. # try timedelta first to avoid spurious datetime conversions # e.g. '00:00:01' is a timedelta but technically is also a datetime elif inferred_type in ['mixed']: if lib.is_possible_datetimelike_array(_ensure_object(v)): value = _try_timedelta(v) if lib.infer_dtype(value) in ['mixed']: value = _try_datetime(v) return value
def _possibly_downcast_to_dtype(result, dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 """ if is_scalar(result): return result def trans(x): return x if isinstance(dtype, string_types): if dtype == 'infer': inferred_type = lib.infer_dtype(_ensure_object(result.ravel())) if inferred_type == 'boolean': dtype = 'bool' elif inferred_type == 'integer': dtype = 'int64' elif inferred_type == 'datetime64': dtype = 'datetime64[ns]' elif inferred_type == 'timedelta64': dtype = 'timedelta64[ns]' # try to upcast here elif inferred_type == 'floating': dtype = 'int64' if issubclass(result.dtype.type, np.number): def trans(x): # noqa return x.round() else: dtype = 'object' if isinstance(dtype, string_types): dtype = np.dtype(dtype) try: # don't allow upcasts here (except if empty) if dtype.kind == result.dtype.kind: if (result.dtype.itemsize <= dtype.itemsize and np.prod(result.shape)): return result if issubclass(dtype.type, np.floating): return result.astype(dtype) elif is_bool_dtype(dtype) or is_integer_dtype(dtype): # if we don't have any elements, just astype it if not np.prod(result.shape): return trans(result).astype(dtype) # do a test on the first element, if it fails then we are done r = result.ravel() arr = np.array([r[0]]) # if we have any nulls, then we are done if isnull(arr).any() or not np.allclose(arr, trans(arr).astype(dtype)): return result # a comparable, e.g. a Decimal may slip in here elif not isinstance( r[0], (np.integer, np.floating, np.bool, int, float, bool)): return result if (issubclass(result.dtype.type, (np.object_, np.number)) and notnull(result).all()): new_result = trans(result).astype(dtype) try: if np.allclose(new_result, result): return new_result except: # comparison of an object dtype with a number type could # hit here if (new_result == result).all(): return new_result # a datetimelike # GH12821, iNaT is casted to float elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i', 'f']: try: result = result.astype(dtype) except: if dtype.tz: # convert to datetime and change timezone from pandas import to_datetime result = to_datetime(result).tz_localize(dtype.tz) except: pass return result
def test_infer_dtype_datetime(self): arr = np.array([Timestamp('2011-01-01'), Timestamp('2011-01-02')]) self.assertEqual(lib.infer_dtype(arr), 'datetime') arr = np.array( [np.datetime64('2011-01-01'), np.datetime64('2011-01-01')], dtype=object) self.assertEqual(lib.infer_dtype(arr), 'datetime64') arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) self.assertEqual(lib.infer_dtype(arr), 'datetime') # starts with nan for n in [pd.NaT, np.nan]: arr = np.array([n, pd.Timestamp('2011-01-02')]) self.assertEqual(lib.infer_dtype(arr), 'datetime') arr = np.array([n, np.datetime64('2011-01-02')]) self.assertEqual(lib.infer_dtype(arr), 'datetime64') arr = np.array([n, datetime(2011, 1, 1)]) self.assertEqual(lib.infer_dtype(arr), 'datetime') arr = np.array([n, pd.Timestamp('2011-01-02'), n]) self.assertEqual(lib.infer_dtype(arr), 'datetime') arr = np.array([n, np.datetime64('2011-01-02'), n]) self.assertEqual(lib.infer_dtype(arr), 'datetime64') arr = np.array([n, datetime(2011, 1, 1), n]) self.assertEqual(lib.infer_dtype(arr), 'datetime') # different type of nat arr = np.array([np.timedelta64('nat'), np.datetime64('2011-01-02')], dtype=object) self.assertEqual(lib.infer_dtype(arr), 'mixed') arr = np.array([np.datetime64('2011-01-02'), np.timedelta64('nat')], dtype=object) self.assertEqual(lib.infer_dtype(arr), 'mixed') # mixed datetime arr = np.array([datetime(2011, 1, 1), pd.Timestamp('2011-01-02')]) self.assertEqual(lib.infer_dtype(arr), 'datetime') # should be datetime? arr = np.array( [np.datetime64('2011-01-01'), pd.Timestamp('2011-01-02')]) self.assertEqual(lib.infer_dtype(arr), 'mixed') arr = np.array( [pd.Timestamp('2011-01-02'), np.datetime64('2011-01-01')]) self.assertEqual(lib.infer_dtype(arr), 'mixed') arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1]) self.assertEqual(lib.infer_dtype(arr), 'mixed-integer') arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1]) self.assertEqual(lib.infer_dtype(arr), 'mixed') arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')]) self.assertEqual(lib.infer_dtype(arr), 'mixed')
def test_infer_dtype_timedelta(self): arr = np.array([pd.Timedelta('1 days'), pd.Timedelta('2 days')]) self.assertEqual(lib.infer_dtype(arr), 'timedelta') arr = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D')], dtype=object) self.assertEqual(lib.infer_dtype(arr), 'timedelta') arr = np.array([timedelta(1), timedelta(2)]) self.assertEqual(lib.infer_dtype(arr), 'timedelta') # starts with nan for n in [pd.NaT, np.nan]: arr = np.array([n, Timedelta('1 days')]) self.assertEqual(lib.infer_dtype(arr), 'timedelta') arr = np.array([n, np.timedelta64(1, 'D')]) self.assertEqual(lib.infer_dtype(arr), 'timedelta') arr = np.array([n, timedelta(1)]) self.assertEqual(lib.infer_dtype(arr), 'timedelta') arr = np.array([n, pd.Timedelta('1 days'), n]) self.assertEqual(lib.infer_dtype(arr), 'timedelta') arr = np.array([n, np.timedelta64(1, 'D'), n]) self.assertEqual(lib.infer_dtype(arr), 'timedelta') arr = np.array([n, timedelta(1), n]) self.assertEqual(lib.infer_dtype(arr), 'timedelta') # different type of nat arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')], dtype=object) self.assertEqual(lib.infer_dtype(arr), 'mixed') arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')], dtype=object) self.assertEqual(lib.infer_dtype(arr), 'mixed')
def test_infer_dtype_all_nan_nat_like(self): arr = np.array([np.nan, np.nan]) self.assertEqual(lib.infer_dtype(arr), 'floating') # nan and None mix are result in mixed arr = np.array([np.nan, np.nan, None]) self.assertEqual(lib.infer_dtype(arr), 'mixed') arr = np.array([None, np.nan, np.nan]) self.assertEqual(lib.infer_dtype(arr), 'mixed') # pd.NaT arr = np.array([pd.NaT]) self.assertEqual(lib.infer_dtype(arr), 'datetime') arr = np.array([pd.NaT, np.nan]) self.assertEqual(lib.infer_dtype(arr), 'datetime') arr = np.array([np.nan, pd.NaT]) self.assertEqual(lib.infer_dtype(arr), 'datetime') arr = np.array([np.nan, pd.NaT, np.nan]) self.assertEqual(lib.infer_dtype(arr), 'datetime') arr = np.array([None, pd.NaT, None]) self.assertEqual(lib.infer_dtype(arr), 'datetime') # np.datetime64(nat) arr = np.array([np.datetime64('nat')]) self.assertEqual(lib.infer_dtype(arr), 'datetime64') for n in [np.nan, pd.NaT, None]: arr = np.array([n, np.datetime64('nat'), n]) self.assertEqual(lib.infer_dtype(arr), 'datetime64') arr = np.array([pd.NaT, n, np.datetime64('nat'), n]) self.assertEqual(lib.infer_dtype(arr), 'datetime64') arr = np.array([np.timedelta64('nat')], dtype=object) self.assertEqual(lib.infer_dtype(arr), 'timedelta') for n in [np.nan, pd.NaT, None]: arr = np.array([n, np.timedelta64('nat'), n]) self.assertEqual(lib.infer_dtype(arr), 'timedelta') arr = np.array([pd.NaT, n, np.timedelta64('nat'), n]) self.assertEqual(lib.infer_dtype(arr), 'timedelta') # datetime / timedelta mixed arr = np.array( [pd.NaT, np.datetime64('nat'), np.timedelta64('nat'), np.nan]) self.assertEqual(lib.infer_dtype(arr), 'mixed') arr = np.array([np.timedelta64('nat'), np.datetime64('nat')], dtype=object) self.assertEqual(lib.infer_dtype(arr), 'mixed')