def execute_cast_series_timestamp(op, data, type, **kwargs): arg = op.arg from_type = arg.type() if from_type.equals(type): # noop cast return data tz = type.timezone if isinstance(from_type, (dt.Timestamp, dt.Date)): return data.astype( 'M8[ns]' if tz is None else DatetimeTZDtype('ns', tz)) if isinstance(from_type, (dt.String, dt.Integer)): timestamps = data.map_partitions( to_datetime, infer_datetime_format=True, meta=(data.name, 'datetime64[ns]'), ) # TODO - is there a better way to do this timestamps = timestamps.astype(timestamps.head(1).dtype) if getattr(timestamps.dtype, "tz", None) is not None: return timestamps.dt.tz_convert(tz) else: return timestamps.dt.tz_localize(tz) raise TypeError(f"Don't know how to cast {from_type} to {type}")
def execute_cast_series_timestamp(op, data, type, **kwargs): arg = op.arg from_type = arg.type() if from_type.equals(type): # noop cast return data tz = type.timezone if isinstance(from_type, (dt.Timestamp, dt.Date)): return data.astype( 'M8[ns]' if tz is None else DatetimeTZDtype('ns', tz) ) if isinstance(from_type, (dt.String, dt.Integer)): timestamps = pd.to_datetime(data.values, infer_datetime_format=True) if getattr(timestamps.dtype, "tz", None) is not None: method_name = "tz_convert" else: method_name = "tz_localize" method = getattr(timestamps, method_name) timestamps = method(tz) return pd.Series(timestamps, index=data.index, name=data.name) raise TypeError("Don't know how to cast {} to {}".format(from_type, type))
def ibis_dtype_to_pandas(ibis_dtype): """Convert ibis dtype to the pandas / numpy alternative""" assert isinstance(ibis_dtype, dt.DataType) if isinstance(ibis_dtype, dt.Timestamp) and ibis_dtype.timezone: return DatetimeTZDtype('ns', ibis_dtype.timezone) elif isinstance(ibis_dtype, dt.Interval): return np.dtype('timedelta64[{}]'.format(ibis_dtype.unit)) elif isinstance(ibis_dtype, dt.Category): return CategoricalDtype() elif type(ibis_dtype) in _ibis_dtypes: return _ibis_dtypes[type(ibis_dtype)] else: return np.dtype(np.object_)
def convert_datetime64_to_timestamp(in_dtype, out_dtype, column): if in_dtype.type == np.datetime64: return column.astype(out_dtype.to_pandas(), errors='ignore') try: series = pd.to_datetime(column, utc=True) except pd.errors.OutOfBoundsDatetime: inferred_dtype = infer_pandas_dtype(column, skipna=True) if inferred_dtype in PANDAS_DATE_TYPES: # not great, but not really any other option return column.map( partial(convert_timezone, timezone=out_dtype.timezone)) if inferred_dtype not in PANDAS_STRING_TYPES: raise TypeError( ('Conversion to timestamp not supported for Series of type ' '{!r}').format(inferred_dtype)) return column.map(dateutil.parser.parse) else: utc_dtype = DatetimeTZDtype('ns', 'UTC') return series.astype(utc_dtype).dt.tz_convert(out_dtype.timezone)
(np.float64, dt.float64), (np.double, dt.double), (np.str_, dt.string), (np.datetime64, dt.timestamp), (np.timedelta64, dt.interval), ], ) def test_numpy_dtype(numpy_dtype, ibis_dtype): assert dt.dtype(np.dtype(numpy_dtype)) == ibis_dtype @pytest.mark.parametrize( ('dask_dtype', 'ibis_dtype'), [ ( DatetimeTZDtype(tz='US/Eastern', unit='ns'), dt.Timestamp('US/Eastern'), ), (CategoricalDtype(), dt.Category()), ], ) def test_dask_dtype(dask_dtype, ibis_dtype): assert dt.dtype(dask_dtype) == ibis_dtype def test_series_to_ibis_literal(core_client): values = [1, 2, 3, 4] s = dd.from_pandas(pd.Series(values), npartitions=1) expr = ibis.array(s)