예제 #1
0
def execute_cast_series_timestamp(op, data, type, **kwargs):
    arg = op.arg
    from_type = arg.type()

    if from_type.equals(type):  # noop cast
        return data

    tz = type.timezone

    if isinstance(from_type, (dt.Timestamp, dt.Date)):
        return data.astype(
            'M8[ns]' if tz is None else DatetimeTZDtype('ns', tz))

    if isinstance(from_type, (dt.String, dt.Integer)):
        timestamps = data.map_partitions(
            to_datetime,
            infer_datetime_format=True,
            meta=(data.name, 'datetime64[ns]'),
        )
        # TODO - is there a better way to do this
        timestamps = timestamps.astype(timestamps.head(1).dtype)
        if getattr(timestamps.dtype, "tz", None) is not None:
            return timestamps.dt.tz_convert(tz)
        else:
            return timestamps.dt.tz_localize(tz)

    raise TypeError(f"Don't know how to cast {from_type} to {type}")
예제 #2
0
def execute_cast_series_timestamp(op, data, type, **kwargs):
    arg = op.arg
    from_type = arg.type()

    if from_type.equals(type):  # noop cast
        return data

    tz = type.timezone

    if isinstance(from_type, (dt.Timestamp, dt.Date)):
        return data.astype(
            'M8[ns]' if tz is None else DatetimeTZDtype('ns', tz)
        )

    if isinstance(from_type, (dt.String, dt.Integer)):
        timestamps = pd.to_datetime(data.values, infer_datetime_format=True)
        if getattr(timestamps.dtype, "tz", None) is not None:
            method_name = "tz_convert"
        else:
            method_name = "tz_localize"
        method = getattr(timestamps, method_name)
        timestamps = method(tz)
        return pd.Series(timestamps, index=data.index, name=data.name)

    raise TypeError("Don't know how to cast {} to {}".format(from_type, type))
예제 #3
0
def ibis_dtype_to_pandas(ibis_dtype):
    """Convert ibis dtype to the pandas / numpy alternative"""
    assert isinstance(ibis_dtype, dt.DataType)

    if isinstance(ibis_dtype, dt.Timestamp) and ibis_dtype.timezone:
        return DatetimeTZDtype('ns', ibis_dtype.timezone)
    elif isinstance(ibis_dtype, dt.Interval):
        return np.dtype('timedelta64[{}]'.format(ibis_dtype.unit))
    elif isinstance(ibis_dtype, dt.Category):
        return CategoricalDtype()
    elif type(ibis_dtype) in _ibis_dtypes:
        return _ibis_dtypes[type(ibis_dtype)]
    else:
        return np.dtype(np.object_)
예제 #4
0
def convert_datetime64_to_timestamp(in_dtype, out_dtype, column):
    if in_dtype.type == np.datetime64:
        return column.astype(out_dtype.to_pandas(), errors='ignore')
    try:
        series = pd.to_datetime(column, utc=True)
    except pd.errors.OutOfBoundsDatetime:
        inferred_dtype = infer_pandas_dtype(column, skipna=True)
        if inferred_dtype in PANDAS_DATE_TYPES:
            # not great, but not really any other option
            return column.map(
                partial(convert_timezone, timezone=out_dtype.timezone))
        if inferred_dtype not in PANDAS_STRING_TYPES:
            raise TypeError(
                ('Conversion to timestamp not supported for Series of type '
                 '{!r}').format(inferred_dtype))
        return column.map(dateutil.parser.parse)
    else:
        utc_dtype = DatetimeTZDtype('ns', 'UTC')
        return series.astype(utc_dtype).dt.tz_convert(out_dtype.timezone)
예제 #5
0
        (np.float64, dt.float64),
        (np.double, dt.double),
        (np.str_, dt.string),
        (np.datetime64, dt.timestamp),
        (np.timedelta64, dt.interval),
    ],
)
def test_numpy_dtype(numpy_dtype, ibis_dtype):
    assert dt.dtype(np.dtype(numpy_dtype)) == ibis_dtype


@pytest.mark.parametrize(
    ('dask_dtype', 'ibis_dtype'),
    [
        (
            DatetimeTZDtype(tz='US/Eastern', unit='ns'),
            dt.Timestamp('US/Eastern'),
        ),
        (CategoricalDtype(), dt.Category()),
    ],
)
def test_dask_dtype(dask_dtype, ibis_dtype):
    assert dt.dtype(dask_dtype) == ibis_dtype


def test_series_to_ibis_literal(core_client):
    values = [1, 2, 3, 4]
    s = dd.from_pandas(pd.Series(values), npartitions=1)

    expr = ibis.array(s)