Exemple #1
0
def _normalized_dshape(input_dshape, utc=False):
    """
    关闭`dshape`中可选,保留原始数据类型
    option[int] -> int
    如`utc==True`, 则修正ctable中丢失时区信息
    """
    fields = OrderedDict(input_dshape.measure.fields)
    out_dshape = []
    for name, type_ in fields.items():
        if name in (AD_FIELD_NAME, TS_FIELD_NAME):
            if utc:
                out_dshape.append([name, DateTime(tz='UTC')])
            else:
                out_dshape.append([name, DateTime()])
        else:
            # if isinstance(type_, Option):
            #     type_ = type_.ty
            out_dshape.append([name, type_])
        # out_dshape.append([name, datashape_type_to_numpy(type_)])
    return var * Record(out_dshape)
Exemple #2
0
def gen_odo_kwargs(expr, utc=False):
    """生成odo转换参数

    Arguments:
        expr {Expr} -- 要使用的blaze表达式

    Keyword Arguments:
        utc {bool} -- [是否将日期转换为utc] (default: {True})
    """

    fields = OrderedDict(expr.dshape.measure.fields)
    out_dshape = []
    for name, type_ in fields.items():
        if name in (AD_FIELD_NAME, TS_FIELD_NAME):
            if utc:
                out_dshape.append([name, DateTime(tz='UTC')])
            else:
                out_dshape.append([name, DateTime()])
        else:
            if isinstance(type_, Option):
                type_ = type_.ty
            out_dshape.append([name, type_])
    return {'dshape': var * Record(out_dshape)}
Exemple #3
0
def dshape_from_pandas(col):
    if isinstance(col.dtype, categorical):
        return Categorical(col.cat.categories.tolist())
    elif col.dtype.kind == 'M':
        tz = getattr(col.dtype, 'tz', None)
        if tz is not None:
            # Pandas stores this as a pytz.tzinfo, but DataShape wants a
            # string.
            tz = str(tz)
        return Option(DateTime(tz=tz))

    dshape = datashape.CType.from_numpy_dtype(col.dtype)
    dshape = string if dshape == object_ else dshape
    return Option(dshape) if dshape in possibly_missing else dshape
Exemple #4
0
def _check_datetime_field(name, measure):
    """Check that a field is a datetime inside some measure.

    Parameters
    ----------
    name : str
        The name of the field to check.
    measure : Record
        The record to check the field of.

    Raises
    ------
    TypeError
        If the field is not a datetime inside ``measure``.
    """
    if not isinstance(measure[name], (Date, DateTime)):
        raise TypeError(
            "'{name}' field must be a '{dt}', not: '{dshape}'".format(
                name=name,
                dt=DateTime(),
                dshape=measure[name],
            ), )
Exemple #5
0
def test_datetimetz_pandas():
    df = pd.DataFrame(
        OrderedDict([
            ('naive', pd.date_range('2014', periods=5)),
            ('Europe/Moscow',
             pd.date_range('2014', periods=5, tz='Europe/Moscow')),
            ('UTC', pd.date_range('2014', periods=5, tz='UTC')),
            ('US/Eastern', pd.date_range('2014', periods=5, tz='US/Eastern')),
        ]))

    assert_dshape_equal(
        discover(df),
        5 * Record['naive':Option(DateTime(tz=None)),
                   'Europe/Moscow':Option(DateTime(tz='Europe/Moscow')),
                   'UTC':Option(DateTime(tz='UTC')),
                   'US/Eastern':Option(DateTime(tz='US/Eastern')), ])

    assert_dshape_equal(discover(df.naive), 5 * Option(DateTime(tz=None)))
    for tz in ('Europe/Moscow', 'UTC', 'US/Eastern'):
        assert_dshape_equal(discover(df[tz]), 5 * Option(DateTime(tz=tz)))
Exemple #6
0
def test_datetime_index(tz):
    ix = pd.DatetimeIndex(['2014-01-01', '2014-01-02', '2014-01-03'], tz=tz)
    actual = discover(ix)
    expected = 3 * Option(DateTime(tz=tz))

    assert_dshape_equal(actual, expected)