Exemple #1
0
def test_empty_tz():
    warnings.simplefilter("error", DeprecationWarning)

    with pytest.warns(None) as e:
        empty([DatetimeTZDtype(unit="ns", tz="UTC")], 10, cols=['a'],
              timezones={'a': 'UTC'})

    assert len(e) == 0, e
Exemple #2
0
def test_timestamps():
    z = 'US/Eastern'

    # single column
    df, views = empty('M8', 100, cols=['t'])
    assert df.t.dt.tz is None
    views['t'].dtype.kind == "M"

    df, views = empty('M8', 100, cols=['t'], timezones={'t': z})
    assert df.t.dt.tz.zone == z
    views['t'].dtype.kind == "M"

    # one time column, one normal
    df, views = empty('M8,i', 100, cols=['t', 'i'], timezones={'t': z})
    assert df.t.dt.tz.zone == z
    views['t'].dtype.kind == "M"
    views['i'].dtype.kind == 'i'

    # no effect of timezones= on non-time column
    df, views = empty('M8,i', 100, cols=['t', 'i'], timezones={'t': z, 'i': z})
    assert df.t.dt.tz.zone == z
    assert df.i.dtype.kind == 'i'
    views['t'].dtype.kind == "M"
    views['i'].dtype.kind == 'i'

    # multi-timezones
    z2 = 'US/Central'
    df, views = empty('M8,M8',
                      100,
                      cols=['t1', 't2'],
                      timezones={
                          't1': z,
                          't2': z
                      })
    assert df.t1.dt.tz.zone == z
    assert df.t2.dt.tz.zone == z

    df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={'t1': z})
    assert df.t1.dt.tz.zone == z
    assert df.t2.dt.tz is None

    df, views = empty('M8,M8',
                      100,
                      cols=['t1', 't2'],
                      timezones={
                          't1': z,
                          't2': 'UTC'
                      })
    assert df.t1.dt.tz.zone == z
    assert df.t2.dt.tz.zone == 'UTC'

    df, views = empty('M8,M8',
                      100,
                      cols=['t1', 't2'],
                      timezones={
                          't1': z,
                          't2': z2
                      })
    assert df.t1.dt.tz.zone == z
    assert df.t2.dt.tz.zone == z2
Exemple #3
0
def read_avro_bytes(URL, open_with, start_byte, length, header, nrows=None):
    """Pass a specific file/bytechunk and convert to dataframe with cyavro

    Both a python dict version of the header, and the original bytes that
    define it, are required. The bytes are prepended to the data, so that the
    C avro reader can interpret them.
    """
    with open_with(URL, 'rb') as f:
        f.seek(start_byte)
        if start_byte == 0:
            header = read_header(f)
            f.seek(header['header_size'])
        data = header['head_bytes'] + f.read(length)
    if nrows is None:
        b = io.BytesIO(data)
        header['blocks'] = []
        scan_blocks(b, header, len(data))
        nrows = sum(b['nrows'] for b in header['blocks'])
    f = cyavro.AvroReader()
    f.init_bytes(data)
    df, arrs = empty(header['dtypes'].values(), nrows, cols=header['dtypes'])
    f.init_reader()
    f.init_buffers(10000)
    for i in range(0, nrows, 10000):
        d = f.read_chunk()
        for c in d:
            s = [f for f in header['schema']['fields'] if f['name'] == c][0]
            if 'logicalType' in s:
                df[c].values[i:i + 10000] = time_convert(d[c], s)
            else:
                df[c].values[i:i + 10000] = d[c]
    return df
def test_empty():
    n = 100
    df, views = empty("category", size=n, cols=["c"])
    assert df.shape == (n, 1)
    assert df.dtypes.tolist() == ["category"]
    assert views["c"].dtype == "int16"

    df, views = empty("category", size=n, cols=["c"], cats={"c": 2 ** 20})
    assert df.shape == (n, 1)
    assert df.dtypes.tolist() == ["category"]
    assert views["c"].dtype == "int32"

    df, views = empty("category", size=n, cols=["c"], cats={"c": ["one", "two"]})
    views["c"][0] = 1
    assert df.c[:2].tolist() == ["two", "one"]

    df, views = empty("i4,i8,f8,f8,O", size=n, cols=["i4", "i8", "f8_1", "f8_2", "O"])
    assert df.shape == (n, 5)
    assert len(views) == 5
Exemple #5
0
def test_empty():
    n = 100
    df, views = empty('category', size=n, cols=['c'])
    assert df.shape == (n, 1)
    assert df.dtypes.tolist() == ['category']
    assert views['c'].dtype == 'int16'

    df, views = empty('category', size=n, cols=['c'], cats={'c': 2**20})
    assert df.shape == (n, 1)
    assert df.dtypes.tolist() == ['category']
    assert views['c'].dtype == 'int32'

    df, views = empty('category', size=n, cols=['c'],
                      cats={'c': ['one', 'two']})
    views['c'][0] = 1
    assert df.c[:2].tolist() == ['two', 'one']

    df, views = empty('i4,i8,f8,f8,O', size=n,
                      cols=['i4', 'i8', 'f8_1', 'f8_2', 'O'])
    assert df.shape == (n, 5)
    assert len(views) == 5
def test_empty_tz_nonutc():
    df, views = empty(types=[DatetimeTZDtype(unit="ns", tz="CET")],
                      size=8784,
                      cols=['a'],
                      timezones={
                          'a': 'CET',
                          'index': 'CET'
                      },
                      index_types=["datetime64[ns]"],
                      index_names=["index"])
    assert df.index.tz.zone == "CET"
    assert df.a.dtype.tz.zone == "CET"
def test_empty():
    n = 100
    df, views = empty('category', size=n, cols=['c'])
    assert df.shape == (n, 1)
    assert df.dtypes.tolist() == ['category']
    assert views['c'].dtype == 'int16'

    df, views = empty('category', size=n, cols=['c'], cats={'c': 2**20})
    assert df.shape == (n, 1)
    assert df.dtypes.tolist() == ['category']
    assert views['c'].dtype == 'int32'

    df, views = empty('category', size=n, cols=['c'],
                      cats={'c': ['one', 'two']})
    views['c'][0] = 1
    assert df.c[:2].tolist() == ['two', 'one']

    df, views = empty('i4,i8,f8,f8,O', size=n,
                      cols=['i4', 'i8', 'f8_1', 'f8_2', 'O'])
    assert df.shape == (n, 5)
    assert len(views) == 5
def test_timestamps():
    z = 'US/Eastern'

    # single column
    df, views = empty('M8', 100, cols=['t'])
    assert df.t.dt.tz is None
    views['t'].dtype.kind == "M"

    df, views = empty('M8', 100, cols=['t'], timezones={'t': z})
    assert df.t.dt.tz.zone == z
    views['t'].dtype.kind == "M"

    # one time column, one normal
    df, views = empty('M8,i', 100, cols=['t', 'i'], timezones={'t': z})
    assert df.t.dt.tz.zone == z
    views['t'].dtype.kind == "M"
    views['i'].dtype.kind == 'i'

    # no effect of timezones= on non-time column
    df, views = empty('M8,i', 100, cols=['t', 'i'], timezones={'t': z, 'i': z})
    assert df.t.dt.tz.zone == z
    assert df.i.dtype.kind == 'i'
    views['t'].dtype.kind == "M"
    views['i'].dtype.kind == 'i'

    # multi-timezones
    z2 = 'US/Central'
    df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={'t1': z,
                                                                  't2': z})
    assert df.t1.dt.tz.zone == z
    assert df.t2.dt.tz.zone == z

    df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={'t1': z})
    assert df.t1.dt.tz.zone == z
    assert df.t2.dt.tz is None

    df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={'t1': z,
                                                                  't2': 'UTC'})
    assert df.t1.dt.tz.zone == z
    assert df.t2.dt.tz.zone == 'UTC'

    df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={'t1': z,
                                                                  't2': z2})
    assert df.t1.dt.tz.zone == z
    assert df.t2.dt.tz.zone == z2