Beispiel #1
0
def test_temp_ssh_files():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        scsv = into(Temp(SSH(CSV)), csv, hostname='localhost')
        assert discover(csv) == discover(scsv)

        assert isinstance(scsv, _Temp)
Beispiel #2
0
def test_copy_remote_csv():
    with tmpfile('csv') as target:
        with filetext('name,balance\nAlice,100\nBob,200',
                      extension='csv') as fn:
            csv = resource(fn)
            scsv = into('ssh://localhost:foo.csv', csv)
            assert isinstance(scsv, SSH(CSV))
            assert discover(scsv) == discover(csv)

            # Round trip
            csv2 = into(target, scsv)
            assert into(list, csv) == into(list, csv2)
Beispiel #3
0
def test_slicing_with_lists():
    nx = np.arange(20).reshape((4, 5))
    dx = convert(Array, nx, blockshape=(2, 2))
    sx = symbol('x', discover(dx))

    expr = sx[[2, 0, 3]]
    assert eq(np.array(compute(expr, dx)), compute(expr, nx))

    expr = sx[::2, [2, 0, 3]]
    assert eq(np.array(compute(expr, dx)), compute(expr, nx))

    expr = sx[1, [2, 0, 3]]
    assert eq(np.array(compute(expr, dx)), compute(expr, nx))

    expr = sx[[2, 0, 3], -2]
    assert eq(np.array(compute(expr, dx)), compute(expr, nx))

    expr = sx[:, :]
    assert compute(expr, dx).dask == dx.dask

    expr = sx[0]
    assert eq(np.array(compute(expr, dx)), compute(expr, nx))

    expr = sx[0, [3, 1, 4]]
    assert eq(np.array(compute(expr, dx)), compute(expr, nx))
Beispiel #4
0
def test_pandas_discover_on_gzipped_files():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02',
                  open=gzip.open,
                  extension='.csv.gz') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: string, when: datetime}')
        assert discover(csv) == ds
Beispiel #5
0
def test_pandas_loads_in_datetimes_naively():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: ?string, when: ?datetime}')
        assert discover(csv) == ds

        df = convert(pd.DataFrame, csv)
        assert df.dtypes['when'] == 'M8[ns]'
Beispiel #6
0
def test_pandas_loads_in_datetimes_naively():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: string, when: datetime}')
        assert discover(csv) == ds

        df = convert(pd.DataFrame, csv)
        assert df.dtypes['when'] == 'M8[ns]'
Beispiel #7
0
def test_slicing_on_boundary_lines():
    nx = np.arange(100).reshape((10, 10))
    dx = convert(Array, nx, blockshape=(3, 3))
    sx = symbol('x', discover(dx))
    expr = sx[0, [1, 3, 9, 3]]

    result = compute(expr, dx)
    assert eq(result, nx[0, [1, 3, 9, 3]])
Beispiel #8
0
def test_multiple_object_ids():
    data = [{
        'x': 1,
        'y': 2,
        'other': ObjectId('1' * 24)
    }, {
        'x': 3,
        'y': 4,
        'other': ObjectId('2' * 24)
    }]
    with coll(data) as c:
        assert discover(c) == dshape('2 * {x: int64, y: int64}')

        assert convert(list, c) == [(1, 2), (3, 4)]
Beispiel #9
0
def test_slicing_with_newaxis():
    nx = np.arange(20).reshape((4, 5))
    dx = convert(Array, nx, blockshape=(2, 2))
    sx = symbol('x', discover(dx))

    expr = sx[:, None, :]
    result = compute(expr, dx)

    assert result.shape == (4, 1, 5)
    assert result.blockdims == ((2, 2), (1, ), (2, 2, 1))
    assert eq(np.array(result), compute(expr, nx))

    expr = sx[None, [2, 1, 3], None, None, :, None]
    result = compute(expr, dx)

    assert result.shape == (1, 3, 1, 1, 5, 1)
    assert result.blockdims == ((1, ), (3, ), (1, ), (1, ), (2, 2, 1), (1, ))
    assert eq(np.array(result), compute(expr, nx))
Beispiel #10
0
def test_discover_csv_yields_string_on_totally_empty_columns():
    expected = dshape('var * {a: int64, b: string, c: int64}')
    with filetext('a,b,c\n1,,3\n4,,6\n7,,9') as fn:
        csv = CSV(fn, has_header=True)
        assert discover(csv) == expected
Beispiel #11
0
def test_discover_csv_without_columns():
    with filetext('Alice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        ds = discover(csv)
        assert '100' not in str(ds)
Beispiel #12
0

def eq(a, b):
    if isinstance(a, Array):
        a = a.compute()
    if isinstance(b, Array):
        b = b.compute()
    c = a == b
    if isinstance(c, np.ndarray):
        c = c.all()
    return c


nx = np.arange(600).reshape((20, 30))
dx = convert(Array, nx, blockshape=(4, 5))
sx = symbol('x', discover(dx))

ny = np.arange(600).reshape((30, 20))
dy = convert(Array, ny, blockshape=(5, 4))
sy = symbol('y', discover(dy))

na = np.arange(20)
da = convert(Array, na, blockshape=(4, ))
sa = symbol('a', discover(da))

nb = np.arange(30).reshape((30, 1))
db = convert(Array, nb, blockshape=(5, 1))
sb = symbol('b', discover(db))

dask_ns = {sx: dx, sy: dy, sa: da, sb: db}
numpy_ns = {sx: nx, sy: ny, sa: na, sb: nb}
Beispiel #13
0
def test_discover_csv_with_spaces_in_header():
    with filetext(' name,  val\nAlice,100\nBob,200', extension='csv') as fn:
        ds = discover(CSV(fn, has_header=True))
        assert ds.measure.names == ['name', 'val']
Beispiel #14
0
def test_pandas_discover_on_gzipped_files():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02',
            open=gzip.open, extension='.csv.gz') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: string, when: datetime}')
        assert discover(csv) == ds
Beispiel #15
0
def test_discover_csv_with_spaces_in_header():
    with filetext(' name,  val\nAlice,100\nBob,200', extension='csv') as fn:
        ds = discover(CSV(fn, has_header=True))
        assert ds.measure.names == ['name', 'val']
Beispiel #16
0
def test_discover_csv_yields_string_on_totally_empty_columns():
    expected = dshape('var * {a: int64, b: ?string, c: int64}')
    with filetext('a,b,c\n1,,3\n4,,6\n7,,9') as fn:
        csv = CSV(fn, has_header=True)
        assert discover(csv) == expected
Beispiel #17
0
def discover_dask_array(a, **kwargs):
    block = a._get_block(*([0] * a.ndim))
    return DataShape(*(a.shape + (discover(block).measure,)))
Beispiel #18
0
def test_discover():
    with coll(bank) as c:
        assert discover(bank) == discover(c)
Beispiel #19
0
def discover_dask_array(a, **kwargs):
    block = a._get_block(*([0] * a.ndim))
    return DataShape(*(a.shape + (discover(block).measure, )))
Beispiel #20
0
def test_discover_csv_without_columns():
    with filetext('Alice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        ds = discover(csv)
        assert '100' not in str(ds)
Beispiel #21
0
def test_discover():
    with filetext('name,balance\nAlice,100\nBob,200') as fn:
        local = CSV(fn)
        remote = SSH(CSV)(fn, hostname='localhost')

        assert discover(local) == discover(remote)
Beispiel #22
0
def test_csv_missing_values():
    with filetext('name,val\nAlice,100\nNA,200', extension='csv') as fn:
        csv = CSV(fn)
        assert discover(csv).measure.dict['name'] == Option(string)
Beispiel #23
0
def test_discover_from_resource():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        local = CSV(fn)
        remote = resource('ssh://localhost:' + fn)

        assert discover(local) == discover(remote)