Ejemplo n.º 1
0
def test_add_data_to_empty_server(empty_server, serial):
    # add data
    with temp_server() as test:
        iris_path = example('iris.csv')
        blob = serial.dumps({'iris': iris_path})
        response1 = empty_server.post(
            '/add',
            headers=mimetype(serial),
            data=blob,
        )
        assert 'OK' in response1.status
        assert response1.status_code == 200

        # check for expected server datashape
        response2 = empty_server.get('/datashape')
        expected2 = str(discover({'iris': resource(iris_path)}))
        assert response2.data.decode('utf-8') == expected2

        # compute on added data
        t = Data({'iris': resource(iris_path)})
        expr = t.iris.petal_length.sum()

        response3 = empty_server.post(
            '/compute',
            data=serial.dumps({'expr': to_tree(expr)}),
            headers=mimetype(serial)
        )

        result3 = serial.loads(response3.data)['data']
        expected3 = compute(expr, {'iris': resource(iris_path)})
        assert result3 == expected3
Ejemplo n.º 2
0
def test_add_data_to_empty_server(empty_server, serial):
    # add data
    with temp_server() as test:
        iris_path = example('iris.csv')
        blob = serial.dumps({'iris': iris_path})
        response1 = empty_server.post(
            '/add',
            headers=mimetype(serial),
            data=blob,
        )
        assert 'OK' in response1.status
        assert response1.status_code == 200

        # check for expected server datashape
        response2 = empty_server.get('/datashape')
        expected2 = str(discover({'iris': resource(iris_path)}))
        assert response2.data.decode('utf-8') == expected2

        # compute on added data
        t = Data({'iris': resource(iris_path)})
        expr = t.iris.petal_length.sum()

        response3 = empty_server.post(
            '/compute',
            data=serial.dumps({'expr': to_tree(expr)}),
            headers=mimetype(serial)
        )

        result3 = serial.loads(response3.data)['data']
        expected3 = compute(expr, {'iris': resource(iris_path)})
        assert result3 == expected3
Ejemplo n.º 3
0
def test_csv_join():
    d = {"a.csv": "a,b,c\n0,1,2\n3,4,5", "b.csv": "c,d,e\n2,3,4\n5,6,7"}

    with filetexts(d):
        resource_a = resource("a.csv")
        resource_b = resource("b.csv")
        a = symbol("a", discover(resource_a))
        b = symbol("b", discover(resource_b))
        tm.assert_frame_equal(
            odo(compute(join(a, b, "c"), {a: resource_a, b: resource_b}), pd.DataFrame),
            # windows needs explicit int64 construction b/c default is int32
            pd.DataFrame(np.array([[2, 0, 1, 3, 4], [5, 3, 4, 6, 7]], dtype="int64"), columns=list("cabde")),
        )
Ejemplo n.º 4
0
def test_concat():
    d = {"a.csv": "a,b\n1,2\n3,4", "b.csv": "a,b\n5,6\n7,8"}

    with filetexts(d):
        a_rsc = resource("a.csv")
        b_rsc = resource("b.csv")

        a = symbol("a", discover(a_rsc))
        b = symbol("b", discover(b_rsc))

        tm.assert_frame_equal(
            odo(compute(concat(a, b), {a: a_rsc, b: b_rsc}), pd.DataFrame),
            # windows needs explicit int64 construction b/c default is int32
            pd.DataFrame(np.arange(1, 9, dtype="int64").reshape(4, 2), columns=list("ab")),
        )
Ejemplo n.º 5
0
def sql():
    data = [(1, 2), (10, 20), (100, 200)]
    sql = resource('sqlite:///:memory:',
                   'foo',
                   dshape='var * {x: int, y: int}')
    into(sql, data)
    return sql
Ejemplo n.º 6
0
def test_into_empty_sql():
    """ Test all sources into empty SQL database """
    sources = [v for k, v in data if k not in [list]]
    for a in sources:
        sql_empty = resource('sqlite:///:memory:::accounts',
                             dshape='var * ' + sql_schema)
        assert normalize(into(sql_empty, a)) == normalize(sql)
Ejemplo n.º 7
0
def engine():
    tbl = 'testtable'
    with tmpfile('db') as filename:
        engine = sqlalchemy.create_engine('sqlite:///' + filename)
        t = resource('sqlite:///' + filename + '::' + tbl,
                     dshape='var * {a: int32, b: int32}')
        yield engine, t
Ejemplo n.º 8
0
def sql():
    data = [(1, 2), (10, 20), (100, 200)]
    sql = resource(
        'sqlite:///:memory:', 'foo',
        dshape='var * {x: int, y: int}',
    )
    into(sql, data)
    return sql
Ejemplo n.º 9
0
def test_map_called_on_resource_star():
    r = resource(example('accounts*.csv'))
    s = symbol('s', discover(r))
    flag[0] = False
    a = compute(s.count(), r)
    b = compute(s.count(), r, map=mymap)
    assert a == b
    assert flag[0]
Ejemplo n.º 10
0
def test_failing_argument():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = resource(url + '::' + tbl, dshape=csv.dshape)

    into(sql, csv, if_exists="replace", skipinitialspace="alpha") # failing call
Ejemplo n.º 11
0
def test_map_called_on_resource_star():
    r = resource(example('accounts_*.csv'))
    s = symbol('s', discover(r))
    flag[0] = False
    a = compute(s.count(), r)
    b = compute(s.count(), r, map=mymap)
    assert a == b
    assert flag[0]
Ejemplo n.º 12
0
def test_no_header_no_columns():
    tbl = 'testtable_into_2'

    csv = CSV(file_name)
    sql = resource(url + '::' + tbl, dshape=csv.dshape)

    into(sql, csv, if_exists="replace")

    assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
Ejemplo n.º 13
0
def test_simple_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = resource(url + '::' + tbl, dshape=csv.dshape)

    into(sql, csv, if_exists="replace")

    assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
Ejemplo n.º 14
0
def test_simple_float_into():
    tbl = 'testtable_into_float'

    csv = CSV(file_name_floats, columns=['a', 'b'])
    sql = resource(url + '::' + tbl, dshape=csv.dshape)

    into(sql,csv, if_exists="replace")

    assert into(list, sql) == \
            [(1.02, 2.02), (102.02, 202.02), (1002.02, 2002.02)]
Ejemplo n.º 15
0
def test_tryexcept_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = resource(url + '::' + tbl, dshape=csv.dshape)

    into(sql, csv, if_exists="replace", QUOTE="alpha", FORMAT="csv") # uses multi-byte character and
                                                      # fails over to using sql.extend()

    assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
Ejemplo n.º 16
0
    def test_csv_hdf5(self):
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='{a: int32, b: int32}')
                hdf5 = resource(hdf5_fn + '::/data', dshape='var * {a: int32, b: int32}')

                into(hdf5, csv)

                self.assertEquals(hdf5[:].tolist(),
                                  [(1, 1), (2, 2)])
Ejemplo n.º 17
0
def _make_blaze_resource(target, table=None, schema=None, config=None):
    if config is not None:
        conn_str = getattr(config, target)
    else:
        conn_str = target

    if table is not None:
        conn_str = conn_str + "::" + table

    t = bz.resource(conn_str, schema=schema)

    return t
Ejemplo n.º 18
0
def test_csv_join():
    d = {'a.csv': 'a,b,c\n0,1,2\n3,4,5', 'b.csv': 'c,d,e\n2,3,4\n5,6,7'}

    with filetexts(d):
        resource_a = resource('a.csv')
        resource_b = resource('b.csv')
        a = symbol('a', discover(resource_a))
        b = symbol('b', discover(resource_b))
        tm.assert_frame_equal(
            odo(
                compute(join(a, b, 'c'), {
                    a: resource_a,
                    b: resource_b
                }),
                pd.DataFrame,
            ),

            # windows needs explicit int64 construction b/c default is int32
            pd.DataFrame(np.array([[2, 0, 1, 3, 4], [5, 3, 4, 6, 7]],
                                  dtype='int64'),
                         columns=list('cabde')))
Ejemplo n.º 19
0
def _make_blaze_resource(target, table=None, schema=None,
                         config=None):
    if config is not None:
        conn_str = getattr(config, target)
    else:
        conn_str = target

    if table is not None:
        conn_str = conn_str + "::" + table

    t = bz.resource(conn_str, schema=schema)

    return t
Ejemplo n.º 20
0
def test_add_data_to_server(serial):
    with temp_server(data) as test:
        # add data
        initial_datashape = test.get('/datashape').data.decode('utf-8')
        iris_path = example('iris.csv')
        blob = serial.dumps({'iris': iris_path})
        response1 = test.post(
            '/add',
            headers=mimetype(serial),
            data=blob,
        )
        assert 'OK' in response1.status
        assert response1.status_code == 200

        # check for expected server datashape
        new_datashape = test.get('/datashape').data.decode('utf-8')
        data2 = data.copy()
        data2.update({'iris': resource(iris_path)})
        expected2 = str(discover(data2))
        from pprint import pprint as pp
        #import ipdb; ipdb.set_trace()
        assert new_datashape == expected2
        a = new_datashape != initial_datashape
        assert new_datashape != initial_datashape

        # compute on added data
        t = Data({'iris': resource(iris_path)})
        expr = t.iris.petal_length.sum()

        response3 = test.post(
            '/compute',
            data=serial.dumps({'expr': to_tree(expr)}),
            headers=mimetype(serial)
        )

        result3 = serial.loads(response3.data)['data']
        expected3 = compute(expr, {'iris': resource(iris_path)})
        assert result3 == expected3
Ejemplo n.º 21
0
def test_concat():
    d = {'a.csv': 'a,b\n1,2\n3,4', 'b.csv': 'a,b\n5,6\n7,8'}

    with filetexts(d):
        a_rsc = resource('a.csv')
        b_rsc = resource('b.csv')

        a = symbol('a', discover(a_rsc))
        b = symbol('b', discover(b_rsc))

        tm.assert_frame_equal(
            odo(
                compute(concat(a, b), {
                    a: a_rsc,
                    b: b_rsc
                }),
                pd.DataFrame,
            ),

            # windows needs explicit int64 construction b/c default is int32
            pd.DataFrame(np.arange(1, 9, dtype='int64').reshape(4, 2),
                         columns=list('ab')),
        )
Ejemplo n.º 22
0
def test_multiple_csv_files():
    d = {"mult1.csv": "name,val\nAlice,1\nBob,2", "mult2.csv": "name,val\nAlice,3\nCharlie,4"}

    data = [("Alice", 1), ("Bob", 2), ("Alice", 3), ("Charlie", 4)]
    with filetexts(d) as fns:
        r = resource("mult*.csv")
        s = symbol("s", discover(r))

        for e in [s, s.name, s.name.nunique(), s.name.count_values(), s.val.mean()]:
            a = compute(e, {s: r})
            b = compute(e, {s: data})
            if iscollection(e.dshape):
                a, b = into(set, a), into(set, b)
            assert a == b
Ejemplo n.º 23
0
def test_add_data_to_server(serial):
    with temp_server(data) as test:
        # add data
        initial_datashape = datashape.dshape(test.get('/datashape').data.decode('utf-8'))
        iris_path = example('iris.csv')
        blob = serial.dumps({'iris': iris_path})
        response1 = test.post(
            '/add',
            headers=mimetype(serial),
            data=blob,
        )
        assert 'OK' in response1.status
        assert response1.status_code == 200

        # check for expected server datashape
        new_datashape = datashape.dshape(test.get('/datashape').data.decode('utf-8'))
        data2 = data.copy()
        data2.update({'iris': resource(iris_path)})
        expected2 = datashape.dshape(discover(data2))
        from pprint import pprint as pp
        assert_dshape_equal(new_datashape, expected2)
        assert new_datashape.measure.fields != initial_datashape.measure.fields

        # compute on added data
        t = Data({'iris': resource(iris_path)})
        expr = t.iris.petal_length.sum()

        response3 = test.post(
            '/compute',
            data=serial.dumps({'expr': to_tree(expr)}),
            headers=mimetype(serial)
        )

        result3 = serial.loads(response3.data)['data']
        expected3 = compute(expr, {'iris': resource(iris_path)})
        assert result3 == expected3
Ejemplo n.º 24
0
def before_request():
    ds = bz.Data(bz.resource('mongodb://localhost/scrapy::flat'), dshape="""var * {
            open: bool,
            price: real,
            price_period: ?string,
            area: real,
            url: string,
            city: string,
            district: string,
            project: string,
            rooms: ?int
        }""")

    g.ds = ds[(ds.open == True)
        & (ds.price_period == None)]
Ejemplo n.º 25
0
def addserver(payload, serial):
    """Add a data resource to the server.

    The request should contain serialized MutableMapping (dictionary) like
    object, and the server should already be hosting a MutableMapping resource.
    """

    data = _get_data.cache[flask.current_app]

    if not isinstance(data, collections.MutableMapping):
        data_not_mm_msg = ("Cannot update blaze server data since its current "
                           "data is a %s and not a mutable mapping (dictionary "
                           "like).")
        return (data_not_mm_msg % type(data), RC.UNPROCESSABLE_ENTITY)

    if not isinstance(payload, collections.Mapping):
        payload_not_mm_msg = ("Need a dictionary-like payload; instead was "
                              "given %s of type %s.")
        return (payload_not_mm_msg % (payload, type(payload)),
                RC.UNPROCESSABLE_ENTITY)

    if len(payload) > 1:
        error_msg = "Given more than one resource to add: %s"
        return (error_msg % list(payload.keys()),
                RC.UNPROCESSABLE_ENTITY)

    [(name, resource_uri)] = payload.items()

    if name in data:
        msg = "Cannot add dataset named %s, already exists on server."
        return (msg % name, RC.CONFLICT)

    try:
        data.update({name: resource(resource_uri)})
        # Force discovery of new dataset to check that the data is loadable.
        ds = discover(data)
        if name not in ds.dict:
            raise ValueError("%s not added." % name)
    except NotImplementedError as e:
        error_msg = "Addition not supported:\n%s: %s"
        return (error_msg % (type(e).__name__, e),
                RC.UNPROCESSABLE_ENTITY)
    except Exception as e:
        error_msg = "Addition failed with message:\n%s: %s"
        return (error_msg % (type(e).__name__, e),
                RC.UNPROCESSABLE_ENTITY)

    return ('OK', RC.CREATED)
Ejemplo n.º 26
0
def test_complex_into():
    # data from: http://dummydata.me/generate

    this_dir = os.path.dirname(__file__)
    file_name = os.path.join(this_dir, 'dummydata.csv')

    tbl = 'testtable_into_complex'

    csv = CSV(file_name, schema='{Name: string, RegistrationDate: date, ZipCode: int64, Consts: float64}')

    sql = resource(url + '::' + tbl, dshape=csv.dshape)
    into(sql, csv, if_exists="replace")

    df = pd.read_csv(file_name, parse_dates=['RegistrationDate'])

    assert into(list, sql) == into(list, csv)
Ejemplo n.º 27
0
def test_join_count():
    ds = '{t1: var * {x: int, y: int}, t2: var * {a: int, b: int}}'
    engine = resource('sqlite:///:memory:')
    engine = create_from_datashape(engine, ds)
    db = symbol('db', ds)

    expr = join(db.t1[db.t1.x > -1], db.t2, 'x', 'a').count()

    result = compute(expr, {db: engine}, post_compute=False)

    assert normalize(str(result)) == normalize("""
    SELECT count(alias.x) as count
    FROM (SELECT t1.x AS x, t1.y AS y, t2.b AS b
          FROM t1 JOIN t2 ON t1.x = t2.a
          WHERE t1.x > ?) as alias
          """)
Ejemplo n.º 28
0
def test_multiple_csv_files():
    d = {'mult1.csv': 'name,val\nAlice,1\nBob,2',
         'mult2.csv': 'name,val\nAlice,3\nCharlie,4'}

    data = [('Alice', 1), ('Bob', 2), ('Alice', 3), ('Charlie', 4)]
    with filetexts(d) as fns:
        r = resource('mult*.csv')
        s = symbol('s', discover(r))

        for e in [s, s.name, s.name.nunique(), s.name.count_values(),
                s.val.mean()]:
            a = compute(e, {s: r})
            b = compute(e, {s: data})
            if iscollection(e.dshape):
                a, b = into(set, a), into(set, b)
            assert a == b
Ejemplo n.º 29
0
def test_simple_into(csv):
    tbl = 'testtable'

    sql = resource('sqlite:///:memory:', tbl, dshape=csv.dshape)
    engine = sql.bind

    into(sql, csv, if_exists="replace")
    conn = engine.raw_connection()
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' and name='{0}';".format(tbl))

    sqlite_tbl_names = cursor.fetchall()
    assert sqlite_tbl_names[0][0] == tbl


    assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
Ejemplo n.º 30
0
    def test_hdf5_csv(self):
        import h5py
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('') as csv_fn:
                with h5py.File(hdf5_fn, 'w') as f:
                    d = f.create_dataset('data', (3,),
                                         dtype=np.dtype([(c, 'i4')
                                                         for c in 'abc']))
                    d[:] = np.array(1)

                csv = CSV(csv_fn, mode='r+', schema='{a: int32, b: int32, c: int32}')
                hdf5 = resource(hdf5_fn + '::/data')

                into(csv, hdf5)

                self.assertEquals(tuple(map(tuple, csv)),
                                  ((1, 1, 1), (1, 1, 1), (1, 1, 1)))
Ejemplo n.º 31
0
def addserver(payload, serial):
    """Add a data resource to the server.

    The request should contain serialized MutableMapping (dictionary) like
    object, and the server should already be hosting a MutableMapping resource.
    """

    data = _get_data.cache[flask.current_app]

    if not isinstance(data, collections.MutableMapping):
        data_not_mm_msg = (
            "Cannot update blaze server data since its current "
            "data is a %s and not a mutable mapping (dictionary "
            "like).")
        return (data_not_mm_msg % type(data), RC.UNPROCESSABLE_ENTITY)

    if not isinstance(payload, collections.Mapping):
        payload_not_mm_msg = ("Need a dictionary-like payload; instead was "
                              "given %s of type %s.")
        return (payload_not_mm_msg % (payload, type(payload)),
                RC.UNPROCESSABLE_ENTITY)

    if len(payload) > 1:
        error_msg = "Given more than one resource to add: %s"
        return (error_msg % list(payload.keys()), RC.UNPROCESSABLE_ENTITY)

    [(name, resource_uri)] = payload.items()

    if name in data:
        msg = "Cannot add dataset named %s, already exists on server."
        return (msg % name, RC.CONFLICT)

    try:
        data.update({name: resource(resource_uri)})
        # Force discovery of new dataset to check that the data is loadable.
        ds = discover(data)
        if name not in ds.dict:
            raise ValueError("%s not added." % name)
    except NotImplementedError as e:
        error_msg = "Addition not supported:\n%s: %s"
        return (error_msg % (type(e).__name__, e), RC.UNPROCESSABLE_ENTITY)
    except Exception as e:
        error_msg = "Addition failed with message:\n%s: %s"
        return (error_msg % (type(e).__name__, e), RC.UNPROCESSABLE_ENTITY)

    return ('OK', RC.CREATED)
Ejemplo n.º 32
0
def before_request():
    ds = bz.Data(bz.resource('mongodb://localhost/scrapy::car'),
                 dshape="""var * {
            open: bool,
            production_year: int,
            mileage: ?int,
            price: real,
            price_period: ?string,
            url: string,
            brand: string,
            color: string
        }""")

    g.ds = ds[(ds.open == True)
              & (ds.price_period == None)
              & (ds.mileage > 0)
              & (ds.mileage < 1e+6)
              & (ds.production_year > (dt.date.today().year - 20))]
Ejemplo n.º 33
0
 def all_datasets(self):
     storage = self.settings.storage
     last_storage_time = self._storage_time
     last_change = storage.get('_update_time', 0)
     result = {}
     if self._all_datasets is None or last_storage_time < last_change:
         result = copy.copy(self.settings.data)
         for k, v in self.settings.data.items():
             result[k] = v
         for k, v in storage.items():
             if k == '_update_time':
                 continue
             try:
                 result[k] = resource(self.resolve_resource(k), **v)
             except Exception as e:
                 logger.exception(e)
                 raise
         self._all_datasets = result
         self._storage_time = last_change
     return self._all_datasets
Ejemplo n.º 34
0
    def test_csv_sql_json(self):
        data = [('Alice', 100), ('Bob', 200)]
        text = '\n'.join(','.join(map(str, row)) for row in data)
        schema = '{name: string, amount: int}'
        with filetext(text) as csv_fn:
            with filetext('') as json_fn:
                with tmpfile('db') as sqldb:

                    csv = CSV(csv_fn, mode='r', schema=schema)
                    sql = resource('sqlite:///' + sqldb + '::testtable',
                                    dshape='var * ' + schema)
                    json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                    into(sql, csv)

                    self.assertEqual(into(list, sql), data)

                    into(json, sql)

                    with open(json_fn) as f:
                        assert 'Alice' in f.read()
Ejemplo n.º 35
0
def test_multiple_csv_files():
    d = {
        'mult1.csv': 'name,val\nAlice,1\nBob,2',
        'mult2.csv': 'name,val\nAlice,3\nCharlie,4'
    }

    data = [('Alice', 1), ('Bob', 2), ('Alice', 3), ('Charlie', 4)]
    with filetexts(d) as fns:
        r = resource('mult*.csv')
        s = symbol('s', discover(r))

        for e in [
                s, s.name,
                s.name.nunique(),
                s.name.count_values(),
                s.val.mean()
        ]:
            a = compute(e, {s: r})
            b = compute(e, {s: data})
            if iscollection(e.dshape):
                a, b = into(set, a), into(set, b)
            assert a == b
Ejemplo n.º 36
0
def test_csv_postgres_load():
    tbl = 'testtable'

    engine = sqlalchemy.create_engine(url)

    if engine.has_table(tbl):
        metadata = sqlalchemy.MetaData()
        metadata.reflect(engine)
        t = metadata.tables[tbl]
        t.drop(engine)

    csv = CSV(file_name)

    sql = resource(url + '::' + tbl, dshape=csv.dshape)
    engine = sql.bind
    conn = engine.raw_connection()

    cursor = conn.cursor()
    full_path = os.path.abspath(file_name)
    load = '''LOAD DATA INFILE '{0}' INTO TABLE {1} FIELDS TERMINATED BY ','
        lines terminated by '\n'
        '''.format(full_path, tbl)
    cursor.execute(load)
    conn.commit()
Ejemplo n.º 37
0
from blaze.utils import example
from blaze import discover, symbol, by, CSV, compute, join, into, resource
from blaze.server.client import mimetype
from blaze.server.server import Server, to_tree, from_tree
from blaze.server.serialization import all_formats

accounts = DataFrame([['Alice', 100], ['Bob', 200]],
                     columns=['name', 'amount'])

cities = DataFrame([['Alice', 'NYC'], ['Bob', 'LA']], columns=['name', 'city'])

events = DataFrame([[1, datetime(2000, 1, 1, 12, 0, 0)],
                    [2, datetime(2000, 1, 2, 12, 0, 0)]],
                   columns=['value', 'when'])

db = resource('sqlite:///' + example('iris.db'))

data = {'accounts': accounts, 'cities': cities, 'events': events, 'db': db}


@pytest.fixture(scope='module')
def server():
    s = Server(data, all_formats)
    s.app.testing = True
    return s


@pytest.yield_fixture
def test(server):
    with server.app.test_client() as c:
        yield c
Ejemplo n.º 38
0
from blaze.server.client import mimetype
from blaze.server.server import Server, to_tree, from_tree
from blaze.server.serialization import all_formats


accounts = DataFrame([['Alice', 100], ['Bob', 200]],
                     columns=['name', 'amount'])

cities = DataFrame([['Alice', 'NYC'], ['Bob', 'LA']],
                   columns=['name', 'city'])

events = DataFrame([[1, datetime(2000, 1, 1, 12, 0, 0)],
                    [2, datetime(2000, 1, 2, 12, 0, 0)]],
                   columns=['value', 'when'])

db = resource('sqlite:///' + example('iris.db'))

data = {'accounts': accounts,
          'cities': cities,
          'events': events,
              'db': db}


@pytest.fixture(scope='module')
def server():
    s = Server(data, all_formats)
    s.app.testing = True
    return s


@pytest.yield_fixture
Ejemplo n.º 39
0
def test_into_xls_file():
    pytest.importorskip('xlrd')
    fn = os.path.join(dirname, 'accounts.xls')
    assert isinstance(resource(fn), Excel)
Ejemplo n.º 40
0
def test_resource():
    with tmpfile('hdf5') as filename:
        h = HDF5(filename, '/test', schema=schema)
        assert resource(filename, '/test').schema == h.schema
        assert resource(filename + '::/test').schema == h.schema
Ejemplo n.º 41
0
def addserver(payload, serial):
    """Add a data resource to the server.

    The request should contain serialized MutableMapping (dictionary) like
    object, and the server should already be hosting a MutableMapping resource.
    """

    data = _get_data.cache[flask.current_app]

    if not isinstance(data, collections.MutableMapping):
        data_not_mm_msg = ("Cannot update blaze server data since its current "
                           "data is a %s and not a mutable mapping (dictionary "
                           "like).")
        return (data_not_mm_msg % type(data), RC.UNPROCESSABLE_ENTITY)

    if not isinstance(payload, collections.Mapping):
        payload_not_mm_msg = ("Need a dictionary-like payload; instead was "
                              "given %s of type %s.")
        return (payload_not_mm_msg % (payload, type(payload)),
                RC.UNPROCESSABLE_ENTITY)

    if len(payload) > 1:
        error_msg = "Given more than one resource to add: %s"
        return (error_msg % list(payload.keys()),
                RC.UNPROCESSABLE_ENTITY)

    [(name, resource_info)] = payload.items()

    if name in data:
        msg = "Cannot add dataset named %s, already exists on server."
        return (msg % name, RC.CONFLICT)

    try:
        imports = []
        if isinstance(resource_info, dict):
            # Extract resource creation arguments
            source = resource_info['source']
            imports = resource_info.get('imports', [])
            args = resource_info.get('args', [])
            kwargs = resource_info.get('kwargs', {})
        else:
            # Just a URI
            source, args, kwargs = resource_info, [], {}
        # If we've been given libraries to import, we need to do so
        # before we can create the resource.
        for mod in imports:
            importlib.import_module(mod)
        # Make a new resource and try to discover it.
        new_resource = {name: resource(source, *args, **kwargs)}
        # Discovery is a minimal consistency check to determine if the new
        # resource is valid.
        ds = discover(new_resource)
        if name not in ds.dict:
            raise ValueError("%s not added." % name)
    except NotImplementedError as e:
        error_msg = "Addition not supported:\n%s: %s"
        return (error_msg % (type(e).__name__, e),
                RC.UNPROCESSABLE_ENTITY)
    except Exception as e:
        error_msg = "Addition failed with message:\n%s: %s"
        return (error_msg % (type(e).__name__, e),
                RC.UNPROCESSABLE_ENTITY)
    else:
        # Now that we've established that the new resource is discoverable--and
        # thus exists and is accessible--we add the resource to the server.
        data.update(new_resource)

    return ('OK', RC.CREATED)
Ejemplo n.º 42
0
def test_chunks_json():
    r = resource(example('accounts-streaming*.json'))
    assert isinstance(r, Chunks)
    assert compute(s.amount.sum(), r) == 200
Ejemplo n.º 43
0
def addserver(payload, serial):
    """Add a data resource to the server.

    The request should contain serialized MutableMapping (dictionary) like
    object, and the server should already be hosting a MutableMapping resource.
    """

    data = _get_data.cache[flask.current_app]

    if not isinstance(data, collections.MutableMapping):
        data_not_mm_msg = ("Cannot update blaze server data since its current "
                           "data is a %s and not a mutable mapping (dictionary "
                           "like).")
        return (data_not_mm_msg % type(data), RC.UNPROCESSABLE_ENTITY)

    if not isinstance(payload, collections.Mapping):
        payload_not_mm_msg = ("Need a dictionary-like payload; instead was "
                              "given %s of type %s.")
        return (payload_not_mm_msg % (payload, type(payload)),
                RC.UNPROCESSABLE_ENTITY)

    if len(payload) > 1:
        error_msg = "Given more than one resource to add: %s"
        return (error_msg % list(payload.keys()),
                RC.UNPROCESSABLE_ENTITY)

    [(name, resource_info)] = payload.items()
    flask.current_app.logger.debug("Attempting to add dataset '%s'" % name)

    if name in data:
        msg = "Cannot add dataset named %s, already exists on server."
        return (msg % name, RC.CONFLICT)

    try:
        imports = []
        if isinstance(resource_info, dict):
            # Extract resource creation arguments
            source = resource_info['source']
            imports = resource_info.get('imports', [])
            args = resource_info.get('args', [])
            kwargs = resource_info.get('kwargs', {})
        else:
            # Just a URI
            source, args, kwargs = resource_info, [], {}
        # If we've been given libraries to import, we need to do so
        # before we can create the resource.
        for mod in imports:
            importlib.import_module(mod)
        # Make a new resource and try to discover it.
        new_resource = {name: resource(source, *args, **kwargs)}
        # Discovery is a minimal consistency check to determine if the new
        # resource is valid.
        ds = discover(new_resource)
        if name not in ds.dict:
            raise ValueError("%s not added." % name)
    except NotImplementedError as e:
        error_msg = "Addition not supported:\n%s: %s"
        return (error_msg % (type(e).__name__, e),
                RC.UNPROCESSABLE_ENTITY)
    except Exception as e:
        error_msg = "Addition failed with message:\n%s: %s"
        return (error_msg % (type(e).__name__, e),
                RC.UNPROCESSABLE_ENTITY)
    else:
        # Now that we've established that the new resource is discoverable--and
        # thus exists and is accessible--we add the resource to the server.
        data.update(new_resource)

    return ('OK', RC.CREATED)
Ejemplo n.º 44
0
def test_resource_specifying_database_name():
    with existing_schema('myschema'):
        sql = resource(url + '::myschema.accounts', schema='{name: string, value: int}')
        assert isinstance(sql, SQL)
        assert sql.table.schema == 'myschema'
Ejemplo n.º 45
0
                     np.random.randn(qty / 2) + 2]),
    'A3': np.hstack([np.random.randn(qty / 2),
                     np.random.randn(qty / 2) + 3]),
    'A4': np.hstack([np.random.randn(qty / 2),
                     np.random.randn(qty / 2) + 4]),
    'A5': np.hstack([np.random.randn(qty / 2),
                     np.random.randn(qty / 2) + 5]),
    'B': np.random.randn(qty),
    'C': np.hstack([np.zeros(qty / 2), np.ones(qty / 2)])
}
bivariate = pd.DataFrame(bivariate)
import bokeh.server.tests
path = join(dirname(bokeh.server.tests.__file__), 'data', 'AAPL.hdf5')

try:
    aapl = resource("hdfstore://%s::__data__" % path)
except Exception as e:
    aapl = None
    log.error(e)
    warnings.warn(
        "Error loading hdfstore for AAPL. Your version of Blaze is too old, or incompatible"
    )

path = join(dirname(bokeh.server.tests.__file__), 'data', 'array.hdf5')
try:
    arr = resource(path + "::" + "array")
except Exception as e:
    arr = None
    log.error(e)
    warnings.warn(
        "Error loading hdfstore for array. Your version of Blaze is too old, or incompatible"
Ejemplo n.º 46
0
def test_resource():
    with tmpfile('hdf5') as filename:
        h = HDF5(filename, '/test', schema=schema)
        assert resource(filename, '/test').schema == h.schema
        assert resource(filename + '::/test').schema == h.schema
Ejemplo n.º 47
0
                    ('name', 'U7'), ('timestamp', 'M8[us]')])

schema = '{amount: int64, id: int64, name: string, timestamp: datetime}'
sql_schema = '{amount: int64, id: int64, name: string, timestamp: datetime[tz="UTC"]}'

arr = nd.array(L, dtype=schema)

bc = bcolz.ctable([np.array([100, 200, 300], dtype=np.int64),
                   np.array([1, 2, 3], dtype=np.int64),
                   np.array(['Alice', 'Bob', 'Charlie'], dtype='U7'),
                   np.array([datetime(2000, 12, 25, 0, 0, 1),
                             datetime(2001, 12, 25, 0, 0, 1),
                             datetime(2002, 12, 25, 0, 0, 1)], dtype='M8[us]')],
                  names=['amount', 'id', 'name', 'timestamp'])

sql = resource('sqlite:///:memory:::accounts', dshape='var * ' + schema)
into(sql, L)

data = [(list, L),
        (Data, Data(L, 'var * {amount: int64, id: int64, name: string[7], timestamp: datetime}')),
        (DataFrame, df),
        (np.ndarray, x),
        (nd.array, arr),
        (bcolz.ctable, bc),
        (CSV, csv),
        (sqlalchemy.Table, sql)]

schema_no_date = '{amount: int64, id: int64, name: string[7]}'
sql_no_date = resource('sqlite:///:memory:::accounts_no_date',
                       dshape='var * ' + schema_no_date)
Ejemplo n.º 48
0
def test_outer_join():
    L = symbol('L', 'var * {id: int, name: string, amount: real}')
    R = symbol('R', 'var * {city: string, id: int}')

    with tmpfile('db') as fn:
        uri = 'sqlite:///' + fn
        engine = resource(uri)

        _left = [(1, 'Alice', 100),
                (2, 'Bob', 200),
                (4, 'Dennis', 400)]

        left = resource(uri, 'left', dshape=L.dshape)
        into(left, _left)

        _right = [('NYC', 1),
                 ('Boston', 1),
                 ('LA', 3),
                 ('Moscow', 4)]
        right = resource(uri, 'right', dshape=R.dshape)
        into(right, _right)

        conn = engine.connect()

        query = compute(join(L, R, how='inner'),
                        {L: left, R: right},
                        post_compute=False)
        result = list(map(tuple, conn.execute(query).fetchall()))

        assert set(result) == set(
                [(1, 'Alice', 100, 'NYC'),
                 (1, 'Alice', 100, 'Boston'),
                 (4, 'Dennis', 400, 'Moscow')])

        query = compute(join(L, R, how='left'),
                        {L: left, R: right},
                        post_compute=False)
        result = list(map(tuple, conn.execute(query).fetchall()))

        assert set(result) == set(
                [(1, 'Alice', 100, 'NYC'),
                 (1, 'Alice', 100, 'Boston'),
                 (2, 'Bob', 200, None),
                 (4, 'Dennis', 400, 'Moscow')])

        query = compute(join(L, R, how='right'),
                        {L: left, R: right},
                        post_compute=False)
        print(query)
        result = list(map(tuple, conn.execute(query).fetchall()))
        print(result)

        assert set(result) == set(
                [(1, 'Alice', 100, 'NYC'),
                 (1, 'Alice', 100, 'Boston'),
                 (3, None, None, 'LA'),
                 (4, 'Dennis', 400, 'Moscow')])

        # SQLAlchemy doesn't support full outer join
        """
        query = compute(join(L, R, how='outer'),
                        {L: left, R: right},
                        post_compute=False)
        result = list(map(tuple, conn.execute(query).fetchall()))

        assert set(result) == set(
                [(1, 'Alice', 100, 'NYC'),
                 (1, 'Alice', 100, 'Boston'),
                 (2, 'Bob', 200, None),
                 (3, None, None, 'LA'),
                 (4, 'Dennis', 400, 'Moscow')])
        """

        conn.close()