def test_add_data_to_empty_server(empty_server, serial): # add data with temp_server() as test: iris_path = example('iris.csv') blob = serial.dumps({'iris': iris_path}) response1 = empty_server.post( '/add', headers=mimetype(serial), data=blob, ) assert 'OK' in response1.status assert response1.status_code == 200 # check for expected server datashape response2 = empty_server.get('/datashape') expected2 = str(discover({'iris': resource(iris_path)})) assert response2.data.decode('utf-8') == expected2 # compute on added data t = Data({'iris': resource(iris_path)}) expr = t.iris.petal_length.sum() response3 = empty_server.post( '/compute', data=serial.dumps({'expr': to_tree(expr)}), headers=mimetype(serial) ) result3 = serial.loads(response3.data)['data'] expected3 = compute(expr, {'iris': resource(iris_path)}) assert result3 == expected3
def test_csv_join(): d = {"a.csv": "a,b,c\n0,1,2\n3,4,5", "b.csv": "c,d,e\n2,3,4\n5,6,7"} with filetexts(d): resource_a = resource("a.csv") resource_b = resource("b.csv") a = symbol("a", discover(resource_a)) b = symbol("b", discover(resource_b)) tm.assert_frame_equal( odo(compute(join(a, b, "c"), {a: resource_a, b: resource_b}), pd.DataFrame), # windows needs explicit int64 construction b/c default is int32 pd.DataFrame(np.array([[2, 0, 1, 3, 4], [5, 3, 4, 6, 7]], dtype="int64"), columns=list("cabde")), )
def test_concat(): d = {"a.csv": "a,b\n1,2\n3,4", "b.csv": "a,b\n5,6\n7,8"} with filetexts(d): a_rsc = resource("a.csv") b_rsc = resource("b.csv") a = symbol("a", discover(a_rsc)) b = symbol("b", discover(b_rsc)) tm.assert_frame_equal( odo(compute(concat(a, b), {a: a_rsc, b: b_rsc}), pd.DataFrame), # windows needs explicit int64 construction b/c default is int32 pd.DataFrame(np.arange(1, 9, dtype="int64").reshape(4, 2), columns=list("ab")), )
def sql(): data = [(1, 2), (10, 20), (100, 200)] sql = resource('sqlite:///:memory:', 'foo', dshape='var * {x: int, y: int}') into(sql, data) return sql
def test_into_empty_sql(): """ Test all sources into empty SQL database """ sources = [v for k, v in data if k not in [list]] for a in sources: sql_empty = resource('sqlite:///:memory:::accounts', dshape='var * ' + sql_schema) assert normalize(into(sql_empty, a)) == normalize(sql)
def engine(): tbl = 'testtable' with tmpfile('db') as filename: engine = sqlalchemy.create_engine('sqlite:///' + filename) t = resource('sqlite:///' + filename + '::' + tbl, dshape='var * {a: int32, b: int32}') yield engine, t
def sql(): data = [(1, 2), (10, 20), (100, 200)] sql = resource( 'sqlite:///:memory:', 'foo', dshape='var * {x: int, y: int}', ) into(sql, data) return sql
def test_map_called_on_resource_star(): r = resource(example('accounts*.csv')) s = symbol('s', discover(r)) flag[0] = False a = compute(s.count(), r) b = compute(s.count(), r, map=mymap) assert a == b assert flag[0]
def test_failing_argument(): tbl = 'testtable_into_2' csv = CSV(file_name, columns=['a', 'b']) sql = resource(url + '::' + tbl, dshape=csv.dshape) into(sql, csv, if_exists="replace", skipinitialspace="alpha") # failing call
def test_map_called_on_resource_star(): r = resource(example('accounts_*.csv')) s = symbol('s', discover(r)) flag[0] = False a = compute(s.count(), r) b = compute(s.count(), r, map=mymap) assert a == b assert flag[0]
def test_no_header_no_columns(): tbl = 'testtable_into_2' csv = CSV(file_name) sql = resource(url + '::' + tbl, dshape=csv.dshape) into(sql, csv, if_exists="replace") assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
def test_simple_into(): tbl = 'testtable_into_2' csv = CSV(file_name, columns=['a', 'b']) sql = resource(url + '::' + tbl, dshape=csv.dshape) into(sql, csv, if_exists="replace") assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
def test_simple_float_into(): tbl = 'testtable_into_float' csv = CSV(file_name_floats, columns=['a', 'b']) sql = resource(url + '::' + tbl, dshape=csv.dshape) into(sql,csv, if_exists="replace") assert into(list, sql) == \ [(1.02, 2.02), (102.02, 202.02), (1002.02, 2002.02)]
def test_tryexcept_into(): tbl = 'testtable_into_2' csv = CSV(file_name, columns=['a', 'b']) sql = resource(url + '::' + tbl, dshape=csv.dshape) into(sql, csv, if_exists="replace", QUOTE="alpha", FORMAT="csv") # uses multi-byte character and # fails over to using sql.extend() assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
def test_csv_hdf5(self): from dynd import nd with tmpfile('hdf5') as hdf5_fn: with filetext('1,1\n2,2\n') as csv_fn: csv = CSV(csv_fn, schema='{a: int32, b: int32}') hdf5 = resource(hdf5_fn + '::/data', dshape='var * {a: int32, b: int32}') into(hdf5, csv) self.assertEquals(hdf5[:].tolist(), [(1, 1), (2, 2)])
def _make_blaze_resource(target, table=None, schema=None, config=None): if config is not None: conn_str = getattr(config, target) else: conn_str = target if table is not None: conn_str = conn_str + "::" + table t = bz.resource(conn_str, schema=schema) return t
def test_csv_join(): d = {'a.csv': 'a,b,c\n0,1,2\n3,4,5', 'b.csv': 'c,d,e\n2,3,4\n5,6,7'} with filetexts(d): resource_a = resource('a.csv') resource_b = resource('b.csv') a = symbol('a', discover(resource_a)) b = symbol('b', discover(resource_b)) tm.assert_frame_equal( odo( compute(join(a, b, 'c'), { a: resource_a, b: resource_b }), pd.DataFrame, ), # windows needs explicit int64 construction b/c default is int32 pd.DataFrame(np.array([[2, 0, 1, 3, 4], [5, 3, 4, 6, 7]], dtype='int64'), columns=list('cabde')))
def test_add_data_to_server(serial): with temp_server(data) as test: # add data initial_datashape = test.get('/datashape').data.decode('utf-8') iris_path = example('iris.csv') blob = serial.dumps({'iris': iris_path}) response1 = test.post( '/add', headers=mimetype(serial), data=blob, ) assert 'OK' in response1.status assert response1.status_code == 200 # check for expected server datashape new_datashape = test.get('/datashape').data.decode('utf-8') data2 = data.copy() data2.update({'iris': resource(iris_path)}) expected2 = str(discover(data2)) from pprint import pprint as pp #import ipdb; ipdb.set_trace() assert new_datashape == expected2 a = new_datashape != initial_datashape assert new_datashape != initial_datashape # compute on added data t = Data({'iris': resource(iris_path)}) expr = t.iris.petal_length.sum() response3 = test.post( '/compute', data=serial.dumps({'expr': to_tree(expr)}), headers=mimetype(serial) ) result3 = serial.loads(response3.data)['data'] expected3 = compute(expr, {'iris': resource(iris_path)}) assert result3 == expected3
def test_concat(): d = {'a.csv': 'a,b\n1,2\n3,4', 'b.csv': 'a,b\n5,6\n7,8'} with filetexts(d): a_rsc = resource('a.csv') b_rsc = resource('b.csv') a = symbol('a', discover(a_rsc)) b = symbol('b', discover(b_rsc)) tm.assert_frame_equal( odo( compute(concat(a, b), { a: a_rsc, b: b_rsc }), pd.DataFrame, ), # windows needs explicit int64 construction b/c default is int32 pd.DataFrame(np.arange(1, 9, dtype='int64').reshape(4, 2), columns=list('ab')), )
def test_multiple_csv_files(): d = {"mult1.csv": "name,val\nAlice,1\nBob,2", "mult2.csv": "name,val\nAlice,3\nCharlie,4"} data = [("Alice", 1), ("Bob", 2), ("Alice", 3), ("Charlie", 4)] with filetexts(d) as fns: r = resource("mult*.csv") s = symbol("s", discover(r)) for e in [s, s.name, s.name.nunique(), s.name.count_values(), s.val.mean()]: a = compute(e, {s: r}) b = compute(e, {s: data}) if iscollection(e.dshape): a, b = into(set, a), into(set, b) assert a == b
def test_add_data_to_server(serial): with temp_server(data) as test: # add data initial_datashape = datashape.dshape(test.get('/datashape').data.decode('utf-8')) iris_path = example('iris.csv') blob = serial.dumps({'iris': iris_path}) response1 = test.post( '/add', headers=mimetype(serial), data=blob, ) assert 'OK' in response1.status assert response1.status_code == 200 # check for expected server datashape new_datashape = datashape.dshape(test.get('/datashape').data.decode('utf-8')) data2 = data.copy() data2.update({'iris': resource(iris_path)}) expected2 = datashape.dshape(discover(data2)) from pprint import pprint as pp assert_dshape_equal(new_datashape, expected2) assert new_datashape.measure.fields != initial_datashape.measure.fields # compute on added data t = Data({'iris': resource(iris_path)}) expr = t.iris.petal_length.sum() response3 = test.post( '/compute', data=serial.dumps({'expr': to_tree(expr)}), headers=mimetype(serial) ) result3 = serial.loads(response3.data)['data'] expected3 = compute(expr, {'iris': resource(iris_path)}) assert result3 == expected3
def before_request(): ds = bz.Data(bz.resource('mongodb://localhost/scrapy::flat'), dshape="""var * { open: bool, price: real, price_period: ?string, area: real, url: string, city: string, district: string, project: string, rooms: ?int }""") g.ds = ds[(ds.open == True) & (ds.price_period == None)]
def addserver(payload, serial): """Add a data resource to the server. The request should contain serialized MutableMapping (dictionary) like object, and the server should already be hosting a MutableMapping resource. """ data = _get_data.cache[flask.current_app] if not isinstance(data, collections.MutableMapping): data_not_mm_msg = ("Cannot update blaze server data since its current " "data is a %s and not a mutable mapping (dictionary " "like).") return (data_not_mm_msg % type(data), RC.UNPROCESSABLE_ENTITY) if not isinstance(payload, collections.Mapping): payload_not_mm_msg = ("Need a dictionary-like payload; instead was " "given %s of type %s.") return (payload_not_mm_msg % (payload, type(payload)), RC.UNPROCESSABLE_ENTITY) if len(payload) > 1: error_msg = "Given more than one resource to add: %s" return (error_msg % list(payload.keys()), RC.UNPROCESSABLE_ENTITY) [(name, resource_uri)] = payload.items() if name in data: msg = "Cannot add dataset named %s, already exists on server." return (msg % name, RC.CONFLICT) try: data.update({name: resource(resource_uri)}) # Force discovery of new dataset to check that the data is loadable. ds = discover(data) if name not in ds.dict: raise ValueError("%s not added." % name) except NotImplementedError as e: error_msg = "Addition not supported:\n%s: %s" return (error_msg % (type(e).__name__, e), RC.UNPROCESSABLE_ENTITY) except Exception as e: error_msg = "Addition failed with message:\n%s: %s" return (error_msg % (type(e).__name__, e), RC.UNPROCESSABLE_ENTITY) return ('OK', RC.CREATED)
def test_complex_into(): # data from: http://dummydata.me/generate this_dir = os.path.dirname(__file__) file_name = os.path.join(this_dir, 'dummydata.csv') tbl = 'testtable_into_complex' csv = CSV(file_name, schema='{Name: string, RegistrationDate: date, ZipCode: int64, Consts: float64}') sql = resource(url + '::' + tbl, dshape=csv.dshape) into(sql, csv, if_exists="replace") df = pd.read_csv(file_name, parse_dates=['RegistrationDate']) assert into(list, sql) == into(list, csv)
def test_join_count(): ds = '{t1: var * {x: int, y: int}, t2: var * {a: int, b: int}}' engine = resource('sqlite:///:memory:') engine = create_from_datashape(engine, ds) db = symbol('db', ds) expr = join(db.t1[db.t1.x > -1], db.t2, 'x', 'a').count() result = compute(expr, {db: engine}, post_compute=False) assert normalize(str(result)) == normalize(""" SELECT count(alias.x) as count FROM (SELECT t1.x AS x, t1.y AS y, t2.b AS b FROM t1 JOIN t2 ON t1.x = t2.a WHERE t1.x > ?) as alias """)
def test_multiple_csv_files(): d = {'mult1.csv': 'name,val\nAlice,1\nBob,2', 'mult2.csv': 'name,val\nAlice,3\nCharlie,4'} data = [('Alice', 1), ('Bob', 2), ('Alice', 3), ('Charlie', 4)] with filetexts(d) as fns: r = resource('mult*.csv') s = symbol('s', discover(r)) for e in [s, s.name, s.name.nunique(), s.name.count_values(), s.val.mean()]: a = compute(e, {s: r}) b = compute(e, {s: data}) if iscollection(e.dshape): a, b = into(set, a), into(set, b) assert a == b
def test_simple_into(csv): tbl = 'testtable' sql = resource('sqlite:///:memory:', tbl, dshape=csv.dshape) engine = sql.bind into(sql, csv, if_exists="replace") conn = engine.raw_connection() cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' and name='{0}';".format(tbl)) sqlite_tbl_names = cursor.fetchall() assert sqlite_tbl_names[0][0] == tbl assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
def test_hdf5_csv(self): import h5py with tmpfile('hdf5') as hdf5_fn: with filetext('') as csv_fn: with h5py.File(hdf5_fn, 'w') as f: d = f.create_dataset('data', (3,), dtype=np.dtype([(c, 'i4') for c in 'abc'])) d[:] = np.array(1) csv = CSV(csv_fn, mode='r+', schema='{a: int32, b: int32, c: int32}') hdf5 = resource(hdf5_fn + '::/data') into(csv, hdf5) self.assertEquals(tuple(map(tuple, csv)), ((1, 1, 1), (1, 1, 1), (1, 1, 1)))
def addserver(payload, serial): """Add a data resource to the server. The request should contain serialized MutableMapping (dictionary) like object, and the server should already be hosting a MutableMapping resource. """ data = _get_data.cache[flask.current_app] if not isinstance(data, collections.MutableMapping): data_not_mm_msg = ( "Cannot update blaze server data since its current " "data is a %s and not a mutable mapping (dictionary " "like).") return (data_not_mm_msg % type(data), RC.UNPROCESSABLE_ENTITY) if not isinstance(payload, collections.Mapping): payload_not_mm_msg = ("Need a dictionary-like payload; instead was " "given %s of type %s.") return (payload_not_mm_msg % (payload, type(payload)), RC.UNPROCESSABLE_ENTITY) if len(payload) > 1: error_msg = "Given more than one resource to add: %s" return (error_msg % list(payload.keys()), RC.UNPROCESSABLE_ENTITY) [(name, resource_uri)] = payload.items() if name in data: msg = "Cannot add dataset named %s, already exists on server." return (msg % name, RC.CONFLICT) try: data.update({name: resource(resource_uri)}) # Force discovery of new dataset to check that the data is loadable. ds = discover(data) if name not in ds.dict: raise ValueError("%s not added." % name) except NotImplementedError as e: error_msg = "Addition not supported:\n%s: %s" return (error_msg % (type(e).__name__, e), RC.UNPROCESSABLE_ENTITY) except Exception as e: error_msg = "Addition failed with message:\n%s: %s" return (error_msg % (type(e).__name__, e), RC.UNPROCESSABLE_ENTITY) return ('OK', RC.CREATED)
def before_request(): ds = bz.Data(bz.resource('mongodb://localhost/scrapy::car'), dshape="""var * { open: bool, production_year: int, mileage: ?int, price: real, price_period: ?string, url: string, brand: string, color: string }""") g.ds = ds[(ds.open == True) & (ds.price_period == None) & (ds.mileage > 0) & (ds.mileage < 1e+6) & (ds.production_year > (dt.date.today().year - 20))]
def all_datasets(self): storage = self.settings.storage last_storage_time = self._storage_time last_change = storage.get('_update_time', 0) result = {} if self._all_datasets is None or last_storage_time < last_change: result = copy.copy(self.settings.data) for k, v in self.settings.data.items(): result[k] = v for k, v in storage.items(): if k == '_update_time': continue try: result[k] = resource(self.resolve_resource(k), **v) except Exception as e: logger.exception(e) raise self._all_datasets = result self._storage_time = last_change return self._all_datasets
def test_csv_sql_json(self): data = [('Alice', 100), ('Bob', 200)] text = '\n'.join(','.join(map(str, row)) for row in data) schema = '{name: string, amount: int}' with filetext(text) as csv_fn: with filetext('') as json_fn: with tmpfile('db') as sqldb: csv = CSV(csv_fn, mode='r', schema=schema) sql = resource('sqlite:///' + sqldb + '::testtable', dshape='var * ' + schema) json = JSON_Streaming(json_fn, mode='r+', schema=schema) into(sql, csv) self.assertEqual(into(list, sql), data) into(json, sql) with open(json_fn) as f: assert 'Alice' in f.read()
def test_multiple_csv_files(): d = { 'mult1.csv': 'name,val\nAlice,1\nBob,2', 'mult2.csv': 'name,val\nAlice,3\nCharlie,4' } data = [('Alice', 1), ('Bob', 2), ('Alice', 3), ('Charlie', 4)] with filetexts(d) as fns: r = resource('mult*.csv') s = symbol('s', discover(r)) for e in [ s, s.name, s.name.nunique(), s.name.count_values(), s.val.mean() ]: a = compute(e, {s: r}) b = compute(e, {s: data}) if iscollection(e.dshape): a, b = into(set, a), into(set, b) assert a == b
def test_csv_postgres_load(): tbl = 'testtable' engine = sqlalchemy.create_engine(url) if engine.has_table(tbl): metadata = sqlalchemy.MetaData() metadata.reflect(engine) t = metadata.tables[tbl] t.drop(engine) csv = CSV(file_name) sql = resource(url + '::' + tbl, dshape=csv.dshape) engine = sql.bind conn = engine.raw_connection() cursor = conn.cursor() full_path = os.path.abspath(file_name) load = '''LOAD DATA INFILE '{0}' INTO TABLE {1} FIELDS TERMINATED BY ',' lines terminated by '\n' '''.format(full_path, tbl) cursor.execute(load) conn.commit()
from blaze.utils import example from blaze import discover, symbol, by, CSV, compute, join, into, resource from blaze.server.client import mimetype from blaze.server.server import Server, to_tree, from_tree from blaze.server.serialization import all_formats accounts = DataFrame([['Alice', 100], ['Bob', 200]], columns=['name', 'amount']) cities = DataFrame([['Alice', 'NYC'], ['Bob', 'LA']], columns=['name', 'city']) events = DataFrame([[1, datetime(2000, 1, 1, 12, 0, 0)], [2, datetime(2000, 1, 2, 12, 0, 0)]], columns=['value', 'when']) db = resource('sqlite:///' + example('iris.db')) data = {'accounts': accounts, 'cities': cities, 'events': events, 'db': db} @pytest.fixture(scope='module') def server(): s = Server(data, all_formats) s.app.testing = True return s @pytest.yield_fixture def test(server): with server.app.test_client() as c: yield c
from blaze.server.client import mimetype from blaze.server.server import Server, to_tree, from_tree from blaze.server.serialization import all_formats accounts = DataFrame([['Alice', 100], ['Bob', 200]], columns=['name', 'amount']) cities = DataFrame([['Alice', 'NYC'], ['Bob', 'LA']], columns=['name', 'city']) events = DataFrame([[1, datetime(2000, 1, 1, 12, 0, 0)], [2, datetime(2000, 1, 2, 12, 0, 0)]], columns=['value', 'when']) db = resource('sqlite:///' + example('iris.db')) data = {'accounts': accounts, 'cities': cities, 'events': events, 'db': db} @pytest.fixture(scope='module') def server(): s = Server(data, all_formats) s.app.testing = True return s @pytest.yield_fixture
def test_into_xls_file(): pytest.importorskip('xlrd') fn = os.path.join(dirname, 'accounts.xls') assert isinstance(resource(fn), Excel)
def test_resource(): with tmpfile('hdf5') as filename: h = HDF5(filename, '/test', schema=schema) assert resource(filename, '/test').schema == h.schema assert resource(filename + '::/test').schema == h.schema
def addserver(payload, serial): """Add a data resource to the server. The request should contain serialized MutableMapping (dictionary) like object, and the server should already be hosting a MutableMapping resource. """ data = _get_data.cache[flask.current_app] if not isinstance(data, collections.MutableMapping): data_not_mm_msg = ("Cannot update blaze server data since its current " "data is a %s and not a mutable mapping (dictionary " "like).") return (data_not_mm_msg % type(data), RC.UNPROCESSABLE_ENTITY) if not isinstance(payload, collections.Mapping): payload_not_mm_msg = ("Need a dictionary-like payload; instead was " "given %s of type %s.") return (payload_not_mm_msg % (payload, type(payload)), RC.UNPROCESSABLE_ENTITY) if len(payload) > 1: error_msg = "Given more than one resource to add: %s" return (error_msg % list(payload.keys()), RC.UNPROCESSABLE_ENTITY) [(name, resource_info)] = payload.items() if name in data: msg = "Cannot add dataset named %s, already exists on server." return (msg % name, RC.CONFLICT) try: imports = [] if isinstance(resource_info, dict): # Extract resource creation arguments source = resource_info['source'] imports = resource_info.get('imports', []) args = resource_info.get('args', []) kwargs = resource_info.get('kwargs', {}) else: # Just a URI source, args, kwargs = resource_info, [], {} # If we've been given libraries to import, we need to do so # before we can create the resource. for mod in imports: importlib.import_module(mod) # Make a new resource and try to discover it. new_resource = {name: resource(source, *args, **kwargs)} # Discovery is a minimal consistency check to determine if the new # resource is valid. ds = discover(new_resource) if name not in ds.dict: raise ValueError("%s not added." % name) except NotImplementedError as e: error_msg = "Addition not supported:\n%s: %s" return (error_msg % (type(e).__name__, e), RC.UNPROCESSABLE_ENTITY) except Exception as e: error_msg = "Addition failed with message:\n%s: %s" return (error_msg % (type(e).__name__, e), RC.UNPROCESSABLE_ENTITY) else: # Now that we've established that the new resource is discoverable--and # thus exists and is accessible--we add the resource to the server. data.update(new_resource) return ('OK', RC.CREATED)
def test_chunks_json(): r = resource(example('accounts-streaming*.json')) assert isinstance(r, Chunks) assert compute(s.amount.sum(), r) == 200
def addserver(payload, serial): """Add a data resource to the server. The request should contain serialized MutableMapping (dictionary) like object, and the server should already be hosting a MutableMapping resource. """ data = _get_data.cache[flask.current_app] if not isinstance(data, collections.MutableMapping): data_not_mm_msg = ("Cannot update blaze server data since its current " "data is a %s and not a mutable mapping (dictionary " "like).") return (data_not_mm_msg % type(data), RC.UNPROCESSABLE_ENTITY) if not isinstance(payload, collections.Mapping): payload_not_mm_msg = ("Need a dictionary-like payload; instead was " "given %s of type %s.") return (payload_not_mm_msg % (payload, type(payload)), RC.UNPROCESSABLE_ENTITY) if len(payload) > 1: error_msg = "Given more than one resource to add: %s" return (error_msg % list(payload.keys()), RC.UNPROCESSABLE_ENTITY) [(name, resource_info)] = payload.items() flask.current_app.logger.debug("Attempting to add dataset '%s'" % name) if name in data: msg = "Cannot add dataset named %s, already exists on server." return (msg % name, RC.CONFLICT) try: imports = [] if isinstance(resource_info, dict): # Extract resource creation arguments source = resource_info['source'] imports = resource_info.get('imports', []) args = resource_info.get('args', []) kwargs = resource_info.get('kwargs', {}) else: # Just a URI source, args, kwargs = resource_info, [], {} # If we've been given libraries to import, we need to do so # before we can create the resource. for mod in imports: importlib.import_module(mod) # Make a new resource and try to discover it. new_resource = {name: resource(source, *args, **kwargs)} # Discovery is a minimal consistency check to determine if the new # resource is valid. ds = discover(new_resource) if name not in ds.dict: raise ValueError("%s not added." % name) except NotImplementedError as e: error_msg = "Addition not supported:\n%s: %s" return (error_msg % (type(e).__name__, e), RC.UNPROCESSABLE_ENTITY) except Exception as e: error_msg = "Addition failed with message:\n%s: %s" return (error_msg % (type(e).__name__, e), RC.UNPROCESSABLE_ENTITY) else: # Now that we've established that the new resource is discoverable--and # thus exists and is accessible--we add the resource to the server. data.update(new_resource) return ('OK', RC.CREATED)
def test_resource_specifying_database_name(): with existing_schema('myschema'): sql = resource(url + '::myschema.accounts', schema='{name: string, value: int}') assert isinstance(sql, SQL) assert sql.table.schema == 'myschema'
np.random.randn(qty / 2) + 2]), 'A3': np.hstack([np.random.randn(qty / 2), np.random.randn(qty / 2) + 3]), 'A4': np.hstack([np.random.randn(qty / 2), np.random.randn(qty / 2) + 4]), 'A5': np.hstack([np.random.randn(qty / 2), np.random.randn(qty / 2) + 5]), 'B': np.random.randn(qty), 'C': np.hstack([np.zeros(qty / 2), np.ones(qty / 2)]) } bivariate = pd.DataFrame(bivariate) import bokeh.server.tests path = join(dirname(bokeh.server.tests.__file__), 'data', 'AAPL.hdf5') try: aapl = resource("hdfstore://%s::__data__" % path) except Exception as e: aapl = None log.error(e) warnings.warn( "Error loading hdfstore for AAPL. Your version of Blaze is too old, or incompatible" ) path = join(dirname(bokeh.server.tests.__file__), 'data', 'array.hdf5') try: arr = resource(path + "::" + "array") except Exception as e: arr = None log.error(e) warnings.warn( "Error loading hdfstore for array. Your version of Blaze is too old, or incompatible"
('name', 'U7'), ('timestamp', 'M8[us]')]) schema = '{amount: int64, id: int64, name: string, timestamp: datetime}' sql_schema = '{amount: int64, id: int64, name: string, timestamp: datetime[tz="UTC"]}' arr = nd.array(L, dtype=schema) bc = bcolz.ctable([np.array([100, 200, 300], dtype=np.int64), np.array([1, 2, 3], dtype=np.int64), np.array(['Alice', 'Bob', 'Charlie'], dtype='U7'), np.array([datetime(2000, 12, 25, 0, 0, 1), datetime(2001, 12, 25, 0, 0, 1), datetime(2002, 12, 25, 0, 0, 1)], dtype='M8[us]')], names=['amount', 'id', 'name', 'timestamp']) sql = resource('sqlite:///:memory:::accounts', dshape='var * ' + schema) into(sql, L) data = [(list, L), (Data, Data(L, 'var * {amount: int64, id: int64, name: string[7], timestamp: datetime}')), (DataFrame, df), (np.ndarray, x), (nd.array, arr), (bcolz.ctable, bc), (CSV, csv), (sqlalchemy.Table, sql)] schema_no_date = '{amount: int64, id: int64, name: string[7]}' sql_no_date = resource('sqlite:///:memory:::accounts_no_date', dshape='var * ' + schema_no_date)
def test_outer_join(): L = symbol('L', 'var * {id: int, name: string, amount: real}') R = symbol('R', 'var * {city: string, id: int}') with tmpfile('db') as fn: uri = 'sqlite:///' + fn engine = resource(uri) _left = [(1, 'Alice', 100), (2, 'Bob', 200), (4, 'Dennis', 400)] left = resource(uri, 'left', dshape=L.dshape) into(left, _left) _right = [('NYC', 1), ('Boston', 1), ('LA', 3), ('Moscow', 4)] right = resource(uri, 'right', dshape=R.dshape) into(right, _right) conn = engine.connect() query = compute(join(L, R, how='inner'), {L: left, R: right}, post_compute=False) result = list(map(tuple, conn.execute(query).fetchall())) assert set(result) == set( [(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (4, 'Dennis', 400, 'Moscow')]) query = compute(join(L, R, how='left'), {L: left, R: right}, post_compute=False) result = list(map(tuple, conn.execute(query).fetchall())) assert set(result) == set( [(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (2, 'Bob', 200, None), (4, 'Dennis', 400, 'Moscow')]) query = compute(join(L, R, how='right'), {L: left, R: right}, post_compute=False) print(query) result = list(map(tuple, conn.execute(query).fetchall())) print(result) assert set(result) == set( [(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (3, None, None, 'LA'), (4, 'Dennis', 400, 'Moscow')]) # SQLAlchemy doesn't support full outer join """ query = compute(join(L, R, how='outer'), {L: left, R: right}, post_compute=False) result = list(map(tuple, conn.execute(query).fetchall())) assert set(result) == set( [(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (2, 'Bob', 200, None), (3, None, None, 'LA'), (4, 'Dennis', 400, 'Moscow')]) """ conn.close()