def test_data_routing_mulitple_sources(): # test data routing - first call # we setup a large data source on url1 # and a smaller source on url2 # execute 3 calls, url1 should have first priorty, url2, second, url3 third setup_client(integration.url1) c = client() name1 = tempfile.NamedTemporaryFile(prefix='ks-test').name name2 = tempfile.NamedTemporaryFile(prefix='ks-test').name len1 = 1000000 len2 = 100000 with open(name1, 'wb+') as f: f.write(len1 * b'0') with open(name2, 'wb+') as f: f.write(len2 * b'0') remote1 = dp(name1) remote1.rpc_url = integration.url1 remote2 = dp(name2) remote2.rpc_url = integration.url2 remote1.save() remote2.save() c.bc(routing_func, remote1, remote2) c.bc(routing_func, remote1, remote2) c.bc(routing_func, remote1, remote2) c.execute() results = c.br() assert results == [integration.url1, integration.url2, integration.url3]
def test_remote_data_source_conversions(): ### remote data sources can be accessed as an object, local path, or raw data ### test conversions of all df1 = pd.DataFrame({'a' : np.arange(100000)}) shape = df1.shape #start with a python object - we should be able to convert to raw and local path obj = do(df1) path = obj.local_path() with open(path, "rb") as f: df = pickle.load(f) assert df.shape == shape df = pickle.loads(obj.raw()) assert df.shape == shape #start with a raw data, should be able to convert to raw and local path obj = dr(obj.raw()) assert obj.obj().shape == shape path = obj.local_path() with open(path, 'rb') as f: df = pickle.load(f) assert df.shape == shape #start with a file, should be able to convert to obj and raw obj = dp(obj.local_path()) assert obj.obj().shape == shape df = pickle.loads(obj.raw()) assert df.shape == shape
def test_remote_data_source_conversions(): ### remote data sources can be accessed as an object, local path, or raw data ### test conversions of all setup_client(integration.url1) c = client() df1 = pd.DataFrame({'a' : np.arange(100)}) shape = df1.shape obj = do(df1) obj.save() c.bc(remote_obj_func, du(obj.data_url)) c.execute() result = c.br()[0] assert result.shape == df1.shape obj = dp(obj.local_path()) obj.save() c.bc(remote_file, du(obj.data_url)) c.execute() result = c.br()[0] result = pickle.loads(result) assert result.shape == df1.shape
setup_client("http://localhost:6323/") c = client() df = pd.DataFrame({'a' : np.arange(2000000)}) store = pd.HDFStore('test.hdf5') store['df'] = df store.close() """dp is a convenience function, equivalent to RemoteData(local_path=<path>) We construct a remote data object, and save the data to the server (which generates a url). Then we create a new RemoteData pointer with du (short for data url, equivalent to RemoteData(data_url=<data_url>) and we use that in a function call """ remote = dp("test.hdf5") remote.save(prefix="testdata/test") print remote.data_url new_remote = du(remote.data_url) def head(obj, name): store = pd.HDFStore(obj.local_path()) return store.select(name).head(10) c.bc(head, new_remote, 'df') c.execute() result = c.br()[0] print result """do is short for dataobject, equivalent to RemoteData(obj=<obj>) """