def test_data_routing_mulitple_sources():
    # test data routing - first call
    # we setup a large data source on url1
    # and a smaller source on url2
    # execute 3 calls, url1 should have first priorty, url2, second, url3 third

    setup_client(integration.url1)
    c = client()
    name1 = tempfile.NamedTemporaryFile(prefix='ks-test').name
    name2 = tempfile.NamedTemporaryFile(prefix='ks-test').name
    len1 = 1000000
    len2 = 100000
    with open(name1, 'wb+') as f:
        f.write(len1 * b'0')
    with open(name2, 'wb+') as f:
        f.write(len2 * b'0')
    remote1 = dp(name1)
    remote1.rpc_url = integration.url1
    remote2 = dp(name2)
    remote2.rpc_url = integration.url2
    remote1.save()
    remote2.save()
    c.bc(routing_func, remote1, remote2)
    c.bc(routing_func, remote1, remote2)
    c.bc(routing_func, remote1, remote2)
    c.execute()
    results = c.br()
    assert results == [integration.url1, integration.url2, integration.url3]
def test_remote_data_source_conversions():
    ### remote data sources can be accessed as an object, local path, or raw data
    ### test conversions of all

    df1 = pd.DataFrame({'a' : np.arange(100000)})
    shape = df1.shape

    #start with a python object - we should be able to convert to raw and local path
    obj = do(df1)
    path = obj.local_path()
    with open(path, "rb") as f:
        df = pickle.load(f)
    assert df.shape == shape
    df = pickle.loads(obj.raw())
    assert df.shape == shape

    #start with a raw data,  should be able to convert to raw and local path
    obj = dr(obj.raw())
    assert obj.obj().shape == shape
    path = obj.local_path()
    with open(path, 'rb') as f:
        df = pickle.load(f)
    assert df.shape == shape

    #start with a file,  should be able to convert to obj and raw
    obj = dp(obj.local_path())
    assert obj.obj().shape == shape
    df = pickle.loads(obj.raw())
    assert df.shape == shape
def test_remote_data_source_conversions():
    ### remote data sources can be accessed as an object, local path, or raw data
    ### test conversions of all
    setup_client(integration.url1)
    c = client()
    df1 = pd.DataFrame({'a' : np.arange(100)})
    shape = df1.shape
    obj = do(df1)
    obj.save()
    c.bc(remote_obj_func, du(obj.data_url))
    c.execute()
    result = c.br()[0]
    assert result.shape == df1.shape

    obj = dp(obj.local_path())
    obj.save()
    c.bc(remote_file, du(obj.data_url))
    c.execute()
    result = c.br()[0]
    result = pickle.loads(result)
    assert result.shape == df1.shape
Beispiel #4
0
setup_client("http://localhost:6323/")
c = client()
df = pd.DataFrame({'a' : np.arange(2000000)})
store = pd.HDFStore('test.hdf5')
store['df'] = df
store.close()

"""dp is a convenience function, equivalent to RemoteData(local_path=<path>)
We construct a remote data object, and save the data to the server
(which  generates a url).  Then we create a new RemoteData pointer with du
(short for data url, equivalent to RemoteData(data_url=<data_url>)
and we use that in a function call
"""

remote = dp("test.hdf5")
remote.save(prefix="testdata/test")
print remote.data_url

new_remote = du(remote.data_url)
def head(obj, name):
    store = pd.HDFStore(obj.local_path())
    return store.select(name).head(10)

c.bc(head, new_remote, 'df')
c.execute()
result = c.br()[0]
print result

"""do is short for dataobject, equivalent to RemoteData(obj=<obj>)
"""