import logging import pandas as pd import numpy as np from kitchensink.clients.http import Client from kitchensink.data import RemoteData from kitchensink import settings settings.setup_client("http://localhost:6323/") c = Client(settings.rpc_url) c.reducetree("memoize*") def add(x, y): return x + y add.ks_memoize = True c.bc(add, 1, 2) c.execute() c.br() c.bc(add, 1, 2) c.execute() c.br() c.bc(add, 1, 2) c.execute() c.br()
import logging import time import pandas as pd import numpy as np from kitchensink.clients.http import Client from kitchensink.data import RemoteData from kitchensink import settings settings.setup_client("http://localhost:6323/") c = Client(settings.rpc_url) """Assuming you have 3 hosts, the first setup with the command: python -m kitchensink.scripts.start --datadir /tmp/data1 --num-workers 3 And the second 2: python -m kitchensink.scripts.start --datadir /tmp/data2 --no-redis --node-url=http://localhost:6324/ --num-workers 2 python -m kitchensink.scripts.start --datadir /tmp/data3 --no-redis --node-url=http://localhost:6325/ --num-workers 2 """ def test_func(a, b, desired_host=None): from kitchensink import settings print desired_host, settings.host_url result = a.obj() + b.obj() result = RemoteData(obj=result) result.save() return result
import logging import pandas as pd import numpy as np from kitchensink.clients.http import Client from kitchensink.data import du, do from kitchensink import settings from kitchensink.utils.decorators import remote settings.setup_client("http://localhost:6323/") c = Client(settings.rpc_url) df = pd.DataFrame({'a' : np.arange(3)}) obj = do(df) obj.save() @remote def mult(x): return do(2 * x) print "**LOCAL" print mult(df).obj() #executes locally print mult(obj).obj() #xecutes locally c.bc(mult, df) c.execute() print "**REMOTE" print c.br()[0].obj() #executed remote
import logging import time import pandas as pd import numpy as np from kitchensink.clients.http import Client from kitchensink.data import RemoteData from kitchensink import settings settings.setup_client("http://localhost:6323/") c = Client(settings.rpc_url) """follow multi node instructions from README.md """ df = pd.DataFrame({'a' : np.arange(100000)}) remote = RemoteData(obj=df) retval = remote.pipeline(prefix='pipeline_test') print retval print c.data_info([remote.data_url])