Esempio n. 1
0
def main():
    x = np.random.normal(size=(1000000, 5))
    y = x.mean(axis=1)
    cluster = LocalCluster(n_workers=4, threads_per_worker=1, memory_limit='1G')
    client = Client(cluster)
    print(client)
    print("scattering")
    [x_ref, y_ref] = client.scatter([x, y], broadcast=True)
    jobs = []
    for e in range(1, 30):
        print(e)
        jobs.append(client.submit(train_rf, e, x_ref, y_ref))
    for job in as_completed(jobs):
        print(job.result())
        del job
        client.rebalance()
    client.close()
    cluster.close()
    return
import pandas
import dask.dataframe as dd
from dask.distributed import Client

client = Client("10.110.122.238:8888")

df = pd.read_csv('trainingData.csv')
future = client.scatter(df)  # send dataframe to one worker
ddf = dd.from_delayed([future], meta=df)  # build dask.dataframe on remote data
ddf = ddf.repartition(npartitions=20).persist()  # split
client.rebalance(ddf)  # spread around all of your workers