from dask.distributed import Client client = Client() # connect to a Dask cluster def square(x): return x**2 data = [1, 2, 3, 4, 5] results = client.map(square, data) print(results) #print(results.compute()) # [1, 4, 9, 16, 25]
import dask.dataframe as dd from dask.distributed import Client client = Client() # connect to a Dask cluster df = dd.read_csv('customer_transactions.csv') grouped = df.groupby('customer_id') total = grouped.amount.sum() results = client.map(lambda x: x[1].compute(), total.groupby('customer_id')) print(results) #In this example, we use the Dask `dataframe` library to read in a large CSV file containing customer transactions. We group the transactions by customer using the `groupby` method and compute the total amount spent by each customer using the `sum` method. We then apply the `compute` method to each group using a lambda function and the `client.map` function to compute the results in parallel across multiple worker nodes. Package library: `dask`print(results.compute()) # [100.55, 155.75, 50.0, ...]