def spin_up_cluster(n_workers): # set up the cluster before executing the run env_name = "cmip6_derived_cloud_datasets" ## use the pangeo containerfor the software env coiled.create_software_environment( name=env_name, conda= 'environment.yml', # this will take longer...but I guess thats ok for now? # couldnt get this to work yet # container='pangeo/pangeo-notebook:latest', # matches Pangeo Cloud AWS production cluster ) # Create a Dask cluster which uses # software environment cluster = coiled.Cluster( software=env_name, n_workers=n_workers, backend_options={"region": "us-west-2"}, shutdown_on_close=True, ) client = Client(cluster) print("Cluster Name:", cluster.name) print("Dashboard:", client.dashboard_link) print('\n\n\n----------------------------') return client, cluster
def __init__(self): config_file = os.path.join(os.getcwd(), "config", "config.yaml") if not os.path.isfile(config_file): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), config_file) with open(config_file) as file: self.config_values = yaml.full_load(file) if not "cluster_name" in self.config_values: self.config_values["cluster_name"] = "chess-cluster" if not "software_environment_name" in self.config_values: self.config_values["software_environment_name"] = "chess-env" if not "n_workers" in self.config_values: self.config_values["n_workers"] = 50 if not "worker_cpu" in self.config_values: self.config_values["worker_cpu"] = 1 if not "worker_memory" in self.config_values: self.config_values["worker_memory"] = 8 if not "scheduler_memory" in self.config_values: self.config_values["scheduler_memory"] = 16 if not "scheduler_cpu" in self.config_values: self.config_values["scheduler_cpu"] = 4 if not "game_batch_size" in self.config_values: self.config_values["game_batch_size"] = 30 if not "timeout_per_game" in self.config_values: self.config_values["timeout_per_game"] = 60 if not "debug" in self.config_values: self.config_values["debug"] = False if self.config_values["use_local_cluster"]: cluster = LocalCluster(n_workers=self.config_values["n_workers"], threads_per_worker=1) else: coiled.create_software_environment( name=self.config_values["software_environment_name"], pip="requirements.txt") cluster = coiled.Cluster( name=self.config_values["cluster_name"], n_workers=self.config_values["n_workers"], worker_cpu=self.config_values["worker_cpu"], worker_memory=str(self.config_values["worker_memory"]) + "GiB", scheduler_memory=str(self.config_values["scheduler_memory"]) + "GiB", scheduler_cpu=self.config_values["scheduler_cpu"], software=self.config_values["software_environment_name"]) self.client = Client(cluster)
param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True) param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True) bst = xgb.train(param, dtrain) preds = bst.predict(dtest) pred_labels = np.rint(preds) accuracy = sklearn.metrics.accuracy_score(y_test, pred_labels) return accuracy if __name__ == "__main__": with coiled.Cluster(n_workers=5, configuration="jrbourbeau/optuna") as cluster: with Client(cluster) as client: print(f"Dask dashboard is available at {client.dashboard_link}") client.wait_for_workers(5) storage = dask_optuna.DaskStorage("sqlite:///coiled-example.db") study = optuna.create_study(storage=storage, direction="maximize") with joblib.parallel_backend("dask"): study.optimize(objective, n_trials=100, n_jobs=-1) print("Best params:") pprint(study.best_params)
# from # https://examples.dask.org/machine-learning/torch-prediction.html #################### # Setup coiled/ dask import coiled cluster = coiled.Cluster( n_workers=2, #10 software="examples/hyperband-optimization", ) import dask.distributed client = dask.distributed.Client(cluster) ##from distributed import Client #client = Client(n_workers=2, threads_per_worker=2) print(client) #################### # Download data import urllib.request import zipfile filename, _ = urllib.request.urlretrieve( "https://download.pytorch.org/tutorial/hymenoptera_data.zip", "data.zip") zipfile.ZipFile(filename).extractall()
if __name__ == "__main__": import sys if sys.argv[-1] == "coiled": import coiled software = dask.config.get("benchmark.software", "gjoseph92/scheduler-benchmark") print(f"Using software environment {software!r} for cluster.") start = time.perf_counter() cluster = coiled.Cluster( n_workers=10, worker_memory="54 GiB", worker_cpu=1, # ^ NOTE: Coiled VM backend required to get these resources worker_options={"nthreads": 1}, scheduler_cpu=1, scheduler_memory="8 GiB", software=software, shutdown_on_close=True, ) elapsed = time.perf_counter() - start print(f"Created Coiled cluster in {elapsed / 60:.1f} min") client = Client(cluster) filename_suffix = "-coiled" else: client = Client(n_workers=10, threads_per_worker=1) filename_suffix = "" print(client) print(f"Distributed Version: {distributed.__version__}")
start = time.perf_counter() df2 = shuffled.persist() distributed.wait(df2) elapsed = time.perf_counter() - start return elapsed if __name__ == "__main__": n_workers = 100 cluster = coiled.Cluster( software="gjoseph92/profiling", n_workers=1, worker_cpu=1, worker_memory="4 GiB", scheduler_cpu=4, scheduler_memory="8 GiB", shutdown_on_close=True, scheduler_options={"idle_timeout": "1 hour"}, environ=dict( # everything else may cause GC cycles DASK_DISTRIBUTED__SCHEDULER__HTTP__ROUTES= "['distributed.http.health']", ), ) client = distributed.Client(cluster) # if not client.run_on_scheduler(lambda: distributed.scheduler.COMPILED): # print("Scheduler is not compiled!") # client.shutdown() # client.close() # sys.exit(1) print(f"Waiting for {n_workers} workers...") try:
""" Coiled quickstart to run on pull requests as a basic test for the soundness of the `coiled` default environments. """ import os import coiled import dask.dataframe as dd from dask.distributed import Client SOFTWARE = os.environ["SOFTWARE_ENV"] cluster = coiled.Cluster( software=SOFTWARE, n_workers=10, backend_options={"spot": False}, ) client = Client(cluster) df = dd.read_csv( "s3://nyc-tlc/trip data/yellow_tripdata_2019-*.csv", dtype={ "payment_type": "UInt8", "VendorID": "UInt8", "passenger_count": "UInt8", "RatecodeID": "UInt8", }, storage_options={"anon": True}, blocksize="16 MiB",