def spin_up_cluster(n_workers):
    # set up the cluster before executing the run
    env_name = "cmip6_derived_cloud_datasets"

    ## use the pangeo containerfor the software env
    coiled.create_software_environment(
        name=env_name,
        conda=
        'environment.yml',  # this will take longer...but I guess thats ok for now?
        # couldnt get this to work yet
        #     container='pangeo/pangeo-notebook:latest',   # matches Pangeo Cloud AWS production cluster
    )

    # Create a Dask cluster which uses
    # software environment
    cluster = coiled.Cluster(
        software=env_name,
        n_workers=n_workers,
        backend_options={"region": "us-west-2"},
        shutdown_on_close=True,
    )
    client = Client(cluster)
    print("Cluster Name:", cluster.name)
    print("Dashboard:", client.dashboard_link)
    print('\n\n\n----------------------------')
    return client, cluster
Esempio n. 2
0
    def __init__(self):
        config_file = os.path.join(os.getcwd(), "config", "config.yaml")
        if not os.path.isfile(config_file):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                                    config_file)
        with open(config_file) as file:
            self.config_values = yaml.full_load(file)
        if not "cluster_name" in self.config_values:
            self.config_values["cluster_name"] = "chess-cluster"
        if not "software_environment_name" in self.config_values:
            self.config_values["software_environment_name"] = "chess-env"
        if not "n_workers" in self.config_values:
            self.config_values["n_workers"] = 50
        if not "worker_cpu" in self.config_values:
            self.config_values["worker_cpu"] = 1
        if not "worker_memory" in self.config_values:
            self.config_values["worker_memory"] = 8
        if not "scheduler_memory" in self.config_values:
            self.config_values["scheduler_memory"] = 16
        if not "scheduler_cpu" in self.config_values:
            self.config_values["scheduler_cpu"] = 4
        if not "game_batch_size" in self.config_values:
            self.config_values["game_batch_size"] = 30
        if not "timeout_per_game" in self.config_values:
            self.config_values["timeout_per_game"] = 60
        if not "debug" in self.config_values:
            self.config_values["debug"] = False

        if self.config_values["use_local_cluster"]:
            cluster = LocalCluster(n_workers=self.config_values["n_workers"],
                                   threads_per_worker=1)
        else:
            coiled.create_software_environment(
                name=self.config_values["software_environment_name"],
                pip="requirements.txt")
            cluster = coiled.Cluster(
                name=self.config_values["cluster_name"],
                n_workers=self.config_values["n_workers"],
                worker_cpu=self.config_values["worker_cpu"],
                worker_memory=str(self.config_values["worker_memory"]) + "GiB",
                scheduler_memory=str(self.config_values["scheduler_memory"]) +
                "GiB",
                scheduler_cpu=self.config_values["scheduler_cpu"],
                software=self.config_values["software_environment_name"])

        self.client = Client(cluster)
Esempio n. 3
0
        param["rate_drop"] = trial.suggest_float("rate_drop",
                                                 1e-8,
                                                 1.0,
                                                 log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop",
                                                 1e-8,
                                                 1.0,
                                                 log=True)

    bst = xgb.train(param, dtrain)
    preds = bst.predict(dtest)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(y_test, pred_labels)
    return accuracy


if __name__ == "__main__":
    with coiled.Cluster(n_workers=5,
                        configuration="jrbourbeau/optuna") as cluster:
        with Client(cluster) as client:
            print(f"Dask dashboard is available at {client.dashboard_link}")
            client.wait_for_workers(5)

            storage = dask_optuna.DaskStorage("sqlite:///coiled-example.db")
            study = optuna.create_study(storage=storage, direction="maximize")
            with joblib.parallel_backend("dask"):
                study.optimize(objective, n_trials=100, n_jobs=-1)

            print("Best params:")
            pprint(study.best_params)
Esempio n. 4
0
# from
# https://examples.dask.org/machine-learning/torch-prediction.html

####################
# Setup coiled/ dask

import coiled

cluster = coiled.Cluster(
    n_workers=2,  #10
    software="examples/hyperband-optimization",
)

import dask.distributed
client = dask.distributed.Client(cluster)

##from distributed import Client
#client = Client(n_workers=2, threads_per_worker=2)

print(client)

####################
# Download data

import urllib.request
import zipfile

filename, _ = urllib.request.urlretrieve(
    "https://download.pytorch.org/tutorial/hymenoptera_data.zip", "data.zip")
zipfile.ZipFile(filename).extractall()
if __name__ == "__main__":
    import sys

    if sys.argv[-1] == "coiled":
        import coiled
        software = dask.config.get("benchmark.software",
                                   "gjoseph92/scheduler-benchmark")
        print(f"Using software environment {software!r} for cluster.")
        start = time.perf_counter()
        cluster = coiled.Cluster(
            n_workers=10,
            worker_memory="54 GiB",
            worker_cpu=1,
            # ^ NOTE: Coiled VM backend required to get these resources
            worker_options={"nthreads": 1},
            scheduler_cpu=1,
            scheduler_memory="8 GiB",
            software=software,
            shutdown_on_close=True,
        )
        elapsed = time.perf_counter() - start
        print(f"Created Coiled cluster in {elapsed / 60:.1f} min")
        client = Client(cluster)
        filename_suffix = "-coiled"
    else:
        client = Client(n_workers=10, threads_per_worker=1)
        filename_suffix = ""

    print(client)
    print(f"Distributed Version: {distributed.__version__}")
Esempio n. 6
0
    start = time.perf_counter()
    df2 = shuffled.persist()
    distributed.wait(df2)
    elapsed = time.perf_counter() - start
    return elapsed


if __name__ == "__main__":
    n_workers = 100
    cluster = coiled.Cluster(
        software="gjoseph92/profiling",
        n_workers=1,
        worker_cpu=1,
        worker_memory="4 GiB",
        scheduler_cpu=4,
        scheduler_memory="8 GiB",
        shutdown_on_close=True,
        scheduler_options={"idle_timeout": "1 hour"},
        environ=dict(
            # everything else may cause GC cycles
            DASK_DISTRIBUTED__SCHEDULER__HTTP__ROUTES=
            "['distributed.http.health']", ),
    )
    client = distributed.Client(cluster)
    # if not client.run_on_scheduler(lambda: distributed.scheduler.COMPILED):
    #     print("Scheduler is not compiled!")
    #     client.shutdown()
    #     client.close()
    #     sys.exit(1)

    print(f"Waiting for {n_workers} workers...")
    try:
Esempio n. 7
0
"""
Coiled quickstart to run on pull requests as a basic test for the soundness
of the `coiled` default environments.
"""
import os

import coiled
import dask.dataframe as dd
from dask.distributed import Client

SOFTWARE = os.environ["SOFTWARE_ENV"]

cluster = coiled.Cluster(
    software=SOFTWARE,
    n_workers=10,
    backend_options={"spot": False},
)

client = Client(cluster)


df = dd.read_csv(
    "s3://nyc-tlc/trip data/yellow_tripdata_2019-*.csv",
    dtype={
        "payment_type": "UInt8",
        "VendorID": "UInt8",
        "passenger_count": "UInt8",
        "RatecodeID": "UInt8",
    },
    storage_options={"anon": True},
    blocksize="16 MiB",