def spin_up_cluster(n_workers):
    # set up the cluster before executing the run
    env_name = "cmip6_derived_cloud_datasets"

    ## use the pangeo containerfor the software env
    coiled.create_software_environment(
        name=env_name,
        conda=
        'environment.yml',  # this will take longer...but I guess thats ok for now?
        # couldnt get this to work yet
        #     container='pangeo/pangeo-notebook:latest',   # matches Pangeo Cloud AWS production cluster
    )

    # Create a Dask cluster which uses
    # software environment
    cluster = coiled.Cluster(
        software=env_name,
        n_workers=n_workers,
        backend_options={"region": "us-west-2"},
        shutdown_on_close=True,
    )
    client = Client(cluster)
    print("Cluster Name:", cluster.name)
    print("Dashboard:", client.dashboard_link)
    print('\n\n\n----------------------------')
    return client, cluster
Beispiel #2
0
    def __init__(self):
        config_file = os.path.join(os.getcwd(), "config", "config.yaml")
        if not os.path.isfile(config_file):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                                    config_file)
        with open(config_file) as file:
            self.config_values = yaml.full_load(file)
        if not "cluster_name" in self.config_values:
            self.config_values["cluster_name"] = "chess-cluster"
        if not "software_environment_name" in self.config_values:
            self.config_values["software_environment_name"] = "chess-env"
        if not "n_workers" in self.config_values:
            self.config_values["n_workers"] = 50
        if not "worker_cpu" in self.config_values:
            self.config_values["worker_cpu"] = 1
        if not "worker_memory" in self.config_values:
            self.config_values["worker_memory"] = 8
        if not "scheduler_memory" in self.config_values:
            self.config_values["scheduler_memory"] = 16
        if not "scheduler_cpu" in self.config_values:
            self.config_values["scheduler_cpu"] = 4
        if not "game_batch_size" in self.config_values:
            self.config_values["game_batch_size"] = 30
        if not "timeout_per_game" in self.config_values:
            self.config_values["timeout_per_game"] = 60
        if not "debug" in self.config_values:
            self.config_values["debug"] = False

        if self.config_values["use_local_cluster"]:
            cluster = LocalCluster(n_workers=self.config_values["n_workers"],
                                   threads_per_worker=1)
        else:
            coiled.create_software_environment(
                name=self.config_values["software_environment_name"],
                pip="requirements.txt")
            cluster = coiled.Cluster(
                name=self.config_values["cluster_name"],
                n_workers=self.config_values["n_workers"],
                worker_cpu=self.config_values["worker_cpu"],
                worker_memory=str(self.config_values["worker_memory"]) + "GiB",
                scheduler_memory=str(self.config_values["scheduler_memory"]) +
                "GiB",
                scheduler_cpu=self.config_values["scheduler_cpu"],
                software=self.config_values["software_environment_name"])

        self.client = Client(cluster)
Beispiel #3
0
import coiled

# Create cluster software environment
software_name = "examples/optuna-xgboost"
coiled.create_software_environment(
    name=software_name,
    conda="environment.yaml",
)

# Create notebook job software environment
software_notebook_name = software_name + "-notebook"
coiled.create_software_environment(
    name=software_notebook_name,
    container="coiled/notebook:latest",
    conda="environment.yaml",
)

coiled.create_job_configuration(
    name="examples/optuna",
    software=software_notebook_name,
    command=[
        "/bin/bash",
        "run.sh",
    ],
    files=["optuna-xgboost.ipynb", "workspace.json", "run.sh"],
    ports=[8888],
    description="Hyperparameter optimization with Optuna",
)
        "python=3.8",
        "dask=2021.3.0",
        "coiled=0.0.37",
        "optuna",
        "numpy",
        "scikit-learn",
        "xgboost",
        "joblib",
    ],
}

# Create cluster software environment
software_name = "examples/optuna-xgboost"
coiled.create_software_environment(
    name=software_name,
    conda=conda,
    pip=["dask-optuna"],
)

# Create notebook job software environment
software_notebook_name = software_name + "-notebook"
coiled.create_software_environment(
    name=software_notebook_name,
    container="coiled/notebook:latest",
    conda=conda,
    pip=["dask-optuna"],
)

coiled.create_job_configuration(
    name="examples/optuna",
    software=software_notebook_name,
import coiled

software_name = "blog-notebooks/xgboost-on-coiled"
coiled.create_software_environment(
    name=software_name,
    container="coiled/notebook:latest",
    conda={
        "channels": ["conda-forge"],
        "dependencies": [
            "python=3.8",
            "coiled=0.0.36",
            "dask",
            "dask-ml",
            "dask>=2.23.0",
            "fastparquet",
            "matplotlib",
            "pandas>=1.1.0",
            "python-snappy",
            "s3fs",
            "scikit-learn",
            "xgboost>=1.3.0",
            "optuna<2.4.0",
            "numpy",
            "xgboost",
            "joblib",
        ]
    },
    pip=["dask-optuna"],
)

coiled.create_job_configuration(
    name="blog-notebooks/xgboost-on-coiled",
Beispiel #6
0
#!/usr/bin/env python3

import coiled
import argparse

coiled.create_software_environment(
    name="i2k-2020-mi2",
    conda="environment.yml",
)
    },
    "pytorch": {
        "conda": "hyper-parameter-optimmization/environment.yml"
    },
}

cluster_configurations = {
    "xgboost": {
        "worker_cpu": 4,
        "worker_memory": "8 GiB"
    },
    "pangeo": {
        "worker_cpu": 4,
        "worker_memory": "8 GiB"
    },
    "pytorch": {
        "worker_cpu": 4,
        "worker_memory": "16 GiB"
    },
}

assert software_environments.keys() == cluster_configurations.keys()

for name, spec in software_environments.items():
    full_name = f"coiled-examples/{name}"
    print(f"Building {full_name}:")
    coiled.create_software_environment(name=full_name, **spec)
    coiled.create_cluster_configuration(name=full_name,
                                        software=full_name,
                                        **cluster_configurations[name])
#!/usr/bin/env python3

import coiled
import argparse

# conda=["pip", "nodejs", "graphviz", "compilers"],
coiled.create_software_environment(
    name="sc20-pyhpc",
    conda="environment.yml",
    post_build=[
        "jupyter labextension install @jupyter-widgets/jupyterlab-manager jupyter-matplotlib jupyterlab-datawidgets [email protected] dask-labextension [email protected] [email protected]",
        "jupyter serverextension enable dask_labextension"
    ],
)
Beispiel #9
0
import coiled

software_name = "examples/quickstart-notebook"
coiled.create_software_environment(
    name=software_name,
    container="coiled/notebook:latest",
    conda={
        "channels": ["conda-forge"],
        "dependencies": ["python=3.8", "coiled=0.0.37", "dask=2021.3.0"]
    },
)

coiled.create_job_configuration(
    name="examples/quickstart",
    software=software_name,
    command=[
        "/bin/bash",
        "run.sh",
    ],
    files=["quickstart.ipynb", "workspace.json", "run.sh"],
    ports=[8888],
    description="Quickly launch a Dask cluster on the cloud with Coiled",
)