Ejemplo n.º 1
0
def usage(env_name):
    print_ok("""
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
                              Q U I C K S T A R T
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 

Prerequisites:
--------------

- Install Databricks CLI and configure profile(s) for your cluster(s)

  AWS: https://docs.databricks.com/user-guide/dev-tools/databricks-cli.html)
  Azure: https://docs.azuredatabricks.net/user-guide/dev-tools/databricks-cli.html

- Create an ssh key pair called ~/.ssh/id_<profile> for each cluster and 
  add the public key to the cluster SSH configuration


databrickslabs-jupyterlab:
--------------------------

1) Show help

    conda activate %s
    databrickslabs-jupyterlab -h

2) Create jupyter kernel for remote cluster

    Databricks on AWS:
        databrickslabs-jupyterlab <profile> -k [-i cluster-id]
    
    Azure Databricks:
        databrickslabs-jupyterlab <profile> -k -o <organisation>

3) Compare local and remote python package versions
    
    databrickslabs-jupyterlab <profile> -v all|same|diff

4) Copy Personal Access token for databricks cluster to cipboard (same on AWS and Azure)

    databrickslabs-jupyterlab <profile> -c

5) Start jupyter lab to use the kernel(s) created in 2)

    databrickslabs-jupyterlab <profile> -l [-i cluster-id]


6) Check currently available profiles 

    databrickslabs-jupyterlab -p

7) Download a demo notebook from docs.databricks.com (experimental)

    databrickslabs-jupyterlab -n https://docs.databricks.com/_static/notebooks/delta/xyz.html


""" % env_name)
Ejemplo n.º 2
0
def install(profile, host, token, cluster_id, cluster_name, use_whitelist):
    print(
        "\n* Installation of local environment to mirror a remote Databricks cluster"
    )
    result = get_remote_packages(cluster_id, host, token)
    if result[0] != 0:
        print_error(result[1])
        bye(1)
    libs = json.loads(result[1])

    if use_whitelist:
        print_ok("   => Using whitelist to select packages")
        ds_libs = [lib for lib in libs if lib["name"].lower() in WHITELIST]
    else:
        print_ok("   => Using blacklist to select packages")
        ds_libs = [lib for lib in libs if lib["name"].lower() not in BLACKLIST]

    ds_yml = ""
    for lib in ds_libs:
        if lib["name"] == "python":  # just artificially added
            python_version = lib["version"]
        else:
            if lib["name"] in ["hyperopt", "torchvision"]:
                r = re.compile(r"(\d+\.\d+.\d+)(.*)")
                version = r.match(lib["version"]).groups()[0]
            elif lib["name"] in ["tensorboardx"]:
                r = re.compile(r"(\d+\.\d+)(.*)")
                version = r.match(lib["version"]).groups()[0]
            else:
                version = lib["version"]
            ds_yml += "    - %s==%s\n" % (lib["name"], version)

    module_path = os.path.dirname(databrickslabs_jupyterlab.__file__)
    env_file = os.path.join(module_path, "lib", "env.yml")
    with open(env_file, "r") as fd:
        master_yml = fd.read()
    lines = master_yml.split("\n")
    for i in range(len(lines)):
        if lines[i].startswith("dependencies"):
            lines.insert(i + 1, "  - python=%s" % python_version)
            break
    master_yml = "\n".join(lines)

    print("\n Installed environment \n")
    print(master_yml)
    print("\n    # Data Science Libs\n")
    print(ds_yml + "\n")

    with tempfile.TemporaryDirectory() as tmpdir:
        env_file = os.path.join(tmpdir, "env.yml")
        with open(env_file, "w") as fd:
            fd.write(master_yml)
            fd.write("\n    # Data Science Libs\n")
            fd.write(ds_yml)
            fd.write("\n")

        env_name = cluster_name.replace(" ", "_")
        answer = input(
            "    => Provide a conda environment name (default = %s): " %
            env_name)
        if answer != "":
            env_name = answer.replace(" ", "_")

        install_env(env_file, env_name)

        labext_file = os.path.join(module_path, "lib", "labextensions.txt")
        install_labextensions(labext_file, env_name)

    usage(env_name)