Exemple #1
0
def prepare_environment(environment: str) -> ApiClient:
    environment_data = get_environment_data(environment)

    config_type, config = pick_config(environment_data)

    api_client = _get_api_client(config, command_name="cicdtemplates-")
    _prepare_workspace_dir(api_client, environment_data["workspace_dir"])

    if config_type == "ENV":
        mlflow.set_tracking_uri(DATABRICKS_MLFLOW_URI)
    elif config_type == "PROFILE":
        mlflow.set_tracking_uri(f'{DATABRICKS_MLFLOW_URI}://{environment_data["profile"]}')
    else:
        raise NotImplementedError(f"Config type: {config_type} is not implemented")

    experiment: Optional[mlflow.entities.Experiment] = mlflow.get_experiment_by_name(environment_data["workspace_dir"])

    # if there is no experiment
    if not experiment:
        mlflow.create_experiment(environment_data["workspace_dir"], environment_data["artifact_location"])
    else:
        # verify experiment location
        if experiment.artifact_location != environment_data["artifact_location"]:
            raise Exception(
                f"Required location of experiment {environment_data['workspace_dir']} "
                f"doesn't match the project defined one: \n"
                f"\t experiment artifact location: {experiment.artifact_location} \n"
                f"\t project artifact location   : {environment_data['artifact_location']} \n"
                f"Change of experiment location is currently not supported in MLflow. "
                f"Please change the experiment name to create a new experiment."
            )

    mlflow.set_experiment(environment_data["workspace_dir"])

    return api_client
def create_client(host, token):
    username = None
    password = None
    insecure = False

    client_config = config_provider.DatabricksConfig(host, username, password,
                                                     token, insecure)
    return cli_configure._get_api_client(client_config)
Exemple #3
0
def prepare_environment(environment: str):
    environment_data = InfoFile.get("environments").get(environment)

    if not environment_data:
        raise Exception(
            f"No environment {environment} provided in the project file")

    config = EnvironmentVariableConfigProvider().get_config()
    if config:
        config_type = "ENV"
        dbx_echo("Using configuration from the environment variables")
    else:
        dbx_echo(
            "No environment variables provided, using the ~/.databrickscfg")
        config = ProfileConfigProvider(
            environment_data["profile"]).get_config()
        config_type = "PROFILE"
        if not config:
            raise Exception(
                f"""Couldn't get profile with name: {environment_data["profile"]}. Please check the config settings"""
            )

    api_client = _get_api_client(config, command_name="cicdtemplates-")
    _prepare_workspace_dir(api_client, environment_data["workspace_dir"])

    if config_type == "ENV":
        mlflow.set_tracking_uri(DATABRICKS_MLFLOW_URI)
    elif config_type == "PROFILE":
        mlflow.set_tracking_uri(
            f'{DATABRICKS_MLFLOW_URI}://{environment_data["profile"]}')
    else:
        raise NotImplementedError(
            f"Config type: {config_type} is not implemented")

    experiment = mlflow.get_experiment_by_name(
        environment_data["workspace_dir"])

    if not experiment:
        mlflow.create_experiment(environment_data["workspace_dir"],
                                 environment_data["artifact_location"])

    mlflow.set_experiment(environment_data["workspace_dir"])

    return api_client
Exemple #4
0
def prepare_environment(environment: str) -> ApiClient:
    environment_data = get_environment_data(environment)

    config_type, config = pick_config(environment_data)

    api_client = _get_api_client(config, command_name="cicdtemplates-")
    _prepare_workspace_dir(api_client, environment_data["workspace_dir"])

    if config_type == "ENV":
        mlflow.set_tracking_uri(DATABRICKS_MLFLOW_URI)
    elif config_type == "PROFILE":
        mlflow.set_tracking_uri(f'{DATABRICKS_MLFLOW_URI}://{environment_data["profile"]}')
    else:
        raise NotImplementedError(f"Config type: {config_type} is not implemented")

    experiment = mlflow.get_experiment_by_name(environment_data["workspace_dir"])

    if not experiment:
        mlflow.create_experiment(environment_data["workspace_dir"], environment_data["artifact_location"])

    mlflow.set_experiment(environment_data["workspace_dir"])

    return api_client
Exemple #5
0
from databricks_cli.configure.provider import get_config

from pathlib import Path

# COMMAND ----------

dbfs_home_path = Path("dbfs:/home/{}/".format(user))
run_metadata_delta_path = str(dbfs_home_path / "genomics/data/delta/pipeline_runs_info_hail_glow.delta")

# COMMAND ----------

cluster_id=dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags().apply('clusterId')

# COMMAND ----------

cs = ClusterService(_get_api_client(get_config()))
_list = cs.list_clusters()['clusters']
conv = lambda x: {c:v for c,v in x.items() if type(v) in (str, int)}
cluster_info = spark.createDataFrame([conv(x) for x in _list])
cluster_info = cluster_info.where(fx.col("cluster_id") == cluster_id)
worker_info = cluster_info.select("node_type_id", "num_workers", "spark_version", "creator_user_name").collect()
node_type_id = worker_info[0].node_type_id
n_workers = worker_info[0].num_workers
spark_version = worker_info[0].spark_version
creator_user_name = worker_info[0].creator_user_name

# COMMAND ----------

display(cluster_info)

# COMMAND ----------
Exemple #6
0
    #################
    # Create the egg:
    #################

    print("Preparing Recommenders library file ({})...".format(args.eggname))
    myegg = create_egg(args.path_to_recommenders,
                       local_eggname=args.eggname,
                       overwrite=args.overwrite)
    print("Created: {}".format(myegg))

    ############################
    # Interact with Databricks:
    ############################

    # first make sure you are using the correct profile and connecting to the intended workspace
    my_api_client = _get_api_client(
        ProfileConfigProvider(args.profile).get_config())

    # Create a cluster if flagged
    if args.create_cluster:
        # treat args.cluster_id as the name, because if you create a cluster, you do not know its id yet.
        DEFAULT_CLUSTER_CONFIG["cluster_name"] = args.cluster_id
        cluster_info = ClusterApi(my_api_client).create_cluster(
            DEFAULT_CLUSTER_CONFIG)
        args.cluster_id = cluster_info["cluster_id"]
        print("Creating a new cluster with name {}. New cluster_id={}".format(
            DEFAULT_CLUSTER_CONFIG["cluster_name"], args.cluster_id))

    # Upload the egg:
    upload_path = Path(args.dbfs_path, args.eggname).as_posix()

    # Check if file exists to alert user.
pr_branch = namespace.pr_branch
print('PR Branch: ', pr_branch)

# COMMAND ----------

import json
import time
from datetime import datetime

from databricks_cli.configure.config import _get_api_client
from databricks_cli.configure.provider import EnvironmentVariableConfigProvider
from databricks_cli.sdk import JobsService, ReposService

# Let's create Databricks CLI API client to be able to interact with Databricks REST API
config = EnvironmentVariableConfigProvider().get_config()
api_client = _get_api_client(config, command_name="cicdtemplates-")

#Let's checkout the needed branch
if branch_name == 'merge':
  branch = pr_branch
else:
  branch = branch_name
print('Using branch: ', branch)
  
#Let's create Repos Service
repos_service = ReposService(api_client)

# Let's store the path for our new Repo
_b = branch.replace('/','_')
repo_path = f'{repos_path_prefix}_{_b}_{str(datetime.now().microsecond)}'
print('Checking out the following repo: ', repo_path)
    #################
    # Create the egg:
    #################

    print("Preparing Recommenders library file ({})...".format(args.eggname))
    myegg = create_egg(
        args.path_to_recommenders, local_eggname=args.eggname, overwrite=args.overwrite
    )
    print("Created: {}".format(myegg))

    ############################
    # Interact with Databricks:
    ############################

    # first make sure you are using the correct profile and connecting to the intended workspace
    my_api_client = _get_api_client(ProfileConfigProvider(args.profile).get_config())

    # Create a cluster if flagged
    if args.create_cluster:
        # treat args.cluster_id as the name, because if you create a cluster, you do not know its id yet.
        DEFAULT_CLUSTER_CONFIG["cluster_name"] = args.cluster_id
        cluster_info = ClusterApi(my_api_client).create_cluster(DEFAULT_CLUSTER_CONFIG)
        args.cluster_id = cluster_info["cluster_id"]
        print(
            "Creating a new cluster with name {}. New cluster_id={}".format(
                DEFAULT_CLUSTER_CONFIG["cluster_name"], args.cluster_id
            )
        )

    # Upload the egg:
    upload_path = Path(args.dbfs_path, args.eggname).as_posix()