def prepare_environment(environment: str) -> ApiClient: environment_data = get_environment_data(environment) config_type, config = pick_config(environment_data) api_client = _get_api_client(config, command_name="cicdtemplates-") _prepare_workspace_dir(api_client, environment_data["workspace_dir"]) if config_type == "ENV": mlflow.set_tracking_uri(DATABRICKS_MLFLOW_URI) elif config_type == "PROFILE": mlflow.set_tracking_uri(f'{DATABRICKS_MLFLOW_URI}://{environment_data["profile"]}') else: raise NotImplementedError(f"Config type: {config_type} is not implemented") experiment: Optional[mlflow.entities.Experiment] = mlflow.get_experiment_by_name(environment_data["workspace_dir"]) # if there is no experiment if not experiment: mlflow.create_experiment(environment_data["workspace_dir"], environment_data["artifact_location"]) else: # verify experiment location if experiment.artifact_location != environment_data["artifact_location"]: raise Exception( f"Required location of experiment {environment_data['workspace_dir']} " f"doesn't match the project defined one: \n" f"\t experiment artifact location: {experiment.artifact_location} \n" f"\t project artifact location : {environment_data['artifact_location']} \n" f"Change of experiment location is currently not supported in MLflow. " f"Please change the experiment name to create a new experiment." ) mlflow.set_experiment(environment_data["workspace_dir"]) return api_client
def create_client(host, token): username = None password = None insecure = False client_config = config_provider.DatabricksConfig(host, username, password, token, insecure) return cli_configure._get_api_client(client_config)
def prepare_environment(environment: str): environment_data = InfoFile.get("environments").get(environment) if not environment_data: raise Exception( f"No environment {environment} provided in the project file") config = EnvironmentVariableConfigProvider().get_config() if config: config_type = "ENV" dbx_echo("Using configuration from the environment variables") else: dbx_echo( "No environment variables provided, using the ~/.databrickscfg") config = ProfileConfigProvider( environment_data["profile"]).get_config() config_type = "PROFILE" if not config: raise Exception( f"""Couldn't get profile with name: {environment_data["profile"]}. Please check the config settings""" ) api_client = _get_api_client(config, command_name="cicdtemplates-") _prepare_workspace_dir(api_client, environment_data["workspace_dir"]) if config_type == "ENV": mlflow.set_tracking_uri(DATABRICKS_MLFLOW_URI) elif config_type == "PROFILE": mlflow.set_tracking_uri( f'{DATABRICKS_MLFLOW_URI}://{environment_data["profile"]}') else: raise NotImplementedError( f"Config type: {config_type} is not implemented") experiment = mlflow.get_experiment_by_name( environment_data["workspace_dir"]) if not experiment: mlflow.create_experiment(environment_data["workspace_dir"], environment_data["artifact_location"]) mlflow.set_experiment(environment_data["workspace_dir"]) return api_client
def prepare_environment(environment: str) -> ApiClient: environment_data = get_environment_data(environment) config_type, config = pick_config(environment_data) api_client = _get_api_client(config, command_name="cicdtemplates-") _prepare_workspace_dir(api_client, environment_data["workspace_dir"]) if config_type == "ENV": mlflow.set_tracking_uri(DATABRICKS_MLFLOW_URI) elif config_type == "PROFILE": mlflow.set_tracking_uri(f'{DATABRICKS_MLFLOW_URI}://{environment_data["profile"]}') else: raise NotImplementedError(f"Config type: {config_type} is not implemented") experiment = mlflow.get_experiment_by_name(environment_data["workspace_dir"]) if not experiment: mlflow.create_experiment(environment_data["workspace_dir"], environment_data["artifact_location"]) mlflow.set_experiment(environment_data["workspace_dir"]) return api_client
from databricks_cli.configure.provider import get_config from pathlib import Path # COMMAND ---------- dbfs_home_path = Path("dbfs:/home/{}/".format(user)) run_metadata_delta_path = str(dbfs_home_path / "genomics/data/delta/pipeline_runs_info_hail_glow.delta") # COMMAND ---------- cluster_id=dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags().apply('clusterId') # COMMAND ---------- cs = ClusterService(_get_api_client(get_config())) _list = cs.list_clusters()['clusters'] conv = lambda x: {c:v for c,v in x.items() if type(v) in (str, int)} cluster_info = spark.createDataFrame([conv(x) for x in _list]) cluster_info = cluster_info.where(fx.col("cluster_id") == cluster_id) worker_info = cluster_info.select("node_type_id", "num_workers", "spark_version", "creator_user_name").collect() node_type_id = worker_info[0].node_type_id n_workers = worker_info[0].num_workers spark_version = worker_info[0].spark_version creator_user_name = worker_info[0].creator_user_name # COMMAND ---------- display(cluster_info) # COMMAND ----------
################# # Create the egg: ################# print("Preparing Recommenders library file ({})...".format(args.eggname)) myegg = create_egg(args.path_to_recommenders, local_eggname=args.eggname, overwrite=args.overwrite) print("Created: {}".format(myegg)) ############################ # Interact with Databricks: ############################ # first make sure you are using the correct profile and connecting to the intended workspace my_api_client = _get_api_client( ProfileConfigProvider(args.profile).get_config()) # Create a cluster if flagged if args.create_cluster: # treat args.cluster_id as the name, because if you create a cluster, you do not know its id yet. DEFAULT_CLUSTER_CONFIG["cluster_name"] = args.cluster_id cluster_info = ClusterApi(my_api_client).create_cluster( DEFAULT_CLUSTER_CONFIG) args.cluster_id = cluster_info["cluster_id"] print("Creating a new cluster with name {}. New cluster_id={}".format( DEFAULT_CLUSTER_CONFIG["cluster_name"], args.cluster_id)) # Upload the egg: upload_path = Path(args.dbfs_path, args.eggname).as_posix() # Check if file exists to alert user.
pr_branch = namespace.pr_branch print('PR Branch: ', pr_branch) # COMMAND ---------- import json import time from datetime import datetime from databricks_cli.configure.config import _get_api_client from databricks_cli.configure.provider import EnvironmentVariableConfigProvider from databricks_cli.sdk import JobsService, ReposService # Let's create Databricks CLI API client to be able to interact with Databricks REST API config = EnvironmentVariableConfigProvider().get_config() api_client = _get_api_client(config, command_name="cicdtemplates-") #Let's checkout the needed branch if branch_name == 'merge': branch = pr_branch else: branch = branch_name print('Using branch: ', branch) #Let's create Repos Service repos_service = ReposService(api_client) # Let's store the path for our new Repo _b = branch.replace('/','_') repo_path = f'{repos_path_prefix}_{_b}_{str(datetime.now().microsecond)}' print('Checking out the following repo: ', repo_path)
################# # Create the egg: ################# print("Preparing Recommenders library file ({})...".format(args.eggname)) myegg = create_egg( args.path_to_recommenders, local_eggname=args.eggname, overwrite=args.overwrite ) print("Created: {}".format(myegg)) ############################ # Interact with Databricks: ############################ # first make sure you are using the correct profile and connecting to the intended workspace my_api_client = _get_api_client(ProfileConfigProvider(args.profile).get_config()) # Create a cluster if flagged if args.create_cluster: # treat args.cluster_id as the name, because if you create a cluster, you do not know its id yet. DEFAULT_CLUSTER_CONFIG["cluster_name"] = args.cluster_id cluster_info = ClusterApi(my_api_client).create_cluster(DEFAULT_CLUSTER_CONFIG) args.cluster_id = cluster_info["cluster_id"] print( "Creating a new cluster with name {}. New cluster_id={}".format( DEFAULT_CLUSTER_CONFIG["cluster_name"], args.cluster_id ) ) # Upload the egg: upload_path = Path(args.dbfs_path, args.eggname).as_posix()