def link_databricks_workspace():
    """Checks if a Databricks compute target is attached to AML. If it is not
    attached it attaches it."""
    databricks_compute_name = os.environ.get("AML_DATABRICKS_COMPUTE_NAME")
    databricks_workspace_name = os.environ.get("AML_WORKSPACE")
    databricks_resource_group = os.environ.get("AML_RESOURCE_GROUP")
    databricks_access_token = os.environ.get("AML_DATABRICKS_ACCESS_TOKEN")
    aml_workspace_name = os.environ.get("AML_WORKSPACE_NAME")
    try:
        # For automation use a Service Principal too.
        databricks_compute = DatabricksCompute(workspace=aml_workspace_name,
                                               name=databricks_compute_name)
        print('Compute target already exists')
    except ComputeTargetException:
        print('compute not found')
        print('databricks_compute_name {}'.format(databricks_compute_name))
        print('databricks_workspace_name {}'.format(databricks_workspace_name))
        # Unsecure
        # print('databricks_access_token {}'.format(databricks_access_token))

        # Create attach config
        attach_config = DatabricksCompute.attach_configuration(
            resource_group=databricks_resource_group,
            workspace_name=databricks_workspace_name,
            access_token=databricks_access_token)
        databricks_compute = ComputeTarget.attach(databricks_workspace_name,
                                                  databricks_compute_name,
                                                  attach_config)

        databricks_compute.wait_for_completion(True)
Beispiel #2
0
def trigger_env_prep():

    # Define Vars < Change the vars>.
    # In a production situation, don't put secrets in source code, but as secret variables,
    # see https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables
    workspace = "<Name of your workspace>"
    subscription_id = "<Subscription id>"
    resource_grp = "<Name of your resource group where aml service is created>"

    domain = "westeurope.azuredatabricks.net"  # change location in case databricks instance is not in westeurope
    databricks_name = "<<Your Databricks Name>>"
    dbr_pat_token_raw = "<<your Databricks Personal Access Token>>"

    DBR_PAT_TOKEN = bytes(dbr_pat_token_raw, encoding='utf-8')  # adding b'
    databricks_grp = resource_grp
    dataset = "AdultCensusIncome.csv"
    notebook = "3_IncomeNotebookDevops.py"
    experiment_name = "experiment_model_release"
    db_compute_name = "dbr-amls-comp"

    # Print AML Version
    print("Azure ML SDK Version: ", azureml.core.VERSION)

    # Point file to conf directory containing details for the aml service

    cli_auth = AzureCliAuthentication()
    ws = Workspace(workspace_name=workspace,
                   subscription_id=subscription_id,
                   resource_group=resource_grp,
                   auth=cli_auth)

    print(ws.name,
          ws._workspace_name,
          ws.resource_group,
          ws.location,
          sep='\t')

    # Create a new experiment
    print("Starting to create new experiment")
    Experiment(workspace=ws, name=experiment_name)

    # Upload notebook to Databricks

    print("Upload notebook to databricks")
    upload_notebook(domain, DBR_PAT_TOKEN, notebook)

    print("Add databricks env to Azure ML Service Compute")
    # Create databricks workspace in AML SDK
    try:
        databricks_compute = DatabricksCompute(workspace=ws,
                                               name=db_compute_name)
        print('Compute target {} already exists'.format(db_compute_name))
    except ComputeTargetException:
        print('Compute not found, will use below parameters to attach new one')
        config = DatabricksCompute.attach_configuration(
            resource_group=databricks_grp,
            workspace_name=databricks_name,
            access_token=dbr_pat_token_raw)
        databricks_compute = ComputeTarget.attach(ws, db_compute_name, config)
        databricks_compute.wait_for_completion(True)
Beispiel #3
0
def main(args, workspace):
    # Connect Databricks to AzureML
    print("Connecting Databricks to AzureML")
    try:
        databricks_compute = DatabricksCompute(workspace=workspace,
                                               name=args.compute_name)
        print(f"Compute target {databricks_compute.name} already exists")
    except ComputeTargetException as exception:
        print(f"Databricks compute target not found: {exception}")
        print("Attaching Databricks to Azure ML")
        databricks_config = DatabricksCompute.attach_configuration(
            resource_group=args.db_resource_group,
            workspace_name=args.db_workspace_name,
            access_token=args.db_access_token)
        databricks_compute = ComputeTarget.attach(
            workspace=workspace,
            name=args.compute_name,
            attach_configuration=databricks_config)
    databricks_compute.wait_for_completion(show_output=True)
Beispiel #4
0
def get_compute(workspace: Workspace, dbcomputename: str, resource_group: str,
                dbworkspace: str, dbaccesstoken: str):
    try:
        databricks_compute = DatabricksCompute(workspace=workspace,
                                               name=dbcomputename)
        print('Compute target {} already exists'.format(dbcomputename))
    except ComputeTargetException:
        print('Compute not found, will use below parameters to attach new one')
        print('db_compute_name {}'.format(dbcomputename))
        print('db_resource_group {}'.format(resource_group))
        print('db_workspace_name {}'.format(dbworkspace))

        config = DatabricksCompute.attach_configuration(
            resource_group=resource_group,
            workspace_name=dbworkspace,
            access_token=dbaccesstoken)

        databricks_compute = ComputeTarget.attach(workspace, dbcomputename,
                                                  config)
        databricks_compute.wait_for_completion(True)
    return databricks_compute
def get_db_compute(ws: Workspace) -> DatabricksCompute:
    db_compute = None
    try:
        db_compute = ComputeTarget(ws, db_compute_name)
    except ComputeTargetException:
        attach_config = DatabricksCompute.attach_configuration(
            resource_group=db_rg,
            workspace_name=db_workspace_name,
            access_token=db_access_token)
        db_compute = ComputeTarget.attach(ws, db_compute_name, attach_config)
        db_compute.wait_for_completion(True)
    return db_compute
Beispiel #6
0
def attach_databricks(name,
                      access_token,
                      compute_resource_id,
                      workspace_name=None,
                      resource_group_name=None):
    workspace = get_workspace_or_default(workspace_name, resource_group_name)

    print('Attaching compute resource...')
    attach_config = DatabricksCompute.attach_configuration(
        resource_id=compute_resource_id, access_token=access_token)
    ComputeTarget.attach(workspace, name, attach_config)
    print('Resource attach submitted successfully.')
    print('To see if your compute target is ready to use, run:')
    print('  az ml computetarget show -n {}'.format(name))
Beispiel #7
0
def create_databricks_compute(
    workspace: Workspace,
    databricks_workspace_name: str,
    compute_name: str,
    access_token: str,
):
    compute_config = DatabricksCompute.attach_configuration(
        resource_group=workspace.resource_group,
        workspace_name=databricks_workspace_name,
        access_token=access_token)

    compute_target = ComputeTarget.attach(workspace, compute_name,
                                          compute_config)

    compute_target.wait_for_completion(show_output=True)

    return compute_target
#unmanaged Compute Target
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget, DatabricksCompute

# Load the workspace from the saved config file
ws = Workspace.from_config()

# Specify a name for the compute (unique within the workspace)
compute_name = 'db_cluster'

# Define configuration for existing Azure Databricks cluster
db_workspace_name = 'db_workspace'
db_resource_group = 'db_resource_group'
db_access_token = '1234-abc-5678-defg-90...'
db_config = DatabricksCompute.attach_configuration(resource_group=db_resource_group,
                                                   workspace_name=db_workspace_name,
                                                   access_token=db_access_token)

# Create the compute
databricks_compute = ComputeTarget.attach(ws, compute_name, db_config)
databricks_compute.wait_for_completion(True)


#check for existing compute
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

compute_name = "aml-cluster"

# Check if the compute target exists
try:
Beispiel #9
0
print("Argument 2: %s" % args.model_name)
print("Argument 3: %s" % args.build_number)
print("Argument 4: %s" % args.image_name)
print("Argument 5: %s" % args.path)

print('creating AzureCliAuthentication...')
cli_auth = AzureCliAuthentication()
print('done creating AzureCliAuthentication!')

print('get workspace...')
ws = Workspace.from_config(path=args.path, auth=cli_auth)
print('done getting workspace!')

print("looking for existing compute target.")
# aml_compute = AmlCompute(ws, args.aml_compute_target)
aml_compute = DatabricksCompute(ws, args.aml_compute_target)
print("found existing compute target.")

# Create a new runconfig object
run_amlcompute = RunConfiguration()

# # Use the cpu_cluster you created above.
run_amlcompute.target = args.aml_compute_target

# # Enable Docker
# run_amlcompute.environment.docker.enabled = True

# # Set Docker base image to the default CPU-based image
# run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE

# # Use conda_dependencies.yml to create a conda environment in the Docker image for execution
Beispiel #10
0
#     print("found existing compute target.")
# except ComputeTargetException:
#     print("creating new compute target")
    
#     provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
#                                                                 min_nodes = 1, 
#                                                                 max_nodes = 1)    
#     aml_compute = ComputeTarget.create(ws, args.aml_compute_target, provisioning_config)
#     aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
# print("Aml Compute attached")

# ----------------------

try:
    aml_compute = DatabricksCompute(ws, args.aml_compute_target)
    print("found existing compute target.")
except:
    print("Attaching new ADB compute target")
    
    # See below for param description
    # https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.compute.databrickscompute?view=azure-ml-py#attach-configuration-resource-group-none--workspace-name-none--resource-id-none--access-token----
    db_workspace_name = 'test-aml-adb-workspace'
    db_resource_group = 'test-aml-adb'
    db_access_token = '...........................'

    provisioning_config = DatabricksCompute.attach_configuration(resource_group=db_resource_group,
                                                       workspace_name=db_workspace_name,
                                                       access_token=db_access_token)

    databricks_compute = ComputeTarget.attach(ws, args.aml_compute_target, provisioning_config)
Beispiel #11
0

def resolve_db_cluster_id():
    return os.environ["DB_CLUSTER_ID"]


my_env = CondaDependencies.create(conda_packages=resolve_dependencies())

with open("myenv.yml", "w") as f:
    f.write(my_env.serialize_to_string())

ws = Workspace(resolve_subscription_id(), resolve_rg(),
               resolve_ml_workspace_name())

config = DatabricksCompute.attach_configuration(
    resource_group=resolve_rg(),
    workspace_name=resolve_db_workspace_name(),
    access_token=resolve_db_access_token())
databricks_compute = ComputeTarget.attach(ws, resolve_compute_name(), config)
databricks_compute.wait_for_completion(True)

dbPythonInLocalMachineStep = DatabricksStep(
    name="DBPythonInLocalMachine",
    python_script_name=resolve_script_name(),
    source_directory=resolve_source_directory(),
    run_name='DB_Worst_Regression_Run',
    compute_target=databricks_compute,
    existing_cluster_id=resolve_db_cluster_id(),
    allow_reuse=True)

steps = [dbPythonInLocalMachineStep]
pipeline = Pipeline(workspace=ws, steps=steps)