def main(): logging.warning("Loading environment variables...") e = Env() e.load_environment_variables(env_file_path="local.env") # Get Azure machine learning workspace logging.warning("Getting reference to existing Azure Machine Learning workspace...") auth = InteractiveLoginAuthentication(tenant_id=e.tenant_id) ws = get_workspace(e.workspace_name, auth, e.subscription_id, e.resource_group) # Get compute target. # The compute target is explicitely specified here to mitigate risk of choosing an incorrect machine, that would execute # heavy load experiments by triggering pipeline via REST API. compute_target = get_compute_target(ws, compute_name='cpu-high-load', vm_size='STANDARD_F64S_V2') # Create pipeline datastore pipeline_datastore = ws.get_default_datastore() step_output = PipelineData( name="step_output_data", datastore=pipeline_datastore, is_directory=True, ) # Create run configuration run_config = create_run_configuration(ws) latent_dim_dataset_name_param = PipelineParameter(name="latent_dim_dataset_name", default_value='') channel_names_param = PipelineParameter(name="channels_names", default_value='') hyperparameters_param = PipelineParameter(name="hyperparameters", default_value='{}') # Define step step = PythonScriptStep( name=cfg.ExperimentNames.DIM_REDUCTION_REMOTE, source_directory=cfg.StepsStructure.SNAPSHOT_ROOT_DIR, script_name=cfg.StepsStructure.DimReduction.STEP_SCRIPT_PATH, arguments=[ '--latent_dim_dataset_name', latent_dim_dataset_name_param, '--hyperparameters', hyperparameters_param, '--channels_names', channel_names_param, '--experiments_config_filepath', cfg.StepsStructure.get_experiments_config_filepath(), '--output_folder', step_output ], inputs=[], outputs=[step_output], compute_target=compute_target, runconfig=run_config, allow_reuse=False ) pipeline_steps = [step] pipeline = Pipeline(workspace=ws, steps=pipeline_steps) # Create and submit an experiment logging.warning("Submitting experiment...(v0003)") experiment = Experiment(ws, cfg.ExperimentNames.DIM_REDUCTION_REMOTE) experiment.submit( pipeline, pipeline_parameters={ "latent_dim_dataset_name": 'dataset_001', # TODO: default channel names should be taken from the config.json file. "channels_names": "a,b,c,d", # TODO: default hyperparameters should be taken from the config.json file. "hyperparameters": "{ 'a': 10, 'b': 0.0 }" }, regenerate_outputs=False) # Allow data reuse for this run print_green('Experiment submitted!')
def __init__(self, file_name): if not os.path.exists(file_name): raise ValueError( 'Cannot find configuration file "{0}"'.format(file_name)) with open(file_name, 'r') as f: conf = json.load(f) try: # region AMLConfig Section amlconf = conf['AMLConfig'] resource_group = self._encode(amlconf['resource_group']) location = self._encode(amlconf['location']) workspace = self._encode(amlconf['workspace']) experimentation = self._encode(amlconf['experimentation']) self.AMLConfig = self.__AMLConfig(resource_group=resource_group, location=location, workspace=workspace, experimentation=experimentation) # endregion # region Credentials Section creds = conf['Credentials'] self.cred_type = creds['type'] self.subscription_id = creds['subscription_id'] if self.cred_type == 'sp_credentials' and 'sp_credentials' in creds: spcreds = creds['sp_credentials'] aad_client_id = spcreds['aad_client_id'] aad_secret_key = spcreds['aad_secret'] aad_tenant = spcreds['aad_tenant'] self.Credentials = ServicePrincipalAuthentication( tenant_id=aad_tenant, username=aad_client_id, password=aad_secret_key) elif self.cred_type == 'userpass_credentials': self.Credentials = InteractiveLoginAuthentication(force=False) # endregion # region ClusterProperties Section clusterProperties = conf['ClusterProperties'] vmPriority = clusterProperties['vm_priority'] vmSize = clusterProperties['vm_size'] cluster_name = self._encode(clusterProperties['cluster_name']) scaling_method = clusterProperties['scaling']['scaling_method'] scaling = clusterProperties['scaling'][scaling_method] if scaling_method == "manual": minimumNodeCount = scaling['target_node_count'] maximumNodeCount = minimumNodeCount elif scaling_method == "auto_scale": minimumNodeCount = scaling['minimum_node_count'] maximumNodeCount = scaling['maximum_node_count'] else: raise ( "Parsing error, scaling undefined - needs to be manual or auto_scale" ) self.ClusterProperties = self.__AMLClusterProperties( vm_size=vmSize, vm_priority=vmPriority, scaling_method=scaling_method, minimumNodeCount=minimumNodeCount, maximumNodeCount=maximumNodeCount, cluster_name=cluster_name) # endregion # region JobProperties Section jobProperties = conf['JobProperties'] jobNamePrefix = str(jobProperties['jobNamePrefix']) jobNodeCount = int(jobProperties['nodeCount']) jobProcessCount = int(jobProperties['processCount']) jobEstimator = jobProperties['estimator'] jobEstimatorType = jobEstimator['estimatorType'] jobScript = jobEstimator['script'] jobScriptPath = jobEstimator['scriptPath'] jobScriptArgs = jobEstimator['scriptArgsDict'] jobDistributedBackEnd = jobEstimator['distributedBackEnd'] jobPipPackages = jobEstimator['pipPackages'] # Create the estimator based on the type (they might be tensorflow, pytorch or base) self.JobProperties = self.__AMLJobProperties( jobNamePrefix=jobNamePrefix, jobEstimatorType=jobEstimatorType, jobNodeCount=jobNodeCount, jobProcessCount=jobProcessCount, jobScriptPath=jobScriptPath, jobScript=jobScript, jobScriptArgs=jobScriptArgs, jobDistributedBackEnd=jobDistributedBackEnd, jobPipPackages=jobPipPackages) # endregion # region DataReference Section dataReference = conf['DataReferences'] # Loop through list of Files Directories localDirectoryBlobList = [] try: for ref in dataReference['localDirectoryBlob']: localDirectoryBlobList.append( self.__AMLBlobDataRef( dataref_id=ref['dataref_id'], localDirectoryName=ref['localDirectoryName'], remoteMountPath=ref['remoteMountPath'], downloadToComputeNodeBeforeExecution=ref[ 'downloadToComputeNodeBeforeExecution'].upper( ) == "TRUE", remoteBlobContainer=ref['remoteBlobContainer'], uploadContentBeforeRun=ref[ 'uploadContentBeforeRun'].upper() == "TRUE", overwriteOnUpload=ref['overwriteOnUpload'].upper() == "TRUE", downloadContentAfterRun=ref[ 'downloadContentAfterRun'].upper() == "TRUE", storageAccountName=ref['storageAccountName'], storageAccountKey=ref['storageAccountKey'])) except KeyError as err: # Key not present in json config pass localDirectoryFilesList = [] try: for ref in dataReference['localDirectoryFiles']: localDirectoryFilesList.append( self.__AMLFilesDataRef( dataref_id=ref['dataref_id'], localDirectoryName=ref['localDirectoryName'], remoteMountPath=ref['remoteMountPath'], downloadToComputeNodeBeforeExecution=ref[ 'downloadToComputeNodeBeforeExecution'].upper( ) == "TRUE", remoteFileShare=ref['remoteFileShare'], uploadContentBeforeRun=ref[ 'uploadContentBeforeRun'].upper() == "TRUE", overwriteOnUpload=ref['overwriteOnUpload'].upper() == "TRUE", downloadContentAfterRun=ref[ 'downloadContentAfterRun'].upper() == "TRUE", storageAccountName=ref['storageAccountName'], storageAccountKey=ref['storageAccountKey'])) except KeyError as err: # Key not present in json config pass self.DataReference = self.__DataReference( localDirectoryBlobList=localDirectoryBlobList, localDirectoryFilesList=localDirectoryFilesList) # endregion except KeyError as err: raise AttributeError( 'Please provide a value for "{0}" configuration key'.format( err.args[0])) except Exception as err: raise ('Error in config parsing : ' + str(err))
def get_mlflow_client( workspace_kwargs: dict = {}, service_principal_kwargs: dict = {} ) -> MlflowClient: """ Set remote tracking URI for mlflow to AzureML workspace Parameters ---------- workspace_kwargs: dict AzureML Workspace configuration to use for remote MLFlow tracking. An empty dict will result in local logging by the MlflowClient. Example:: `{ "subscription_id":<value>, "resource_group":<value>, "workspace_name":<value> }` service_principal_kwargs: dict AzureML ServicePrincipalAuthentication keyword arguments. An empty dict will result in interactive authentication. Example:: `{ "tenant_id":<value>, "service_principal_id":<value>, "service_principal_password":<value> }` Returns ------- client: mlflow.tracking.MlflowClient Client with tracking uri set to AzureML if configured. """ logger.info("Creating MLflow tracking client.") tracking_uri = None # Get AzureML tracking_uri if using Azure as backend if workspace_kwargs: required_keys = ["subscription_id", "resource_group", "workspace_name"] _validate_dict(workspace_kwargs, required_keys) msg = "Configuring AzureML backend with {auth_type} authentication." if service_principal_kwargs: required_keys = [ "tenant_id", "service_principal_id", "service_principal_password", ] _validate_dict(service_principal_kwargs, required_keys) logger.info(msg.format(auth_type="ServicePrincipalAuthentication")) workspace_kwargs["auth"] = ServicePrincipalAuthentication( **service_principal_kwargs ) else: logger.info(msg.format(auth_type="interactive")) workspace_kwargs["auth"] = InteractiveLoginAuthentication(force=True) tracking_uri = Workspace(**workspace_kwargs).get_mlflow_tracking_uri() mlflow.set_tracking_uri(tracking_uri) return MlflowClient(tracking_uri)
print("SDK version:", azureml.core.VERSION) # COMMAND ---------- subscription_id = "31a7429d-17c4-4831-9ab1-6f817b82e456" resource_group = "StatkraftDemo" workspace_name = "statkraftamlte8756027149" workspace_region = "westus2" # COMMAND ---------- # import the Workspace class and check the azureml SDK version # exist_ok checks if workspace exists or not. from azureml.core.authentication import InteractiveLoginAuthentication from azureml.core.workspace import Workspace my_auth = InteractiveLoginAuthentication(tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47") #Workspace.create(name=<workspace_name>, auth=my_auth, subscription_id=< subscription_id >, resource_group=< resource_group >) ws = Workspace.create(name = workspace_name, auth=my_auth, subscription_id = subscription_id, resource_group = resource_group, location = workspace_region, exist_ok=True) ws.get_details() # COMMAND ---------- ws = Workspace(workspace_name = workspace_name, subscription_id = subscription_id,
#publish pipeline published_pipeline = pipeline_run.publish_pipeline( name='Diabetes_Parallel_Batch_Pipeline', description='Batch scoring of diabetes data', version='1.0') published_pipeline rest_endpoint = published_pipeline.endpoint print(rest_endpoint) from azureml.core.authentication import InteractiveLoginAuthentication interactive_auth = InteractiveLoginAuthentication() auth_header = interactive_auth.get_authentication_header() print('Authentication header ready.') import requests rest_endpoint = published_pipeline.endpoint response = requests.post(rest_endpoint, headers=auth_header, json={"ExperimentName": "Batch_Pipeline_via_REST"}) run_id = response.json()["Id"] run_id from azureml.pipeline.core.run import PipelineRun
def main(): e = Env() from azureml.core.authentication import InteractiveLoginAuthentication myten=os.environ.get("AZURE_TENANT_ID") interactive_auth = InteractiveLoginAuthentication(tenant_id=os.environ.get("AZURE_TENANT_ID")) subscription=os.environ.get("CSUBSCRIPTION") workspace_name=e.workspace_name resource_group=e.resource_group aml_workspace = Workspace.get( name = workspace_name, subscription_id = subscription, resource_group=resource_group, auth=interactive_auth ) from ml_service.util.attach_compute import get_compute # Get Azure machine learning cluster # If not present then get_compute will create a compute based on environment variables aml_compute = get_compute( aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) print("SDK version: ", azureml.core.VERSION) ## Variable names that can be passed in as parameter values from azureml.pipeline.core.graph import PipelineParameter from azureml.core import Datastore model_name_param = PipelineParameter( name="model_name", default_value=e.model_name) dataset_version_param = PipelineParameter( name="dataset_version", default_value=e.dataset_version) data_file_path_param = PipelineParameter( name="data_file_path", default_value="none") caller_run_id_param = PipelineParameter( name="caller_run_id", default_value="none") #model_path = PipelineParameter( # name="model_path", default_value=e.model_path) if (e.datastore_name): datastore_name = e.datastore_name else: datastore_name = aml_workspace.get_default_datastore().name # Get the datastore whether it is the default or named store datastore = Datastore.get(aml_workspace, datastore_name) dataset_name = e.dataset_name # Create a reusable Azure ML environment from ml_service.util.manage_environment import get_environment from azureml.core import Environment # RUN Configuration ## Must have this process to work with AzureML-SDK 1.0.85 from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE from azureml.core.conda_dependencies import CondaDependencies try: app_env=Environment(name="smartschedule_env") app_env.register(workspace=aml_workspace) except: print("Environment not found") # Create a new runconfig object aml_run_config = RunConfiguration() aml_run_config.environment.environment_variables["DATASTORE_NAME"] = e.datastore_name # NOQA: E501 # Use the aml_compute you created above. aml_run_config.target = aml_compute # Enable Docker aml_run_config.environment.docker.enabled = True # Set Docker base image to the default CPU-based image aml_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE #aml_run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/base:0.2.1" # Use conda_dependencies.yml to create a conda environment in the Docker image for execution aml_run_config.environment.python.user_managed_dependencies = False app_conda_deps=CondaDependencies.create( conda_packages=['pandas','scikit-learn', 'libgcc','pyodbc', 'sqlalchemy', 'py-xgboost==0.90'], pip_packages=['azureml-sdk[automl,explain,contrib,interpret]==1.4.0', 'xgboost==0.90', 'azureml-dataprep==1.4.6', 'pyarrow', 'azureml-defaults==1.4.0', 'azureml-train-automl-runtime==1.4.0'], pin_sdk_version=False) # Specify CondaDependencies obj, add necessary packages aml_run_config.environment.python.conda_dependencies = app_conda_deps print ("Run configuration created.") from azure.common.credentials import ServicePrincipalCredentials #from azure.keyvault import KeyVaultClient, KeyVaultAuthentication from azure.keyvault.secrets import SecretClient from azure.identity import DefaultAzureCredential import pandas as pd #import sqlalchemy as sql import pyodbc def get_data(sql_string, columns): credentials = None credential = DefaultAzureCredential() secret_client = SecretClient("https://smrtschd-aml-kv.vault.azure.net", credential=credential) secret = secret_client.get_secret("database-connection") #client = KeyVaultClient(KeyVaultAuthentication(auth_callback)) #secret_bundle = client.get_secret("https://smrtschd-aml-kv.vault.azure.net", "database-connection", "") server = 'starlims-sql.database.windows.net' database = 'QM12_DATA_AUTOMATION' username = '******' password = secret.value driver= '{ODBC Driver 17 for SQL Server}' conn = pyodbc.connect('Driver='+driver+';'+ 'Server='+server+';'+ 'Database='+database+';'+ 'PORT=1433;'+ 'UID='+username+';'+ 'PWD='+password+'; MARS_Connection=Yes' ) try: SQL_Query = pd.read_sql_query(sql_string, conn) df = pd.DataFrame(SQL_Query, columns=columns) return df except Exception as e: print(e) raise sql_str = "SELECT " \ " Dept " \ ", Method " \ ", Servgrp " \ ", Runno " \ ", TestNo " \ ", Testcode " \ ", Total_Duration_Min " \ ", Total_Duration_Hr " \ ", Usrnam " \ ", Eqid " \ ", Eqtype " \ "FROM dbo.Draft " \ "order by TESTCODE, RUNNO, dept, method;" columns = ["Dept", "Method", "Servgrp", "Runno", "TestNo", "Testcode", "Total_Duration_Min", "Total_Duration_Hr", "Usrnam", "Eqid","Eqtype"] from azureml.core import Dataset from sklearn.model_selection import train_test_split if (e.train_dataset_name not in aml_workspace.datasets): df = get_data(sql_str, columns) train_df, test_df=train_test_split(df, test_size=0.2) MY_DIR = "data" CHECK_FOLDER = os.path.isdir(MY_DIR) if not CHECK_FOLDER: os.makedirs(MY_DIR) else: print("Folder ", MY_DIR, " is already created") #files = ["data/analyst_tests.csv"] files = ["data/train_data.csv","data/test_data.csv"] def_file_store = Datastore(aml_workspace, "workspacefilestore") dtfrm = df.to_csv(files[0], header=True, index=False) train_dataframe=train_df.to_csv(files[0], header=True, index=False) test_dataframe=test_df.to_csv(files[1], header=True, index=False) datastore.upload_files( files=files, target_path='data/', overwrite=True ) from azureml.data.data_reference import DataReference blob_input_data_test=DataReference( datastore=datastore, data_reference_name="smartschedulertest", path_on_datastore="data/test_data.csv" ) test_data=Dataset.Tabular.from_delimited_files(blob_input_data_test) test_data.register(aml_workspace, e.test_dataset_name, create_new_version=True) blob_input_data_train=DataReference( datastore=datastore, data_reference_name="smartschedulertrain", path_on_datastore="data/train_data.csv" ) train_data=Dataset.Tabular.from_delimited_files(blob_input_data_train) train_data.register(aml_workspace, e.train_dataset_name, create_new_version=True) else: from azureml.data.data_reference import DataReference print("getting from the datastore instead of uploading") train_data=Dataset.get_by_name(aml_workspace, name=e.train_dataset_name) test_data=Dataset.get_by_name(aml_workspace, name=e.test_dataset_name) # check the training dataset to make sure it has at least 50 records. tdf=train_data.to_pandas_dataframe().head(5) print(tdf.shape) print(tdf) # display the first five rows of the data # create a variable that can be used for other purposes df=train_data.to_pandas_dataframe().head() label_column="Total_Duration_Min" import random import string def randomString(stringLength=15): letters = string.ascii_lowercase return ''.join(random.choice(letters) for i in range(stringLength)) from azureml.core import Experiment experiment = Experiment(aml_workspace, "SmartScheduler_Pipeline") import logging aml_name = 'smart_scheduler_' + randomString(5) print(aml_name) import numpy as np import matplotlib.pyplot as plt from matplotlib.ticker import StrMethodFormatter print(df.head(5)) print(df.shape) print(df.dtypes) #df.hist(column='Dept') list(df.columns.values) # Remove Features that are not necessary. #df.hist(column="Servgrp", bins=4) train_data=train_data.drop_columns(["Runno","TestNo","Total_Duration_Hr"]) test_data=test_data.drop_columns(["Runno","TestNo","Total_Duration_Hr"]) print(train_data.to_pandas_dataframe()) print(test_data.to_pandas_dataframe()) from azureml.automl.core.featurization import FeaturizationConfig # some of the columns could be change to one hot encoding especially if the categorical column featurization_config=FeaturizationConfig() featurization_config.blocked_transformers=['LabelEncoder'] featurization_config.add_column_purpose('Dept', 'CategoricalHash') featurization_config.add_transformer_params('HashOneHotEncoder',['Method'], {"number_of_bits":3}) featurization_config.add_column_purpose('Servgrp', 'CategoricalHash') featurization_config.add_column_purpose('Testcode', 'Numeric') featurization_config.add_column_purpose('Usrnam', 'CategoricalHash') featurization_config.add_column_purpose('Eqid', 'CategoricalHash') featurization_config.add_column_purpose('Eqtype', 'CategoricalHash') from azureml.pipeline.core import Pipeline, PipelineData from azureml.pipeline.steps import PythonScriptStep #train_model_folder = './scripts/trainmodel' automl_settings = { "iteration_timeout_minutes": 5, "iterations": 5, "enable_early_stopping": True, "primary_metric": 'spearman_correlation', "verbosity": logging.INFO, "n_cross_validation":5 } automl_config = AutoMLConfig(task="regression", debug_log='automated_ml_errors.log', #path = train_model_folder, training_data=train_data, featurization=featurization_config, blacklist_models=['XGBoostRegressor'], label_column_name=label_column, compute_target=aml_compute, **automl_settings) from azureml.pipeline.steps import AutoMLStep from azureml.pipeline.core import TrainingOutput metrics_output_name = 'metrics_output' best_model_output_name='best_model_output' metrics_data = PipelineData(name = 'metrics_data', datastore = datastore, pipeline_output_name=metrics_output_name, training_output=TrainingOutput(type='Metrics')) model_data = PipelineData(name='model_data', datastore=datastore, pipeline_output_name=best_model_output_name, training_output=TrainingOutput(type='Model')) trainWithAutomlStep = AutoMLStep( name=aml_name, automl_config=automl_config, passthru_automl_config=False, outputs=[metrics_data, model_data], allow_reuse=True ) evaluate_step = PythonScriptStep( name="Evaluate Model", script_name='./evaluate/evaluate_model.py', # e.evaluate_script_path, compute_target=aml_compute, source_directory='../app', arguments=[ "--model_name", model_name_param, "--allow_run_cancel", e.allow_run_cancel ] ) register_step = PythonScriptStep( name="Register Model ", script_name='register/register_model2.py', #e.register_script_path, compute_target=aml_compute, source_directory='../app', inputs=[model_data], arguments=[ "--model_name", model_name_param, "--model_path", model_data, "--ds_name", e.train_dataset_name ], runconfig=aml_run_config, allow_reuse=False ) if ((e.run_evaluation).lower() == 'true'): print("Include evaluation step before register step.") evaluate_step.run_after(trainWithAutomlStep) register_step.run_after(evaluate_step) pipeline_steps = [ trainWithAutomlStep, evaluate_step, register_step ] else: print("Exclude the evaluation step and run register step") register_step.run_after(trainWithAutomlStep) pipeline_steps = [ trainWithAutomlStep, register_step ] print( "this is the value for execute pipeline: {}".format(e.execute_pipeline)) if( (e.execute_pipeline).lower() =='true' ): # Execute the pipe normally during testing and debugging print("Pipeline submitted for execution.") pipeline = Pipeline(workspace = aml_workspace, steps=pipeline_steps) pipeline_run = experiment.submit(pipeline) pipeline_run.wait_for_completion() print("Pipeline is built.") else: # Generates pipeline that will be called in ML Ops train_pipeline = Pipeline(workspace=aml_workspace, steps=pipeline_steps) train_pipeline._set_experiment_name train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
# name: *** # vm_size : *** # vm_priority : *** # min_nodes : 0 # max_nodes : 1 # idle_seconds_before_scaledown: 1200 # # ... with open(cfg_file) as f: amlsetup = yaml.safe_load(f) ############################################################################### # Login to Azure ############################################################################### interactive_auth = InteractiveLoginAuthentication( tenant_id=amlsetup["Environment"]["tenant_id"]) # sp_auth = ServicePrincipalAuthentication(tenant_id="***", service_principal_id="***", service_principal_password="******", _enable_caching=False) ############################################################################### # Create workspace ############################################################################### print(f"Setting up workspace:") subscription_id = os.getenv("SUBSCRIPTION_ID", default=amlsetup["Environment"]["subscription_id"]) resource_group = os.getenv("RESOURCE_GROUP", default=amlsetup["Environment"]["resource_group"]) workspace_name = os.getenv("WORKSPACE_NAME", default=amlsetup["Environment"]["workspace_name"]) workspace_region = os.getenv( "WORKSPACE_REGION", default=amlsetup["Environment"]["workspace_region"])
# tutorial/01-create-workspace.py from azureml.core.authentication import InteractiveLoginAuthentication from azureml.core import Workspace interactive_auth = InteractiveLoginAuthentication(tenant_id="99e1e721-7184-498e-8aff-b2ad4e53c1c2") ws = Workspace.create(name='azure-ml', subscription_id='59e1d56a-8a2d-48a7-9cd3-a52c1a268c55', resource_group='cloud-ml', create_resource_group=True, location='eastus2', auth=interactive_auth ) # write out the workspace details to a configuration file: .azureml/config.json ws.write_config(path='.azureml')
from azureml.core import Workspace from azureml.core.authentication import InteractiveLoginAuthentication forced_interactive_auth = InteractiveLoginAuthentication( tenant_id="my-tenant-id", force=True) ws = Workspace.from_config()
# COMMAND ---------- # MAGIC %run ./99-Shared-Functions-and-Settings # COMMAND ---------- # MAGIC %md # MAGIC ### Instantiate the Workspace and Experiment objects # COMMAND ---------- from azureml.core import Workspace, Experiment, Run from azureml.core.authentication import InteractiveLoginAuthentication up = InteractiveLoginAuthentication() up.get_authentication_header() ws = Workspace(**AZURE_ML_CONF, auth=up) experiment = Experiment(ws, pyspark_experiment_name) # COMMAND ---------- # MAGIC %md # MAGIC ### Find best performing run # MAGIC To find the best performing model run from our experiment, we have several options. # MAGIC # MAGIC We can: # MAGIC 1. Use the Azure Portal to compare runs # MAGIC 1. Use Python to compare runs
# tutorial/01-create-workspace.py from azureml.core.authentication import InteractiveLoginAuthentication from azureml.core import Workspace interactive_auth = InteractiveLoginAuthentication(tenant_id="xxxxx") ws = Workspace.create(name='azure-ml', subscription_id='xxxx', resource_group='xxxx', create_resource_group=False, location='eastus2', auth=interactive_auth ) # write out the workspace details to a configuration file: .azureml/config.json ws.write_config(path='.azureml')