def trigger_training_job(compute_name, script_folder): # Define Vars < Change the vars> tenant_id = "<Enter Your Tenant Id>" app_id = "<Application Id of the SPN you Create>" app_key = "<Key for the SPN>" workspace = "<Name of your workspace>" subscription_id = "<Subscription id>" resource_grp = "<Name of your resource group where aml service is created>" experiment_name = '<Name of your experiment you defined in dataprep.py>' print("Starting trigger engine") # Start creating # Point file to conf directory containing details for the aml service spn = ServicePrincipalAuthentication(tenant_id, app_id, app_key) ws = Workspace(auth=spn, workspace_name=workspace, subscription_id=subscription_id, resource_group=resource_grp) #ws = Workspace.from_config(path="../conf/config.json") ds = ws.get_default_datastore() print(ds.datastore_type, ds.account_name, ds.container_name) exp = Experiment(workspace=ws, name=experiment_name) compute_target = ws.compute_targets[compute_name] script_params = {'--data-folder': ds.as_mount(), '--regularization': 0.8} est = Estimator(source_directory=script_folder, script_params=script_params, compute_target=compute_target, entry_script='train.py', conda_packages=['scikit-learn']) print("Submitting Runs to AML compute " + compute_name) run = exp.submit(config=est) run.wait_for_completion(show_output=True) # specify True for a verbose log
def get_output_location( ws: Workspace, env: Env, outputdatastore: Datastore = None ) -> PipelineData: """ Returns a Datastore wrapped as a PipelineData instance suitable for passing into a pipeline step. Represents the location where the scoring output should be written. Uses the default workspace blob store if no output datastore is supplied. :param ws: AML Workspace :param env: Environment Variables :param outputdatastore: AML Datastore, optional, default is None :returns: PipelineData wrapping the output datastore """ if outputdatastore is None: output_loc = PipelineData( name="defaultoutput", datastore=ws.get_default_datastore() ) else: output_loc = PipelineData( name=outputdatastore.name, datastore=outputdatastore ) # NOQA: E501 return output_loc
def register_dataset(aml_workspace: Workspace, dataset_name: str, datastore_name: str, file_path: str) -> Dataset: if datastore_name: datastore = Datastore.get(aml_workspace, datastore_name) else: datastore = aml_workspace.get_default_datastore() # if the path is same as the latest version, no new version will be registered # NOQA: E501 # however, run.input_datasets['name'] = dataset will not log the dataset in the run # NOQA: E501 # in this case, the dataset returned from Dataset.get_by_name does get logged # NOQA: E501 dataset = Dataset.File.from_files(path=(datastore, file_path)) dataset = dataset.register(workspace=aml_workspace, name=dataset_name, create_new_version=True) return Dataset.get_by_name(aml_workspace, dataset_name)
def get_fallback_input_dataset(ws: Workspace, env: Env) -> Dataset: """ Called when an input datastore does not exist or no input data file exists at that location. Create a sample dataset using the safedriver dataset from scikit-learn. Useful when debugging this code in the absence of the input data location Azure blob. :param ws: AML Workspace :param env: Environment Variables :returns: Fallback input dataset :raises: FileNotFoundError """ # This call creates an example CSV from sklearn sample data. If you # have already bootstrapped your project, you can comment this line # out and use your own CSV. create_sample_data_csv( file_name=env.scoring_datastore_input_filename, for_scoring=True ) if not os.path.exists(env.scoring_datastore_input_filename): error_message = ( "Could not find CSV dataset for scoring at {}. " + "No alternate data store location was provided either.".format( env.scoring_datastore_input_filename ) # NOQA: E501 ) raise FileNotFoundError(error_message) # upload the input data to the workspace default datastore default_datastore = ws.get_default_datastore() scoreinputdataref = default_datastore.upload_files( [env.scoring_datastore_input_filename], target_path="scoringinput", overwrite=False, ) scoringinputds = ( Dataset.Tabular.from_delimited_files(scoreinputdataref) .register(ws, env.scoring_dataset_name, create_new_version=True) .as_named_input(env.scoring_dataset_name) ) return scoringinputds
def __init__(self, module: Module, workspace: Workspace = None, compute_target: AmlCompute = None): self.module = module self.workspace = workspace self.default_module_version = self.get_default_module_version() interface_keys = self.get_interface_keys() self.inputs = AttrDict('Input', interface_keys['input']) self.outputs = AttrDict('Output', interface_keys['output']) self.params = AttrDict('Parameter', interface_keys['param']) self.datastore = workspace.get_default_datastore() self.compute_target = compute_target self.init_outputs() self.init_params()
def register_dataset_by_path(ws: Workspace, dataset_name, path): if not path.is_dir(): raise ValueError(f"Dataset must be a folder.") # Upload path to datastore m = hashlib.sha256() m.update(str(path).encode()) ds_path = m.hexdigest() datastore = ws.get_default_datastore() path_on_datastore = folder_on_datastore = f'/data/{ds_path}' datastore.upload(str(path), target_path=folder_on_datastore) # Create a FileDataset datastore_paths = [(datastore, path_on_datastore + '/**')] dataset = Dataset.File.from_files(datastore_paths) print(f"Registering dataset for path {path}") dataset.register(ws, name=dataset_name, create_new_version=True) print("Dataset registered", dataset) return Dataset.get_by_name(ws, name=dataset_name)
def _create_running_config(ws: Workspace, path_to_dataset_in_datastore, compute_target_name, env_name, model_save_path): try: module_root_folder_path = r'online_training' compute_target = _get_compute_target(ws, compute_target_name) default_data_store = ws.get_default_datastore() dataset = Dataset.File.from_files(path=(default_data_store, path_to_dataset_in_datastore)) module_env = Environment.get(workspace=ws, name=env_name) running_config = ScriptRunConfig( source_directory=module_root_folder_path, script='train.py', compute_target=compute_target, environment=module_env, arguments=[ "--data_path", dataset.as_mount(), "--batch_size", 32, "--model_path", model_save_path ]) print("running config created!") return running_config except Exception as e: raise e
# upload the local file to a datastore on the cloud # subscription_id = '<your-subscription-id>' # resource_group = 'MCW-Machine-Learning' # workspace_name = 'mcwmachinelearning' #WARNING - REMOVE subscription_id = '8c924580-ce70-48d0-a031-1b21726acc1a' resource_group = 'MCW-Machine-Learning' workspace_name = 'mcwmachinelearning' # Connect to the Azure ML Workspace workspace = Workspace(subscription_id, resource_group, workspace_name) # Get default datastore to upload prepared data datastore = workspace.get_default_datastore() # COMMAND ---------- # Create a temporary folder to store locally relevant content for this notebook tempFolderName = '/FileStore/CarBatteries_{0}'.format(uuid.uuid4()) dbutils.fs.mkdirs(tempFolderName) file_path = '/dbfs%s/%s' % (tempFolderName, 'daily-battery-time-series-v3-processed.csv') # Persist dataset to temporary folder df.to_csv(file_path, sep=',') # datastore.upload(src_dir=, target_path='data') datastore.upload_files([file_path], relative_root=None, target_path=None, overwrite=True, show_progress=True)
#strokeDash='Snp_Seq_No:N', facet=alt.Facet('Relative_EOM_Snp_Month_Offset:N', columns=3) ).properties( height=h, width=w ) """ # COMMAND ---------- # DBTITLE 1,Upload data to datastore # Create dataset for training from azureml.core import Dataset, Datastore datastore = ws.get_default_datastore() print("Default datastore's name: {}".format(datastore.name)) # register_spark_dataframe(dataframe=train_sdf, target=blobstore_datadir, name="revforecast_train.parquet", description=None, tags=None, show_progress=True) datastore.upload_files( files=[ output_data_path + "revregionforecast_train.csv", output_data_path + "revregionforecast_test.csv", ], target_path=blobstore_datadir, overwrite=True, show_progress=True, ) dataset_filename = blobstore_datadir + "revregionforecast_train.csv"
, admin_user_ssh_key=ssh_key ) ) cluster = ComputeTarget.create( workspace , args.cluster_name , provisioning_config ) print_message('WAITING FOR COMPLETION', filler='-', pre_post='>') cluster.wait_for_completion(show_output=True) print_message(f'CLUSTER "{args.cluster_name}" CREATED') years = [args.nyctaxi_years] datastore = workspace.get_default_datastore() if args.download_nyctaxi_data: print_message('DOWNLOADING DATA') download_nyctaxi_data( years , args.nyctaxi_src_path ) if args.upload_nyctaxi_data: print_message('UPLOADING DATA TO STORAGE') upload_nyctaxi_data( workspace , datastore , os.path.join(args.nyctaxi_src_path, "nyctaxi") , os.path.join(args.nyctaxi_dst_path, "nyctaxi"))
keyvault = ws.get_default_keyvault() tenant_id = keyvault.get_secret('tenantId') service_principal_id = keyvault.get_secret("servicePrincipalId") service_principal_password = keyvault.get_secret("servicePrincipalPassword") svc_pr = ServicePrincipalAuthentication( tenant_id=tenant_id, service_principal_id=service_principal_id, service_principal_password=service_principal_password, ) ws = Workspace(ws.subscription_id, ws.resource_group, ws.name, auth=svc_pr) print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep="\n") def_blob_store = ws.get_default_datastore() print("Blobstore's name: {}".format(def_blob_store.name)) # create a list of datasets stored in blob print("Checking for new datasets") blob_service = BlockBlobService(def_blob_store.account_name, def_blob_store.account_key) generator = blob_service.list_blobs(def_blob_store.container_name, prefix="prednet/data/raw_data") datasets = [] for blob in generator: dataset = blob.name.split("/")[3] if (dataset not in datasets and dataset.startswith("UCSD") and not dataset.endswith("txt")): datasets.append(dataset)
py_rc.framework = "Python" py_rc.environment.python.conda_dependencies = cd sql_datastore = Datastore.register_azure_sql_database( workspace=ws, datastore_name="modelling_db", server_name="dbserver-mlops-demo", database_name="asq-mlops-demo", username=kv.get_secret("db-user"), password=kv.get_secret("db-pass"), ) traindata = Dataset.Tabular.from_sql_query( (sql_datastore, "SELECT * FROM dbo.traindata")) outdata = PipelineData("outdata", datastore=ws.get_default_datastore()) download_step = PythonScriptStep( name="Load training data from database", script_name="download_dataset.py", arguments=["--dataset-name", "traindata", "--outpath", outdata], inputs=[traindata.as_named_input("traindata")], compute_target=compute_target, source_directory=".", outputs=[outdata], runconfig=py_rc, allow_reuse=False, ) model_outpath = PipelineData("modeldir", datastore=ws.get_default_datastore(), is_directory=True)
def start(login, app): login_config = ngccontent.get_config(login) app_config = ngccontent.get_config(app) ### WORKSPACE subscription_id = login_config["azureml_user"]["subscription_id"] resource_group = login_config["azureml_user"]["resource_group"] workspace_name = login_config["azureml_user"]["workspace_name"] try: ws = Workspace( workspace_name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, ) except ProjectSystemException: msg = f'\n\nThe workspace "{workspace_name}" does not exist. ' msg += f"Go to \n\n " msg += f"-->> https://docs.microsoft.com/en-us/azure/machine-learning/how-to-manage-workspace <<--\n\n" msg += f"and create the workspace first.\n\n\n" msg += f"Your current configuration: \n\n" msg += f"Workspace name: {workspace_name} \n" msg += f"Subscription id: {subscription_id} \n" msg += f"Resource group: {resource_group}\n\n" logger.exception(msg) raise Exception(msg) verify = f""" Subscription ID: {subscription_id} Resource Group: {resource_group} Workspace: {workspace_name}""" logger.info(verify) ### experiment name exp_name = login_config["aml_compute"]["exp_name"] ### azure ml names ct_name = login_config["aml_compute"]["ct_name"] vm_name = login_config["aml_compute"]["vm_name"].lower() vm_priority = login_config["aml_compute"]["vm_priority"] ### trust but verify verify = f""" Experiment name: {exp_name}""" logger.info(verify) ### GPU RUN INFO workspace_vm_sizes = AmlCompute.supported_vmsizes(ws) pascal_volta_pattern = pattern = re.compile( r"[a-z]+_nc[0-9]+[s]?_v[2,3]" ) ### matches NC-series v2 and v3 workspace_vm_sizes = [ (e["name"].lower(), e["gpus"]) for e in workspace_vm_sizes if pattern.match(e["name"].lower()) ] workspace_vm_sizes = dict(workspace_vm_sizes) ### GET COMPUTE TARGET if vm_name in workspace_vm_sizes: gpus_per_node = workspace_vm_sizes[vm_name] verify = f""" Compute target: {ct_name} VM Size: {vm_name} No of GPUs: {gpus_per_node} Priority: {vm_priority} """ logger.info(verify) ### get SSH keys ssh_key_pub, pri_key_file = get_ssh_keys() if ct_name not in ws.compute_targets: logger.warning(f"Compute target {ct_name} does not exist...") ct = createOrGetComputeTarget( ws, ct_name, vm_name, vm_priority, ssh_key_pub, login_config ) else: ct = ws.compute_targets[ct_name] if ct.provisioning_state == "Failed": logger.warning( f"Compute target {ct_name} found but provisioning_state is showing as 'failed'..." ) logger.warning(f"Deleting {ct_name} target and will attempt again...") logger.warning( f"If this fails again check that you have enough resources in your subscription..." ) ct.delete() time.sleep(5) ct = createOrGetComputeTarget( ws, ct_name, vm_name, vm_priority, ssh_key_pub, login_config ) else: logger.info(f" Using pre-existing compute target {ct_name}") else: logger.exception("Unsupported vm_size {vm_size}".format(vm_size=vm_name)) logger.exception("The specified vm size must be one of ...") for azure_gpu_vm_size in workspace_vm_sizes.keys(): logger.exception("... " + azure_gpu_vm_size) raise Exception( "{vm_size} does not have Pascal or above GPU Family".format(vm_size=vm_name) ) env = createOrGetEnvironment(ws, login_config, app_config) ### UPLOAD ADDITIONAL CONTENT IF NOT EXISTS for additional_content in app_config["additional_content"]["list"]: url = additional_content["url"] targetfile = additional_content["filename"] src_path = additional_content["localdirectory"] dest_path = additional_content["computedirectory"] if ( "source" in additional_content.keys() and additional_content["source"] == "github" ): ngccontent.clone_github_repo(url,"additional_content",src_path) else: if app_config["additional_content"]["download_content"]: ngccontent.download(url, "additional_content", targetfile) if ( app_config["additional_content"]["unzip_content"] and additional_content["zipped"] ): ngccontent.unzipFile(targetfile, "additional_content", src_path) if app_config["additional_content"]["upload_content"]: ngccontent.upload_data( ws, ws.get_default_datastore(), "additional_content/" + src_path, dest_path, ) if (login_config["aml_compute"]["max_nodes"]==1): amlcluster = AzureMLComputeCluster( workspace=ws, compute_target=ct, initial_node_count=1, experiment_name=login_config["aml_compute"]["exp_name"], environment_definition=env, jupyter_port=login_config["aml_compute"]["jupyter_port"], telemetry_opt_out=login_config["azureml_user"]["telemetry_opt_out"], admin_username=login_config["aml_compute"]["admin_name"], admin_ssh_key=pri_key_file, ) else: logger.info("Creating a Dask Cluster with {} nodes".format(login_config["aml_compute"]["max_nodes"])) amlcluster = AzureMLCluster( workspace=ws, compute_target=ct, initial_node_count=login_config["aml_compute"]["max_nodes"], experiment_name=login_config["aml_compute"]["exp_name"], environment_definition=env, jupyter_port=login_config["aml_compute"]["jupyter_port"], telemetry_opt_out=login_config["azureml_user"]["telemetry_opt_out"], admin_username=login_config["aml_compute"]["admin_name"], admin_ssh_key=pri_key_file, ) logger.info(f"\n Go to: {amlcluster.jupyter_link}") logger.info(" Press Ctrl+C to stop the cluster.") try: while True: pass except KeyboardInterrupt: amlcluster.close()