help="image name", dest="image_name", required=True) parser.add_argument("--output", type=str, help="eval output directory", dest="output", required=True) args = parser.parse_args() print("Argument 1: %s" % args.model_name) print("Argument 2: %s" % args.image_name) print("Argument 3: %s" % args.output) run = Run.get_context() ws = run.experiment.workspace print('Workspace configuration succeeded') model_list = Model.list(ws, name=args.model_name) latest_model = sorted(model_list, reverse=True, key=lambda x: x.created_time)[0] latest_model_id = latest_model.id latest_model_name = latest_model.name latest_model_version = latest_model.version latest_model_path = latest_model.get_model_path(latest_model_name, _workspace=ws) print('Latest model id: ', latest_model_id)
import pandas as pd import numpy as np import keras from keras.models import Sequential from keras.layers import Dense, Dropout, LSTM, Activation from keras.utils import plot_model import tensorflow as tf np.random.seed(12345) PYTHONHASHSEED = 0 from azureml.core import Run run = Run.get_context() parser = argparse.ArgumentParser(description='Keras DogCat example:') parser.add_argument('--epochs', '-e', type=int, default=10, help='Number of sweeps over the dataset to train') parser.add_argument('--batchsize', '-b', type=int, default=32, help='Number of images in each mini-batch') parser.add_argument('--dataset', '-d', dest='data_folder', help='The datastore') args = parser.parse_args()
def main(): run = Run.get_context() if (run.id.startswith('OfflineRun')): from dotenv import load_dotenv # For local development, set values in this section load_dotenv() workspace_name = os.environ.get("WORKSPACE_NAME") experiment_name = os.environ.get("EXPERIMENT_NAME") resource_group = os.environ.get("RESOURCE_GROUP") subscription_id = os.environ.get("SUBSCRIPTION_ID") # run_id useful to query previous runs run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012" aml_workspace = Workspace.get(name=workspace_name, subscription_id=subscription_id, resource_group=resource_group) ws = aml_workspace exp = Experiment(ws, experiment_name) else: ws = run.experiment.workspace exp = run.experiment run_id = 'amlcompute' parser = argparse.ArgumentParser("register") parser.add_argument( "--run_id", type=str, help="Training run ID", ) parser.add_argument( "--model_name", type=str, help="Name of the Model", default="driverhacker5_model.pkl", ) parser.add_argument("--step_input", type=str, help=("input from previous steps")) args = parser.parse_args() if (args.run_id is not None): run_id = args.run_id if (run_id == 'amlcompute'): run_id = run.parent.id model_name = args.model_name model_path = args.step_input print("Getting registration parameters") # Load the registration parameters from the parameters file with open("parameters.json") as f: pars = json.load(f) try: register_args = pars["registration"] except KeyError: print("Could not load registration values from file") register_args = {"tags": []} model_tags = {} for tag in register_args["tags"]: try: mtag = run.parent.get_metrics()[tag] model_tags[tag] = mtag except KeyError: print(f"Could not find {tag} metric on parent run.") # load the model print("Loading model from " + model_path) model_file = os.path.join(model_path, model_name) model = joblib.load(model_file) parent_tags = run.parent.get_tags() try: build_id = parent_tags["BuildId"] except KeyError: build_id = None print("BuildId tag not found on parent run.") print(f"Tags present: {parent_tags}") try: build_uri = parent_tags["BuildUri"] except KeyError: build_uri = None print("BuildUri tag not found on parent run.") print(f"Tags present: {parent_tags}") if (model is not None): dataset_id = parent_tags["dataset_id"] if (build_id is None): register_aml_model(model_file, model_name, model_tags, exp, run_id, dataset_id) elif (build_uri is None): register_aml_model(model_file, model_name, model_tags, exp, run_id, dataset_id, build_id) else: register_aml_model(model_file, model_name, model_tags, exp, run_id, dataset_id, build_id, build_uri) else: print("Model not found. Skipping model registration.") sys.exit(0)
import argparse import json import os import azureml.core from azureml.core import Workspace, Experiment, Model from azureml.core import Run from azureml.train.hyperdrive import HyperDriveRun from shutil import copy2 parser = argparse.ArgumentParser() parser.add_argument('--saved-model', type=str, dest='saved_model', help='path to saved model file') args = parser.parse_args() model_output_dir = './model/' os.makedirs(model_output_dir, exist_ok=True) copy2(args.saved_model, model_output_dir) ws = Run.get_context().experiment.workspace model = Model.register(workspace=ws, model_name='tf-dnn-mnist', model_path=model_output_dir)
import os, json from azureml.core import Workspace from azureml.core import Experiment from azureml.core.model import Model import azureml.core from azureml.core import Run import argparse run = Run.get_context() # Get workspace # ws = Workspace.from_config() run.experiment.workspace exp = run.experiment # Paramaterize the matrics on which the models should be compared # Add golden data set on which all the model performance can be evaluated # Get the latest run_id # with open("aml_config/run_id.json") as f: # config = json.load(f) parser = argparse.ArgumentParser() parser.add_argument('--train_run_id', type=str, default='', help='Run id of the newly trained model') #parser.add_argument('--model_assets_path',type=str,default='outputs',help='Location of trained model.') new_model_run_id = args.train_run_id # config["run_id"] # experiment_name = config["experiment_name"] # exp = Experiment(workspace=ws, name=experiment_name)
exp = Experiment(workspace=ws, name=experiment_name) try: # Get most recently registered model, we assume that is the model in production. Download this model and compare it with the recently trained model by running test with same data set. model_list = Model.list(ws) production_model = next( filter( lambda x: x.created_time == max(model.created_time for model in model_list), model_list)) production_model_run_id = production_model.tags.get('run_id') run_list = exp.get_runs() # production_model_run = next(filter(lambda x: x.id == production_model_run_id, run_list)) # Get the run history for both production model and newly trained model and compare mse production_model_run = Run(exp, run_id=production_model_run_id) new_model_run = Run(exp, run_id=new_model_run_id) production_model_mse = production_model_run.get_metrics().get('mse') new_model_mse = new_model_run.get_metrics().get('mse') print('Current Production model mse: {}, New trained model mse: {}'.format( production_model_mse, new_model_mse)) promote_new_model = False if new_model_mse < production_model_mse: promote_new_model = True print('New trained model performs better, thus it will be registered') except: promote_new_model = True print( 'This is the first model to be trained, thus nothing to evaluate for now'
POSSIBILITY OF SUCH DAMAGE. """ import os import json import sys from azureml.core import Run import argparse from azureml.core.authentication import AzureCliAuthentication cli_auth = AzureCliAuthentication() # Get workspace # ws = Workspace.from_config(auth=cli_auth, path='./') run = Run.get_context() exp = run.experiment ws = run.experiment.workspace parser = argparse.ArgumentParser("register") parser.add_argument("--config_suffix", type=str, help="Datetime suffix for json config files") parser.add_argument( "--json_config", type=str, help="Directory to write all the intermediate json configs", ) parser.add_argument( "--model_name", type=str,
if __name__ == "__main__": job_info_path = "parent_run.json" experiment_name = sys.argv[1] run_name = sys.argv[3][:-3] # should be the file name env_dictionary = {"MLFLOW_EXPERIMENT_NAME": experiment_name} if os.path.exists(job_info_path): # get parent run id, experiment name from file & workspace obj # create child run (id ) with open(job_info_path, 'r') as f: job_info_dict = json.load(f) print("Dictionary read from file " + job_info_dict + "\n") run_id = job_info_dict["run_id"] ws = get_ws() # TODO set path and auth exp = Experiment(workspace=ws, name=experiment_name) run = Run(exp, run_id) run.child_run(name=run_name) # TODO: add the step's name tags = { "mlflow.source.type": "JOB", "mlflow.source.name": "train.py", "mlflow.user": "******" } run.set_tags(tags) # log environment variables env_dictionary["MLFLOW_EXPERIMENT_ID"] = exp._id env_dictionary["MLFLOW_RUN_ID"] = run_id env_dictionary["MLFLOW_TRACKING_URI"] = _get_mlflow_tracking_uri(ws) else: # start run ws = get_ws() exp = Experiment(workspace=ws, name=experiment_name)
""" import os, json, sys from azureml.core import Workspace from azureml.core import Run from azureml.core import Experiment from azureml.core.model import Model import argparse from azureml.core.runconfig import RunConfiguration from azureml.core.authentication import AzureCliAuthentication cli_auth = AzureCliAuthentication() # Get workspace # ws = Workspace.from_config(auth=cli_auth) run = Run.get_context() exp = run.experiment ws = run.experiment.workspace parser = argparse.ArgumentParser("register") parser.add_argument( "--config_suffix", type=str, help="Datetime suffix for json config files" ) parser.add_argument( "--json_config", type=str, help="Directory to write all the intermediate json configs", ) args = parser.parse_args() print("Argument 1: %s" % args.config_suffix)
experiment_name = "kd_teach_the_student" # with open('config.json', 'r') as f: # config = json.load(f) # svc_pr = ServicePrincipalAuthentication( # tenant_id=config['tenant_id'], # service_principal_id=config['service_principal_id'], # service_principal_password=config['service_principal_password']) # ws = Workspace.from_config(auth=svc_pr) exp = Experiment(ws, name=experiment_name) # best_run_id = 'kd_teach_the_student_1559281481492_0' best_run = Run(exp, best_run_id) # register the model if best_run_id: tags = {} tags['run_id'] = best_run_id tags['val_loss'] = metrics[best_run_id]['val_loss'][-1] model = best_run.register_model(model_name=experiment_name, model_path='outputs', tags=tags) else: print( "Couldn't not find a model to register. Probably because no run completed" ) raise BaseException
from azureml.core import Run input_file_ds_path = Run.get_context().input_datasets["file_dataset"] with open(input_file_ds_path, "r") as f: content = f.read() print(content) input_tabular_ds = Run.get_context().input_datasets["tabular_dataset"] tabular_df = input_tabular_ds.to_pandas_dataframe() print(tabular_df)
import os import argparse import numpy as np import pandas as pd from sklearn import preprocessing from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from sklearn.externals import joblib from azureml.core import Run # get hold of the current run run = Run.get_submitted_run() # parse arguments parser = argparse.ArgumentParser() parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point') args = parser.parse_args() data_folder = args.data_folder data_path = os.path.join(data_folder, 'data_after_prep.pkl') run.log('Data path', data_path) # load data pd_dataframe = pd.read_pickle(data_path) run.log('Data loading', 'finished')
from azureml.core import Run, Workspace, Experiment, Model, Dataset from azureml.core.resource_configuration import ResourceConfiguration import sklearn import sys import os print("...getting run context, experiment, and workspace") run = Run.get_context() if (run.id.startswith('OfflineRun')): os.environ['AZUREML_DATAREFERENCE_irisdata'] = '.\sample_data.csv' os.environ['AZUREML_DATAREFERENCE_model_output'] = '.\model_output' ws = Workspace.from_config() model_name = 'iris_classifier_model' training_step_name = 'iris_supervised_model.py' expirement_name = "your_experiment_name" parentrunid = "previous_pipeline_runid_for_your_experiment" exp = Experiment(ws, expirement_name) parentrun = Run(exp, parentrunid) # sys.exit("Currently this model registration script can only run in "+ # "context of a parent pipeline.") else: ws = run.experiment.workspace print("...getting arguments (model_name, training_step_name)") model_name = sys.argv[2] training_step_name = sys.argv[4] parentrun = run.parent print("model_name:", model_name) print("training_step_name:", training_step_name)
os.environ['AZUREML_ARM_RESOURCEGROUP'] = args.AZUREML_ARM_RESOURCEGROUP os.environ['AZUREML_ARM_WORKSPACE_NAME'] = args.AZUREML_ARM_WORKSPACE_NAME os.environ['AZUREML_ARM_PROJECT_NAME'] = args.AZUREML_ARM_PROJECT_NAME os.environ['AZUREML_SERVICE_ENDPOINT'] = args.AZUREML_SERVICE_ENDPOINT return args args = populate_environ() input_path = args.input input_filename = args.input_filename output_path = args.processed_data output_filename = args.output_filename run = Run.get_context(allow_offline=False) print(run._run_dto["parent_run_id"]) input_fullpath = input_path + f'/{input_filename}' df = spark.read.csv(input_fullpath) print(display(df)) total_rows = df.count() run.log('total_rows', total_rows) onput_fullpath = output_path + f"/{output_filename}" df.write.parquet(onput_fullpath) run.log('Loss', 1.2) run.log('Loss', 1.8)
def __init__(self, **kwargs): self.run_context = Run.get_context() super().__init__(**kwargs)
from azureml.core import Run run = Run.get_context() # get hold of the current run import argparse, numpy as np, os # let user feed in 4 parameters: the location of the data files (container+folder from datastore), # the regularization rate of the logistic regression algorythm and the model name parser = argparse.ArgumentParser() parser.add_argument('--model_name', type=str, help='model_name') parser.add_argument('--datatrain_output', type=str, help='datatrain_output') parser.add_argument('--modelregistration_output', type=str, help='modelregistration_output4') parser.add_argument('--is_directory', type=bool, help='is_directory') args = parser.parse_args() model_name = args.model_name print('model_name:', model_name) run.log('model_name', model_name) datatrain_output = args.datatrain_output print('datatrain_output:', datatrain_output) run.log('datatrain_output', datatrain_output) modelregistration_output = args.modelregistration_output print('modelregistration_output:', modelregistration_output) run.log('modelregistration_output', modelregistration_output) is_directory = args.is_directory print('is_directory:', is_directory) run.log('is_directory', is_directory)
# https://github.com/pytorch/examples/blob/master/mnist/main.py # License text at foot of file # from __future__ import print_function import argparse import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.optim.lr_scheduler import StepLR from azureml.core import Run azureMLRun = Run.get_context() azureMLRun.log("How To Log Metrics","This was the first logged message") azureMLRun.log("How To Log Metrics", "use azureml.core.Run.get_context().log('MetricName', value)") azureMLRun.log("How To Log Metrics","Files saved to the outputs/ directory stay available after the run.") class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.Conv2d(32, 64, 3, 1) self.dropout1 = nn.Dropout2d(0.25) self.dropout2 = nn.Dropout2d(0.5) self.fc1 = nn.Linear(9216, 128) self.fc2 = nn.Linear(128, 10) def forward(self, x): x = self.conv1(x)
def train(data_folder, output_folder, batch_size, n_hidden_1, n_hidden_2, learning_rate): # load train and test set into numpy arrays # note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge # faster. X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0 X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0 y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1) y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1) print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n') # training_set_size = X_train.shape[0] n_inputs = 28 * 28 n_outputs = 10 n_epochs = 20 batch_size = batch_size learning_rate = learning_rate y_train = np.eye(n_outputs)[y_train.reshape(-1)] y_test = np.eye(n_outputs)[y_test.reshape(-1)] print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n') # Build a simple MLP model model = Sequential() # first hidden layer model.add(Dense(n_hidden_1, activation='relu', input_shape=(n_inputs, ))) # second hidden layer model.add(Dense(n_hidden_2, activation='relu')) # output layer model.add(Dense(n_outputs, activation='softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=learning_rate), metrics=['accuracy']) # start an Azure ML run run = Run.get_context() class LogRunMetrics(Callback): # callback at the end of every epoch def on_epoch_end(self, epoch, log): # log a value repeated which creates a list run.log('Loss', log['loss']) run.log('Accuracy', log['acc']) history = model.fit(X_train, y_train, batch_size=batch_size, epochs=n_epochs, verbose=2, validation_data=(X_test, y_test), callbacks=[LogRunMetrics()]) score = model.evaluate(X_test, y_test, verbose=0) # log a single value run.log("Final test loss", score[0]) print('Test loss:', score[0]) run.log('Final test accuracy', score[1]) print('Test accuracy:', score[1]) plt.figure(figsize=(6, 3)) plt.title('MNIST with Keras MLP ({} epochs)'.format(n_epochs), fontsize=14) plt.plot(history.history['acc'], 'b-', label='Accuracy', lw=4, alpha=0.5) plt.plot(history.history['loss'], 'r--', label='Loss', lw=4, alpha=0.5) plt.legend(fontsize=12) plt.grid(True) # log an image run.log_image('Accuracy vs Loss', plot=plt) # create a ./outputs/model folder in the compute target # files saved in the "./outputs" folder are automatically uploaded into run history folder_model = os.path.join(output_folder, 'model') os.makedirs(folder_model, exist_ok=True) # serialize NN architecture to JSON model_json = model.to_json() # save model JSON with open(os.path.join(folder_model, 'model.json'), 'w') as f: f.write(model_json) # save model weights model.save_weights(os.path.join(folder_model, 'model.h5')) print("model saved in folder " + folder_model) return model
def main(): run = Run.get_context() if (run.id.startswith('OfflineRun')): from dotenv import load_dotenv sys.path.append(os.path.abspath("./code/util")) # NOQA: E402 from model_helper import get_model_by_tag # For local development, set values in this section load_dotenv() workspace_name = os.environ.get("WORKSPACE_NAME") experiment_name = os.environ.get("EXPERIMENT_NAME") resource_group = os.environ.get("RESOURCE_GROUP") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") model_name = os.environ.get("MODEL_NAME") app_id = os.environ.get('SP_APP_ID') app_secret = os.environ.get('SP_APP_SECRET') build_id = os.environ.get('BUILD_BUILDID') service_principal = ServicePrincipalAuthentication( tenant_id=tenant_id, service_principal_id=app_id, service_principal_password=app_secret) aml_workspace = Workspace.get( name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, auth=service_principal ) ws = aml_workspace exp = Experiment(ws, experiment_name) run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012" else: sys.path.append(os.path.abspath("./util")) # NOQA: E402 from model_helper import get_model_by_tag ws = run.experiment.workspace exp = run.experiment run_id = 'amlcompute' parser = argparse.ArgumentParser("register") parser.add_argument( "--build_id", type=str, help="The Build ID of the build triggering this pipeline run", ) parser.add_argument( "--run_id", type=str, help="Training run ID", ) parser.add_argument( "--model_name", type=str, help="Name of the Model", default="sklearn_regression_model.pkl", ) parser.add_argument( "--validate", type=str, help="Set to true to only validate if model is registered for run", default=False, ) args = parser.parse_args() if (args.build_id is not None): build_id = args.build_id if (args.run_id is not None): run_id = args.run_id if (run_id == 'amlcompute'): run_id = run.parent.id if (args.validate is not None): validate = args.validate model_name = args.model_name if (validate): try: tag_name = 'BuildId' model = get_model_by_tag( model_name, tag_name, build_id, exp.workspace) if (model is not None): print("Model was registered for this build.") if (model is None): print("Model was not registered for this run.") sys.exit(1) except Exception as e: print(e) print("Model was not registered for this run.") sys.exit(1) else: if (build_id is None): register_aml_model(model_name, exp, run_id) else: run.tag("BuildId", value=build_id) register_aml_model(model_name, exp, run_id, build_id)
print('My rank is ', rank) print('My ip is ', ip) parser = argparse.ArgumentParser() parser.add_argument('--data') FLAGS, unparsed = parser.parse_known_args() #print('data path', FLAGS.data) #os.system('find ' + FLAGS.data + '/nycflights/') if rank == 0: data = { 'scheduler': 'tcp://' + ip + ':8786', 'bokeh': 'tcp://' + ip + ':8787', } Run.get_context().log('headnode', ip) Run.get_context().log('scheduler', data['scheduler']) Run.get_context().log('bokeh', data['bokeh']) Run.get_context().log('data', FLAGS.data) else: data = None data = comm.bcast(data, root=0) scheduler = data['scheduler'] print('scheduler is ', scheduler) if rank == 0: os.system('dask-scheduler') else: os.system('dask-worker ' + scheduler)
import os import azureml.core from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig, Run from azureml.core.authentication import ServicePrincipalAuthentication os.makedirs('outputs', exist_ok=True) run = Run.get_context() exp_name = 'titanic_training' workspace_name = run.get_secret('workspaceName') subscription_id = run.get_secret('subscriptionId') resource_group = run.get_secret('resourceGroup') tenant_id = run.get_secret('tenantId') client_id = run.get_secret('clientId') client_secret = run.get_secret('clientSecret') print("Azure ML SDK Version: ", azureml.core.VERSION) # connect to your aml workspace ## NOTE: you can use Workspace.create to create a workspace using Python. ## this authentication method will require a auth = ServicePrincipalAuthentication(tenant_id=tenant_id, service_principal_id=client_id, service_principal_password=client_secret) ws = Workspace.get(name=workspace_name, auth=auth, subscription_id=subscription_id, resource_group=resource_group) exp = Experiment(ws, exp_name) env = Environment.get(workspace=ws, name="sklearn-env") config = ScriptRunConfig(source_directory='src', script='train.py', compute_target='cpu-cluster', environment=env) run = exp.submit(config)
def pullUsageAndSaveV2(url, token,startDate,endDate, counter,usageDataFrame): #print (url) subscription_id = 'bd04922c-a444-43dc-892f-74d5090f8a9a' resource_group = 'mlplayarearg' workspace_name = 'testdeployment' run = Run.get_context() client_secret = run.get_secret(name="aadappsecretAML") svc_pr = ServicePrincipalAuthentication( tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47", service_principal_id="8e1d74de-d84f-4971-b737-66e737b636c1", service_principal_password=client_secret) workspace = Workspace(subscription_id, resource_group, workspace_name,auth=svc_pr) mydatastore = Datastore.get(workspace, 'billingdatablobstorage') print('about to make request to get usage') resp = requests.get(url, headers = {"Authorization":"Bearer " + token}) if resp.status_code == 401: token = authenticate_client_key() allData = resp.content dataObj = json.loads(allData) if 'value' not in dataObj: return usageData = dataObj['value'] usageDF = pd.DataFrame.from_dict(usageData) if 'properties' in usageDF: propsExpanded = usageDF['properties'].apply(pd.Series) if 'date' in propsExpanded: usageDFNew = pd.concat([usageDF.drop(['properties'], axis=1),propsExpanded[['meterId','resourceGroup','offerId','chargeType','frequency','quantity','effectivePrice','cost','unitPrice','billingCurrency','date','resourceId']]], axis=1) else : print (f"date not found in response for {url}") usageDFNew = pd.DataFrame() else : print (f"properties not found in response for {url}") usageDFNew = pd.DataFrame() print(usageDFNew.shape) files = [] groupedFiles = [] if (usageDataFrame.shape[0] == 0): print('assigning data first time') usageDataFrame = usageDFNew else: print(f'adding {usageDFNew.shape[0]} rows to exisitng dataframe of size {usageDataFrame.shape[0]}') usageDataFrame = usageDataFrame.append(usageDFNew) print (f'usageDataFrame is now {usageDataFrame.shape[0]}') if 'nextLink' in dataObj: pullUsageAndSave(datapath,dataObj['nextLink'],token,startDate, endDate,counter+1,usageDataFrame) else: print (f'saving dataframe with shape: {usageDataFrame.shape}') if 'date' in usageDataFrame : for singleDay in usageDataFrame['date'].unique(): singleDayData = usageDataFrame[usageDataFrame['date'] == singleDay] print (f'saving rows for {singleDay} {singleDayData.shape[0]}') singleDayData.to_csv(f"{singleDay[0:4]}{singleDay[5:7]}{singleDay[8:10]}.csv") files.append(f"{singleDay[0:4]}{singleDay[5:7]}{singleDay[8:10]}.csv") mydatastore.upload_files( files, # List[str] of absolute paths of files to upload target_path='rawdata', overwrite=True, ) groupedData = singleDayData.groupby(['meterId','resourceGroup','date']).agg({'cost':sum,'quantity':sum}) groupedData.to_csv(f"{singleDay[0:4]}{singleDay[5:7]}{singleDay[8:10]}grouped.csv") groupedFiles.append(f"{singleDay[0:4]}{singleDay[5:7]}{singleDay[8:10]}grouped.csv") mydatastore.upload_files( groupedFiles, # List[str] of absolute paths of files to upload target_path='rolledup', overwrite=True, ) else : print (f'saving dataframe with shape: {usageDataFrame.shape} but date key not found so leaving that and moving ahead')
model.config.pad_token_id = model.config.eos_token_id encoded_dataset_train, encoded_dataset_eval = load_encoded_glue_dataset( task=task, tokenizer=tokenizer) compute_metrics = construct_compute_metrics_function(args.task) trainer = Trainer( model, training_args, callbacks=[AzureMLCallback()], train_dataset=encoded_dataset_train, eval_dataset=encoded_dataset_eval, tokenizer=tokenizer, compute_metrics=compute_metrics, ) trainer.pop_callback(MLflowCallback) print("Training...") run = Run.get_context() # get handle on Azure ML run start = time.time() trainer.train() run.log("time/epoch", (time.time() - start) / 60 / training_args.num_train_epochs) print("Evaluation...") trainer.evaluate()
script = "script_name.py", ) run = exp.submit(script_config) run.wait_for_completion(show_output=True) #2. Script file from azureml.core import Run, Workspace ws = Workspace.from_config() #to get the current run of the experiment new_run = Run.get_context() #do your work #for completing the run new_run.complete() #now we have the experiment module part but lets check the how can we do training of the model with this script method # 1. Script file from azureml.core import Workspace,Run from sklearn.model_selection import train_test_split import numpy as np import pandas as pd
def main(): run = Run.get_context() if (run.id.startswith('OfflineRun')): from dotenv import load_dotenv load_dotenv() sources_dir = os.environ.get("SOURCES_DIR_TRAIN") if (sources_dir is None): sources_dir = 'fusion' workspace_name = os.environ.get("WORKSPACE_NAME") experiment_name = os.environ.get("EXPERIMENT_NAME") resource_group = os.environ.get("RESOURCE_GROUP") subscription_id = os.environ.get("SUBSCRIPTION_ID") build_id = os.environ.get('BUILD_BUILDID') aml_workspace = Workspace.get( name=workspace_name, subscription_id=subscription_id, resource_group=resource_group ) ws = aml_workspace exp = Experiment(ws, experiment_name) else: exp = run.experiment e = Env() parser = argparse.ArgumentParser("register") parser.add_argument( "--build_id", type=str, help="The Build ID of the build triggering this pipeline run", ) parser.add_argument( "--output_model_version_file", type=str, default="model_version.txt", help="Name of a file to write model version to" ) args = parser.parse_args() if (args.build_id is not None): build_id = args.build_id model_name = e.model_name try: tag_name = 'BuildId' model = get_latest_model( model_name, tag_name, build_id, exp.workspace) if (model is not None): print("Model was registered for this build.") if (model is None): print("Model was not registered for this run.") sys.exit(1) except Exception as e: print(e) print("Model was not registered for this run.") sys.exit(1) # Save the Model Version for other AzDO jobs after script is complete if args.output_model_version_file is not None: with open(args.output_model_version_file, "w") as out_file: out_file.write(str(model.version))
from azureml.train.automl import AutoMLConfig from azureml.automl.core.shared import constants import datetime from azureml_user.parallel_run import EntryScript from train_automl_helper import str2bool, compose_logs import logging from azureml.automl.core.shared.exceptions import (AutoMLException, ClientException, ErrorTypes) from azureml.automl.core.shared.utilities import get_error_code from joblib import dump, load from random import randint from time import sleep import json current_step_run = Run.get_context() LOG_NAME = "user_log" parser = argparse.ArgumentParser("split") parser.add_argument("--process_count_per_node", default=1, type=int, help="number of processes per node") parser.add_argument("--retrain_failed_models", default=False, type=str2bool, help="retrain failed models only") args, _ = parser.parse_known_args()
parser.add_argument('--model_name', type=str, default='', help='Name you want to give to the model.') parser.add_argument('--model_assets_path', type=str, default='outputs', help='Location of trained model.') args, unparsed = parser.parse_known_args() print('Model assets path is:', args.model_assets_path) print('Model name is:', args.model_name) run = Run.get_context() pipeline_run = Run(run.experiment, run._root_run_id) pipeline_run.upload_file("outputs/model/model.pth", os.path.join(args.model_assets_path, "model.pth")) pipeline_run.upload_file( "outputs/model/labels.txt", os.path.join(args.model_assets_path, "labels.txt")) pipeline_run.upload_file("outputs/deployment/score.py", "deployment/score.py") pipeline_run.upload_file("outputs/deployment/myenv.yml", "deployment/myenv.yml") pipeline_run.upload_file("outputs/deployment/inferenceconfig.json", "deployment/inferenceconfig.json") pipeline_run.upload_file("outputs/deployment/deploymentconfig_aci.json", "deployment/deploymentconfig_aci.json")
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ import os from azureml.core import Run import argparse import traceback from util.model_helper import get_model_by_tag run = Run.get_context() # if you would like to run this script on a local computer # the following code is a good starting point for you # use # python -m evaluate.evaluate_model # in diabetes_regression folder context # if (run.id.startswith('OfflineRun')): # from dotenv import load_dotenv # # For local development, set values in this section # load_dotenv() # sources_dir = os.environ.get("SOURCES_DIR_TRAIN") # if (sources_dir is None): # sources_dir = 'diabetes_regression' # path_to_util = os.path.join(".", sources_dir, "util")
def main(argv=None): # get hold of the current run run = Run.get_submitted_run() train_evaluate(run)
parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regularization rate') args = parser.parse_args() data_folder = os.path.join(args.data_folder, 'mnist') print('Data folder:', data_folder) # load train and test set into numpy arrays # note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster. X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0 X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0 y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1) y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1) print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\n') # get hold of the current run run = Run.get_context() print('Train a logistic regression model with regularizaion rate of', args.reg) clf = LogisticRegression(C=1.0/args.reg, random_state=42) clf.fit(X_train, y_train) print('Predict the test set') y_hat = clf.predict(X_test) # calculate accuracy on the prediction acc = np.average(y_hat == y_test) print('Accuracy is', acc) run.log('regularization rate', np.float(args.reg)) run.log('accuracy', np.float(acc))
from azureml.core import Workspace, Experiment, Dataset, Run from azureml.train.automl import AutoMLConfig exp = Run.get_context().experiment data = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/creditcard.csv" compute_name = "cpu-cluster" dataset = Dataset.Tabular.from_delimited_files(data) automl_settings = { "n_cross_validations": 3, "primary_metric": "average_precision_score_weighted", "enable_early_stopping": True, } automl_config = AutoMLConfig( task="classification", max_concurrent_iterations=3, compute_target=compute_name, training_data=dataset, label_column_name="Class", **automl_settings ) run = exp.submit(automl_config) run.wait_for_completion(show_output=True)