def RunAutoML(): automl_settings = { "name": "AutoML_Demo_Experiment", "iteration_timeout_minutes": 15, "iterations": 3, "n_cross_validations": 5, "primary_metric": 'r2_score', "preprocess": False, "max_concurrent_iterations": 8, "verbosity": logging.INFO } subscription_id = request.json['subscription_id'] print(userData) print(userData[subscription_id]) #return "ok" try: automl_config = AutoMLConfig(task="classification", X=userData[subscription_id][1], y=userData[subscription_id][2], debug_log='automl_errors.log', preprocess=True, **automl_settings, ) experiment=Experiment(userData[subscription_id][0], 'automl_remote') run = experiment.submit(automl_config, show_output=True) run best_model,fitted_model = run.get_output() return 'ok' except: return 'error'
def auto_train_model(ws, experiment_name, model_name, full_X, full_Y,training_set_percentage, training_target_accuracy): # start a training run by defining an experiment experiment = Experiment(ws, experiment_name) train_X, test_X, train_Y, test_Y = train_test_split(full_X, full_Y, train_size=training_set_percentage, random_state=42) train_Y_array = train_Y.values.flatten() # Configure the automated ML job # The model training is configured to run on the local machine # The values for all settings are documented at https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train # Notice we no longer have to scale the input values, as Auto ML will try various data scaling approaches automatically Automl_config = AutoMLConfig(task = 'classification', primary_metric = 'accuracy', max_time_sec = 12000, iterations = 20, n_cross_validations = 3, exit_score = training_target_accuracy, blacklist_algos = ['kNN','LinearSVM'], X = train_X, y = train_Y_array, path='./04-automl/outputs') # Execute the job run = experiment.submit(Automl_config, show_output=True) # Get the run with the highest accuracy value. best_run, best_model = run.get_output() return (best_model, run, best_run)
def get_latest_model(self, experiment_name): """ This function finds the experiment associated with the Data Labelling Project and finds the best model and downloads the train artifacts. Note, at the time of writing no SDK support is available for data labelling projects :param experiment_name: :return: """ success = False ws = self.get_workspace() logging.info(f"Connected to Workspace {ws.name}") experiment = Experiment(workspace=ws, name=experiment_name) list_runs = experiment.get_runs() for run in list_runs: logging.info(f"Getting last run {run.id}") tags = run.get_tags() if tags['model_explain_run'] == 'best_run': # Get the latest run logging.info(f"Getting last best child run {tags['automl_best_child_run_id']}") child_run = run.get(ws, tags['automl_best_child_run_id']) metrics = run.get_metrics() logging.info(f"Accuracy (class) {metrics['accuracy']}") file_names = child_run.get_file_names() if "train_artifacts/model.pt" in file_names: logging.info('Found a trained model.pt') child_run.download_files(prefix='train_artifacts', output_directory='/usr/src/api/models') success = True break return success
def RunAutoML(): subscription_id = request.json['subscription_id'] resource_group = request.json['resource_group'] workspace_name = request.json['workspace_name'] file_name = request.json['file_name'] #location = request.json['location'] ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) print("Found workspace {} at location {}".format(ws.name, ws.location)) print('Found existing Workspace.') dataset_name = file_name # Get a dataset by name df = Dataset.get_by_name(workspace=ws, name=dataset_name) stock_dataset_df = df.to_pandas_dataframe() print('file successfully recieved.') stock_dataset_df.head() #stock_dataset_json = stock_dataset_df.to_json(orient='split') #print(stock_dataset_json) y_df = stock_dataset_df['ActionTaken'].values x_df = stock_dataset_df.drop(['ActionTaken'], axis=1) ExperimentName = request.json['ExperimentName'] tasks = request.json['tasks'] iterations = request.json['iterations'] iteration_timeout_minutes = request.json['iteration_timeout_minutes'] primary_metric = request.json['primary_metric'] #n_cross_validations = request.json['n_cross_validations'] try: automl_config = AutoMLConfig( task=tasks, X=x_df, y=y_df, iterations=iterations, iteration_timeout_minutes=iteration_timeout_minutes, primary_metric=primary_metric, #n_cross_validations=n_cross_validations, preprocess=True, ) experiment = Experiment(ws, ExperimentName) run = experiment.submit(config=automl_config, show_output=True) best_model,fitted_model = run.get_output() return 'ok' except: return 'error'
def main(train_path, pred_path, n_pred, dt, target, time_limit_min): df_train = pd.read_csv(train_path) df_train[dt] = pd.to_datetime(df_train[dt]) time_series_settings = { "time_column_name": dt, "max_horizon": n_pred, "target_lags": "auto", "target_rolling_window_size": "auto" } automl_config = AutoMLConfig(task="forecasting", training_data=df_train, label_column_name=target, n_cross_validations=5, max_cores_per_iteration=-1, path=os.environ["SCRATCH"], experiment_timeout_minutes=time_limit_min, ensemble_download_models_timeout_sec=3600, **time_series_settings) ws = Workspace.from_config() experiment = Experiment(ws, "experiment") best_run, fitted_model = experiment.submit(automl_config, show_output=True).get_output() print("Best pipeline:") try: ensemble = vars(fitted_model.steps[1][1])["_wrappedEnsemble"] print(ensemble.__class__) steps = ensemble.estimators_ except: steps = fitted_model.steps best_pipeline = "" for i, step in enumerate(steps): best_pipeline += f"{i}. {str(step)}\n" print(best_pipeline) pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option('display.max_colwidth', -1) print(fitted_model.named_steps["timeseriestransformer"]. get_engineered_feature_names()) featurization_summary = fitted_model.named_steps[ "timeseriestransformer"].get_featurization_summary() print(pd.DataFrame.from_records(featurization_summary)) x_pred = pd.date_range(df_train[dt].iloc[-1], periods=n_pred + 1, freq=pd.infer_freq(df_train[dt]))[1:] y_pred = fitted_model.forecast(forecast_destination=x_pred[-1])[0] # y_pred = fitted_model.forecast(pd.DataFrame({dt: x_pred}))[0] df_pred = pd.DataFrame({dt: x_pred, target: y_pred}) df_pred.to_csv(pred_path, index=False)
def train_model(data_file, random_seed): """Train the automl model.""" target = "utilization" df = pd.read_parquet(data_file) x = df.loc[:, [c for c in df if c != target]].values y = df[target].values project_folder = "./automl" automl_config = AutoMLConfig( task="regression", iteration_timeout_minutes=5, iterations=10, primary_metric="spearman_correlation", n_cross_validations=5, debug_log="automl.log", verbosity=logging.INFO, X=x, y=y, path=project_folder, ) load_dotenv(find_dotenv()) ws = Workspace( workspace_name=getenv("AML_WORKSPACE_NAME"), subscription_id=getenv("AML_SUBSCRIPTION_ID"), resource_group=getenv("AML_RESOURCE_GROUP"), ) experiment = Experiment(ws, getenv("AML_EXPERIMENT_NAME")) local_run = experiment.submit(automl_config, show_output=True) sub_runs = list(local_run.get_children()) best_run = None best_score = 0 for sub_run in sub_runs: props = sub_run.get_properties() if props["run_algorithm"] != "Ensemble": if float(props["score"]) > best_score: best_run = sub_run model_name = "Automl{}".format(str(uuid.uuid4()).replace("-", ""))[:20] best_run.register_model(model_name=model_name, model_path="outputs/model.pkl") # best_run, fitted_model = local_run.get_output() # local_run.register_model( # description="automl meetup best model" # ) print("Model name is {}".format(model_name))
def train_eval_register_model(ws, experiment_name, model_name, full_X, full_Y, training_set_percentage): # start a training run by defining an experiment myexperiment = Experiment(ws, experiment_name) run = myexperiment.start_logging() train_X, test_X, train_Y, test_Y = train_test_split( full_X, full_Y, train_size=training_set_percentage, random_state=42) scaler = StandardScaler() X_scaled = scaler.fit_transform(train_X) clf = linear_model.LogisticRegression(C=1) clf.fit(X_scaled, train_Y) scaled_inputs = scaler.transform(test_X) predictions = clf.predict(scaled_inputs) score = accuracy_score(test_Y, predictions) print("With %0.2f percent of data, model accuracy reached %0.4f." % (training_set_percentage, score)) # Log the training metrics to Azure Machine Learning service run history run.log("Training_Set_Percentage", training_set_percentage) run.log("Accuracy", score) # Serialize the model to a pickle file in the outputs folder output_model_path = 'outputs/' + model_name + '.pkl' pickle.dump(clf, open(output_model_path, 'wb')) print('Exported model to ', output_model_path) # Serialize the scaler as a pickle file in the same folder as the model output_scaler_path = 'outputs/' + 'scaler' + '.pkl' pickle.dump(scaler, open(output_scaler_path, 'wb')) print('Exported scaler to ', output_scaler_path) # notice for the model_path, we supply the name of the outputs folder without a trailing slash # this will ensure both the model and the scaler get uploaded. registered_model = Model.register(model_path='outputs', model_name=model_name, workspace=ws) print(registered_model.name, registered_model.id, registered_model.version, sep='\t') run.complete() return (registered_model, clf, scaler, score, run)
def submit(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any],) -> List[Dict]: """Define what the form has to do after all required slots are filled""" task=tracker.get_slot('task') data=tracker.get_slot('data') column_name=tracker.get_slot('column_name') dispatcher.utter_message(template="utter_doing_task", task=tracker.get_slot('task'),data=tracker.get_slot('data'), column_name=tracker.get_slot('column_name')) # Load the workspace from the saved config file ws = Workspace.from_config() print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name)) df = pd.read_csv(data) train_data, test_data = train_test_split(df, test_size=0.1, random_state=42) label = column_name automl_config = AutoMLConfig(name='Automated ML Experiment', task= task, compute_target='local', training_data = train_data, validation_data = test_data, label_column_name= label, experiment_timeout_minutes=30, iterations=6, primary_metric = 'AUC_weighted', featurization='auto', ) automl_experiment = Experiment(ws, 'mslearn-diabetes-automl') automl_run = automl_experiment.submit(automl_config) best_run, fitted_model = automl_run.get_output() best_run_metrics = best_run.get_metrics() metric_list = [] for metric_name in best_run_metrics: metric = best_run_metrics[metric_name] metric_list.append((metric_name, metric)) return fitted_model, metric_list print("The best model pipeline for the data is") dispatcher.utter_message(text="The best model pipeline for the data is") print(model) dispatcher.utter_message(model) print("The different metrics are") dispatcher.utter_message(text="The different metrics are") print(metrics) dispatcher.utter_message(text=metrics) column_name=tracker.get_slot('column_name'))
def run_pipeline(self, params): """ run_pipeline - Submit a pipeline job. :param Workspace ws: AML Workspace. :param Pipeline pipeline: AML pipeline. :param str pipeline_name: Directory of the source files. :param dict params: Pipeline parameteters. :returns: An AML experiment :rtype: Experiment """ # Submit the pipeline to be run exp = Experiment(self.ws, self.pipeline_name) exp_id = exp.submit(self.pipeline, pipeline_parameters=params) return exp_id
def fit(self, X, y, sample_weight=None): # fit implementation for a single output model. # Create experiment for specified workspace automl_config = copy.deepcopy(self._automl_config) current_time = time.localtime() current_time_string = time.strftime('%y_%m_%d-%H_%M_%S', current_time) experiment_name = self._experiment_name_prefix + "_" + current_time_string self._experiment = Experiment(self._workspace, experiment_name) # Configure automl_config with training set information. automl_config.user_settings['X'] = X automl_config.user_settings['y'] = y automl_config.user_settings['sample_weight'] = sample_weight # Wait for remote run to complete, the set the model print("Experiment " + experiment_name + " has started.") local_run = self._experiment.submit(automl_config, show_output=self._show_output) print("Experiment " + experiment_name + " completed.") _, self._model = local_run.get_output()
def train_eval_register_model(experiment_name, full_X, full_Y, training_set_percentage): # start a training run by defining an experiment myexperiment = Experiment(ws, experiment_name) run = myexperiment.start_logging() train_X, test_X, train_Y, test_Y = train_test_split( full_X, full_Y, train_size=training_set_percentage, random_state=42) scaler = StandardScaler() X_scaled = scaler.fit_transform(train_X) clf = linear_model.LogisticRegression(C=1) clf.fit(X_scaled, train_Y) scaled_inputs = scaler.transform(test_X) predictions = clf.predict(scaled_inputs) score = accuracy_score(test_Y, predictions) print("With %0.2f percent of data, model accuracy reached %0.4f." % (training_set_percentage, score)) # Log the training metrics to Azure Machine Learning service run history run.log("Training_Set_Percentage", training_set_percentage) run.log("Accuracy", score) run.complete() model_name = experiment_name + '.pkl' output_model_path = './outputs/' + model_name pickle.dump(clf, open(output_model_path, 'wb')) # Upload and register this version of the model with Azure Machine Learning service destination_path = 'outputs/' + model_name run.upload_file(destination_path, output_model_path) # destination, source registered_model = run.register_model(model_name='usedcarsmodel', model_path=destination_path) print(registered_model.name, registered_model.id, registered_model.version, sep='\t') return (clf, score)
def main(force_model_register: bool, skip_model_register: bool, submit_pipeline: bool, publish_pipeline: bool, experiment_name: str, debug_run: bool, dbx_cluster_name: str, aml_compute_name: str, input_dataset_name: str, validation_dataset_name: str): pipeline: Pipeline = create_pipeline( debug_run=debug_run, dbx_compute=dbx_cluster_name, aml_compute=aml_compute_name, input_dataset=input_dataset_name, validation_dataset=validation_dataset_name) pipeline.validate() if submit_pipeline and not publish_pipeline: exp = Experiment(WS, experiment_name) exp.submit(pipeline, pipeline_parameters={ "force_registration": str(force_model_register), "skip_registration": str(skip_model_register) }) if publish_pipeline: published_pipeline: PublishedPipeline = pipeline.publish( name="Driver Safety Pipeline", description="Training Pipeline for new driver safety model") if submit_pipeline: published_pipeline.submit(workspace=WS, experiment_name=experiment_name, pipeline_parameters={ "force_registration": str(force_model_register), "skip_registration": str(skip_model_register) }) sys.stdout.write(published_pipeline.id)
def existingModel(self, exp_name, run_id): SUBSCRIPTION_ID = '1f6fddae-bfa7-4f33-b9a5-ad3d4f29b8a9' RESOURCE_GROUP = 'DECADAAPPS' WORKSPACE_NAME = 'kongming-aml' TENANT_ID = 'd7802200-0ab3-48a9-a946-c4e20d15c1ca' auth = InteractiveLoginAuthentication(tenant_id=TENANT_ID) ws = Workspace(subscription_id=SUBSCRIPTION_ID, resource_group=RESOURCE_GROUP, workspace_name=WORKSPACE_NAME, auth=auth) exp = Experiment(ws, exp_name) run = AutoMLRun(experiment=exp, run_id=run_id) _, model = run.get_output() return run, model
def run_pipeline(self, experiment_name, tags=None): """ submits batch inference pipeline as an experiment run :param str experiment_name: [required] name of the experiment in azureml :param dict tags: [optional] dictionary of tags :returns: run :rtype: Run """ if tags is None: tags = self.pipeline_tags step_sequence = StepSequence(steps=self.steps) pipeline = Pipeline(workspace=self.ws, steps=step_sequence) run = Experiment(self.ws, experiment_name).submit( pipeline, tags=tags, continue_on_step_failure=False) return run
def __init__(self, directory=".", experiment=None, auth=None, _disable_service_check=False): """ Creates the project object using the local project path. :param directory: Project path. :type directory: str :param experiment: :type experiment: azureml.core.Experiment :param auth: An authentication object of a subclass of azureml.core.authentication.AbstractAuthentication :type auth: azureml.core.authentication.AbstractAuthentication :return: """ from azureml.core.experiment import Experiment if not directory: directory = "." if experiment: self._workspace = experiment.workspace self.directory = directory self._project_path = os.path.abspath(directory) self._experiment = experiment self._snapshots_client = SnapshotsClient( self._workspace.service_context) else: if not auth: auth = InteractiveLoginAuthentication() self._project_path = os.path.abspath(directory) info_dict = _commands.get_project_info(auth, self._project_path) from azureml.core.workspace import Workspace self._workspace = Workspace( info_dict[_commands.SUBSCRIPTION_KEY], info_dict[_commands.RESOURCE_GROUP_KEY], info_dict[_commands.WORKSPACE_KEY], auth, _disable_service_check=_disable_service_check) self._experiment = Experiment(self._workspace, info_dict[_commands.PROJECT_KEY]) self._snapshots_client = SnapshotsClient( self._workspace.service_context)
class _InnerAutomatedMLModel(): # Inner single model to be passed that wrapper can use to pass into MultiOutputRegressor def __init__(self, automl_config, workspace, experiment_name_prefix="aml_experiment"): self._show_output = automl_config._show_output self._workspace = workspace self._automl_config = automl_config self._experiment_name_prefix = experiment_name_prefix def get_params(self, deep=True): # Must be implemented for MultiOutputRegressor to view _InnerAutomatedMLModel # as an sklearn estimator return { 'workspace': self._workspace, 'automl_config': self._automl_config, 'experiment_name_prefix': self._experiment_name_prefix } def fit(self, X, y, sample_weight=None): # fit implementation for a single output model. # Create experiment for specified workspace automl_config = copy.deepcopy(self._automl_config) current_time = time.localtime() current_time_string = time.strftime('%y_%m_%d-%H_%M_%S', current_time) experiment_name = self._experiment_name_prefix + "_" + current_time_string self._experiment = Experiment(self._workspace, experiment_name) # Configure automl_config with training set information. automl_config.user_settings['X'] = X automl_config.user_settings['y'] = y automl_config.user_settings['sample_weight'] = sample_weight # Wait for remote run to complete, the set the model print("Experiment " + experiment_name + " has started.") local_run = self._experiment.submit(automl_config, show_output=self._show_output) print("Experiment " + experiment_name + " completed.") _, self._model = local_run.get_output() def predict(self, X): return self._model.predict(X) def predict_proba(self, X): return self._model.predict_proba(X)
def detach(name=None, project=None): """Detach compute target""" # Set correlation id set_correlation_id() if not project: project = "." auth = get_cli_specific_auth() project_object = Project(auth=auth, directory=project) experiment = Experiment(project_object.workspace, project_object.history.name) remove_legacy_compute_target(experiment, project, name) command_output = CLICommandOutput( "Detaching {} compute target for project " "{} successful".format(name, project_object.project_directory)) command_output.set_do_not_print_dict() return get_cli_specific_output(command_output)
verbosity=logging.INFO, spark_context=sc, # noqa whitelist_models=[ "GradientBoosting", "DecisionTree", "RandomForest", "ExtremeRandomTrees", "LightGBM", ], blacklist_models=["ensemble"], X=x, y=y, path=project_folder, ) experiment = Experiment(ws, "host-ml-nt-ai-meetup") db_run = experiment.submit(automl_config, show_output=True) sub_runs = list(db_run.get_children()) best_run = None best_score = 0 for sub_run in sub_runs: props = sub_run.get_properties() if props["run_algorithm"] != "Ensemble": if float(props["score"]) > best_score: best_run = sub_run model_name = "Automl{}".format(str(uuid.uuid4()).replace("-", ""))[:20]
ws = Workspace.create(name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, location=workspace_region, exist_ok=True) print("Workspace Provisioning complete.") #%% # Step 10 - Create an experiment and log metrics for multiple training runs ########################################################################### from azureml.core.run import Run from azureml.core.experiment import Experiment # start a training run by defining an experiment myexperiment = Experiment(ws, "UsedCars_Experiment") root_run = myexperiment.start_logging() training_set_percentage = 0.25 run = root_run.child_run("Training_Set_Percentage-%0.5F" % training_set_percentage) model, score = train_eval_model(full_X, full_Y, training_set_percentage) print("With %0.2f percent of data, model accuracy reached %0.4f." % (training_set_percentage, score)) run.log("Training_Set_Percentage", training_set_percentage) run.log("Accuracy", score) run.complete() training_set_percentage = 0.5 run = root_run.child_run("Training_Set_Percentage-%0.5F" % training_set_percentage)
# ``` # from azureml.core.authentication import ServicePrincipalAuthentication # auth = auth = ServicePrincipalAuthentication('mytenantid', 'myappid', 'mypassword') # ws = Workspace.from_config(auth = auth) # ``` # For more details, see [aka.ms/aml-notebook-auth](http://aka.ms/aml-notebook-auth) # In[85]: ws = Workspace.from_config() # Choose a name for the experiment and specify the project folder. experiment_name = 'automl-classification' project_folder = './sample_projects/automl-classification' experiment = Experiment(ws, experiment_name) output = {} output['SDK version'] = azureml.core.VERSION output['Subscription ID'] = ws.subscription_id output['Workspace Name'] = ws.name output['Resource Group'] = ws.resource_group output['Location'] = ws.location output['Project Directory'] = project_folder output['Experiment Name'] = experiment.name pd.set_option('display.max_colwidth', -1) outputDf = pd.DataFrame(data=output, index=['']) outputDf.T # ## Data #
import shutil import os from pyspark.ml.classification import LogisticRegression from pyspark.ml.evaluation import BinaryClassificationEvaluator from pyspark.ml import Pipeline, PipelineModel from azureml.core.run import Run from azureml.core.experiment import Experiment #defining some wariables model_name = "smsspamclassif_runs.mml" model_dbfs = os.path.join("/dbfs", model_name) #these are the different regularization parameter values we are going to test regs = [0.0001, 0.001, 0.01, 0.1] myexperiment = Experiment(ws, "SMS_Spam_Classifier") main_run = myexperiment.start_logging() for reg in regs: print("Regularization rate: {}".format(reg)) with main_run.child_run("reg-" + str(reg)) as run: lr = LogisticRegression(featuresCol="features", labelCol='label', regParam=reg) pipe = Pipeline(stages=[ stringIndexer, tokenizer, stopwordsRemover, hashingTF, idf, lr ]) model_p = pipe.fit(training_data) # make prediction on test_data pred = model_p.transform(test_data)
from azureml.core.runconfig import RunConfiguration from azureml.widgets import RunDetails from checknotebookoutput import checkNotebookOutput if __name__ == "__main__": ws = Workspace.from_config() print(ws.resource_group) print(ws.subscription_id) # choose a name for the run history container in the workspace experiment_name = 'automl-remote-attach' # project folder project_folder = './sample_projects/automl-remote-attach' experiment = Experiment(ws, experiment_name) automl_runs = list(experiment.get_runs(type='automl')) assert (len(automl_runs) == 1) compute_name = 'mydsvmb' dsvm_compute = ws.compute_targets[compute_name] # create a new RunConfig object conda_run_config = RunConfiguration(framework="python") # Set compute target to the Linux DSVM conda_run_config.target = dsvm_compute cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'],
@author: datacore """ from azureml.core.authentication import AzureCliAuthentication import azure.cli.core #cli_auth = AzureCliAuthentication() from azureml.core.workspace import Workspace ws = Workspace(subscription_id="24075937-2687-4457-bac6-ec16dec514c3", resource_group="VstsRG-784AbhijitC-8a31", workspace_name="automldc") from azureml.core.experiment import Experiment from azureml.core import Run experiment = Experiment(ws, 'Myexp2_v1_test21') best_run = Run(experiment=experiment, run_id='AutoML_74e9d9dc-f347-4392-b8bb-3edeb4a6afad_8') fitted_model = Run(experiment=experiment, run_id='AutoML_74e9d9dc-f347-4392-b8bb-3edeb4a6afad_8') #print(best_run.register_model() print(fitted_model) # Get a dataset by name from azureml.core.dataset import Dataset file_name = '2018Q4PredictionTrainedSet101.csv' stock_dataset = Dataset.get_by_name(ws, '2018Q4PredictionTrainedSet101.csv') #stock_dataset #dataset = Dataset.Tabular.from_delimited_files(stock_dataset) stock_dataset.to_pandas_dataframe().describe()
import numpy as np import logging from azureml.core.workspace import Workspace from azureml.core.experiment import Experiment from azureml.train.automl import AutoMLConfig from azureml.train.automl.run import AutoMLRun from azureml.automl.core.featurization import FeaturizationConfig # https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb # Choose a name for the experiment and specify the project folder. experiment_name = "automl-revenue-region-forecast" try: experiment = Experiment(ws, experiment_name) output = {} output["SDK version"] = azureml.core.VERSION output["Subscription ID"] = ws.subscription_id output["Workspace Name"] = ws.name output["Resource Group"] = ws.resource_group output["Location"] = ws.location output["Experiment Name"] = experiment.name pd.set_option("display.max_colwidth", -1) print(pd.DataFrame(data=output, index=[""]).T) except Exception as error: print(error) log_error("{} {}".format(notebook, error)) #log error in sentry raise dbutils.notebook.exit(error) #raise the exception
def RunAutoML(): subscription_id = request.json['subscription_id'] resource_group = request.json['resource_group'] workspace_name = request.json['workspace_name'] file_name = request.json['file_name'] #location = request.json['location'] ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) print("Found workspace {} at location {}".format(ws.name, ws.location)) print('Found existing Workspace.') dataset_name = file_name # Get a dataset by name df = Dataset.get_by_name(workspace=ws, name=dataset_name) stock_dataset_df = df.to_pandas_dataframe() print('file successfully recieved.') stock_dataset_df.head() #stock_dataset_json = stock_dataset_df.to_json(orient='split') #print(stock_dataset_json) y_df = stock_dataset_df['ActionTaken'].values x_df = stock_dataset_df.drop(['ActionTaken'], axis=1) print(y_df) ExperimentName = request.json['ExperimentName'] tasks = request.json['tasks'] iterations = request.json['iterations'] n_cross_validations = request.json['n_cross_validations'] iteration_timeout_minutes = request.json['iteration_timeout_minutes'] primary_metric = request.json['primary_metric'] max_concurrent_iterations = request.json['max_concurrent_iterations'] #n_cross_validations = request.json['n_cross_validations'] try: automl_settings = { "name": ExperimentName, "iteration_timeout_minutes": iteration_timeout_minutes, "iterations": iterations, "n_cross_validations": n_cross_validations, "primary_metric": primary_metric, "preprocess": True, "max_concurrent_iterations": max_concurrent_iterations, "verbosity": logging.INFO } automl_config = AutoMLConfig( task=tasks, debug_log='automl_errors.log', path=os.getcwd(), #compute_target = 'Automlvm', X=x_df, y=y_df, **automl_settings, ) experiment = Experiment(ws, 'automl_local_v2') remote_run = experiment.submit(automl_config, show_output=True) children = list(remote_run.get_children()) metricslist = {} for run in children: properties = run.get_properties() metrics = { k: v for k, v in run.get_metrics().items() if isinstance(v, float) } metricslist[int(properties['iteration'])] = metrics rundata = pd.DataFrame(metricslist).sort_index(1) rundata_toJson = rundata.to_json(orient='columns') return rundata_toJson except: return 'error'
from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplainerSetupClass, \ automl_setup_model_explanations, automl_check_model_if_explainable from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel from azureml.explain.model.mimic_wrapper import MimicWrapper from automl.client.core.common.constants import MODEL_PATH from azureml.explain.model.scoring.scoring_explainer import TreeScoringExplainer, save OUTPUT_DIR = './outputs/' os.makedirs(OUTPUT_DIR, exist_ok=True) # Get workspace from the run context run = Run.get_context() ws = run.experiment.workspace # Get the AutoML run object from the experiment name and the workspace experiment = Experiment(ws, '<<experiment_name>>') automl_run = Run(experiment=experiment, run_id='<<run_id>>') # Check if this AutoML model is explainable if not automl_check_model_if_explainable(automl_run): raise Exception("Model explanations is currently not supported for " + automl_run.get_properties().get('run_algorithm')) # Download the best model from the artifact store automl_run.download_file(name=MODEL_PATH, output_file_path='model.pkl') # Load the AutoML model into memory fitted_model = joblib.load('model.pkl') # Get the train dataset from the workspace train_dataset = Dataset.get_by_name(workspace=ws,
from azureml.pipeline.core import PublishedPipeline from azureml.core.experiment import Experiment from azureml.core import Workspace workspace = Workspace.from_config() published_pipeline_id = "" is_debug = True debug_relay_connection_name = "test" if published_pipeline_id is None or published_pipeline_id == "": raise ValueError("Initialize published_pipeline_id") pipeline_parameters = {"is_debug": is_debug} if is_debug: if debug_relay_connection_name == "": raise ValueError("Hybrid connection name cannot be empty!") pipeline_parameters.update( {"debug_relay_connection_name": debug_relay_connection_name}) experiment = Experiment(workspace, "Pipeline_debug_experiment") published_pipeline = PublishedPipeline.get(workspace=workspace, id=published_pipeline_id) experiment.submit(published_pipeline, pipeline_parameters=pipeline_parameters)
def RunAutoML(): subscription_id = request.json['subscription_id'] resource_group = request.json['resource_group'] workspace_name = request.json['workspace_name'] file_name = request.json['file_name'] location = request.json['location'] target_var = request.json['target_var'] ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) print("Found workspace {} at location {}".format(ws.name, ws.location)) print('Found existing Workspace.') dataset_name = file_name # Get a dataset by name df = Dataset.get_by_name(workspace=ws, name=dataset_name) stock_dataset_df = df.to_pandas_dataframe() print('file successfully recieved.') stock_dataset_df.head() #stock_dataset_json = stock_dataset_df.to_json(orient='split') #print(stock_dataset_json) y_df = stock_dataset_df[target_var].values x_df = stock_dataset_df.drop([target_var], axis=1) print(y_df) ExperimentName = request.json['ExperimentName'] tasks = request.json['tasks'] iterations = request.json['iterations'] n_cross_validations = request.json['n_cross_validations'] iteration_timeout_minutes = request.json['iteration_timeout_minutes'] primary_metric = request.json['primary_metric'] max_concurrent_iterations = request.json['max_concurrent_iterations'] best_model = request.json['best_model'] #n_cross_validations = request.json['n_cross_validations'] try: automl_settings = { "name": ExperimentName, "iteration_timeout_minutes": iteration_timeout_minutes, "iterations": iterations, "n_cross_validations": n_cross_validations, "primary_metric": primary_metric, "preprocess": True, "max_concurrent_iterations": max_concurrent_iterations, "verbosity": logging.INFO } automl_config = AutoMLConfig( task=tasks, debug_log='automl_errors.log', path= 'D:\\Stock_Prediction\\AutoML_Azure\\python\\Flask_API_Azure\\log', #compute_target = 'Automlvm', X=x_df, y=y_df, **automl_settings, ) experiment = Experiment(ws, ExperimentName) remote_run = experiment.submit(automl_config, show_output=True) best_run, fitted_model = remote_run.get_output() #print(best_run) print(best_run.get_file_names()) #Register the model from datetime import date model = best_run.register_model(model_name=best_model + str(date.today()), model_path='outputs/model.pkl') print(model.name, model.id, model.version, sep='\t') children = list(remote_run.get_children()) metricslist = {} for run in children: properties = run.get_properties() metrics = { k: v for k, v in run.get_metrics().items() if isinstance(v, float) } metricslist[int(properties['iteration'])] = metrics rundata = pd.DataFrame(metricslist).sort_index(1) rundata.rename(column={ 0: "one", 1: "two", 2: "three", 3: "four", 4: "five", 5: "six", 6: "seven", 7: "right", 8: "nine", 9: "ten", }, inplace=True) rundata_toJson = rundata.to_json(orient='columns') print(rundata_toJson) return rundata_toJson except: return 'error'
def trigger_training_job(): # Define Vars < Change the vars>. # In a production situation, don't put secrets in source code, but as secret variables, # see https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables workspace = "<Name of your workspace>" subscription_id = "<Subscription id>" resource_grp = "<Name of your resource group where aml service is created>" domain = "westeurope.azuredatabricks.net" # change location in case databricks instance is not in westeurope dbr_pat_token_raw = "<<your Databricks Personal Access Token>>" DBR_PAT_TOKEN = bytes(dbr_pat_token_raw, encoding='utf-8') # adding b' notebookRemote = "/3_IncomeNotebookDevops" experiment_name = "experiment_model_release" model_name_run = datetime.datetime.now().strftime( "%Y%m%d%H%M%S" ) + "_dbrmod.mml" # in case you want to change the name, keep the .mml extension model_name = "databricksmodel.mml" # in case you want to change the name, keep the .mml extension db_compute_name = "dbr-amls-comp" # # Step 1: Run notebook using Databricks Compute in AML SDK # cli_auth = AzureCliAuthentication() ws = Workspace(workspace_name=workspace, subscription_id=subscription_id, resource_group=resource_grp, auth=cli_auth) ws.get_details() # # Step 2: Create job and attach it to cluster # # In this steps, secret are added as parameters (spn_tenant, spn_clientid, spn_clientsecret) # Never do this in a production situation, but use secret scope backed by key vault instead # See https://docs.azuredatabricks.net/user-guide/secrets/secret-scopes.html#azure-key-vault-backed-scopes response = requests.post( 'https://%s/api/2.0/jobs/create' % domain, headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, json={ "name": "Run AzureDevopsNotebook Job", "new_cluster": { "spark_version": "4.0.x-scala2.11", "node_type_id": "Standard_D3_v2", "spark_env_vars": { 'PYSPARK_PYTHON': '/databricks/python3/bin/python3', }, "autoscale": { "min_workers": 1, "max_workers": 2 } }, "libraries": [{ "pypi": { "package": "azureml-sdk[databricks]" } }], "notebook_task": { "notebook_path": notebookRemote, "base_parameters": [{ "key": "subscription_id", "value": subscription_id }, { "key": "resource_group", "value": resource_grp }, { "key": "workspace_name", "value": workspace }, { "key": "model_name", "value": model_name_run }] } }) if response.status_code != 200: print("Error launching cluster: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(2) # # Step 3: Start job # databricks_job_id = response.json()['job_id'] response = requests.post( 'https://%s/api/2.0/jobs/run-now' % domain, headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, json={"job_id": +databricks_job_id}) if response.status_code != 200: print("Error launching cluster: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(3) print(response.json()['run_id']) # # Step 4: Wait until job is finished # databricks_run_id = response.json()['run_id'] scriptRun = 1 count = 0 while scriptRun == 1: response = requests.get( 'https://%s/api/2.0/jobs/runs/get?run_id=%s' % (domain, databricks_run_id), headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, ) state = response.json()['state'] life_cycle_state = state['life_cycle_state'] print(state) if life_cycle_state in ["TERMINATED", "SKIPPED", "INTERNAL_ERROR"]: result_state = state['result_state'] if result_state == "SUCCESS": print("run ok") scriptRun = 0 #exit(0) else: exit(4) elif count > 180: print("time out occurred after 30 minutes") exit(5) else: count += 1 time.sleep(30) # wait 30 seconds before next status update # # Step 5: Retrieve model from dbfs # mdl, ext = model_name_run.split(".") model_zip_run = mdl + ".zip" response = requests.get( 'https://%s/api/2.0/dbfs/read?path=/%s' % (domain, model_zip_run), headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}) if response.status_code != 200: print("Error copying dbfs results: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(1) model_output = base64.b64decode(response.json()['data']) # download model in deploy folder os.chdir("deploy") with open(model_zip_run, "wb") as outfile: outfile.write(model_output) print("Downloaded model {} to Project root directory".format(model_name)) # # Step 6: Retrieve model metrics from dbfs # mdl, ext = model_name_run.split(".") model_metrics_json_run = mdl + "_metrics.json" response = requests.get( 'https://%s/api/2.0/dbfs/read?path=/%s' % (domain, model_metrics_json_run), headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}) if response.status_code != 200: print("Error copying dbfs results: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(2) model_metrics_output = json.loads(base64.b64decode( response.json()['data'])) # # Step 7: Put model and metrics to Azure ML Service # # start a training run by defining an experiment myexperiment = Experiment(ws, experiment_name) run = myexperiment.start_logging() run.upload_file("outputs/" + model_zip_run, model_zip_run) #run.log("pipeline_run", pipeline_run.id) run.log("au_roc", model_metrics_output["Area_Under_ROC"]) run.log("au_prc", model_metrics_output["Area_Under_PR"]) run.log("truePostive", model_metrics_output["True_Positives"]) run.log("falsePostive", model_metrics_output["False_Positives"]) run.log("trueNegative", model_metrics_output["True_Negatives"]) run.log("falseNegative", model_metrics_output["False_Negatives"]) run.complete() run_id = run.id print("run id:", run_id) # unzip file to model_name_run shutil.unpack_archive(model_zip_run, model_name_run) model = Model.register( model_path=model_name_run, # this points to a local file model_name=model_name, # this is the name the model is registered as tags={ "area": "spar", "type": "regression", "run_id": run_id }, description="Medium blog test model", workspace=ws, ) print("Model registered: {} \nModel Description: {} \nModel Version: {}". format(model.name, model.description, model.version)) # Step 8. Finally, writing the registered model details to conf/model.json model_json = {} model_json["model_name"] = model.name model_json["model_version"] = model.version model_json["run_id"] = run_id model_json["model_name_run"] = model_name_run with open("../conf/model.json", "w") as outfile: json.dump(model_json, outfile)
from azureml.train.automl.automlexplainer import retrieve_model_explanation from azureml.core.model import Model from azureml.core.image import ContainerImage from azureml.core.image.image import Image from azureml.core import Webservice from azureml.core.webservice import AciWebservice # try: # setting the local env to hadnle missing packages run_user_managed = RunConfiguration() run_user_managed.environment.python.user_managed_dependencies = False # Create workspace object for existing one and create an experiment ws = Workspace.from_config('subscription.json') print(ws.name, ws.location, ws.resource_group, ws.location, sep='\t') experiment = Experiment(workspace=ws, name='experiment1') # full path to training data,testing data file_path1 = os.path.join(os.getcwd(), "cumodelwo2014.csv") dflowtr = dprep.auto_read_file(path=file_path1) file_path2 = os.path.join(os.getcwd(), "test2014.csv") dflowte = dprep.auto_read_file(path=file_path2) # Specifying x(causal) and y(response) attributes in training data dflowtr_x = dflowtr.keep_columns([ 'cell-ID', 'Soil_Name', 'MEAN_Yld_V', 'COUNT_Yld', 'MEAN_Eleva', 'RANGE_Elev', 'Crop-Type', 'V.A.T(F)', 'R.A.T(F)', 'M.A.T(F)', 'V.PET(inch)', 'R.PET(inch)', 'M.PET(inch)', 'V.T.R(inch)', 'R.T.R(inch)', 'M.T.R(inch)' ]) dflowtr_y = dflowtr.keep_columns('NormalizedYield')