def main(): """ Deploy model to your service """ work_space = Workspace.from_config() environment = Environment("keras-service-environment") environment.python.conda_dependencies = CondaDependencies.create( python_version="3.7.7", pip_packages=["azureml-defaults", "numpy", "tensorflow==2.3.1"], ) model = Model(work_space, "keras_mnist") model_list = model.list(work_space) validation_accuracy = [] version = [] for i in model_list: validation_accuracy.append(float(i.properties["val_accuracy"])) version.append(i.version) model = Model(work_space, "keras_mnist", version=version[np.argmax(validation_accuracy)]) service_name = "keras-mnist-service" inference_config = InferenceConfig(entry_script="score_keras.py", environment=environment) aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1) service = Model.deploy( workspace=work_space, name=service_name, models=[model], inference_config=inference_config, deployment_config=aci_config, overwrite=True, ) service.wait_for_deployment(show_output=True) print(service.get_logs())
def evaluate_model(): all_runs = exp.get_runs(properties={ "release_id": release_id, "run_type": "train" }, include_children=True) # print(f'Search parameters: properties="release_id": {release_id}, "run_type": "train"') # print(f'Experiment :{exp}') # li_test = list(all_runs) # print(f'li_test: {li_test}') # all_runs contains the reference to the entire run that satisfied the properties values in the query: # The list of runs is returned in decending order, so the first value is the most recent run new_model_run = next(all_runs) new_model_run_id = new_model_run.id print(f'New Run found with Run ID of: {new_model_run_id}') new_model_run = Run(exp, run_id=new_model_run_id) new_model_acc = new_model_run.get_metrics().get("final-accuracy") try: # Get most recently registered model, we assume that # is the model in production. # Download this model and compare it with the recently # trained model by running test with same data set. model_list = Model.list(ws) production_model = next( filter( lambda x: x.created_time == max(model.created_time for model in model_list), model_list, )) production_model_run_id = production_model.tags.get("run_id") run_list = exp.get_runs() # Get the run history for both production model and # newly trained model and compare final-accuracy production_model_run = Run(exp, run_id=production_model_run_id) production_model_acc = production_model_run.get_metrics().get( "final-accuracy") print( "Current Production model accuracy: {}, New trained model accuracy: {}" .format(production_model_acc, new_model_acc)) promote_new_model = False if new_model_acc < production_model_acc: promote_new_model = True print( "New trained model performs better, thus it will be registered" ) except Exception: promote_new_model = True print("This is the first model to be trained, \ thus nothing to evaluate for now") return promote_new_model, new_model_run, new_model_acc
def registerModel(workspace, experiment, model_name, model_file): ''' Search an existing AMLS workspace for models. If one is found, return it, otherwise create a new model. If the parameter model_file points to a file on disk (check existence), then that is used to register a new model. If not, a new dummy pkl file will be generated. PARAMS: workspace : azureml.core.Workspace : Existing AMLS Workspace experiment_name : azureml.core.Experiment : Existing AMLS Experiment model_name : String : The name of the model to register model_file : String : This is one of two values 1. Name of a pkl file to create (dummy for RTS) 2. Full path to pkl model file that is in the same directory as the running script. RETURNS: azureml.core.Model ''' return_model = None ''' If model already exists then just return it. ''' models = Model.list(workspace) if models: for model in models: if model.name == model_name: print("Returning existing model....", model_name) return_model = model break if not return_model: ''' Create it. ''' print("Creating new model....") run = experiment.start_logging() run.log("Just simply dumping somethign in", True) # If the file does not exist, create a dummy model file. if os.path.exists(model_file) == False: createPickle(model_file) run.upload_file(name='outputs/' + model_file, path_or_stream='./' + model_file) # Complete tracking and get link to details details = run.complete() return_model = run.register_model(model_name=model_name, model_path="outputs/" + model_file) return return_model
def main(model_name, model_version, target_path): run = Run.get_context() models = Model.list(run.experiment.workspace, name=model_name) if model_version is not None: try: model = next(m for m in models if m.version == model_version) except StopIteration: raise ValueError("This version of the model was not found") else: print("Model version not specified. Using latest version.") model = max(models, key=lambda x: x.version) model.download(target_dir=target_path, exist_ok=True)
model_framework=Model.Framework.SCIKITLEARN, model_framework_version='0.20.3') #alternatively run.register_model( model_name='classification_model', model_path='outputs/model.pkl', # run outputs path description='A classification model', tags={'dept': 'sales'}, model_framework=Model.Framework.SCIKITLEARN, model_framework_version='0.20.3') #view registered models from azureml.core import Model for model in Model.list(ws): # Get model name and auto-generated version print(model.name, 'version:', model.version) #NOW TRAIN MODEL #1 connect to workspace import azureml.core from azureml.core import Workspace # Load the workspace from the saved config file ws = Workspace.from_config() print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))
all_runs = exp.get_runs(properties={ "release_id": release_id, "run_type": "train" }, include_children=True) new_model_run = next(all_runs) new_model_run_id = new_model_run.id print(f'New Run found with Run ID of: {new_model_run_id}') try: # Get most recently registered model, we assume that # is the model in production. # Download this model and compare it with the recently # trained model by running test with same data set. model_list = Model.list(ws) production_model = next( filter( lambda x: x.created_time == max(model.created_time for model in model_list), model_list, )) production_model_run_id = production_model.tags.get("run_id") run_list = exp.get_runs() # Get the run history for both production model and # newly trained model and compare mse production_model_run = Run(exp, run_id=production_model_run_id) new_model_run = Run(exp, run_id=new_model_run_id) production_model_mse = production_model_run.get_metrics().get("mse")
# Get workspace run = Run.get_context() exp = run.experiment ws = run.experiment.workspace # Get workspace inference_config = InferenceConfig(runtime="python", entry_script="score.py", conda_file="conda_dependencies.yml", source_directory="./deploy/scoring/") print(inference_config) # Do something with imagecnfig image_config = ContainerImage # Get latest model model_list = Model.list(ws, name=model_name) model = next( filter( lambda x: x.created_time == max(model.created_time for model in model_list), model_list, )) aks_target = ComputeTarget(ws, "one-dspe-aks") deployment_config = AksWebservice.deploy_configuration(cpu_cores=1, memory_gb=4) service = Model.deploy(workspace=ws, name=service_name, models=[model], inference_config=inference_config, deployment_config=deployment_config,
model = run.register_model(model_name = "credit_scoring_" + notebook_username, tags = {'area': "Credit scoring", 'type': "classification"}, description = "Credit Scoring model", iteration = None, # you can deploy a specific iteration metric="AUC_weighted") # you can deploy the best model according to a different metric, for example "accuracy" # COMMAND ---------- # MAGIC %md You can explore the registered models within your workspace and query by tag. Models are versioned. If you call the register_model command many times with same model name, you will get multiple versions of the model with increasing version numbers. # COMMAND ---------- from azureml.core import Model my_models = Model.list(workspace=ws, tags=['area']) for m in my_models: print("Name:", m.name,"\tVersion:", m.version, "\tDescription:", m.description, m.tags) # COMMAND ---------- # MAGIC %md ### Create Docker Image # COMMAND ---------- # MAGIC %md In addition to the registered model, in order to create a Docker image we will also need: # MAGIC - a scoring Python file (*score.py*), which will be called whenever there is a request for a prediction # MAGIC - a Conda dependencies file (*myenv.yml*), which contains all other Anaconda dependencies which should be included in the image # MAGIC # MAGIC Optionally you can also provide: # MAGIC - a Dockerfile, if you'd rather [use your own custom Docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-custom-docker-image) instead of the one provided by Azure ML Service
def main(): e = Env() print('********************') print(e.source_directory) files = os.listdir('./aml_pipeline') for f in files: print(f) print('***************') workspace_name = e.workspace_name subscription_id = e.subscription_id resource_group = e.resource_group #Connect to AML Workspace print('workspace_name = ' + workspace_name) print('subscription_id = ' + subscription_id) print('resource_group = ' + resource_group) ws = Workspace.get( name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, ) print('Ready to use Azure ML {} to work with {}'.format( azureml.core.VERSION, ws.name)) default_ds = ws.get_default_datastore() if 'diabetes dataset' not in ws.datasets: default_ds.upload_files( files=['diabetes.csv', 'diabetes2.csv'], # Upload the diabetes csv files in /data target_path= 'diabetes-data/', # Put it in a folder path in the datastore overwrite=True, # Replace existing files of the same name show_progress=True) #Create a tabular dataset from the path on the datastore (this may take a short while) tab_data_set = Dataset.Tabular.from_delimited_files( path=(default_ds, 'diabetes-data/*.csv')) # Register the tabular dataset try: tab_data_set = tab_data_set.register(workspace=ws, name='diabetes dataset', description='diabetes data', tags={'format': 'CSV'}, create_new_version=True) print('Dataset registered.') except Exception as ex: print(ex) else: print('Dataset already registered.') # Create a folder for the pipeline step files experiment_folder = 'diabetes_pipeline' os.makedirs(experiment_folder, exist_ok=True) print(experiment_folder) cluster_name = "mmcomputecluster" try: # Check for existing compute target pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name) print('Found existing cluster, use it.') except ComputeTargetException: # If it doesn't already exist, create it try: compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_DS11_V2', max_nodes=2) pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config) pipeline_cluster.wait_for_completion(show_output=True) except Exception as ex: print(ex) # Create a Python environment for the experiment diabetes_env = Environment("diabetes-pipeline-env") diabetes_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies diabetes_env.docker.enabled = True # Use a docker container # Create a set of package dependencies diabetes_packages = CondaDependencies.create( conda_packages=[ 'scikit-learn', 'ipykernel', 'matplotlib', 'pandas', 'pip' ], pip_packages=[ 'azureml-defaults', 'azureml-dataprep[pandas]', 'pyarrow' ]) # Add the dependencies to the environment diabetes_env.python.conda_dependencies = diabetes_packages # Register the environment diabetes_env.register(workspace=ws) registered_env = Environment.get(ws, 'diabetes-pipeline-env') # Create a new runconfig object for the pipeline pipeline_run_config = RunConfiguration() # Use the compute you created above. pipeline_run_config.target = pipeline_cluster # Assign the environment to the run configuration pipeline_run_config.environment = registered_env print("Run configuration created.") # Get the training dataset diabetes_ds = ws.datasets.get("diabetes dataset") # Create a PipelineData (temporary Data Reference) for the model folder prepped_data_folder = PipelineData("prepped_data_folder", datastore=ws.get_default_datastore()) # Step 1, Run the data prep script prep_step = PythonScriptStep(name="Prepare Data", script_name="prep_diabetes.py", source_directory='./aml_pipeline', arguments=[ '--input-data', diabetes_ds.as_named_input('raw_data'), '--prepped-data', prepped_data_folder ], outputs=[prepped_data_folder], compute_target=pipeline_cluster, runconfig=pipeline_run_config, allow_reuse=True) # Step 2, run the training script train_step = PythonScriptStep( name="Train and Register Model", source_directory='./aml_pipeline', script_name="train_diabetes.py", arguments=['--training-folder', prepped_data_folder], inputs=[prepped_data_folder], compute_target=pipeline_cluster, runconfig=pipeline_run_config, allow_reuse=True) print("Pipeline steps defined") pipeline_steps = [prep_step, train_step] pipeline = Pipeline(workspace=ws, steps=pipeline_steps) print("Pipeline is built.") # Create an experiment and run the pipeline experiment = Experiment(workspace=ws, name='jlg-exp') pipeline_run = experiment.submit(pipeline, regenerate_outputs=True) print("Pipeline submitted for execution.") pipeline_run.wait_for_completion(show_output=True) for run in pipeline_run.get_children(): print(run.name, ':') metrics = run.get_metrics() for metric_name in metrics: print('\t', metric_name, ":", metrics[metric_name]) for model in Model.list(ws): print(model.name, 'version:', model.version) for tag_name in model.tags: tag = model.tags[tag_name] print('\t', tag_name, ':', tag) for prop_name in model.properties: prop = model.properties[prop_name] print('\t', prop_name, ':', prop) print('\n') # Publish the pipeline from the run published_pipeline = pipeline_run.publish_pipeline( name="diabetes-training-pipeline", description="Trains diabetes model", version="1.0") published_pipeline rest_endpoint = published_pipeline.endpoint print(rest_endpoint)
avg_score = sum(score) / len(score) avg_time = total_time / len(score) tags = { 'accuracy': avg_score, 'latency': avg_time, 'num_images': len(score), 'name': 'eval' } # Register the model only if it has highest accuracy among the trained models run = Run.get_context() ws = run.experiment.workspace registered_models = Model.list(ws, name=MODEL_NAME) max_accuracy = 0 for model in registered_models: registered_accuracy = model.tags['accuracy'] if max_accuracy < registered_accuracy: max_accuracy = registered_accuracy if max_accuracy < avg_score: # TODO: Verify that images and masks are splitted in the same way Model.register( workspace=ws, model_path=PATH_MODEL, # model_path contains architecture and weights model_name=MODEL_NAME, tags=tags, model_framework=Model.Framework.TENSORFLOW, model_framework_version='2.3') # Second element is type Dataset
def getOperationOutput(self, operationNoun, operationId, userId, subscriptionId, downloadFiles=True): operationName = self.GetOperationNameByNoun(operationNoun) if operationName == 'train': tags = [['userId', userId], ['modelId', operationId], ['subscriptionId', subscriptionId]] models = Model.list(self._workspace, tags=tags) if len(models) == 0: return None model = models[0] result = { 'id': operationId, 'description': model.description, 'created_time': model.created_time } return result, "model" if operationName == 'deploy': tags = [['userId', userId], ['endpointId', operationId], ['subscriptionId', subscriptionId]] endpoints = Webservice.list(self._workspace, tags=tags) if len(endpoints) == 0: return None, None endpoint = endpoints[0] primaryKey, secondaryKey = endpoint.get_keys() result = { 'id': operationId, 'description': endpoint.description, 'created_time': endpoint.created_time, 'scoring_uri': endpoint.scoring_uri, 'primary_key': primaryKey, 'secondary_key': secondaryKey } return result, "endpoint" tags = { 'userId': userId, 'operationId': operationId, 'operationName': operationName, 'subscriptionId': subscriptionId } experimentName = subscriptionId exp = Experiment(self._workspace, experimentName) runs = exp.get_runs(type='azureml.PipelineRun', tags=tags) try: run = next(runs) child_runs = run.get_children() child_run = next(child_runs) outputType = self._utils.GetOutputType(operationName) if outputType == 'json': with tempfile.TemporaryDirectory() as tmp: path = os.path.join(tmp, 'output.json') files = child_run.download_file('/outputs/output.json', path) with open(path) as file: return json.load(file), "json" elif outputType == 'file': if downloadFiles: tmp = tempfile.TemporaryDirectory().name path = os.path.join(tmp, "outputs") zip_file_path = os.path.join( tmp, "output_{}.zip".format(operationId)) files = child_run.download_files("/outputs", path, append_prefix=False) zipf = zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) self.zipdir(path, zipf, "outputs") zipf.close() return zip_file_path, "file" else: return "file", "file" except StopIteration: return None
# DO NOT USE IN PRODUCTION ENVIRONMENTS. from azureml.core import Environment, Model, Run from azureml.core.webservice import AksWebservice from azureml.core.compute import AksCompute from azureml.core.model import InferenceConfig from azureml.core.conda_dependencies import CondaDependencies run = Run.get_context() ws = run.experiment.workspace service_name = "trump-tweets-scoring-aml" model_name = "trump-tweet-classification" # getting last model model = next( iter(Model.list(workspace=ws, name=model_name, latest=True)), None, ) environment = Environment("trump-tweet-inferencing") conda_dep = CondaDependencies() conda_dep.add_conda_package("scikit-learn") conda_dep.add_pip_package("azureml-sdk") environment.python.conda_dependencies = conda_dep aks_target = AksCompute(ws, "trump-tweets-inf") deployment_config = AksWebservice.deploy_configuration(cpu_cores=2, memory_gb=8) inference_config = InferenceConfig(entry_script="score.py",
new_model_run = None # get the last completed run with metrics new_model_run = None for run in all_runs: acc = run.get_metrics().get("val_accuracy") print(f'run is {run}, acc is {acc}') if run.get_status() == 'Finished' and acc is not None: new_model_run = run print('found a valid new model with acc {}'.format(acc)) break if new_model_run is None: raise Exception('new model must log a val_accuracy metric, please check' ' your train.py file') model_generator = Model.list(ws) # Check that there are models if len(model_generator) > 0: # Get the model with best val accuracy, assume this is best cur_max = None production_model = None for model in model_generator: cur_acc = model.run.get_metrics().get("val_accuracy")[-1] if cur_max is None or cur_acc > cur_max: cur_max = cur_acc production_model = model production_model_acc = cur_max