def downloadPickles(ws, modelName, outputPath="./pickles", modelVer=None): if modelVer == 'best': bestModel = None maxAcc = -1 for model in Model.list(ws, modelName, ["accuracy"]): modelAcc = float(model.tags["accuracy"]) if modelAcc > maxAcc: bestModel = model maxAcc = modelAcc print(f"### Best model with highest accuracy of {maxAcc} found") if not bestModel: model = Model(ws, modelName) print("### WARNING! No best model found, using latest instead") elif modelVer is not None: model = Model(ws, modelName, version=modelVer) else: model = Model(ws, modelName) print(f"### Using model version {model.version}") # Echo'ing out this magic string sets an output variable in Azure DevOps pipeline # Set AZML_MODEL_VER for use by subsequent steps print(f"##vso[task.setvariable variable=AZML_MODEL_VER]{model.version}") # These are special tags, lets us get back to the run that created the model try: runId = model.tags['aml-runid'] experimentName = model.tags['aml-experiment'] except: print( "### ERROR! Model missing `aml-runid` and `aml-experiment` tags, Can't continue!" ) exit() exp = Experiment(workspace=ws, name=experimentName) run = Run(exp, runId) if run.status != "Completed": print(f'### ERROR! Run {runId} did not complete!') return print(f'### Will download from run {runId} in {experimentName}') # Now we can get all the files created with the run, grab all the .pkls for f in run.get_file_names(): if f.endswith('.pkl'): output_file_path = os.path.join(outputPath, f.split('/')[-1]) print('### Downloading from {} to {} ...'.format( f, output_file_path)) run.download_file(name=f, output_file_path=output_file_path) # Add some extra metadata, handy to have metadata = { 'name': model.name, 'version': model.version, 'tags': model.tags } with open(f"{outputPath}/metadata.json", 'w') as metadata_file: print(f"### Storing metadata in {outputPath}/metadata.json") json.dump(metadata, metadata_file)
def download_run_outputs_by_prefix( blobs_prefix: Path, destination: Path, run: Run ) -> None: """ Download all the blobs from the run's default output directory: DEFAULT_AML_UPLOAD_DIR ("outputs") that have a given prefix (folder structure). When saving, the prefix string will be stripped off. For example, if blobs_prefix = "foo", and the run has a file "outputs/foo/bar.csv", it will be downloaded to destination/bar.csv. If there is in addition a file "foo.txt", that file will be skipped. :param blobs_prefix: The prefix for all files in "outputs" that should be downloaded. :param run: The AzureML run to download the files from. :param destination: Local path to save the downloaded blobs to. """ prefix_str = str((fixed_paths.DEFAULT_AML_UPLOAD_DIR / blobs_prefix).as_posix()) logging.info( f"Downloading multiple files from run {run.id}: {prefix_str} -> {str(destination)}" ) # There is a download_files function, but that can time out when downloading several large checkpoints file # (120sec timeout for all files). for file in run.get_file_names(): if file.startswith(prefix_str): target_path = file[len(prefix_str):] if target_path.startswith("/"): target_path = target_path[1:] logging.info(f"Downloading {file}") run.download_file( file, str(destination / target_path), _validate_checksum=True ) else: logging.warning( f"Skipping file {file}, because the desired prefix {prefix_str} is not aligned with " f"the folder structure" )
def register_model(run_id, experiment): best_run = Run(experiment=experiment, run_id=run_id) files = best_run.get_file_names() r = re.compile('outputs.*') model_path = [l for l in files if r.match(l)] path, model = os.path.split(model_path[0]) model = best_run.register_model(model_name=model, model_path='outputs/model.pkl') return path
def get_job_log_file(run: Run, index: Optional[int] = None) -> str: """ Reads the job log file (70_driver_log.txt or std_log.txt) of the given job. If an index is provided, get the matching file from a multi-node job. :return: The contents of the job log file. """ assert run.status == RunStatus.COMPLETED files = run.get_file_names() suffix = (f"_{index}" if index is not None else "") + ".txt" file1 = "azureml-logs/70_driver_log" + suffix file2 = "user_logs/std_log" + suffix if file1 in files: file = file1 elif file2 in files: file = file2 else: raise ValueError(f"No log file ({file1} or {file2}) present in the run. Existing files: {files}") downloaded = tempfile.NamedTemporaryFile().name run.download_file(name=file, output_file_path=downloaded) return Path(downloaded).read_text()
model_name = "model_alpha_" + str(alpha) + ".pkl" filename = "outputs/" + model_name joblib.dump(value=model, filename=filename) run.upload_file(name=model_name, path_or_stream=filename) run.complete() minimum_rmse_runid = None minimum_rmse = None for run in experiment.get_runs(): run_metrics = run.get_metrics() run_details = run.get_details() # each logged metric becomes a key in this returned dict run_rmse = run_metrics["rmse"] run_id = run_details["runId"] if minimum_rmse is None: minimum_rmse = run_rmse minimum_rmse_runid = run_id else: if run_rmse < minimum_rmse: minimum_rmse = run_rmse minimum_rmse_runid = run_id print("Best run_id: " + minimum_rmse_runid) print("Best run_id rmse: " + str(minimum_rmse)) best_run = Run(experiment=experiment, run_id=minimum_rmse_runid) print(best_run.get_file_names())
run_details = run.get_details() # each logged metric becomes a key in this returned dict run_rmse = run_metrics["rmse"] run_id = run_details["runId"] if minimum_rmse is None: minimum_rmse = run_rmse minimum_rmse_runid = run_id else: if run_rmse < minimum_rmse: minimum_rmse = run_rmse minimum_rmse_runid = run_id #Sobre el mejor modelo, obtener una referencia del mismo from azureml.core import Run best_run = Run(experiment=experiment, run_id=minimum_rmse_runid) print(best_run.get_file_names()) #Descargar el modelo model_file_name = best_run.get_file_names()[0] best_run.download_file(name=model_file_name) #Cargamos el modelo en python import joblib clf = joblib.load(model_file_name) print("El proceso ha finalizado correctamente!!") #Uso del clasificador para nuevos datos (referencia) #y_predecido = clf.predict(X) #Comparar x con y y calcular rmse #rmse = math.sqrt(mean_squared_error(y_true=y_realdelosdatos, y_pred=y_predecido))
from azureml.core.authentication import AzureCliAuthentication cli_auth = AzureCliAuthentication() # Get workspace ws = Workspace.from_config(auth=cli_auth) # Get the latest evaluation result try: with open("run_id.json") as f: config = json.load(f) if not config["run_id"]: raise Exception( "No new model to register as production model perform better") except: print("No new model to register as production model perform better") # raise Exception('No new model to register as production model perform better') sys.exit(0) run_id = config["run_id"] experiment_name = config["experiment_name"] exp = Experiment(workspace=ws, name=experiment_name) run = Run(experiment=exp, run_id=run_id) os.makedirs('../mnist-tf/ckpt', exist_ok=True) for f in run.get_file_names(): if f.startswith('outputs/model'): output_file_path = os.path.join('../mnist-tf/ckpt/', f.split('/')[-1]) print('Downloading from {} to {} ...'.format(f, output_file_path)) run.download_file(name=f, output_file_path=output_file_path)
run_id = run._run_id last = False print() print(f"Last Run ID: {run_id}") # register last run_id as environment variable so it can be used in %sh commands os.environ['RUN_ID_LAST'] = run_id # print(os.getenv('RUN_ID_LAST')) # COMMAND ---------- import mlflow.sklearn # run_id = "434c0e56-307a-4e40-a170-343530c77386" run = Run(experiment, run_id=run_id) run.get_file_names() # COMMAND ---------- run.download_files(output_directory=f"/tmp/{run_id}") # COMMAND ---------- # MAGIC %sh # MAGIC ls -l /tmp/$RUN_ID_LAST/random_forest_model # COMMAND ---------- model_uri_aml = f"/tmp/{run_id}/random_forest_model" model = mlflow.sklearn.load_model(model_uri_aml)
# MAGIC Each time we ran the models, we stored a zip file with the trained model in AML. We can now retrieve the trained model of the particular run that we want to deploy. We'll copy the relevant `id` from above and retrieve the Run object. # COMMAND ---------- best_run_id = '6d670807-6477-4ea6-a98b-84069c888346' best_run = Run(experiment, best_run_id) # COMMAND ---------- # MAGIC %md # MAGIC #### 4. Download the model locally and unzip # COMMAND ---------- # First, let's list the filenames best_run.get_file_names() # COMMAND ---------- # In this case, we're looking for the 'outputs/{MODEL_NAME}Model.zip' file import shutil import os saved_model_name = 'model.zip' # Saved model filename in AML Service - leave out the 'outputs/' folder at the beginning aml_location = 'outputs/{0}'.format(saved_model_name) user_folder = '/dbfs/tmp/{0}/'.format(user_name) # Create objects with folder names temporary_zip_path = os.path.join( user_folder, 'trained_model_zip') # Folder to zipped model file