parser.add_argument( '--model_name', type=str, default='',help='Name you want to give to the model.' ) parser.add_argument( '--model_assets_path',type=str, default='outputs',help='Location of trained model.' ) args,unparsed = parser.parse_known_args() print('Model assets path is:',args.model_assets_path) print('Model name is:',args.model_name) run = Run.get_context() pipeline_run = Run(run.experiment, run._root_run_id) pipeline_run.upload_file("outputs/model/model.pth",os.path.join(args.model_assets_path,"model.pth")) pipeline_run.upload_file("outputs/model/labels.txt",os.path.join(args.model_assets_path,"labels.txt")) pipeline_run.upload_file("outputs/deployment/score.py","deployment/score.py") pipeline_run.upload_file("outputs/deployment/myenv.yml","deployment/myenv.yml") pipeline_run.upload_file("outputs/deployment/deploymentconfig.json","deployment/deploymentconfig.json") pipeline_run.upload_file("outputs/deployment/inferenceconfig.json","deployment/inferenceconfig.json") tags = { "Conference":"Codecamp" } model = pipeline_run.register_model(model_name=args.model_name, model_path='outputs/',tags=tags) print('Model registered: {} \nModel Description: {} \nModel Version: {}'.format(model.name, model.description, model.version))
for n in n_estimators: run = experiment.start_logging() run.log("n_estimators", n) model = preprocessor_pipe.set_params(L_model__n_estimators=n) model.fit(X_train, y_train) y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) run.log("acc", accuracy) model_name = "model_n_estimators_" + str(n) + ".pkl" filename = "outputs/" + model_name joblib.dump(value=model, filename=filename) run.upload_file(name=model_name, path_or_stream=filename) run.complete() ############### maximum_acc_runid = None maximum_acc = None for run in experiment.get_runs(): run_metrics = run.get_metrics() run_details = run.get_details() # each logged metric becomes a key in this returned dict run_acc = run_metrics["acc"] run_id = run_details["runId"] if maximum_acc is None:
# COMMAND ---------- model_nm = "bikeshare.mml" model_output = '/mnt/azml/outputs/' + model_nm model_dbfs = "/dbfs" + model_output lrPipelineModel.write().overwrite().save(model_output) # COMMAND ---------- model_name, model_ext = model_dbfs.split(".") # COMMAND ---------- model_zip = model_name + ".zip" shutil.make_archive(model_name, 'zip', model_dbfs) azRun.upload_file("outputs/" + model_nm, model_zip) # COMMAND ---------- azRun.register_model(model_name='model_nm', model_path="outputs/" + model_nm) # COMMAND ---------- # now delete the serialized model from local folder since it is already uploaded to run history shutil.rmtree(model_dbfs) os.remove(model_zip) # COMMAND ---------- mlflow.end_run()
if not (run.id.startswith('OfflineRun')): # get last section of the mnt location starting after '/workspaceblobstore' which is the blob storage location blobstoragepath = mntpath.rsplit('/workspaceblobstore', maxsplit=1)[1] dataset = Dataset.Tabular.from_delimited_files( path=[(ws.get_default_datastore(), blobstoragepath)]) # getting the model output path (the path to the model.pkl file) from the previous step's output. model_output = os.environ['AZUREML_DATAREFERENCE_model_output'] print("model path", model_output) print("files in model path", os.listdir(path=model_output)) # will register the model to the parent which encapsulates all the steps # if model already exists, error will be thrown, ignore. try: # to register a model to a run, the file has to be uploaded to that run first. parentrun.upload_file('model.pkl', model_output + '/model.pkl') except: None # model_path is the local path of the parentrun model file (see upload_file action above) model_path = "model.pkl" print("...working directory") print(os.listdir(path='.')) # if the model has been previously registered, retrieve the accuracy. This will be used to determine if the new model # should be registered or if the old one should be kept as the latest version. try: model = Model(ws, model_name) acc_to_beat = float(model.properties["accuracy"]) except: acc_to_beat = 0