def upload_model(ws: Workspace, config: MoveModelConfig) -> Model: """ Uploads an InnerEye model to an AzureML workspace :param ws: The AzureML workspace :param config: move config :return: imported Model """ model_path, environment_path = config.get_paths() with open(model_path / MODEL_JSON, 'r') as f: model_dict = json.load(f) # Find the folder containing the final model. final_model_path = model_path / FINAL_MODEL_FOLDER full_model_path = final_model_path if final_model_path.exists( ) else model_path / FINAL_ENSEMBLE_MODEL_FOLDER new_model = Model.register(ws, model_path=str(full_model_path), model_name=model_dict['name'], tags=model_dict['tags'], properties=model_dict['properties'], description=model_dict['description']) env = Environment.load_from_directory(str(environment_path)) env.register(workspace=ws) print(f"Environment {env.name} registered") return new_model
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get(name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group) print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) # Create a reusable run configuration environment # Read definition from diabetes_regression/azureml_environment.json # Make sure to include `r-essentials' # in diabetes_regression/conda_dependencies.yml environment = Environment.load_from_directory(e.sources_directory_train) if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name builduri_base = builduri_base + "/_build/results?buildId=" environment.environment_variables["BUILDURI_BASE"] = builduri_base environment.register(aml_workspace) run_config = RunConfiguration() run_config.environment = environment train_step = PythonScriptStep( name="Train Model", script_name="train_with_r.py", compute_target=aml_compute, source_directory="diabetes_regression/training/R", runconfig=run_config, allow_reuse=False, ) print("Step Train created") steps = [train_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get(name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group) print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) # Create a reusable run configuration environment # Read definition from diabetes_regression/azureml_environment.json environment = Environment.load_from_directory(e.sources_directory_train) if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name builduri_base = builduri_base + "/_build/results?buildId=" environment.environment_variables["BUILDURI_BASE"] = builduri_base environment.register(aml_workspace) run_config = RunConfiguration() run_config.environment = environment model_name_param = PipelineParameter(name="model_name", default_value=e.model_name) build_id_param = PipelineParameter(name="build_id", default_value=e.build_id) # Get dataset name dataset_name = e.dataset_name # Check to see if dataset exists if (dataset_name not in aml_workspace.datasets): # Create dataset from diabetes sample data sample_data = load_diabetes() df = pd.DataFrame(data=sample_data.data, columns=sample_data.feature_names) df['Y'] = sample_data.target file_name = 'diabetes.csv' df.to_csv(file_name, index=False) # Upload file to default datastore in workspace default_ds = aml_workspace.get_default_datastore() target_path = 'training-data/' default_ds.upload_files(files=[file_name], target_path=target_path, overwrite=True, show_progress=False) # Register dataset path_on_datastore = os.path.join(target_path, file_name) dataset = Dataset.Tabular.from_delimited_files( path=(default_ds, path_on_datastore)) dataset = dataset.register(workspace=aml_workspace, name=dataset_name, description='diabetes training data', tags={'format': 'CSV'}, create_new_version=True) # Get the dataset dataset = Dataset.get_by_name(aml_workspace, dataset_name) # Create a PipelineData to pass data between steps pipeline_data = PipelineData( 'pipeline_data', datastore=aml_workspace.get_default_datastore()) train_step = PythonScriptStep( name="Train Model", script_name=e.train_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, inputs=[dataset.as_named_input('training_data')], outputs=[pipeline_data], arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, "--step_output", pipeline_data ], runconfig=run_config, allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=e.evaluate_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, "--allow_run_cancel", e.allow_run_cancel, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") register_step = PythonScriptStep( name="Register Model ", script_name=e.register_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, inputs=[pipeline_data], arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, "--step_input", pipeline_data, ], runconfig=run_config, allow_reuse=False, ) print("Step Register created") # Check run_evaluation flag to include or exclude evaluation step. if ((e.run_evaluation).lower() == 'true'): print("Include evaluation step before register step.") evaluate_step.run_after(train_step) register_step.run_after(evaluate_step) steps = [train_step, evaluate_step, register_step] else: print("Exclude evaluation step and directly run register step.") register_step.run_after(train_step) steps = [train_step, register_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline._set_experiment_name train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def main(): env = EnvironmentVariables() args = add_arguments() workspace = get_workspace() cpu_cluster_name = env.cpu_cluster_name compute = get_or_create_compute(workspace, cpu_cluster_name, env.compute_vm_size, env.max_nodes) environment = Environment.load_from_directory(env.sources_directory_train) environment.register(workspace) run_configuration = RunConfiguration() run_configuration.environment = environment model_name_param = PipelineParameter(name="model_name", default_value=env.model_name) build_id_param = PipelineParameter(name="build_id", default_value=env.build_id) should_tune_hyperparameters_param = PipelineParameter( name="should_tune_hyperparameters", default_value=env.should_tune_hyperparameters) parallelism_level_param = PipelineParameter( name="parallelism_level", default_value=env.parallelism_level) force_register_param = PipelineParameter(name="force_register", default_value=env.force_register) datastore = get_datastore() dataset_name = env.dataset_name dataset_path = env.dataset_path print( f"Creating new dataset version for {dataset_name} in datastore {datastore} from file {dataset_path}" ) temp_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, dataset_path)]) dataset = temp_dataset.register(workspace=workspace, name=dataset_name, description=dataset_name, tags={'format': 'CSV'}, create_new_version=True) train_output = PipelineData('train_output', output_name='train_output', datastore=datastore) train_step = PythonScriptStep( name="Train model", compute_target=compute, script_name=env.train_script_name, runconfig=run_configuration, inputs=[dataset.as_named_input('training')], outputs=[train_output], arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, "--parallelism_level", parallelism_level_param, "--should_tune_hyperparameters", should_tune_hyperparameters_param ], allow_reuse=False) evaluate_step = PythonScriptStep(name="Evaluate model", compute_target=compute, script_name=env.evaluate_script_name, runconfig=run_configuration, inputs=[train_output], arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, "--train_output", train_output, "--force_register", force_register_param ], allow_reuse=False) register_step = PythonScriptStep(name="Register model", compute_target=compute, script_name=env.register_script_name, runconfig=run_configuration, inputs=[train_output], arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, "--train_output", train_output ], allow_reuse=False) evaluate_step.run_after(train_step) register_step.run_after(evaluate_step) steps = [train_step, evaluate_step, register_step] train_pipeline = Pipeline(workspace=workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=env.pipeline_name, description="Train/Eval/Register if better pipeline", version=env.build_id) output_file_name = args.output_file_name if output_file_name: with open(output_file_name, "w") as output_file: output_file.write(published_pipeline.id) print( f"Published pipeline {published_pipeline.name} for build {published_pipeline.version}" )
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get( name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group ) print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) # Create a reusable run configuration environment # Read definition from diabetes_regression/azureml_environment.json environment = Environment.load_from_directory(e.sources_directory_train) if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name builduri_base = builduri_base + "/_build/results?buildId=" environment.environment_variables["BUILDURI_BASE"] = builduri_base environment.register(aml_workspace) run_config = RunConfiguration() run_config.environment = environment model_name_param = PipelineParameter( name="model_name", default_value=e.model_name) build_id_param = PipelineParameter( name="build_id", default_value=e.build_id) dataset_name = "" if (e.datastore_name is not None and e.datafile_name is not None): dataset_name = e.dataset_name datastore = Datastore.get(aml_workspace, e.datastore_name) data_path = [(datastore, e.datafile_name)] dataset = Dataset.Tabular.from_delimited_files(path=data_path) dataset.register(workspace=aml_workspace, name=e.dataset_name, description="dataset with training data", create_new_version=True) train_step = PythonScriptStep( name="Train Model", script_name=e.train_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, "--dataset_name", dataset_name, ], runconfig=run_config, allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=e.evaluate_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, "--allow_run_cancel", e.allow_run_cancel, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") register_step = PythonScriptStep( name="Register Model ", script_name=e.register_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, ], runconfig=run_config, allow_reuse=False, ) print("Step Register created") # Check run_evaluation flag to include or exclude evaluation step. if ((e.run_evaluation).lower() == 'true'): print("Include evaluation step before register step.") evaluate_step.run_after(train_step) register_step.run_after(evaluate_step) steps = [train_step, evaluate_step, register_step] else: print("Exclude evaluation step and directly run register step.") register_step.run_after(train_step) steps = [train_step, register_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline._set_experiment_name train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')