def get_run_configs(ws: Workspace, computetarget: ComputeTarget, env: Env) -> Tuple[ParallelRunConfig, RunConfiguration]: """ Creates the necessary run configurations required by the pipeline to enable parallelized scoring. :param ws: AML Workspace :param computetarget: AML Compute target :param env: Environment Variables :returns: Tuple[Scoring Run configuration, Score copy run configuration] """ # get a conda environment for scoring environment = get_environment( ws, env.aml_env_name_scoring, conda_dependencies_file=env.aml_env_score_conda_dep_file, enable_docker=True, use_gpu=env.use_gpu_for_scoring, create_new=env.rebuild_env_scoring, ) score_run_config = ParallelRunConfig( entry_script=env.batchscore_script_path, source_directory=env.sources_directory_train, error_threshold=10, output_action="append_row", compute_target=computetarget, node_count=env.max_nodes_scoring, environment=environment, run_invocation_timeout=300, ) copy_run_config = RunConfiguration() copy_run_config.environment = get_environment( ws, env.aml_env_name_score_copy, conda_dependencies_file=env.aml_env_scorecopy_conda_dep_file, enable_docker=True, use_gpu=env.use_gpu_for_scoring, create_new=env.rebuild_env_scoring, ) return (score_run_config, copy_run_config)
def test_perf(perf_test_configuration, workspace, request, wheel_file): print("Starting with test case {}".format(request.node.name)) script_name = determine_script_name(request.node.name) generate_script(request, perf_test_configuration, script_name, SCRIPT_DIRECTORY) experiment = Experiment(workspace=workspace, name=EXPERIMENT_NAME) compute_target = workspace.get_default_compute_target(type='cpu') run_config = RunConfiguration() run_config.target = compute_target environment = configure_environment(workspace, wheel_file=wheel_file) run_config.environment = environment environment.register(workspace=workspace) script_run_config = ScriptRunConfig(source_directory=SCRIPT_DIRECTORY, script=script_name, run_config=run_config) print("submitting run") experiment.submit(config=script_run_config, tags=perf_test_configuration.__dict__) print("submitted run")
def _submit_profile(dataset_profile_config_object, workspace, experiment_name): """Start Profile execution with the given config on the given workspace. :param dataset_profile_config_object: :param workspace: :param experiment_name: :param kwargs: :return: """ dataset = dataset_profile_config_object._dataset compute_target = dataset_profile_config_object._compute_target if isinstance(compute_target, ComputeTarget): compute_target = compute_target.name else: compute_target = compute_target run_id = 'dataset_' + str(uuid.uuid4()) saved_dataset_id = dataset._ensure_saved(workspace) action_dto = _restclient(workspace).dataset.generate_profile_with_preview( workspace.subscription_id, workspace.resource_group, workspace.name, id=saved_dataset_id, compute_target=compute_target, experiment_name=experiment_name, run_id=run_id, custom_headers=_custom_headers) if dataset_profile_config_object._compute_target == _LOCAL_COMPUTE: with tempfile.TemporaryDirectory() as temp_dir: script = os.path.join(temp_dir, 'profile_run_script.py') copyfile( os.path.join(os.path.dirname(__file__), '_profile_run_script.py'), script) run_local = RunConfiguration() run_local.environment.python.user_managed_dependencies = True run_local.environment.python.interpreter_path = sys.executable script_config = ScriptRunConfig(source_directory=temp_dir, script="profile_run_script.py", arguments=[ action_dto.dataset_id, action_dto.action_id, saved_dataset_id ], run_config=run_local) experiment = Experiment(workspace, experiment_name) experiment.submit(script_config, run_id=run_id) else: experiment = Experiment(workspace, action_dto.experiment_name) run_id = action_dto.run_id run = get_run(experiment, run_id) return DatasetProfileRun(workspace, dataset, run)
def __init__(self, kubeflow_component): self._comp = kubeflow_component run_config = RunConfiguration() run_config.target = 'zhizhu-compute' run_config.environment.docker.enabled = True run_config.environment.docker.base_image = self._comp.image print(f"== Creating KubeflowComponentStep: name={self._comp.name}\n" f" arguments={self._comp.command_and_args}\n" f" inputs={self._comp.input_refs}\n" f" outputs={self._comp.output_refs}\n") super().__init__(name=self._comp.name, source_directory='script', script_name='invoker.py', arguments=self._comp.command_and_args, inputs=self._comp.input_refs, outputs=self._comp.output_refs, compute_target='zhizhu-compute', allow_reuse=True, runconfig=run_config)
def scale_up(self, workers=1): """ Scale up the number of workers. """ run_config = RunConfiguration() run_config.target = self.compute_target run_config.environment = self.environment_definition scheduler_ip = self.run.get_metrics()["scheduler"] args = [ f"--scheduler_ip_port={scheduler_ip}", f"--use_gpu={self.use_gpu}", f"--n_gpus_per_node={self.n_gpus_per_node}", f"--worker_death_timeout={self.worker_death_timeout}", ] file_dataset_registered_name = self.kwargs.get( 'file_dataset_registered_name', None) dataset_config_name = self.kwargs.get('dataset_config_name', None) path_on_compute = self.kwargs.get('path_on_compute', None) if path_on_compute is not None: dataset = Dataset.get_by_name(workspace=self.workspace, name=file_dataset_registered_name) input1 = dataset.as_named_input(dataset_config_name).as_mount( path_on_compute=path_on_compute) args.append(input1) child_run_config = ScriptRunConfig( source_directory=os.path.join(self.abs_path, "setup"), script="start_worker.py", arguments=args, run_config=run_config, ) for i in range(workers): child_run = self.run.submit_child(child_run_config, tags=self.tags) self.workers_list.append(child_run) hostname = socket.gethostname()
def scale_up(self, workers=1): """Scale up the number of workers.""" run_config = RunConfiguration() run_config.target = self.compute_target run_config.environment = self.environment_definition scheduler_ip = self.run.get_metrics()["scheduler"] args = [ f"--scheduler_ip_port={scheduler_ip}", f"--use_gpu={self.use_gpu}", f"--n_gpus_per_node={self.n_gpus_per_node}", f"--worker_death_timeout={self.worker_death_timeout}", ] child_run_config = ScriptRunConfig( source_directory=os.path.join(self.abs_path, "setup"), script="start_worker.py", arguments=args, run_config=run_config, ) for i in range(workers): child_run = self.run.submit_child(child_run_config, tags=self.tags) self.workers_list.append(child_run)
def _generate_run_config(self, step): """ generates an AzML run config if the user gives specifics about requirements :param dict step: step defined by user that we are currently building :returns: run_config :rtype: RunConfiguration """ try: conda_deps = self._get_conda_deps(step) conda_deps.add_conda_package("pip==20.0.2") return RunConfiguration(script=step["script"], conda_dependencies=conda_deps) except KeyError: return None
def get_automl_environment(workspace: Workspace, automl_settings_dict: AzureAutoMLSettings): from azureml.core import RunConfiguration from azureml.train.automl._environment_utilities import modify_run_configuration import logging null_logger = logging.getLogger("manymodels_null_logger") null_logger.addHandler(logging.NullHandler()) null_logger.propagate = False automl_settings_obj = AzureAutoMLSettings.from_string_or_dict( automl_settings_dict) run_configuration = modify_run_configuration(automl_settings_obj, RunConfiguration(), logger=null_logger) train_env = run_configuration.environment train_env.environment_variables['DISABLE_ENV_MISMATCH'] = True train_env.environment_variables['AZUREML_FLUSH_INGEST_WAIT'] = '' train_env.environment_variables['AZUREML_METRICS_POLLING_INTERVAL'] = '30' return run_configuration.environment
def get_run_config(self): def _get_structured_interface_param(name, param_list): return next((param for param in param_list if param.name == name), None) param_list = self.default_module_version.interface.parameters conda_content = _get_structured_interface_param( 'CondaDependencies', param_list).default_value docker_enabled = _get_structured_interface_param( 'DockerEnabled', param_list).default_value base_docker_image = _get_structured_interface_param( 'BaseDockerImage', param_list).default_value conda_dependencies = CondaDependencies( _underlying_structure=ruamel.yaml.safe_load(conda_content)) run_config = RunConfiguration() run_config.environment.docker.enabled = docker_enabled run_config.environment.docker.base_image = base_docker_image run_config.environment.python.conda_dependencies = conda_dependencies return run_config
def load_runconfig_yaml(runconfig_yaml_file): try: run_config = RunConfiguration().load( path=runconfig_yaml_file ) # Setting source directory for script run config source_directory = os.path.split(runconfig_yaml_file)[0] source_directory = os.path.split(source_directory)[0] if os.path.split(source_directory)[-1] == ".azureml" or os.path.split(source_directory)[-1] == "aml_config" else source_directory # defining scriptrunconfig run_config = ScriptRunConfig( source_directory=source_directory, run_config=run_config ) except TypeError as exception: print(f"::debug::Error when loading runconfig yaml definition your repository (Path: /{runconfig_yaml_file}): {exception}") run_config = None except FileNotFoundError as exception: print(f"::debug::Error when loading runconfig yaml definition your repository (Path: /{runconfig_yaml_file}): {exception}") run_config = None return run_config
curr_dir = os.path.dirname(os.path.realpath(__file__)) output_dir = 'outputs' output_dir_local = os.path.join(curr_dir, '../../../', 'outputs') # Pipeline parameters run_experiment = True register_model = False publish_pipeline = False # load workspace config, load default datastore. ws = Workspace.from_config(auth=AzureCliAuthentication()) default_ds = ws.get_default_datastore() # load run config run_config = RunConfiguration.load(path=os.path.join(curr_dir, '../../../', 'aml_config'), name=run_config_name) # define training pipeline with one AMLCompute step trainStep = PythonScriptStep( script_name="train.py", name="Model Training", arguments=[ '--data-dir', str(default_ds.as_mount()), '--output-dir', output_dir ], inputs=[DataReference(datastore=default_ds, mode="mount")], outputs=[ PipelineData(name="model", datastore=default_ds, output_path_on_compute="training")
### # Define and set up pipeline ### pipeline_param = PipelineParameter(name="my_arg", default_value="default") my_step = PythonScriptStep( name="My Script Step", script_name="scriptstep.py", arguments=[pipeline_param], inputs=[], outputs=[], compute_target=compute_target, source_directory="src", allow_reuse=True, runconfig=RunConfiguration(conda_dependencies=CondaDependencies( conda_dependencies_file_path="environment.yml")), ) pipeline_id, pipeline_endpoint = publish_pipeline(ws, [my_step], "blabla") ### # Trigger pipeline via REST API ### # To trigger the pipeline, a service principal is required: https://docs.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication token = requests.post( f"{config['sp']['resource_url']}/{config['sp']['tenant_id']}/oauth2/token", data={ "grant_type": "client_credentials", "client_id": config["sp"]["client_id"],
def submit_hyperdrive( experiment, hyperdrive_configuration_name, source_directory, run_configuration_name, path=None, run_async=None, conda_dependencies=None, ct_name=None, user_script_and_arguments=None, logger=None): from azureml.train.hyperdrive.runconfig import HyperDriveConfig, PrimaryMetricGoal policies = { "BANDITPOLICY": _get_bandit_policy, "MEDIANSTOPPINGPOLICY": _get_median_stopping_policy, "TRUNCATIONSELECTIONPOLICY": _get_truncation_selection_policy, "NOTERMINATIONPOLICY": _get_no_termination_policy } samplings = { "RANDOM": _get_random_sampling, "GRID": _get_grid_sampling, "BAYESIAN": _get_bayesian_sampling } if user_script_and_arguments and len(user_script_and_arguments) > 0: script, arguments = user_script_and_arguments[0], user_script_and_arguments[1:] else: script, arguments = None, None if run_configuration_name is None: raise UserErrorException("Please specify the name of the run configuration to use.") else: run_config = RunConfiguration.load(path, run_configuration_name) if conda_dependencies: from azureml.core.conda_dependencies import CondaDependencies cd = CondaDependencies(conda_dependencies_file_path=conda_dependencies) run_config.environment.python.conda_dependencies = cd if not run_config.script and not script: raise UserErrorException("Please specify the script to run either via parameter or in the runconfig") if run_config.script and script: logger.info("Overriding runconfig script %s with script argument %s", run_config.script, script) if script: run_config.script = script if run_config.arguments and arguments: logger.info("Overriding runconfig arguments %s with %s", run_config.arguments, arguments) if arguments: run_config.arguments = arguments if ct_name: run_config.target = ct_name logger.info("Running %s with arguments %s", run_config.script, run_config.arguments) # default to path if source directory is missing. if source_directory is None: source_directory = path script_run_config = ScriptRunConfig(source_directory=source_directory, run_config=run_config) # Support absolute or relative to working directory file location. if os.path.isfile(hyperdrive_configuration_name): hd_config_file_path = hyperdrive_configuration_name else: # otherwise look for file where run config files are located (sub-folder of path) for root, dirs, files in os.walk(path): for file in files: if file.endswith(hyperdrive_configuration_name): hd_config_file_path = os.path.join(root, file) with open(hd_config_file_path, "r") as hstream: hyperdrive_dict = ruamel.yaml.safe_load(hstream) hyperparameter_sampling_type = hyperdrive_dict.get('sampling').get('type') if hyperparameter_sampling_type is None: raise ValueError("Please provide hyperparameter sampling type in hyperdrive configuration file.") hyperparameter_sampling = samplings[hyperparameter_sampling_type.upper()](hyperdrive_dict) policy_type = hyperdrive_dict.get('policy').get('type', 'NOTERMINATIONPOLICY') policy = policies[policy_type.upper()](hyperdrive_dict) primary_metric_goal = PrimaryMetricGoal.from_str(hyperdrive_dict.get('primary_metric_goal')) hyperdrive_config = HyperDriveConfig(hyperparameter_sampling=hyperparameter_sampling, primary_metric_name=hyperdrive_dict.get('primary_metric_name'), primary_metric_goal=primary_metric_goal, max_total_runs=hyperdrive_dict.get('max_total_runs'), max_concurrent_runs=hyperdrive_dict.get('max_concurrent_runs'), max_duration_minutes=hyperdrive_dict.get('max_duration_minutes'), policy=policy, run_config=script_run_config) run = experiment.submit(hyperdrive_config) logger.debug("Running asynchronously: %s", run_async) if not run_async: run.wait_for_completion(show_output=True, wait_post_processing=True) return _run_to_output_dict(run)
logger.setLevel("INFO") ch = logging.StreamHandler() ch.setLevel(logging.INFO) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) logger.addHandler(ch) # GET WS, EXP, ENV and COMPUTE TARGET ws = Workspace.from_config() experiment = Experiment(ws, "MaxFreezerTemperatureExceededPipeline", _create_in_cloud=True) compute_target = ComputeTarget(ws, "freezertrain") run_config = RunConfiguration() freezer_environment = ws.environments["sktime_freezer_environment"] run_config.environment = freezer_environment logger.info("Environment complete") # PIPELINE PARAMS output_df_long = PipelineData("output_df_long", datastore=ws.get_default_datastore()) output_df_nested = PipelineData("output_df_nested", datastore=ws.get_default_datastore()) time_series_length_param = PipelineParameter(name="time_series_length", default_value=10) threshold_param = PipelineParameter(name="threshold", default_value=180.0) dataset_name_param = PipelineParameter(name="dataset_name", default_value="processed_json")
# Create the environment tf_env = Environment(ENV_NAME) tf_env.docker.enabled = True tf_env.docker.base_image = BASE_IMAGE # Define additional packages to be installed conda_dep = CondaDependencies() conda_dep.add_pip_package('tensorflow-gpu==2.3.0') conda_dep.add_pip_package('pillow') # Add packages to the environment tf_env.python.conda_dependencies = conda_dep # Create the configuration of an experiment aml_run_config = RunConfiguration() aml_run_config.environment = tf_env # The name of the custome environment must not start by 'AzureML' # https://github.com/MicrosoftDocs/azure-docs/issues/65770#issuecomment-724536550 aml_run_config.environment.name = 'road-segmentation-GPU' # Create the compute target compute_target = createAmlCompute(ws, CLUSTER_NAME, VM_SIZE) dm = DataManager(ws) # Obtain training set images_dataset = dm.filterDataset('training', 'images/**/*.png') labels_dataset = dm.filterDataset('training', 'labels/**/*_road_*.png') scoring_images, training_images = dm.splitDataset(images_dataset, 0.2,
import sys from typing import Tuple import click from azureml.core import (ComputeTarget, Dataset, Environment, RunConfiguration, Workspace) from azureml.core.authentication import AzureCliAuthentication from azureml.core.experiment import Experiment from azureml.pipeline.core import (Pipeline, PipelineData, PipelineParameter, PublishedPipeline) from azureml.pipeline.steps import DatabricksStep, PythonScriptStep CLI_AUTH = AzureCliAuthentication() # noinspection PyTypeChecker WS = Workspace.from_config(auth=CLI_AUTH) RC = RunConfiguration() RC.environment = Environment.get(WS, "lightgbm") # noinspection PyTypeChecker def create_databricks_step( input_dataset: Dataset, compute: ComputeTarget, debug_run: bool) -> Tuple[DatabricksStep, PipelineData]: output_data = PipelineData(name="ParquetFiles", datastore=WS.get_default_datastore(), is_directory=True) node_size = 'Standard_DS4_v2' spark_version = '7.3.x-cpu-ml-scala2.12' db_step = DatabricksStep(
def create_experiment_config(workspace): ######################################## ### Creating data prep Pipeline Step ### ######################################## # Load settings print("Loading settings") data_prep_step_path = os.path.join("steps", "data_prep") with open(os.path.join(data_prep_step_path, "step.json")) as f: data_prep_settings = json.load(f) # Setup datasets of first step print("Setting up datasets") data_prep_input = Dataset.get_by_name(workspace=workspace, name=data_prep_settings.get( "dataset_input_name", None)).as_named_input( data_prep_settings.get( "dataset_input_name", None)).as_mount() data_prep_output = PipelineData( name=data_prep_settings.get("dataset_output_name", None), datastore=Datastore(workspace=workspace, name=data_prep_settings.get( "datastore_output_name", "workspaceblobstore")), output_mode="mount").as_dataset() # Uncomment next lines, if you want to register intermediate dataset #data_prep_output.register( # name=data_prep_settings.get("dataset_output_name", None), # create_new_version=True #) # Create conda dependencies print("Creating conda dependencies") data_prep_dependencies = CondaDependencies.create( pip_packages=data_prep_settings.get("pip_packages", []), conda_packages=data_prep_settings.get("conda_packages", []), python_version=data_prep_settings.get("python_version", "3.6.2")) # Create run configuration print("Creating RunConfiguration") data_prep_run_config = RunConfiguration( conda_dependencies=data_prep_dependencies, framework=data_prep_settings.get("framework", "Python")) # Loading compute target print("Loading ComputeTarget") data_prep_compute_target = ComputeTarget(workspace=workspace, name=data_prep_settings.get( "compute_target_name", None)) # Create python step print("Creating Step") data_prep = PythonScriptStep( name=data_prep_settings.get("step_name", None), script_name=data_prep_settings.get("script_name", None), arguments=data_prep_settings.get("arguments", []), compute_target=data_prep_compute_target, runconfig=data_prep_run_config, inputs=[data_prep_input], outputs=[data_prep_output], params=data_prep_settings.get("parameters", []), source_directory=data_prep_step_path, allow_reuse=data_prep_settings.get("allow_reuse", True), version=data_prep_settings.get("version", None), ) ############################################### ### Creating data model train Pipeline Step ### ############################################### # Load settings print("Loading settings") model_train_step_path = os.path.join("steps", "model_train") with open(os.path.join(model_train_step_path, "step.json")) as f: model_train_settings = json.load(f) hyperparameter_sampling_settings = model_train_settings.get( "hyperparameter_sampling", {}) # Setup datasets of first step print("Setting up datasets") model_train_input = data_prep_output.as_named_input( name=model_train_settings.get("dataset_input_name", None)) model_train_output = PipelineData( name=model_train_settings.get("dataset_output_name", None), datastore=Datastore(workspace=workspace, name=model_train_settings.get( "datastore_output_name", None)), output_mode="mount", ).as_dataset() # Uncomment next lines, if you want to register intermediate dataset #model_train_output.register( # name=model_train_settings.get("dataset_output_name", None), # create_new_version=True #) # Create conda dependencies print("Creating conda dependencies") model_train_dependencies = CondaDependencies.create( pip_packages=model_train_settings.get("pip_packages", []), conda_packages=model_train_settings.get("conda_packages", []), python_version=model_train_settings.get("python_version", "3.6.2")) # Create run configuration print("Creating RunConfiguration") model_train_run_config = RunConfiguration( conda_dependencies=model_train_dependencies, framework=model_train_settings.get("framework", "Python")) # Loading compute target print("Loading ComputeTarget") model_train_compute_target = ComputeTarget(workspace=workspace, name=model_train_settings.get( "compute_target_name", None)) # Create distributed training backend print("Creating distributed training backend") distributed_training_backend = get_distributed_backend( backend_name=model_train_settings.get("distributed_backend", None)) # Create Estimator for Training print("Creating Estimator for training") model_train_estimator = Estimator( source_directory=model_train_step_path, entry_script=model_train_settings.get("script_name", None), environment_variables=model_train_settings.get("parameters", None), compute_target=model_train_compute_target, node_count=model_train_settings.get("node_count", None), distributed_training=distributed_training_backend, conda_packages=model_train_settings.get("conda_packages", None), pip_packages=model_train_settings.get("pip_packages", None), ) try: # Create parameter sampling print("Creating Parameter Sampling") parameter_dict = {} parameters = hyperparameter_sampling_settings.get( "parameters", {}) if "parameters" in hyperparameter_sampling_settings else {} for parameter_name, parameter_details in parameters.items(): parameter_distr = get_parameter_distribution( distribution=parameter_details.get("distribution", None), **parameter_details.get("settings", {})) parameter_dict[f"--{parameter_name}"] = parameter_distr model_train_ps = get_parameter_sampling( sampling_method=hyperparameter_sampling_settings.get( "method", None), parameter_dict=parameter_dict) # Get Policy definition policy_settings = hyperparameter_sampling_settings.get("policy", {}) kwargs = { key: value for key, value in policy_settings.items() if key not in ["policy_method", "evaluation_interval", "delay_evaluation"] } # Create termination policy print("Creating early termination policy") model_train_policy = get_policy( policy_method=policy_settings.get("method", ""), evaluation_interval=policy_settings.get("evaluation_interval", None), delay_evaluation=policy_settings.get("delay_evaluation", None), **kwargs) # Create HyperDriveConfig print("Creating HyperDriveConfig") model_train_hyperdrive_config = HyperDriveConfig( estimator=model_train_estimator, hyperparameter_sampling=model_train_ps, policy=model_train_policy, primary_metric_name=hyperparameter_sampling_settings.get( "primary_metric", None), primary_metric_goal=PrimaryMetricGoal.MINIMIZE if "min" in hyperparameter_sampling_settings.get( "primary_metric_goal", None) else PrimaryMetricGoal.MAXIMIZE, max_total_runs=hyperparameter_sampling_settings.get( "max_total_runs", 1), max_concurrent_runs=hyperparameter_sampling_settings.get( "max_concurrent_runs", 1), max_duration_minutes=hyperparameter_sampling_settings.get( "max_duration_minutes", None)) # Create HyperDriveStep print("Creating HyperDriveStep") model_train = HyperDriveStep( name=model_train_settings.get("step_name", None), hyperdrive_config=model_train_hyperdrive_config, estimator_entry_script_arguments=model_train_settings.get( "arguments", None), inputs=[model_train_input], outputs=[model_train_output], allow_reuse=model_train_settings.get("allow_reuse", True), version=model_train_settings.get("version", True)) except: print("Not all required parameters specified for HyperDrive step") # Create EstimatorStep print("Creating EstimatorStep") model_train = EstimatorStep( name=model_train_settings.get("step_name", None), estimator=model_train_estimator, estimator_entry_script_arguments=model_train_settings.get( "arguments", None), inputs=[model_train_input], outputs=[model_train_output], compute_target=model_train_compute_target, allow_reuse=model_train_settings.get("allow_reuse", True), version=model_train_settings.get("version", True)) ######################### ### Creating Pipeline ### ######################### # Create Pipeline print("Creating Pipeline") pipeline = Pipeline( workspace=workspace, steps=[model_train], description="Training Pipeline", ) # Validate pipeline print("Validating pipeline") pipeline.validate() return pipeline
parser.add_argument('--subscription_id', help='the subscription id of aml') parser.add_argument('--resource_group', help='the resource group of aml') parser.add_argument('--workspace_name', help='the workspace name of aml') parser.add_argument('--compute_target', help='the compute cluster name of aml') parser.add_argument('--docker_image', help='the docker image of job') parser.add_argument('--experiment_name', help='the experiment name') parser.add_argument('--script_dir', help='script directory') parser.add_argument('--script_name', help='script name') args = parser.parse_args() ws = Workspace(args.subscription_id, args.resource_group, args.workspace_name) compute_target = ComputeTarget(workspace=ws, name=args.compute_target) experiment = Experiment(ws, args.experiment_name) run_config = RunConfiguration() dependencies = CondaDependencies() dependencies.add_pip_package("azureml-sdk") dependencies.add_pip_package("azureml") run_config.environment.python.conda_dependencies = dependencies run_config.environment.docker.enabled = True run_config.environment.docker.base_image = args.docker_image run_config.target = compute_target run_config.node_count = 1 config = ScriptRunConfig(source_directory=args.script_dir, script=args.script_name, run_config=run_config) run = experiment.submit(config) print(run.get_details()["runId"]) while True: line = sys.stdin.readline().rstrip()
import time from azureml.core import ScriptRunConfig, RunConfiguration from azureml.core import Workspace, Experiment ws = Workspace.from_config(path = './aml_config/PredictiveMaintenanceWSConfig.json') exp = Experiment(name = 'TrainModel', workspace = ws) #run_config = RunConfiguration.load(name = 'local', path = '.') run_config = RunConfiguration.load(name = 'amlcompute', path = '.') #run_config = RunConfiguration.load(name = 'cluster', path = '.') # `cluster` Compute Target should be created within Azure ML Workspace print(run_config) script_run_config = ScriptRunConfig(source_directory = '.', script = 'train.py', run_config = run_config) run = exp.submit(script_run_config) print(run.get_portal_url()) run.log('Starting Submission', time.asctime(time.localtime(time.time()))) run.wait_for_completion(show_output = True)
type=str, help="Path to model training code", dest="source_directory", required=True) args = parser.parse_args() print(f'Arguments: {args}') print('Connecting to workspace') ws = Workspace.from_config() print( f'WS name: {ws.name}\nRegion: {ws.location}\nSubscription id: {ws.subscription_id}\nResource group: {ws.resource_group}' ) print('Loading runconfig for pipeline') runconfig = RunConfiguration.load(args.runconfig) runconfig_register = RunConfiguration.load(args.runconfig_register) print('Loading dataset') training_dataset = Dataset.get_by_name(ws, args.dataset) # Parametrize dataset input to the pipeline training_dataset_parameter = PipelineParameter(name="training_dataset", default_value=training_dataset) training_dataset_consumption = DatasetConsumptionConfig( "training_dataset", training_dataset_parameter).as_mount() train_step = PythonScriptStep( name="train-step", runconfig=runconfig, source_directory=args.source_directory,
def create_pipeline(): ws = Workspace.from_config(auth=authenticate()) def_data_store = ws.get_default_datastore() run = Run.get_context() project_folder = "project" read_output = PipelineData("read_output", datastore=def_data_store, output_name="read_output") process_out = PipelineData("process_out", datastore=def_data_store, output_name="process_out") # hist, line, scatter chart_type = PipelineParameter(name="chart_type", default_value="line") # Check if compute exist compute_name = "Dedicated-DS3-v2" vm_size = "STANDARD_D3_V2" if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found compute target: ' + compute_name) else: # create the compute target print('Creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=0, max_nodes=4) compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # create run config for our python steps def conda_deps(): deps = CondaDependencies(f'{project_folder}/environment.yml') deps.add_channel("conda-forge") deps.add_conda_package('curl') return deps run_config = RunConfiguration(conda_dependencies=conda_deps()) run_config.environment.docker.enabled = True run_config.environment.spark.precache_packages = False # Create each step for our pipeline read_data = PythonScriptStep( name="read_data", script_name="read_data.py", arguments=["read-data", "--output-path", read_output], outputs=[read_output], compute_target=compute_target, source_directory=project_folder, runconfig=run_config) pre_process = PythonScriptStep(name="pre_process", script_name="pre_process.py", arguments=[ "pre-process", "--input-path", read_output, "--output-path", process_out ], inputs=[read_output], outputs=[process_out], compute_target=compute_target, source_directory=project_folder, runconfig=run_config) visualize = PythonScriptStep(name="visualize", script_name="visualize.py", arguments=[ "visualize", "--input-path", process_out, "--chart", chart_type ], inputs=[process_out], compute_target=compute_target, source_directory=project_folder, runconfig=run_config) # list of steps to run steps = [read_data, pre_process, visualize] # Build the pipeline test_pipeline = Pipeline(workspace=ws, steps=[steps]) # Submit the pipeline to be run - In the same experiment pipeline_run = run.experiment.submit(test_pipeline) pipeline_run.wait_for_completion()
def get_pipeline(aml_compute: ComputeTarget, blob_ds: Datastore, batch_env: Environment, tf_env: Environment) -> str: """ Creates pipeline steps Parameters: aml_compute (ComputeTarget): a reference to a compute blob_ds (DataStore): a reference to a datastore batch_env (Environment): a reference to environment object tf_env (Environment): a horovod/tf environment Returns: string: a set of pipeline steps """ # We need something to generate data by the way pipeline_files = PipelineData("pipeline_files", datastore=blob_ds).as_dataset() # Pipeline parameters to use with every run is_debug = PipelineParameter("is_debug", default_value=False) relay_connection_name = PipelineParameter("debug_relay_connection_name", default_value="none") single_step_config = RunConfiguration() single_step_config.environment = batch_env single_step = PythonScriptStep( name=f"single-step", script_name="samples/azure_ml_advanced/steps/single_step.py", source_directory=".", runconfig=single_step_config, arguments=[ "--pipeline-files", pipeline_files, "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5678, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], inputs=[], outputs=[pipeline_files], compute_target=aml_compute, allow_reuse=False) output_dir = PipelineData("output_dir") parallel_run_config = ParallelRunConfig( entry_script="samples/azure_ml_advanced/steps/parallel_step.py", source_directory=".", mini_batch_size="5", output_action="summary_only", environment=batch_env, compute_target=aml_compute, error_threshold=10, run_invocation_timeout=600, # very important for debugging node_count=2, process_count_per_node=1) parallelrun_step = ParallelRunStep( name="parallel-run-step", parallel_run_config=parallel_run_config, inputs=[pipeline_files], output=output_dir, arguments=[ "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5679, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], allow_reuse=False) parallelrun_step.run_after(single_step) distr_config = MpiConfiguration(process_count_per_node=1, node_count=2) src = ScriptRunConfig( source_directory=".", script="samples/azure_ml_advanced/steps/mpi/mpi_step_starter.py", arguments=[ "--input-ds", pipeline_files, "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5680, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], compute_target=compute_name, environment=tf_env, distributed_job_config=distr_config, ) mpi_step = PythonScriptStep( name="mpi-step", script_name="samples/azure_ml_advanced/steps/mpi/mpi_step_starter.py", arguments=[ "--input-ds", pipeline_files, "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5680, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], compute_target=aml_compute, inputs=[pipeline_files], outputs=[], runconfig=src.run_config, source_directory=".") mpi_step.run_after(parallelrun_step) print("Pipeline Steps Created") steps = [single_step, parallelrun_step, mpi_step] print(f"Returning {len(steps)} steps") return steps
def build_pipeline_steps(automlconfig: AutoMLConfig, data: Dataset, target_column: str, compute_target: ComputeTarget, group_column_names: list, time_column_name: str, deploy: bool, service_name: str = 'grouping-demo') -> StepSequence: steps = [] metrics_output_name = 'metrics_{}' best_model_output_name = 'best_model_{}' count = 0 model_names = [] # get all automl configs by group configs = _get_configs(automlconfig, data, target_column, compute_target, group_column_names) # build a runconfig for register model register_config = RunConfiguration() cd = CondaDependencies() cd.add_pip_package('azureml-pipeline') register_config.environment.python.conda_dependencies = cd # create each automl step end-to-end (train, register) for group_name, conf in configs.items(): # create automl metrics output metrics_data = PipelineData( name='metrics_data_{}'.format(group_name), pipeline_output_name=metrics_output_name.format(group_name), training_output=TrainingOutput(type='Metrics')) # create automl model output model_data = PipelineData( name='model_data_{}'.format(group_name), pipeline_output_name=best_model_output_name.format(group_name), training_output=TrainingOutput(type='Model', metric=conf.user_settings['primary_metric'])) automl_step = AutoMLStep( name='automl_{}'.format(group_name), automl_config=conf, outputs=[metrics_data, model_data], allow_reuse=True) steps.append(automl_step) # pass the group name as a parameter to the register step -> # this will become the name of the model for this group. group_name_param = PipelineParameter("group_name_{}".format(count), default_value=group_name) count += 1 reg_model_step = PythonScriptStep( 'register.py', name='register_{}'.format(group_name), arguments=["--model_name", group_name_param, "--model_path", model_data], inputs=[model_data], compute_target=compute_target, runconfig=register_config, source_directory="register", allow_reuse=True ) steps.append(reg_model_step) model_names.append(group_name) final_steps = steps if deploy: # modify the conda dependencies to ensure we pick up correct # versions of azureml-defaults and azureml-train-automl cd = CondaDependencies.create(pip_packages=['azureml-defaults', 'azureml-train-automl']) automl_deps = CondaDependencies(conda_dependencies_file_path='deploy/myenv.yml') cd._merge_dependencies(automl_deps) cd.save('deploy/myenv.yml') # add deployment step pp_group_column_names = PipelineParameter( "group_column_names", default_value="#####".join(list(reversed(group_column_names)))) pp_model_names = PipelineParameter( "model_names", default_value=json.dumps(model_names)) pp_service_name = PipelineParameter( "service_name", default_value=service_name) deployment_step = PythonScriptStep( 'deploy.py', name='service_deploy', arguments=["--group_column_names", pp_group_column_names, "--model_names", pp_model_names, "--service_name", pp_service_name, "--time_column_name", time_column_name], compute_target=compute_target, runconfig=RunConfiguration(), source_directory="deploy" ) final_steps = StepSequence(steps=[steps, deployment_step]) return final_steps
# Connect to the workspace ws = Workspace.from_config() print(f'WS name: {ws.name}') print(f'Region: {ws.location}') print(f'Subscription id: {ws.subscription_id}') print(f'Resource group: {ws.resource_group}') default_training_dataset = Dataset.get_by_name(ws, default_dataset_name) # Parametrize dataset input to the pipeline training_dataset_parameter = PipelineParameter(name='training_dataset', default_value=default_training_dataset) training_dataset_consumption = DatasetConsumptionConfig('training_dataset', training_dataset_parameter).as_download() # Load runconfig from earlier exercise and create pipeline runconfig = RunConfiguration.load(os.path.join(source_directory, 'runconfig.yml')) train_step = PythonScriptStep(name='train-step', source_directory=source_directory, script_name='train.py', arguments=['--data-path', training_dataset_consumption], inputs=[training_dataset_consumption], runconfig=runconfig, allow_reuse=False) steps = [train_step] pipeline = Pipeline(workspace=ws, steps=steps) pipeline.validate() published_pipeline = pipeline.publish('training-pipeline')
def get_backtest_pipeline( experiment: Experiment, dataset: TabularDataset, process_per_node: int, node_count: int, compute_target: ComputeTarget, automl_settings: Dict[str, Any], step_size: int, step_number: int, model_name: Optional[str] = None, model_uid: Optional[str] = None, ) -> Pipeline: """ :param experiment: The experiment used to run the pipeline. :param dataset: Tabular data set to be used for model training. :param process_per_node: The number of processes per node. Generally it should be the number of cores on the node divided by two. :param node_count: The number of nodes to be used. :param compute_target: The compute target to be used to run the pipeline. :param model_name: The name of a model to be back tested. :param automl_settings: The dictionary with automl settings. :param step_size: The number of periods to step back in backtesting. :param step_number: The number of backtesting iterations. :param model_uid: The uid to mark models from this run of the experiment. :return: The pipeline to be used for model retraining. **Note:** The output will be uploaded in the pipeline output called 'score'. """ jasmine_client = JasmineClient( service_context=experiment.workspace.service_context, experiment_name=experiment.name, experiment_id=experiment.id, ) env = jasmine_client.get_curated_environment( scenario=Scenarios.AUTOML, enable_dnn=False, enable_gpu=False, compute=compute_target, compute_sku=experiment.workspace.compute_targets.get( compute_target.name ).vm_size, ) data_results = PipelineData( name="results", datastore=None, pipeline_output_name="results" ) ############################################################ # Split the data set using python script. ############################################################ run_config = RunConfiguration() run_config.docker.use_docker = True run_config.environment = env utilities.set_environment_variables_for_run(run_config) split_data = PipelineData(name="split_data_output", datastore=None).as_dataset() split_step = PythonScriptStep( name="split_data_for_backtest", script_name="data_split.py", inputs=[dataset.as_named_input("training_data")], outputs=[split_data], source_directory=PROJECT_FOLDER, arguments=[ "--step-size", step_size, "--step-number", step_number, "--time-column-name", automl_settings.get("time_column_name"), "--time-series-id-column-names", automl_settings.get("grain_column_names"), "--output-dir", split_data, ], runconfig=run_config, compute_target=compute_target, allow_reuse=False, ) ############################################################ # We will do the backtest the parallel run step. ############################################################ settings_path = os.path.join(PROJECT_FOLDER, SETTINGS_FILE) hru.dump_object_to_json(automl_settings, settings_path) mini_batch_size = PipelineParameter(name="batch_size_param", default_value=str(1)) back_test_config = ParallelRunConfig( source_directory=PROJECT_FOLDER, entry_script="retrain_models.py", mini_batch_size=mini_batch_size, error_threshold=-1, output_action="append_row", append_row_file_name="outputs.txt", compute_target=compute_target, environment=env, process_count_per_node=process_per_node, run_invocation_timeout=3600, node_count=node_count, ) utilities.set_environment_variables_for_run(back_test_config) forecasts = PipelineData(name="forecasts", datastore=None) if model_name: parallel_step_name = "{}-backtest".format(model_name.replace("_", "-")) else: parallel_step_name = "AutoML-backtest" prs_args = [ "--target_column_name", automl_settings.get("label_column_name"), "--output-dir", forecasts, ] if model_name is not None: prs_args.append("--model-name") prs_args.append(model_name) if model_uid is not None: prs_args.append("--model-uid") prs_args.append(model_uid) backtest_prs = ParallelRunStep( name=parallel_step_name, parallel_run_config=back_test_config, arguments=prs_args, inputs=[split_data], output=forecasts, allow_reuse=False, ) ############################################################ # Then we collect the output and return it as scores output. ############################################################ collection_step = PythonScriptStep( name="score", script_name="score.py", inputs=[forecasts.as_mount()], outputs=[data_results], source_directory=PROJECT_FOLDER, arguments=["--forecasts", forecasts, "--output-dir", data_results], runconfig=run_config, compute_target=compute_target, allow_reuse=False, ) # Build and return the pipeline. return Pipeline( workspace=experiment.workspace, steps=[split_step, backtest_prs, collection_step], )
def submit_run( experiment=None, path=None, run_configuration_name=None, source_directory=None, conda_dependencies=None, run_async=None, ct_name=None, user_script_and_arguments=None, logger=None): from azureml.core import RunConfiguration, ScriptRunConfig if user_script_and_arguments and len(user_script_and_arguments) > 0: script, arguments = user_script_and_arguments[0], user_script_and_arguments[1:] else: script, arguments = None, None if run_configuration_name is None: logger.info("No Run Configuration name provided, using default: local system-managed") run_config = RunConfiguration() else: run_config = RunConfiguration.load(path, run_configuration_name) if conda_dependencies: from azureml.core.conda_dependencies import CondaDependencies cd = CondaDependencies(conda_dependencies_file_path=conda_dependencies) run_config.environment.python.conda_dependencies = cd if not run_config.script and not script: raise UserErrorException("Please specify the script to run either via parameter or in the runconfig") if run_config.script and script: logger.info("Overriding runconfig script %s with script argument %s", run_config.script, script) if script: run_config.script = script if run_config.arguments and arguments: logger.info("Overriding runconfig arguments %s with %s", run_config.arguments, arguments) if arguments: run_config.arguments = arguments if ct_name: run_config.target = ct_name # default to path if source directory is missing. if source_directory is None: source_directory = path logger.info("Running %s with arguments %s", run_config.script, run_config.arguments) script_run_config = ScriptRunConfig(source_directory=source_directory, run_config=run_config, arguments=run_config.arguments) run = experiment.submit(script_run_config) logger.debug("Running asynchronously: %s", run_async) if not run_async: run.wait_for_completion(show_output=True, wait_post_processing=True) return _run_to_output_dict(run)
prepared_dataset = PipelineData(prepared_data_dir, datastore=datastore).as_dataset() prepared_dataset = prepared_dataset.register(name=prepared_data_dir) conda = CondaDependencies.create( pip_packages=[ "azureml-sdk", "azureml-dataprep[fuse,pandas]", "torch==1.5.0", "nlp==0.2.0", "transformers==2.11.0", ], pin_sdk_version=False, ) conda.set_pip_option("--pre") run_config = RunConfiguration() run_config.environment.python.conda_dependencies = conda # Define Pipeline Parameters model_name_param = PipelineParameter("model_name_or_path", "bert-base-cased") max_seq_len_param = PipelineParameter("max_seq_length", 128) task_param = PipelineParameter("task", "mrpc") learning_rate_param = PipelineParameter("learning_rate", 2e-5) seed_param = PipelineParameter("seed", 1) train_batch_size_param = PipelineParameter("train_batch_size", 64) eval_batch_size_param = PipelineParameter("eval_batch_size", 64) max_epochs_param = PipelineParameter("max_epochs", 3) num_gpus_param = PipelineParameter("gpus", 2) num_workers_param = PipelineParameter("num_workers", 2)
parser.add_argument("--source_directory", type=str, help="Path to model training code", dest="source_directory", required=True) args = parser.parse_args() print(f'Arguments: {args}') print('Connecting to workspace') ws = Workspace.from_config() print( f'WS name: {ws.name}\nRegion: {ws.location}\nSubscription id: {ws.subscription_id}\nResource group: {ws.resource_group}' ) print('Loading runconfig for pipeline') runconfig = RunConfiguration.load(args.runconfig) print('Loading dataset') training_dataset = Dataset.get_by_name(ws, args.dataset) # Parametrize dataset input to the pipeline training_dataset_parameter = PipelineParameter(name="training_dataset", default_value=training_dataset) training_dataset_consumption = DatasetConsumptionConfig( "training_dataset", training_dataset_parameter).as_mount() pipeline_data = PipelineData("pipeline_data", datastore=ws.get_default_datastore()) model_name_param = PipelineParameter(name="model_name", default_value="model.pkl")
'POSTGRES_PASSWORD': os.environ['POSTGRES_PASSWORD'], 'POSTGRES_HOSTNAME': 'ackbar-postgres.postgres.database.azure.com', 'AZURE_STORAGE_CONNECTION_STRING': os.environ['AZURE_STORAGE_CONNECTION_STRING'] } env = Environment(name='env', environment_variables=environment_variables) conda = CondaDependencies() conda.add_conda_package('psycopg2') conda.add_conda_package('numpy') conda.add_conda_package('Pillow') # have to use pip to install azure packages... conda.add_pip_package('azure-storage-blob') env.python.conda_dependencies = conda run_config = RunConfiguration() run_config.environment = env PROJECT = 'caltech' prepare_step = PythonScriptStep( script_name='prepare.py', arguments=['--output', batch_input, '--project', PROJECT], inputs=[], outputs=[batch_input], compute_target=compute_target, source_directory='pipeline', runconfig=run_config, params=environment_variables, )
from azureml.pipeline.core import PipelineData is_directory = False # it's a file where we save the prepared dataframe default_datastore = ws.get_default_datastore() datapreparation_output = PipelineData('datapreparation_output', datastore=default_datastore, is_directory=is_directory) # 1.B) Create the dependency object with mlextend package https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.conda_dependencies.condadependencies?view=azure-ml-py: from azureml.core.environment import CondaDependencies conda_dep_prep = CondaDependencies() conda_dep_prep.add_pip_package( "mlxtend==0.17.2") # or conda_dep.add_conda_package("mlxtend==0.17.2") # 1.C) Create the RunConfiguration object: from azureml.core import RunConfiguration run_config_prep = RunConfiguration(conda_dependencies=conda_dep_prep) # 1.D) Create the PythonScriptStep from azureml.pipeline.steps import PythonScriptStep data_preparation_step = PythonScriptStep( name="1: Data preparation", script_name="1-data_preparation.py", compute_target=compute_target, runconfig=run_config_prep, arguments=[ "--remoteDataFolder", remote_data_folder, "--localDataFolder", local_data_to_download_folder, "--datapreparation_output", datapreparation_output, "--is_directory", 'aaa' if is_directory else '' ], # All non-empty strings have a True boolean value outputs=[datapreparation_output], source_directory='./local_scripts/')