def create_hyperdrive_trainer(self, estimator, hd_dict, search_type,
                                  metric_name, maximize_metric,
                                  early_term_policy, max_total_runs,
                                  max_concurrent_runs, max_minutes):

        from azureml.train.hyperdrive import RandomParameterSampling, GridParameterSampling, BayesianParameterSampling

        if search_type == "random":
            ps = RandomParameterSampling(hd_dict)
        elif search_type == "grid":
            ps = GridParameterSampling(hd_dict)
        elif search_type == "bayesian":
            ps = BayesianParameterSampling(hd_dict)
        else:
            errors.config_error(
                "Azure ML Hyperdrive search_type not supported: " +
                search_type)

        max_concurrent_runs = min(max_total_runs, max_concurrent_runs)

        from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal

        trainer = HyperDriveConfig(
            estimator=estimator,
            hyperparameter_sampling=ps,
            policy=early_term_policy,
            primary_metric_name=metric_name,
            primary_metric_goal=PrimaryMetricGoal.MAXIMIZE
            if maximize_metric else PrimaryMetricGoal.MINIMIZE,
            max_total_runs=max_total_runs,
            max_concurrent_runs=max_concurrent_runs,
            max_duration_minutes=max_minutes)

        return trainer
Beispiel #2
0
    def exec_SubmitAutoMLTask(self, Parameters: SubmitAutoMLTaskParameter):
        execResult = False
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()

        if self.experiment is None:
            print('experiment is None, Please Setup Experiment first.')
        else:
            try:
                image_config = AutoMLImageConfig(
                    task=Parameters.TaskType,
                    compute_target=self.compute_target,
                    training_data=self.training_dataset,
                    validation_data=self.validation_dataset,
                    hyperparameter_sampling=GridParameterSampling(
                        {'model_name': choice(Parameters.ModelChoice)}))
                self.automl_task_run = self.experiment.submit(image_config)
                print(self.automl_task_run.get_status())
                # self.automl_task_run.wait_for_completion(wait_post_processing=True)
                execResult = True
            except Exception as ex:
                print(ex)

        sys.stdout = old_stdout
        return ExecResult(execResult, mystdout.getvalue())
 def get_cross_validation_hyperdrive_sampler(self) -> GridParameterSampling:
     """
     Returns the cross validation sampler, required to sample the entire parameter space for cross validation.
     """
     return GridParameterSampling(parameter_space={
         CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: choice(list(range(self.number_of_cross_validation_splits))),
     })
 def get_cross_validation_hyperdrive_sampler(self) -> GridParameterSampling:
     if self.perform_sub_fold_cross_validation:
         return GridParameterSampling(parameter_space={
             CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: choice(list(range(self.number_of_cross_validation_splits))),
             CROSS_VALIDATION_SUB_FOLD_SPLIT_INDEX_TAG_KEY: choice(list(range(
                 self.number_of_cross_validation_splits_per_fold))),
         })
     else:
         return super().get_cross_validation_hyperdrive_sampler()
Beispiel #5
0
def get_parameter_sampling(sampling_method, parameter_dict):
    if "random" in sampling_method.lower():
        ps = RandomParameterSampling(
            parameter_space=parameter_dict
        )
    elif "grid" in sampling_method.lower():
        ps = GridParameterSampling(
            parameter_space=parameter_dict
        )
    elif "bayesian" in sampling_method.lower():
        ps = BayesianParameterSampling(
            parameter_space=parameter_dict
        )
    else:
        ps = None
        raise RunConfigurationException("Parameter Sampling Method not defined in settings. Please choose between \'random\', \'grid\' and \'bayesian\'")
    return ps
Beispiel #6
0
def get_parameter_sampling(sampling_method, parameter_settings):
    parameter_dict = {}
    for parameter_name, parameter_setting in parameter_settings.items():
        parameter_distr = get_parameter_distribution(parameter_name,
                                                     parameter_setting)
        parameter_dict["--{}".format(parameter_name)] = parameter_distr

    if "random" in sampling_method:
        ps = RandomParameterSampling(parameter_dict)
    elif "grid" in sampling_method:
        ps = GridParameterSampling(parameter_dict)
    elif "bayesian" in sampling_method:
        ps = BayesianParameterSampling(parameter_dict)
    else:
        ps = None
        raise RunConfigurationException(
            "Parameter Sampling Method not defined in settings. Please choose between \'random\', \'grid\' and \'bayesian\'"
        )
    return ps
 def get_cross_validation_hyperdrive_config(
         self, run_config: ScriptRunConfig) -> HyperDriveConfig:
     """
     Returns a configuration for AzureML Hyperdrive that varies the cross validation split index.
     Because this adds a val/Loss metric it is important that when subclassing LightningContainer
     your implementeation of LightningModule logs val/Loss. There is an example of this in 
     HelloRegression's validation_step method.
     :param run_config: The AzureML run configuration object that training for an individual model.
     :return: A hyperdrive configuration object.
     """
     return HyperDriveConfig(
         run_config=run_config,
         hyperparameter_sampling=GridParameterSampling(
             parameter_space={
                 CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY:
                 choice(list(range(self.number_of_cross_validation_splits)))
             }),
         primary_metric_name=TrackedMetrics.Val_Loss.value,
         primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
         max_total_runs=self.number_of_cross_validation_splits)
Beispiel #8
0
print("Training dataset name: " + training_dataset.name)
print("Validation dataset name: " + validation_dataset.name)

training_dataset.to_pandas_dataframe()

from azureml.train.automl import AutoMLImageConfig
from azureml.train.hyperdrive import GridParameterSampling
from azureml.train.hyperdrive import choice

image_config_maskrcnn = AutoMLImageConfig(
    task='image-instance-segmentation',
    compute_target=compute_target,
    training_data=training_dataset,
    validation_data=validation_dataset,
    hyperparameter_sampling=GridParameterSampling(
        {'model_name': choice('maskrcnn_resnet50_fpn')}))

automl_image_run = experiment.submit(image_config_maskrcnn)

automl_image_run.wait_for_completion(wait_post_processing=True)

from azureml.train.automl import AutoMLImageConfig
from azureml.train.hyperdrive import GridParameterSampling, RandomParameterSampling, BayesianParameterSampling
from azureml.train.hyperdrive import BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive import choice, uniform

parameter_space = {
    'model_name': choice('maskrcnn_resnet50_fpn'),
    'learning_rate': uniform(0.0001, 0.001),
    #'warmup_cosine_lr_warmup_epochs': choice(0, 3),
    'optimizer': choice('sgd', 'adam', 'adamw'),
Beispiel #9
0
        task=LabeledDatasetTask.OBJECT_DETECTION,
        path=ds.path('odFridgeObjects/validation_annotations.jsonl'))
    validation_dataset = validation_dataset.register(
        workspace=ws, name=validation_dataset_name)

print("Training dataset name: " + training_dataset.name)
print("Validation dataset name: " + validation_dataset.name)

training_dataset.to_pandas_dataframe()

image_config_yolov5 = AutoMLImageConfig(
    task='image-object-detection',
    compute_target=compute_target,
    training_data=training_dataset,
    validation_data=validation_dataset,
    hyperparameter_sampling=GridParameterSampling(
        {'model_name': choice('yolov5')}))

automl_image_run = experiment.submit(image_config_yolov5)

automl_image_run.wait_for_completion(wait_post_processing=True)

parameter_space = {
    'model':
    choice(
        {
            'model_name': choice('yolov5'),
            'learning_rate': uniform(0.0001, 0.01),
            #'model_size': choice('small', 'medium'), # model-specific
            'img_size': choice(640, 704, 768),  # model-specific
        },
        {
Beispiel #10
0
experiment_name = 'diabetes_training'
experiment = Experiment(workspace=ws, name=experiment_name)

# Create a folder for the experiment files
experiment_folder = './' + experiment_name
os.makedirs(experiment_folder, exist_ok=True)

print("Experiment:", experiment.name)

#Fetch GPU cluster for computations
gpu_cluster = ComputeTarget(workspace=ws, name='demo-GPU-cluster')

# Sample a range of parameter values
params = GridParameterSampling({
    # There's only one parameter, so grid sampling will try each value - with multiple parameters it would try every combination
    '--regularization':
    choice(0.001, 0.005, 0.01, 0.05, 0.1, 1.0)
})

# Set evaluation policy to stop poorly performing training runs early
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

# Get the training dataset
diabetes_ds = ws.datasets.get("diabetes_dataset")

# Create an estimator that uses the remote compute
hyper_estimator = SKLearn(
    source_directory=experiment_folder,
    inputs=[diabetes_ds.as_named_input('diabetes')
            ],  # Pass the dataset as an input
    compute_target=gpu_cluster,
from azureml.core import ScriptRunConfig, Experiment

script_run = ScriptRunConfig(source_directory = ".",
                             script = "hyperdrive_script.py",
                             arguments = ["--input_data", input_ds.as_named_input("raw_data")],
                             environment = my_env,
                             compute_taret = cluster
                            ) 

#creating hyper parmas

from azureml.train.hyperdrive import GridParameterSampling, choice

hyper_params = GridParameterSampling({
    '--n_estimators': choice(10,20,30,100),
    '--min_samples_leaf': choice(1,2,5)
})


#configuring hyperdrive class
from azureml.train.hyperdrive import HyperDriveConfig,PrimaryMetricGoal

hyper_config = HyperDriveConfig(run_config=script_run,
hyperparameter_sampling = hyper_params,
policy= None,
primary_metric_name = 'accuray',
primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
max_total_runs = 20,
max_concurrent_runs=2)

# So far we have been putting in default hyperparameter values, but in practice we would need tune these values to optimize the performance. Azure Machine Learning service provides many methods for tuning hyperparameters using different strategies.
#
# The first step is to choose the parameter space that we want to search. We have a few choices to make here :
#
# - **Parameter Sampling Method**: This is how we select the combinations of parameters to sample. Azure Machine Learning service offers [RandomParameterSampling](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive.randomparametersampling?view=azure-ml-py), [GridParameterSampling](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive.gridparametersampling?view=azure-ml-py), and [BayesianParameterSampling](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive.bayesianparametersampling?view=azure-ml-py). We will use the `GridParameterSampling` method.
# - **Parameters To Search**: We will be searching for optimal combinations of `learning_rate` and `num_epochs`.
# - **Parameter Expressions**: This defines the [functions that can be used to describe a hyperparameter search space](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive.parameter_expressions?view=azure-ml-py), which can be discrete or continuous. We will be using a `discrete set of choices`.
#
# The following code allows us to define these options.

# %%
from azureml.train.hyperdrive import GridParameterSampling
from azureml.train.hyperdrive.parameter_expressions import choice

param_sampling = GridParameterSampling({
    '--learning_rate': choice(3e-5, 3e-4),
    '--num_epochs': choice(3, 4)
})

# %% [markdown]
# The next step is to a define how we want to measure our performance. We do so by specifying two classes:
#
# - **[PrimaryMetricGoal](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive.primarymetricgoal?view=azure-ml-py)**: We want to `MAXIMIZE` the `val_accuracy` that is logged in our training script.
# - **[BanditPolicy](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive.banditpolicy?view=azure-ml-py)**: A policy for early termination so that jobs which don't show promising results will stop automatically.

# %%
from azureml.train.hyperdrive import BanditPolicy
from azureml.train.hyperdrive import PrimaryMetricGoal

primary_metric_name = 'val_accuracy'
primary_metric_goal = PrimaryMetricGoal.MAXIMIZE
Beispiel #13
0

from azureml.train.hyperdrive import choice, normal

param_sapce = {
    '--batch_size': choice(10,12,15),
    '--learning_rate': normal(10,3)
}


#configuring sample - grid,random, baysian

from azureml.train.hyperdrive import GridParameterSampling, RandomParameterSampling, BayesianParameterSampling


param_sampling = GridParameterSampling(param_sapce)


#configuring the early stopping 

- bandit policy            -> BanditPolicy(slack_amount = 0.2, evaluation_interval = 1, delay_evaluation = 5)
- median stopping policy   -> MedianStoppingPolicy(evaluation_interval = 1, delay_evaluation = 5)
- truncation policy        -> TruncationSelectionPolicy(truncation_percentage=10,
                                                     evaluation_interval=1,
                                                     delay_evaluation=5)



#for hyperparameter tuning we need a trianing script

# Now we already have training script above