def get_cross_validation_hyperdrive_sampler(self) -> GridParameterSampling:
     if self.perform_sub_fold_cross_validation:
         return GridParameterSampling(parameter_space={
             CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: choice(list(range(self.number_of_cross_validation_splits))),
             CROSS_VALIDATION_SUB_FOLD_SPLIT_INDEX_TAG_KEY: choice(list(range(
                 self.number_of_cross_validation_splits_per_fold))),
         })
     else:
         return super().get_cross_validation_hyperdrive_sampler()
Exemple #2
0
def test_get_hyperdrive_config(
        number_of_cross_validation_splits: int,
        number_of_cross_validation_splits_per_fold: int,
        test_output_dirs: TestOutputDirectories) -> None:
    """
    Test to make sure the number of dataset reader workers are set correctly
    """
    if number_of_cross_validation_splits_per_fold > 0:
        config = HyperDriveTestModelScalar()
        config.number_of_cross_validation_splits_per_fold = number_of_cross_validation_splits_per_fold

    else:
        config = HyperDriveTestModelSegmentation()

    config.number_of_cross_validation_splits = number_of_cross_validation_splits
    # create HyperDrive config with dummy estimator for testing
    source_config = SourceConfig(root_folder=test_output_dirs.root_dir,
                                 entry_script="something.py",
                                 conda_dependencies_files=[])
    estimator = Estimator(source_directory=source_config.root_folder,
                          entry_script=source_config.entry_script,
                          compute_target="Local")

    hd_config = config.get_hyperdrive_config(estimator=estimator)

    assert hd_config.estimator.source_directory == source_config.root_folder
    assert hd_config.estimator.run_config.script == source_config.entry_script
    assert hd_config.estimator._script_params == source_config.script_params

    if number_of_cross_validation_splits > 0 and number_of_cross_validation_splits_per_fold > 0:
        assert hd_config._max_total_runs == number_of_cross_validation_splits * \
               number_of_cross_validation_splits_per_fold
    elif number_of_cross_validation_splits > 0:
        assert hd_config._max_total_runs == number_of_cross_validation_splits
    else:
        assert hd_config._max_total_runs == HYPERDRIVE_TOTAL_RUNS

    if config.perform_cross_validation:
        # check sampler is as expected
        sampler = config.get_cross_validation_hyperdrive_sampler()

        expected_sampler_dict = {
            CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY:
            choice(list(range(number_of_cross_validation_splits)))
        }

        if number_of_cross_validation_splits_per_fold > 0:
            expected_sampler_dict[
                CROSS_VALIDATION_SUB_FOLD_SPLIT_INDEX_TAG_KEY] = choice(
                    list(range(number_of_cross_validation_splits_per_fold)))

        assert sampler._parameter_space == expected_sampler_dict
    else:
        assert vars(config.get_hyperdrive_config(estimator)) \
               == vars(_create_dummy_hyperdrive_param_search_config(estimator))
Exemple #3
0
def main(epochs, iterations, compute_target, concurrent_runs):
    cli_auth = AzureCliAuthentication()

    experiment = Experiment.from_directory(".", auth=cli_auth)
    ws = experiment.workspace

    cluster = ws.compute_targets[compute_target]
    food_data = ws.datastores['food_images']

    script_arguments = {"--data-dir": food_data.as_mount(), "--epochs": epochs}

    tf_est = TensorFlow(source_directory=".",
                        entry_script='code/train/train.py',
                        script_params=script_arguments,
                        compute_target=cluster,
                        conda_packages=['pillow', 'pandas'],
                        pip_packages=['click', 'seaborn'],
                        use_docker=True,
                        use_gpu=True,
                        framework_version='1.13')

    # Run on subset of food categories
    tf_est.run_config.arguments.extend(
        ['apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio'])

    param_sampler = RandomParameterSampling({
        '--minibatch-size':
        choice(16, 32, 64),
        '--learning-rate':
        loguniform(-9, -6),
        '--optimizer':
        choice('rmsprop', 'adagrad', 'adam')
    })

    # Create Early Termination Policy
    etpolicy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

    # Create HyperDrive Run Configuration
    hyper_drive_config = HyperDriveConfig(
        estimator=tf_est,
        hyperparameter_sampling=param_sampler,
        policy=etpolicy,
        primary_metric_name='acc',
        primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
        max_total_runs=iterations,
        max_concurrent_runs=concurrent_runs)

    # Submit the Hyperdrive Run
    print("Submitting Hyperdrive Run")
    hd_run = experiment.submit(hyper_drive_config)
    hd_run.wait_for_completion(raise_on_error=True, show_output=True)
    print("Finishing Run")
    best_run = hd_run.get_best_run_by_primary_metric()
    print(f'##vso[task.setvariable variable=run_id]{best_run.id}')
 def get_cross_validation_hyperdrive_sampler(self) -> GridParameterSampling:
     """
     Returns the cross validation sampler, required to sample the entire parameter space for cross validation.
     """
     return GridParameterSampling(parameter_space={
         CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: choice(list(range(self.number_of_cross_validation_splits))),
     })
Exemple #5
0
def test_get_hyperdrive_config(number_of_cross_validation_splits: int,
                               test_output_dirs: OutputFolderForTests) -> None:
    """
    Test to make sure the number of dataset reader workers are set correctly
    """
    config = HyperDriveTestModel()

    config.number_of_cross_validation_splits = number_of_cross_validation_splits
    run_config = ScriptRunConfig(source_directory=str(
        test_output_dirs.root_dir),
                                 script=str(Path("something.py")),
                                 arguments=["foo"],
                                 compute_target="Local")

    hd_config = config.get_hyperdrive_config(run_config=run_config)

    if number_of_cross_validation_splits > 0:
        assert hd_config._max_total_runs == number_of_cross_validation_splits
    else:
        assert hd_config._max_total_runs == HYPERDRIVE_TOTAL_RUNS

    if config.perform_cross_validation:
        # check sampler is as expected
        sampler = config.get_cross_validation_hyperdrive_sampler()

        expected_sampler_dict = {
            CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY:
            choice(list(range(number_of_cross_validation_splits)))
        }

        assert sampler._parameter_space == expected_sampler_dict
    else:
        assert vars(config.get_hyperdrive_config(run_config)) \
               == vars(_create_dummy_hyperdrive_param_search_config(run_config))
    def exec_SubmitAutoMLTask(self, Parameters: SubmitAutoMLTaskParameter):
        execResult = False
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()

        if self.experiment is None:
            print('experiment is None, Please Setup Experiment first.')
        else:
            try:
                image_config = AutoMLImageConfig(
                    task=Parameters.TaskType,
                    compute_target=self.compute_target,
                    training_data=self.training_dataset,
                    validation_data=self.validation_dataset,
                    hyperparameter_sampling=GridParameterSampling(
                        {'model_name': choice(Parameters.ModelChoice)}))
                self.automl_task_run = self.experiment.submit(image_config)
                print(self.automl_task_run.get_status())
                # self.automl_task_run.wait_for_completion(wait_post_processing=True)
                execResult = True
            except Exception as ex:
                print(ex)

        sys.stdout = old_stdout
        return ExecResult(execResult, mystdout.getvalue())
Exemple #7
0
def get_parameter_distribution(distribution, **kwargs):
    if "choice" in distribution.lower():
        parameter_distr = choice(
            kwargs.get("options", [])
        )
    elif "randint" in distribution.lower():
        parameter_distr = randint(
            upper=kwargs.get("upper", None)
        )
    elif "uniform" in distribution.lower():
        parameter_distr = uniform(
            min_value=kwargs.get("min_value", None),
            max_value=kwargs.get("max_value", None)
        )
    elif "quniform" in distribution.lower():
        parameter_distr = quniform(
            min_value=kwargs.get("min_value", None),
            max_value=kwargs.get("max_value", None),
            q=kwargs.get("q", None)
        )
    elif "loguniform" in distribution.lower():
        parameter_distr = loguniform(
            min_value=kwargs.get("min_value", None),
            max_value=kwargs.get("max_value", None),
        )
    elif "qloguniform" in distribution.lower():
        parameter_distr = qloguniform(
            min_value=kwargs.get("min_value", None),
            max_value=kwargs.get("max_value", None),
            q=kwargs.get("q", None)
        )
    elif "normal" in distribution.lower():
        parameter_distr = normal(
            mu=kwargs.get("mu", None),
            sigma=kwargs.get("sigma", None)
        )
    elif "qnormal" in distribution.lower():
        parameter_distr = qnormal(
            mu=kwargs.get("mu", None),
            sigma=kwargs.get("sigma", None),
            q=kwargs.get("q", None)
        )
    elif "lognormal" in distribution.lower():
        parameter_distr = lognormal(
            mu=kwargs.get("mu", None),
            sigma=kwargs.get("sigma", None)
        )
    elif "qlognormal" in distribution.lower():
        parameter_distr = qlognormal(
            mu=kwargs.get("mu", None),
            sigma=kwargs.get("sigma", None),
            q=kwargs.get("q", None)
        )
    else:
        parameter_distr = None
        raise RunConfigurationException(f"Parameter distribution for parameter not defined in settings. Please choose between \'choice\', \'randint\', \'uniform\', \'quniform\', \'loguniform\', \'qloguniform\', \'normal\', \'qnormal\', \'lognormal\' and \'qlognormal\'")
    return parameter_distr
Exemple #8
0
def get_parameter_distribution(parameter_name, parameter_setting):
    if "choice" in parameter_setting["distribution"]:
        parameter_distr = choice(parameter_setting["parameters"]["options"])
    elif "randint" in parameter_setting["distribution"]:
        parameter_distr = randint(
            upper=parameter_setting["parameters"]["upper"])
    elif "uniform" in parameter_setting["distribution"]:
        parameter_distr = uniform(
            min_value=parameter_setting["parameters"]["min_value"],
            max_value=parameter_setting["parameters"]["max_value"])
    elif "quniform" in parameter_setting["distribution"]:
        parameter_distr = quniform(
            min_value=parameter_setting["parameters"]["min_value"],
            max_value=parameter_setting["parameters"]["max_value"],
            q=parameter_setting["parameters"]["q"])
    elif "loguniform" in parameter_setting["distribution"]:
        parameter_distr = loguniform(
            min_value=parameter_setting["parameters"]["min_value"],
            max_value=parameter_setting["parameters"]["max_value"])
    elif "qloguniform" in parameter_setting["distribution"]:
        parameter_distr = qloguniform(
            min_value=parameter_setting["parameters"]["min_value"],
            max_value=parameter_setting["parameters"]["max_value"],
            q=parameter_setting["parameters"]["q"])
    elif "normal" in parameter_setting["distribution"]:
        parameter_distr = normal(
            mu=parameter_setting["parameters"]["mu"],
            sigma=parameter_setting["parameters"]["sigma"])
    elif "qnormal" in parameter_setting["distribution"]:
        parameter_distr = qnormal(
            mu=parameter_setting["parameters"]["mu"],
            sigma=parameter_setting["parameters"]["sigma"],
            q=parameter_setting["parameters"]["q"])
    elif "lognormal" in parameter_setting["distribution"]:
        parameter_distr = lognormal(
            mu=parameter_setting["parameters"]["mu"],
            sigma=parameter_setting["parameters"]["sigma"])
    elif "qlognormal" in parameter_setting["distribution"]:
        parameter_distr = qlognormal(
            mu=parameter_setting["parameters"]["mu"],
            sigma=parameter_setting["parameters"]["sigma"],
            q=parameter_setting["parameters"]["q"])
    else:
        parameter_distr = None
        raise RunConfigurationException(
            "Parameter distribution for parameter {} not defined in settings. Please choose between \'choice\', \'randint\', \'uniform\', \'quniform\', \'loguniform\', \'qloguniform\', \'normal\', \'qnormal\', \'lognormal\' and \'qlognormal\'"
            .format(parameter_name))
    return parameter_distr
 def get_cross_validation_hyperdrive_config(
         self, run_config: ScriptRunConfig) -> HyperDriveConfig:
     """
     Returns a configuration for AzureML Hyperdrive that varies the cross validation split index.
     Because this adds a val/Loss metric it is important that when subclassing LightningContainer
     your implementeation of LightningModule logs val/Loss. There is an example of this in 
     HelloRegression's validation_step method.
     :param run_config: The AzureML run configuration object that training for an individual model.
     :return: A hyperdrive configuration object.
     """
     return HyperDriveConfig(
         run_config=run_config,
         hyperparameter_sampling=GridParameterSampling(
             parameter_space={
                 CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY:
                 choice(list(range(self.number_of_cross_validation_splits)))
             }),
         primary_metric_name=TrackedMetrics.Val_Loss.value,
         primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
         max_total_runs=self.number_of_cross_validation_splits)
Exemple #10
0
 est = Estimator(
     source_directory=source_directory,
     script_params={"--data-folder": ds.as_mount()},
     compute_target=compute_target,
     pip_packages=GeneralConfig.pip_packages,
     entry_script=PathsConfig.entry_script,
     use_gpu=True,
     custom_docker_image=settings["IMAGE_NAME"],
 )
 if GeneralConfig.hyperdrive:
     if GeneralConfig.architecture_type == "PretrainedResNet50":
         hyperparams_space = HyperdriveConfig.pretrained_resnet50_hyperparams_space
     else:
         raise NotImplementedError
     hyperparams_space_format = {
         parameter: choice(parameter_range)
         for parameter, parameter_range in hyperparams_space.items()
     }
     parameters_sampling = RandomParameterSampling(hyperparams_space_format)
     policy = BanditPolicy(
         evaluation_interval=HyperdriveConfig.evaluation_interval,
         slack_factor=HyperdriveConfig.slack_factor,
     )
     hdc = HyperDriveConfig(
         estimator=est,
         hyperparameter_sampling=parameters_sampling,
         policy=policy,
         primary_metric_name="Accuracy",
         primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
         max_total_runs=HyperdriveConfig.max_total_runs,
         max_concurrent_runs=HyperdriveConfig.max_concurrent_runs,
args = [
    '--input-data', ds.as_named_input('train_data'),
    #'--num-topics', 10,
    '--chunksize', 2000,
    '--passes', 20,
    '--iterations', 400
]

src = ScriptRunConfig(source_directory="./topicmodel",
                      script='train.py',
                      arguments=args,
                      compute_target=compute_target,
                      environment=env)

param_sampling = RandomParameterSampling({
    "--num-topics": choice(5, 10, 15, 20)
})

# Submit experiment

hd = HyperDriveConfig(run_config=src,
                      hyperparameter_sampling=param_sampling,
                      primary_metric_name="c_v",
                      primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                      max_total_runs=100,
                      max_concurrent_runs=4)

run = exp.submit(config=hd)

run.wait_for_completion(show_output=False)
Exemple #12
0
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.estimator import Estimator
import os

from azureml.train.hyperdrive import normal, uniform, choice

# Specify parameter sampler
ps = RandomParameterSampling({
    "learning_rate": uniform(0.05, 0.1),
    "batch_size": choice(16, 32, 64, 128)
})

# Specify a Policy, check job every 2 iterations
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# workspaceblobstore is the default blob storage
#src.run_config.source_directory_data_store = "workspaceblobstore"

# Create a SKLearn estimator for use with train.py
est = SKLearn("./training",
              script_params=None,
              compute_target=compute_target,
Exemple #13
0
                     'keras==2.0.8', 'theano', 'tensorflow==1.8.0',
                     'tensorflow-gpu==1.8.0', 'matplotlib', 'horovod', 'hickle'
                 ],
                 entry_script='train.py',
                 use_gpu=True,
                 node_count=1)

# run = exp.submit(est)

# print(run)

# run.wait_for_completion(show_output=True)

ps = RandomParameterSampling({
    '--batch_size':
    choice(2, 4, 8, 16),
    '--filter_sizes':
    choice("3 3 3", "4 4 4", "5 5 5"),
    '--stack_sizes':
    choice("48 96 192", "36 72 144", "12 24 48"),
    '--learning_rate':
    loguniform(-6, -1),
    '--lr_decay':
    loguniform(-9, -1)
})

policy = BanditPolicy(evaluation_interval=2,
                      slack_factor=0.1)  #, delay_evaluation=20)

hdc = HyperDriveRunConfig(estimator=est,
                          hyperparameter_sampling=ps,
Exemple #14
0
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive import choice
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.policy import TruncationSelectionPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.core import ScriptRunConfig
import os

# Specify parameter sampler
# https://xgboost.readthedocs.io/en/latest/parameter.html
ps = {
    '--num_boost_round': choice(5, 10, 20, 50),
    '--max_depth': choice(3, 5, 8),
    '--learning_rate': choice(0.001, 0.005, 0.01, 0.05),
    '--gamma': choice(0, 1, 2),
    '--reg_lambda': choice(0.1, 1, 2, 5),
    '--scale_pos_weight': choice(1, 2)
}
samp = RandomParameterSampling(parameter_space=ps)

# Specify a Policy
policy = TruncationSelectionPolicy(
    truncation_percentage=50)  # BanditPolicy(slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")
def build_pipeline(dataset, ws, config):
    print("building pipeline for dataset %s in workspace %s" %
          (dataset, ws.name))

    hostname = socket.gethostname()
    if hostname == 'wopauliNC6':
        base_dir = '.'
    else:
        base_dir = '.'

    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = './scripts'
    os.makedirs(script_folder, exist_ok=True)

    shutil.copy(os.path.join(base_dir, 'video_decoding.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_submit.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_build.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'train.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'prednet.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'keras_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_preparation.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'model_registration.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'config.json'), script_folder)

    cpu_compute_name = config['cpu_compute']
    try:
        cpu_compute_target = AmlCompute(ws, cpu_compute_name)
        print("found existing compute target: %s" % cpu_compute_name)
    except ComputeTargetException:
        print("creating new compute target")

        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_D2_V2',
            max_nodes=4,
            idle_seconds_before_scaledown=1800)
        cpu_compute_target = ComputeTarget.create(ws, cpu_compute_name,
                                                  provisioning_config)
        cpu_compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=20)

    # use get_status() to get a detailed status for the current cluster.
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = config['gpu_compute']

    try:
        gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
        print("found existing compute target: %s" % gpu_compute_name)
    except ComputeTargetException:
        print('Creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_NC6',
            max_nodes=5,
            idle_seconds_before_scaledown=1800)

        # create the cluster
        gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name,
                                                  provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it uses the scale settings for the cluster
        gpu_compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=20)

    # use get_status() to get a detailed status for the current cluster.
    print(gpu_compute_target.get_status().serialize())

    # conda dependencies for compute targets
    cpu_cd = CondaDependencies.create(conda_packages=["py-opencv=3.4.2"],
                                      pip_packages=[
                                          "azure-storage-blob==1.5.0",
                                          "hickle==3.4.3", "requests==2.21.0",
                                          "sklearn", "pandas==0.24.2",
                                          "azureml-sdk==1.0.21",
                                          "numpy==1.16.2", "pillow==6.0.0"
                                      ])
    gpu_cd = CondaDependencies.create(pip_packages=[
        "keras==2.0.8", "theano==1.0.4", "tensorflow==1.8.0",
        "tensorflow-gpu==1.8.0", "hickle==3.4.3", "matplotlib==3.0.3",
        "seaborn==0.9.0", "requests==2.21.0", "bs4==0.0.1", "imageio==2.5.0",
        "sklearn", "pandas==0.24.2", "azureml-sdk==1.0.21", "numpy==1.16.2"
    ])

    # Runconfigs
    cpu_compute_run_config = RunConfiguration(conda_dependencies=cpu_cd)
    cpu_compute_run_config.environment.docker.enabled = True
    cpu_compute_run_config.environment.docker.gpu_support = False
    cpu_compute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    cpu_compute_run_config.environment.spark.precache_packages = False

    gpu_compute_run_config = RunConfiguration(conda_dependencies=gpu_cd)
    gpu_compute_run_config.environment.docker.enabled = True
    gpu_compute_run_config.environment.docker.gpu_support = True
    gpu_compute_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE
    gpu_compute_run_config.environment.spark.precache_packages = False

    print("PipelineData object created")

    video_data = DataReference(datastore=def_blob_store,
                               data_reference_name="video_data",
                               path_on_datastore=os.path.join(
                                   "prednet", "data", "video", dataset))

    # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1.
    raw_data = PipelineData("raw_video_fames", datastore=def_blob_store)
    preprocessed_data = PipelineData("preprocessed_video_frames",
                                     datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    data_output = PipelineData("output_data", datastore=def_blob_store)

    print("DataReference object created")

    # prepare dataset for training/testing prednet
    video_decoding = PythonScriptStep(
        name='decode_videos',
        script_name="video_decoding.py",
        arguments=["--input_data", video_data, "--output_data", raw_data],
        inputs=[video_data],
        outputs=[raw_data],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.'])
    print("video_decode created")

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(name='prepare_data',
                                 script_name="data_preparation.py",
                                 arguments=[
                                     "--input_data", raw_data, "--output_data",
                                     preprocessed_data
                                 ],
                                 inputs=[raw_data],
                                 outputs=[preprocessed_data],
                                 compute_target=cpu_compute_target,
                                 source_directory=script_folder,
                                 runconfig=cpu_compute_run_config,
                                 allow_reuse=True,
                                 hash_paths=['.'])
    data_prep.run_after(video_decoding)

    print("data_prep created")

    est = TensorFlow(source_directory=script_folder,
                     compute_target=gpu_compute_target,
                     pip_packages=[
                         'keras==2.0.8', 'theano', 'tensorflow==1.8.0',
                         'tensorflow-gpu==1.8.0', 'matplotlib', 'horovod',
                         'hickle'
                     ],
                     entry_script='train.py',
                     use_gpu=True,
                     node_count=1)

    ps = RandomParameterSampling({
        '--batch_size':
        choice(2, 4, 8, 16),
        '--filter_sizes':
        choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
        '--stack_sizes':
        choice("48, 96, 192", "36, 72, 144", "12, 24, 48"),  #, "48, 96"),
        '--learning_rate':
        loguniform(-6, -1),
        '--lr_decay':
        loguniform(-9, -1),
        '--freeze_layers':
        choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "1", "2",
               "3"),
        '--transfer_learning':
        choice("True", "False")
    })

    policy = BanditPolicy(evaluation_interval=2,
                          slack_factor=0.1,
                          delay_evaluation=20)

    hdc = HyperDriveRunConfig(
        estimator=est,
        hyperparameter_sampling=ps,
        policy=policy,
        primary_metric_name='val_loss',
        primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
        max_total_runs=5,  #100,
        max_concurrent_runs=5,  #10,
        max_duration_minutes=60 * 6)

    hd_step = HyperDriveStep(name="train_w_hyperdrive",
                             hyperdrive_run_config=hdc,
                             estimator_entry_script_arguments=[
                                 '--data-folder', preprocessed_data,
                                 '--remote_execution'
                             ],
                             inputs=[preprocessed_data],
                             metrics_output=data_metrics,
                             allow_reuse=True)
    hd_step.run_after(data_prep)

    registration_step = PythonScriptStep(
        name='register_model',
        script_name='model_registration.py',
        arguments=['--input_dir', data_metrics, '--output_dir', data_output],
        compute_target=gpu_compute_target,
        inputs=[data_metrics],
        outputs=[data_output],
        source_directory=script_folder,
        allow_reuse=True,
        hash_paths=['.'])
    registration_step.run_after(hd_step)

    pipeline = Pipeline(
        workspace=ws,
        steps=[video_decoding, data_prep, hd_step, registration_step])
    print("Pipeline is built")

    pipeline.validate()
    print("Simple validation complete")

    pipeline_name = 'prednet_' + dataset
    pipeline.publish(name=pipeline_name)

    return pipeline_name
Exemple #16
0
        ],
        compute_target=target,
        environment=env,
    )

    # set up hyperdrive search space
    convert_base = lambda x: float(np.log(x))
    search_space = {
        "--learning_rate":
        hyperdrive.loguniform(
            convert_base(1e-6),
            convert_base(5e-2)),  # NB. loguniform on [exp(min), exp(max)]
        "--weight_decay":
        hyperdrive.uniform(5e-3, 15e-2),
        "--per_device_train_batch_size":
        hyperdrive.choice([16, 32]),
    }

    hyperparameter_sampling = RandomParameterSampling(search_space)

    policy = TruncationSelectionPolicy(truncation_percentage=50,
                                       evaluation_interval=2,
                                       delay_evaluation=0)

    hyperdrive_config = HyperDriveConfig(
        run_config=config,
        hyperparameter_sampling=hyperparameter_sampling,
        policy=policy,
        primary_metric_name="eval_matthews_correlation",
        primary_metric_goal=hyperdrive.PrimaryMetricGoal.MAXIMIZE,
        max_total_runs=20,
# IN AzureML, we can acheive hyper parameter tuning through an experiment that consists of a hyperdrive run, which initiates a child run
# for each hyper parameter combination to be tested. Each child run uses a training script with parameterized hyperparams values to train a model 
# and logs the target performance metric achieved by the trained model.

#for hyperparameter tuning we have to define the search space

from azureml.train.hyperdrive import choice, normal

param_space = {

    '--batch_size' : choice(16,32,64),
    '--learning_rate': normal(10,3)
}


from azureml.train.hyperdrive import GridParameterSampling
#all list would be discrete for grid search 
param_space = {
    "--batch_size" : choice(16,32,64),
    '--learning_rate' : choice(0.01, 0.02, 0.03)
}

param_sampling = GridParameterSampling(param_space)

#distributions can be these type

#for discrete:
# qnormal,quniform,qlognormal, qloguniform

# for continuous:
Exemple #18
0
def build_pipeline(dataset, ws, config):
    print("building pipeline for dataset %s in workspace %s" % (dataset, ws.name))

    base_dir = '.'
        
    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = './scripts'
    os.makedirs(script_folder, exist_ok=True)
    
    shutil.copy(os.path.join(base_dir, 'video_decoding.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_submit.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_create.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'train.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'prednet.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'keras_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_preparation.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'model_registration.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'config.json'), script_folder)
    
    cpu_compute_name = config['cpu_compute']
    try:
        cpu_compute_target = AmlCompute(ws, cpu_compute_name)
        print("found existing compute target: %s" % cpu_compute_name)
    except:# ComputeTargetException:
        print("creating new compute target")
        
        provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                                    max_nodes=4,
                                                                    idle_seconds_before_scaledown=1800)    
        cpu_compute_target = ComputeTarget.create(ws, cpu_compute_name, provisioning_config)
        cpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
        
    # use get_status() to get a detailed status for the current cluster. 
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = config['gpu_compute']

    try:
        gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
        print("found existing compute target: %s" % gpu_compute_name)
    except: 
        print('Creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                                    max_nodes=10,
                                                                    idle_seconds_before_scaledown=1800)

        # create the cluster
        gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name, provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout. 
        # if no min node count is provided it uses the scale settings for the cluster
        gpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

    # use get_status() to get a detailed status for the current cluster. 
    try:
        print(gpu_compute_target.get_status().serialize())
    except BaseException as e:
        print("Could not get status of compute target.")
        print(e)

    # conda dependencies for compute targets
    cpu_cd = CondaDependencies.create(conda_packages=["py-opencv=3.4.2"], pip_indexurl='https://azuremlsdktestpypi.azureedge.net/sdk-release/Candidate/604C89A437BA41BD942B4F46D9A3591D', pip_packages=["azure-storage-blob==1.5.0", "hickle==3.4.3", "requests==2.21.0", "sklearn", "pandas==0.24.2", "azureml-sdk", "numpy==1.16.2", "pillow==6.0.0"])
    
    # Runconfigs
    cpu_compute_run_config = RunConfiguration(conda_dependencies=cpu_cd)
    cpu_compute_run_config.environment.docker.enabled = True
    cpu_compute_run_config.environment.docker.gpu_support = False
    cpu_compute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    cpu_compute_run_config.environment.spark.precache_packages = False

    print("PipelineData object created")

    # DataReference to where video data is stored.
    video_data = DataReference(
        datastore=def_blob_store,
        data_reference_name="video_data",
        path_on_datastore=os.path.join("prednet", "data", "video", dataset))
    print("DataReference object created")
        
    # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1.
    raw_data = PipelineData("raw_video_fames", datastore=def_blob_store)
    preprocessed_data = PipelineData("preprocessed_video_frames", datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    data_output = PipelineData("output_data", datastore=def_blob_store)

    # prepare dataset for training/testing prednet
    video_decoding = PythonScriptStep(
        name='decode_videos',
        script_name="video_decoding.py", 
        arguments=["--input_data", video_data, "--output_data", raw_data],
        inputs=[video_data],
        outputs=[raw_data],
        compute_target=cpu_compute_target, 
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.']
    )
    print("video_decode step created")

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(
        name='prepare_data',
        script_name="data_preparation.py", 
        arguments=["--input_data", raw_data, "--output_data", preprocessed_data],
        inputs=[raw_data],
        outputs=[preprocessed_data],
        compute_target=cpu_compute_target, 
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.']
    )
    data_prep.run_after(video_decoding)

    print("data_prep step created")


    # configure access to ACR for pulling our custom docker image
    acr = ContainerRegistry()
    acr.address = config['acr_address']
    acr.username = config['acr_username']
    acr.password = config['acr_password']
    
    est = Estimator(source_directory=script_folder,
                    compute_target=gpu_compute_target,
                    entry_script='train.py', 
                    use_gpu=True,
                    node_count=1,
                    custom_docker_image = "wopauli_1.8-gpu:1",
                    image_registry_details=acr,
                    user_managed=True
                    )

    ps = RandomParameterSampling(
        {
            '--batch_size': choice(1, 2, 4, 8),
            '--filter_sizes': choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
            '--stack_sizes': choice("48, 96, 192", "36, 72, 144", "12, 24, 48"), #, "48, 96"),
            '--learning_rate': loguniform(-6, -1),
            '--lr_decay': loguniform(-9, -1),
            '--freeze_layers': choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "3"),
            '--transfer_learning': choice("True", "False")
        }
    )

    policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=10)

    hdc = HyperDriveConfig(estimator=est, 
                            hyperparameter_sampling=ps, 
                            policy=policy, 
                            primary_metric_name='val_loss', 
                            primary_metric_goal=PrimaryMetricGoal.MINIMIZE, 
                            max_total_runs=10,
                            max_concurrent_runs=5, 
                            max_duration_minutes=60*6
                            )

    hd_step = HyperDriveStep(
        name="train_w_hyperdrive",
        hyperdrive_run_config=hdc,
        estimator_entry_script_arguments=[
            '--data-folder', preprocessed_data, 
            '--remote_execution',
            '--dataset', dataset
            ],
        inputs=[preprocessed_data],
        metrics_output = data_metrics,
        allow_reuse=True
    )
    hd_step.run_after(data_prep)

    registration_step = PythonScriptStep(
        name='register_model',
        script_name='model_registration.py',
        arguments=['--input_dir', data_metrics, '--output_dir', data_output],
        compute_target=cpu_compute_target,
        inputs=[data_metrics],
        outputs=[data_output],
        source_directory=script_folder,
        allow_reuse=True,
        hash_paths=['.']
    )
    registration_step.run_after(hd_step)

    pipeline = Pipeline(workspace=ws, steps=[video_decoding, data_prep, hd_step, registration_step])
    print ("Pipeline is built")

    pipeline.validate()
    print("Simple validation complete") 

    pipeline_name = 'prednet_' + dataset
    published_pipeline = pipeline.publish(name=pipeline_name)
    

    schedule = Schedule.create(workspace=ws, name=pipeline_name + "_sch",
                            pipeline_id=published_pipeline.id, 
                            experiment_name=pipeline_name,
                            datastore=def_blob_store,
                            wait_for_provisioning=True,
                            description="Datastore scheduler for Pipeline" + pipeline_name,
                            path_on_datastore=os.path.join('prednet/data/video', dataset, 'Train'),
                            polling_interval=1
                            )

    return pipeline_name
Exemple #19
0
# -lognormal
# -loguniform


#continuous hyperparams

#normal
#uniform
#lognormal
#loguniform


from azureml.train.hyperdrive import choice, normal

param_sapce = {
    '--batch_size': choice(10,12,15),
    '--learning_rate': normal(10,3)
}


#configuring sample - grid,random, baysian

from azureml.train.hyperdrive import GridParameterSampling, RandomParameterSampling, BayesianParameterSampling


param_sampling = GridParameterSampling(param_sapce)


#configuring the early stopping 

- bandit policy            -> BanditPolicy(slack_amount = 0.2, evaluation_interval = 1, delay_evaluation = 5)
Exemple #20
0
                                script='training03.py',
                                arguments=[
                                    '--max_depth', 5, '--num_leaves', 50,
                                    '--subsample', 0.9, '--learning_rate',
                                    0.01, '--min_data_in_leaf', 50,
                                    '--lambda_l1', 20, '--lambda_l2', 20,
                                    '--n_estimators', 1000
                                ],
                                environment=sklearn_env,
                                compute_target=training_cluster)

# Sample a range of parameter values
params = BayesianParameterSampling({
    # There's only one parameter, so grid sampling will try each value - with multiple parameters it would try every combination
    '--max_depth':
    choice(list(range(2, 20))),
    '--num_leaves':
    choice(list(range(6, 251))),
    '--subsample':
    uniform(0.5, 1),
    '--learning_rate':
    uniform(0.005, 0.25),
    '--min_data_in_leaf':
    choice(list(range(2, 501))),
    '--lambda_l1':
    choice(list(range(201))),
    '--lambda_l2':
    choice(list(range(201))),
    '--n_estimators':
    choice(list(range(100, 4001, 100)))
})
Exemple #21
0
def build_prednet_pipeline(dataset, ws):
    print("building pipeline for dataset %s in workspace %s" %
          (dataset, ws.name))

    base_dir = "."

    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = "./scripts"
    os.makedirs(script_folder)

    shutil.copytree(os.path.join(base_dir, "models"),
                    os.path.join(base_dir, script_folder, "models"))
    shutil.copy(os.path.join(base_dir, "train.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "data_preparation.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "register_prednet.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "batch_scoring.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "train_clf.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "register_clf.py"), script_folder)

    cpu_compute_name = args.cpu_compute_name
    cpu_compute_target = AmlCompute(ws, cpu_compute_name)
    print("found existing compute target: %s" % cpu_compute_name)

    # use get_status() to get a detailed status for the current cluster.
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = args.gpu_compute_name

    gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
    print(gpu_compute_target.get_status().serialize())

    env = Environment.get(ws, "prednet")

    # Runconfigs
    runconfig = RunConfiguration()
    runconfig.environment = env
    print("PipelineData object created")

    # DataReference to where raw data is stored.
    raw_data = DataReference(
        datastore=def_blob_store,
        data_reference_name="raw_data",
        path_on_datastore=os.path.join("prednet", "data", "raw_data"),
    )
    print("DataReference object created")

    # Naming the intermediate data as processed_data and assigning it to the
    # variable processed_data.
    preprocessed_data = PipelineData("preprocessed_data",
                                     datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    hd_child_cwd = PipelineData("prednet_model_path", datastore=def_blob_store)
    # prednet_path = PipelineData("outputs", datastore=def_blob_store)
    scored_data = PipelineData("scored_data", datastore=def_blob_store)
    model_path = PipelineData("model_path", datastore=def_blob_store)

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(
        name="prepare_data",
        script_name="data_preparation.py",
        arguments=[
            "--raw_data",
            raw_data,
            "--preprocessed_data",
            preprocessed_data,
            "--dataset",
            dataset,
        ],
        inputs=[raw_data],
        outputs=[preprocessed_data],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    # data_prep.run_after(video_decoding)

    print("data_prep step created")

    est = Estimator(
        source_directory=script_folder,
        compute_target=gpu_compute_target,
        entry_script="train.py",
        node_count=1,
        environment_definition=env,
    )

    ps = BayesianParameterSampling({
        "--batch_size":
        choice(1, 2, 4, 10),
        "--filter_sizes":
        choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
        "--stack_sizes":
        choice("48, 96, 192", "36, 72, 144", "12, 24, 48"),
        "--learning_rate":
        uniform(1e-6, 1e-3),
        "--lr_decay":
        uniform(1e-9, 1e-2),
        "--freeze_layers":
        choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "3"),
        # "--fine_tuning": choice("True", "False"),
    })

    hdc = HyperDriveConfig(
        estimator=est,
        hyperparameter_sampling=ps,
        primary_metric_name="val_loss",
        primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
        max_total_runs=3,
        max_concurrent_runs=3,
        max_duration_minutes=60 * 6,
    )

    train_prednet = HyperDriveStep(
        "train_w_hyperdrive",
        hdc,
        estimator_entry_script_arguments=[
            "--preprocessed_data",
            preprocessed_data,
            "--remote_execution",
            "--dataset",
            dataset,
        ],
        inputs=[preprocessed_data],
        outputs=[hd_child_cwd],
        metrics_output=data_metrics,
        allow_reuse=True,
    )
    train_prednet.run_after(data_prep)

    register_prednet = PythonScriptStep(
        name="register_prednet",
        script_name="register_prednet.py",
        arguments=[
            "--data_metrics",
            data_metrics,
        ],
        compute_target=cpu_compute_target,
        inputs=[data_metrics, hd_child_cwd],
        source_directory=script_folder,
        allow_reuse=True,
    )
    register_prednet.run_after(train_prednet)

    batch_scoring = PythonScriptStep(
        name="batch_scoring",
        script_name="batch_scoring.py",
        arguments=[
            "--preprocessed_data",
            preprocessed_data,
            "--scored_data",
            scored_data,
            "--dataset",
            dataset,
            # "--prednet_path",
            # prednet_path
        ],
        compute_target=gpu_compute_target,
        inputs=[preprocessed_data],
        outputs=[scored_data],
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    batch_scoring.run_after(register_prednet)

    train_clf = PythonScriptStep(
        name="train_clf",
        script_name="train_clf.py",
        arguments=[
            "--preprocessed_data", preprocessed_data, "--scored_data",
            scored_data, "--model_path", model_path
        ],
        compute_target=cpu_compute_target,
        inputs=[preprocessed_data, scored_data],
        outputs=[model_path],
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    train_clf.run_after(batch_scoring)

    register_clf = PythonScriptStep(
        name="register_clf",
        script_name="register_clf.py",
        arguments=["--model_path", model_path],
        inputs=[model_path],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        allow_reuse=True,
        runconfig=runconfig,
    )
    register_clf.run_after(train_clf)

    pipeline = Pipeline(
        workspace=ws,
        steps=[
            data_prep,
            train_prednet,
            register_prednet,
            batch_scoring,
            train_clf,
            register_clf,
        ],
    )
    pipeline.validate()

    pipeline_name = "prednet_" + dataset
    published_pipeline = pipeline.publish(name=pipeline_name)

    _ = Schedule.create(
        workspace=ws,
        name=pipeline_name + "_sch",
        pipeline_id=published_pipeline.id,
        experiment_name=pipeline_name,
        datastore=def_blob_store,
        wait_for_provisioning=True,
        description="Datastore scheduler for Pipeline" + pipeline_name,
        path_on_datastore=os.path.join("prednet/data/raw_data", dataset,
                                       "Train"),
        polling_interval=60 * 24,
    )

    published_pipeline.submit(ws, pipeline_name)
Exemple #22
0
#%%

# Create Experiment object - this will be used to submit the Hyperdrive run and store all the given parameters
experiment_hd = Experiment(workspace=ws, name='hyperdrive')

#%% [markdown]

###### Create Random Parameter Sampler

#%%

# Parameter space to sweep over - uses Random Parameter Sampling
ps = hd.RandomParameterSampling({
    '--network-name':
    hd.choice('densenet201', 'resnet152', 'resnet34', 'alexnet', 'vgg19_bn'),
    '--minibatch-size':
    hd.choice(8, 16),
    '--learning-rate':
    hd.uniform(0.00001, 0.001),
    '--step-size':
    hd.choice(10, 25, 50),  # How often should the learning rate decay
    '--gamma':
    hd.uniform(
        0.7, 0.99
    ),  # The decay applied to the learning rate every {step-size} steps
    '--optimizer-type':
    hd.choice('sgd', 'adam')
})

#%% [markdown]
from azureml.core import ScriptRunConfig, Experiment

script_run = ScriptRunConfig(source_directory = ".",
                             script = "hyperdrive_script.py",
                             arguments = ["--input_data", input_ds.as_named_input("raw_data")],
                             environment = my_env,
                             compute_taret = cluster
                            ) 

#creating hyper parmas

from azureml.train.hyperdrive import GridParameterSampling, choice

hyper_params = GridParameterSampling({
    '--n_estimators': choice(10,20,30,100),
    '--min_samples_leaf': choice(1,2,5)
})


#configuring hyperdrive class
from azureml.train.hyperdrive import HyperDriveConfig,PrimaryMetricGoal

hyper_config = HyperDriveConfig(run_config=script_run,
hyperparameter_sampling = hyper_params,
policy= None,
primary_metric_name = 'accuray',
primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
max_total_runs = 20,
max_concurrent_runs=2)
Exemple #24
0
pred = imported_model(X_test)
y_hat = np.argmax(pred, axis=1)

# print the first 30 labels and predictions
print('labels:  \t', y_test[:30])
print('predictions:\t', y_hat[:30])

print("Accuracy on the test set:", np.average(y_hat == y_test))

from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive import choice, loguniform

ps = RandomParameterSampling({
    '--batch-size':
    choice(32, 64, 128),
    '--first-layer-neurons':
    choice(16, 64, 128, 256, 512),
    '--second-layer-neurons':
    choice(16, 64, 256, 512),
    '--learning-rate':
    loguniform(-6, -1)
})

est = TensorFlow(source_directory=script_folder,
                 script_params={
                     '--data-folder':
                     dataset.as_named_input('mnist').as_mount()
                 },
                 compute_target=compute_target,
                 entry_script='tf_mnist2.py',
Exemple #25
0
                conda_packages=['cudatoolkit=10.0.130'],
                entry_script='kd_squeezenet.py',
                use_gpu=True,
                node_count=1)

from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.pipeline.steps import HyperDriveStep
from azureml.train.hyperdrive import choice, loguniform, uniform

ps = RandomParameterSampling({
    '--learning_rate': uniform(1e-3, 2e-2),
    '--momentum': uniform(.1, .95),
    '--weight_decay': loguniform(-5, -3),
    '--temperature': uniform(1, 9),
    # '--lambda_const': uniform(.1, .3),
    '--transfer_learning': choice("True", "False")
})

policy = BanditPolicy(evaluation_interval=2,
                      slack_factor=0.1,
                      delay_evaluation=10)

hdc = HyperDriveConfig(
    estimator=est,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name='val_loss',
    primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
    max_total_runs=5,  #100,
    max_concurrent_runs=5)
Exemple #26
0
experiment = Experiment(workspace=ws, name=experiment_name)

# Create a folder for the experiment files
experiment_folder = './' + experiment_name
os.makedirs(experiment_folder, exist_ok=True)

print("Experiment:", experiment.name)

#Fetch GPU cluster for computations
gpu_cluster = ComputeTarget(workspace=ws, name='demo-GPU-cluster')

# Sample a range of parameter values
params = GridParameterSampling({
    # There's only one parameter, so grid sampling will try each value - with multiple parameters it would try every combination
    '--regularization':
    choice(0.001, 0.005, 0.01, 0.05, 0.1, 1.0)
})

# Set evaluation policy to stop poorly performing training runs early
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

# Get the training dataset
diabetes_ds = ws.datasets.get("diabetes_dataset")

# Create an estimator that uses the remote compute
hyper_estimator = SKLearn(
    source_directory=experiment_folder,
    inputs=[diabetes_ds.as_named_input('diabetes')
            ],  # Pass the dataset as an input
    compute_target=gpu_cluster,
    conda_packages=['pandas', 'ipykernel', 'matplotlib'],
Exemple #27
0
        path=ds.path('odFridgeObjects/validation_annotations.jsonl'))
    validation_dataset = validation_dataset.register(
        workspace=ws, name=validation_dataset_name)

print("Training dataset name: " + training_dataset.name)
print("Validation dataset name: " + validation_dataset.name)

training_dataset.to_pandas_dataframe()

image_config_yolov5 = AutoMLImageConfig(
    task='image-object-detection',
    compute_target=compute_target,
    training_data=training_dataset,
    validation_data=validation_dataset,
    hyperparameter_sampling=GridParameterSampling(
        {'model_name': choice('yolov5')}))

automl_image_run = experiment.submit(image_config_yolov5)

automl_image_run.wait_for_completion(wait_post_processing=True)

parameter_space = {
    'model':
    choice(
        {
            'model_name': choice('yolov5'),
            'learning_rate': uniform(0.0001, 0.01),
            #'model_size': choice('small', 'medium'), # model-specific
            'img_size': choice(640, 704, 768),  # model-specific
        },
        {
Exemple #28
0
        if config.get('type') == 'classification':
            script_params = {
                '--task': int(task),
                '--use_cuda': '',
                '--register_model': ''
            }
            est = PyTorch(source_directory=script_folder,
                          compute_target=compute_target,
                          script_params=script_params,
                          entry_script='src/classification.py',
                          pip_packages=pip_packages,
                          use_gpu=True)

            ### Hyperparameters params
            if language == 'en':
                model_type = choice('roberta', 'bert', 'albert')
            elif language == 'de':
                model_type = choice('distilbert', 'bert', 'roberta')
            elif language == 'it' or language == 'es':
                model_type = choice('bert')
            elif language == 'fr':
                model_type = choice('camembert', 'bert')
            param_sampling = RandomParameterSampling({
                '--n_epochs':
                choice(3, 5, 10),
                '--learning_rate':
                choice(1e-5, 2e-5, 3e-5, 4e-5),
                '--model_type':
                model_type,
                '--max_seq_len':
                choice(128, 256),
Exemple #29
0
print("Training dataset name: " + training_dataset.name)
print("Validation dataset name: " + validation_dataset.name)

training_dataset.to_pandas_dataframe()

from azureml.train.automl import AutoMLImageConfig
from azureml.train.hyperdrive import GridParameterSampling
from azureml.train.hyperdrive import choice

image_config_maskrcnn = AutoMLImageConfig(
    task='image-instance-segmentation',
    compute_target=compute_target,
    training_data=training_dataset,
    validation_data=validation_dataset,
    hyperparameter_sampling=GridParameterSampling(
        {'model_name': choice('maskrcnn_resnet50_fpn')}))

automl_image_run = experiment.submit(image_config_maskrcnn)

automl_image_run.wait_for_completion(wait_post_processing=True)

from azureml.train.automl import AutoMLImageConfig
from azureml.train.hyperdrive import GridParameterSampling, RandomParameterSampling, BayesianParameterSampling
from azureml.train.hyperdrive import BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive import choice, uniform

parameter_space = {
    'model_name': choice('maskrcnn_resnet50_fpn'),
    'learning_rate': uniform(0.0001, 0.001),
    #'warmup_cosine_lr_warmup_epochs': choice(0, 3),
    'optimizer': choice('sgd', 'adam', 'adamw'),
Exemple #30
0
#define serach space
from azureml.train.hyperdrive import choice, normal

param_space = {
                 '--batch_size': choice(16, 32, 64),
                 '--learning_rate': normal(10, 3)
              }

 #grid sampling example
 from azureml.train.hyperdrive import GridParameterSampling, choice

param_space = {
                 '--batch_size': choice(16, 32, 64),
                 '--learning_rate': choice(0.01, 0.1, 1.0)
              }

param_sampling = GridParameterSampling(param_space)             


#random sampling
from azureml.train.hyperdrive import RandomParameterSampling, choice, normal

param_space = {
                 '--batch_size': choice(16, 32, 64),
                 '--learning_rate': normal(10, 3)
              }

param_sampling = RandomParameterSampling(param_space)


#Bayesian sampling