コード例 #1
0
 def get_cross_validation_hyperdrive_sampler(self) -> GridParameterSampling:
     if self.perform_sub_fold_cross_validation:
         return GridParameterSampling(parameter_space={
             CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: choice(list(range(self.number_of_cross_validation_splits))),
             CROSS_VALIDATION_SUB_FOLD_SPLIT_INDEX_TAG_KEY: choice(list(range(
                 self.number_of_cross_validation_splits_per_fold))),
         })
     else:
         return super().get_cross_validation_hyperdrive_sampler()
コード例 #2
0
def test_get_hyperdrive_config(
        number_of_cross_validation_splits: int,
        number_of_cross_validation_splits_per_fold: int,
        test_output_dirs: TestOutputDirectories) -> None:
    """
    Test to make sure the number of dataset reader workers are set correctly
    """
    if number_of_cross_validation_splits_per_fold > 0:
        config = HyperDriveTestModelScalar()
        config.number_of_cross_validation_splits_per_fold = number_of_cross_validation_splits_per_fold

    else:
        config = HyperDriveTestModelSegmentation()

    config.number_of_cross_validation_splits = number_of_cross_validation_splits
    # create HyperDrive config with dummy estimator for testing
    source_config = SourceConfig(root_folder=test_output_dirs.root_dir,
                                 entry_script="something.py",
                                 conda_dependencies_files=[])
    estimator = Estimator(source_directory=source_config.root_folder,
                          entry_script=source_config.entry_script,
                          compute_target="Local")

    hd_config = config.get_hyperdrive_config(estimator=estimator)

    assert hd_config.estimator.source_directory == source_config.root_folder
    assert hd_config.estimator.run_config.script == source_config.entry_script
    assert hd_config.estimator._script_params == source_config.script_params

    if number_of_cross_validation_splits > 0 and number_of_cross_validation_splits_per_fold > 0:
        assert hd_config._max_total_runs == number_of_cross_validation_splits * \
               number_of_cross_validation_splits_per_fold
    elif number_of_cross_validation_splits > 0:
        assert hd_config._max_total_runs == number_of_cross_validation_splits
    else:
        assert hd_config._max_total_runs == HYPERDRIVE_TOTAL_RUNS

    if config.perform_cross_validation:
        # check sampler is as expected
        sampler = config.get_cross_validation_hyperdrive_sampler()

        expected_sampler_dict = {
            CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY:
            choice(list(range(number_of_cross_validation_splits)))
        }

        if number_of_cross_validation_splits_per_fold > 0:
            expected_sampler_dict[
                CROSS_VALIDATION_SUB_FOLD_SPLIT_INDEX_TAG_KEY] = choice(
                    list(range(number_of_cross_validation_splits_per_fold)))

        assert sampler._parameter_space == expected_sampler_dict
    else:
        assert vars(config.get_hyperdrive_config(estimator)) \
               == vars(_create_dummy_hyperdrive_param_search_config(estimator))
コード例 #3
0
def main(epochs, iterations, compute_target, concurrent_runs):
    cli_auth = AzureCliAuthentication()

    experiment = Experiment.from_directory(".", auth=cli_auth)
    ws = experiment.workspace

    cluster = ws.compute_targets[compute_target]
    food_data = ws.datastores['food_images']

    script_arguments = {"--data-dir": food_data.as_mount(), "--epochs": epochs}

    tf_est = TensorFlow(source_directory=".",
                        entry_script='code/train/train.py',
                        script_params=script_arguments,
                        compute_target=cluster,
                        conda_packages=['pillow', 'pandas'],
                        pip_packages=['click', 'seaborn'],
                        use_docker=True,
                        use_gpu=True,
                        framework_version='1.13')

    # Run on subset of food categories
    tf_est.run_config.arguments.extend(
        ['apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio'])

    param_sampler = RandomParameterSampling({
        '--minibatch-size':
        choice(16, 32, 64),
        '--learning-rate':
        loguniform(-9, -6),
        '--optimizer':
        choice('rmsprop', 'adagrad', 'adam')
    })

    # Create Early Termination Policy
    etpolicy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

    # Create HyperDrive Run Configuration
    hyper_drive_config = HyperDriveConfig(
        estimator=tf_est,
        hyperparameter_sampling=param_sampler,
        policy=etpolicy,
        primary_metric_name='acc',
        primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
        max_total_runs=iterations,
        max_concurrent_runs=concurrent_runs)

    # Submit the Hyperdrive Run
    print("Submitting Hyperdrive Run")
    hd_run = experiment.submit(hyper_drive_config)
    hd_run.wait_for_completion(raise_on_error=True, show_output=True)
    print("Finishing Run")
    best_run = hd_run.get_best_run_by_primary_metric()
    print(f'##vso[task.setvariable variable=run_id]{best_run.id}')
コード例 #4
0
 def get_cross_validation_hyperdrive_sampler(self) -> GridParameterSampling:
     """
     Returns the cross validation sampler, required to sample the entire parameter space for cross validation.
     """
     return GridParameterSampling(parameter_space={
         CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: choice(list(range(self.number_of_cross_validation_splits))),
     })
コード例 #5
0
def test_get_hyperdrive_config(number_of_cross_validation_splits: int,
                               test_output_dirs: OutputFolderForTests) -> None:
    """
    Test to make sure the number of dataset reader workers are set correctly
    """
    config = HyperDriveTestModel()

    config.number_of_cross_validation_splits = number_of_cross_validation_splits
    run_config = ScriptRunConfig(source_directory=str(
        test_output_dirs.root_dir),
                                 script=str(Path("something.py")),
                                 arguments=["foo"],
                                 compute_target="Local")

    hd_config = config.get_hyperdrive_config(run_config=run_config)

    if number_of_cross_validation_splits > 0:
        assert hd_config._max_total_runs == number_of_cross_validation_splits
    else:
        assert hd_config._max_total_runs == HYPERDRIVE_TOTAL_RUNS

    if config.perform_cross_validation:
        # check sampler is as expected
        sampler = config.get_cross_validation_hyperdrive_sampler()

        expected_sampler_dict = {
            CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY:
            choice(list(range(number_of_cross_validation_splits)))
        }

        assert sampler._parameter_space == expected_sampler_dict
    else:
        assert vars(config.get_hyperdrive_config(run_config)) \
               == vars(_create_dummy_hyperdrive_param_search_config(run_config))
コード例 #6
0
ファイル: module.py プロジェクト: tommywu052/automl-image-web
    def exec_SubmitAutoMLTask(self, Parameters: SubmitAutoMLTaskParameter):
        execResult = False
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()

        if self.experiment is None:
            print('experiment is None, Please Setup Experiment first.')
        else:
            try:
                image_config = AutoMLImageConfig(
                    task=Parameters.TaskType,
                    compute_target=self.compute_target,
                    training_data=self.training_dataset,
                    validation_data=self.validation_dataset,
                    hyperparameter_sampling=GridParameterSampling(
                        {'model_name': choice(Parameters.ModelChoice)}))
                self.automl_task_run = self.experiment.submit(image_config)
                print(self.automl_task_run.get_status())
                # self.automl_task_run.wait_for_completion(wait_post_processing=True)
                execResult = True
            except Exception as ex:
                print(ex)

        sys.stdout = old_stdout
        return ExecResult(execResult, mystdout.getvalue())
コード例 #7
0
ファイル: utils.py プロジェクト: soniaang/ProjectGemini
def get_parameter_distribution(distribution, **kwargs):
    if "choice" in distribution.lower():
        parameter_distr = choice(
            kwargs.get("options", [])
        )
    elif "randint" in distribution.lower():
        parameter_distr = randint(
            upper=kwargs.get("upper", None)
        )
    elif "uniform" in distribution.lower():
        parameter_distr = uniform(
            min_value=kwargs.get("min_value", None),
            max_value=kwargs.get("max_value", None)
        )
    elif "quniform" in distribution.lower():
        parameter_distr = quniform(
            min_value=kwargs.get("min_value", None),
            max_value=kwargs.get("max_value", None),
            q=kwargs.get("q", None)
        )
    elif "loguniform" in distribution.lower():
        parameter_distr = loguniform(
            min_value=kwargs.get("min_value", None),
            max_value=kwargs.get("max_value", None),
        )
    elif "qloguniform" in distribution.lower():
        parameter_distr = qloguniform(
            min_value=kwargs.get("min_value", None),
            max_value=kwargs.get("max_value", None),
            q=kwargs.get("q", None)
        )
    elif "normal" in distribution.lower():
        parameter_distr = normal(
            mu=kwargs.get("mu", None),
            sigma=kwargs.get("sigma", None)
        )
    elif "qnormal" in distribution.lower():
        parameter_distr = qnormal(
            mu=kwargs.get("mu", None),
            sigma=kwargs.get("sigma", None),
            q=kwargs.get("q", None)
        )
    elif "lognormal" in distribution.lower():
        parameter_distr = lognormal(
            mu=kwargs.get("mu", None),
            sigma=kwargs.get("sigma", None)
        )
    elif "qlognormal" in distribution.lower():
        parameter_distr = qlognormal(
            mu=kwargs.get("mu", None),
            sigma=kwargs.get("sigma", None),
            q=kwargs.get("q", None)
        )
    else:
        parameter_distr = None
        raise RunConfigurationException(f"Parameter distribution for parameter not defined in settings. Please choose between \'choice\', \'randint\', \'uniform\', \'quniform\', \'loguniform\', \'qloguniform\', \'normal\', \'qnormal\', \'lognormal\' and \'qlognormal\'")
    return parameter_distr
コード例 #8
0
ファイル: utils.py プロジェクト: zm-intern06/MLDevOps
def get_parameter_distribution(parameter_name, parameter_setting):
    if "choice" in parameter_setting["distribution"]:
        parameter_distr = choice(parameter_setting["parameters"]["options"])
    elif "randint" in parameter_setting["distribution"]:
        parameter_distr = randint(
            upper=parameter_setting["parameters"]["upper"])
    elif "uniform" in parameter_setting["distribution"]:
        parameter_distr = uniform(
            min_value=parameter_setting["parameters"]["min_value"],
            max_value=parameter_setting["parameters"]["max_value"])
    elif "quniform" in parameter_setting["distribution"]:
        parameter_distr = quniform(
            min_value=parameter_setting["parameters"]["min_value"],
            max_value=parameter_setting["parameters"]["max_value"],
            q=parameter_setting["parameters"]["q"])
    elif "loguniform" in parameter_setting["distribution"]:
        parameter_distr = loguniform(
            min_value=parameter_setting["parameters"]["min_value"],
            max_value=parameter_setting["parameters"]["max_value"])
    elif "qloguniform" in parameter_setting["distribution"]:
        parameter_distr = qloguniform(
            min_value=parameter_setting["parameters"]["min_value"],
            max_value=parameter_setting["parameters"]["max_value"],
            q=parameter_setting["parameters"]["q"])
    elif "normal" in parameter_setting["distribution"]:
        parameter_distr = normal(
            mu=parameter_setting["parameters"]["mu"],
            sigma=parameter_setting["parameters"]["sigma"])
    elif "qnormal" in parameter_setting["distribution"]:
        parameter_distr = qnormal(
            mu=parameter_setting["parameters"]["mu"],
            sigma=parameter_setting["parameters"]["sigma"],
            q=parameter_setting["parameters"]["q"])
    elif "lognormal" in parameter_setting["distribution"]:
        parameter_distr = lognormal(
            mu=parameter_setting["parameters"]["mu"],
            sigma=parameter_setting["parameters"]["sigma"])
    elif "qlognormal" in parameter_setting["distribution"]:
        parameter_distr = qlognormal(
            mu=parameter_setting["parameters"]["mu"],
            sigma=parameter_setting["parameters"]["sigma"],
            q=parameter_setting["parameters"]["q"])
    else:
        parameter_distr = None
        raise RunConfigurationException(
            "Parameter distribution for parameter {} not defined in settings. Please choose between \'choice\', \'randint\', \'uniform\', \'quniform\', \'loguniform\', \'qloguniform\', \'normal\', \'qnormal\', \'lognormal\' and \'qlognormal\'"
            .format(parameter_name))
    return parameter_distr
コード例 #9
0
 def get_cross_validation_hyperdrive_config(
         self, run_config: ScriptRunConfig) -> HyperDriveConfig:
     """
     Returns a configuration for AzureML Hyperdrive that varies the cross validation split index.
     Because this adds a val/Loss metric it is important that when subclassing LightningContainer
     your implementeation of LightningModule logs val/Loss. There is an example of this in 
     HelloRegression's validation_step method.
     :param run_config: The AzureML run configuration object that training for an individual model.
     :return: A hyperdrive configuration object.
     """
     return HyperDriveConfig(
         run_config=run_config,
         hyperparameter_sampling=GridParameterSampling(
             parameter_space={
                 CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY:
                 choice(list(range(self.number_of_cross_validation_splits)))
             }),
         primary_metric_name=TrackedMetrics.Val_Loss.value,
         primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
         max_total_runs=self.number_of_cross_validation_splits)
コード例 #10
0
 est = Estimator(
     source_directory=source_directory,
     script_params={"--data-folder": ds.as_mount()},
     compute_target=compute_target,
     pip_packages=GeneralConfig.pip_packages,
     entry_script=PathsConfig.entry_script,
     use_gpu=True,
     custom_docker_image=settings["IMAGE_NAME"],
 )
 if GeneralConfig.hyperdrive:
     if GeneralConfig.architecture_type == "PretrainedResNet50":
         hyperparams_space = HyperdriveConfig.pretrained_resnet50_hyperparams_space
     else:
         raise NotImplementedError
     hyperparams_space_format = {
         parameter: choice(parameter_range)
         for parameter, parameter_range in hyperparams_space.items()
     }
     parameters_sampling = RandomParameterSampling(hyperparams_space_format)
     policy = BanditPolicy(
         evaluation_interval=HyperdriveConfig.evaluation_interval,
         slack_factor=HyperdriveConfig.slack_factor,
     )
     hdc = HyperDriveConfig(
         estimator=est,
         hyperparameter_sampling=parameters_sampling,
         policy=policy,
         primary_metric_name="Accuracy",
         primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
         max_total_runs=HyperdriveConfig.max_total_runs,
         max_concurrent_runs=HyperdriveConfig.max_concurrent_runs,
コード例 #11
0
args = [
    '--input-data', ds.as_named_input('train_data'),
    #'--num-topics', 10,
    '--chunksize', 2000,
    '--passes', 20,
    '--iterations', 400
]

src = ScriptRunConfig(source_directory="./topicmodel",
                      script='train.py',
                      arguments=args,
                      compute_target=compute_target,
                      environment=env)

param_sampling = RandomParameterSampling({
    "--num-topics": choice(5, 10, 15, 20)
})

# Submit experiment

hd = HyperDriveConfig(run_config=src,
                      hyperparameter_sampling=param_sampling,
                      primary_metric_name="c_v",
                      primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                      max_total_runs=100,
                      max_concurrent_runs=4)

run = exp.submit(config=hd)

run.wait_for_completion(show_output=False)
コード例 #12
0
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.estimator import Estimator
import os

from azureml.train.hyperdrive import normal, uniform, choice

# Specify parameter sampler
ps = RandomParameterSampling({
    "learning_rate": uniform(0.05, 0.1),
    "batch_size": choice(16, 32, 64, 128)
})

# Specify a Policy, check job every 2 iterations
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# workspaceblobstore is the default blob storage
#src.run_config.source_directory_data_store = "workspaceblobstore"

# Create a SKLearn estimator for use with train.py
est = SKLearn("./training",
              script_params=None,
              compute_target=compute_target,
コード例 #13
0
                     'keras==2.0.8', 'theano', 'tensorflow==1.8.0',
                     'tensorflow-gpu==1.8.0', 'matplotlib', 'horovod', 'hickle'
                 ],
                 entry_script='train.py',
                 use_gpu=True,
                 node_count=1)

# run = exp.submit(est)

# print(run)

# run.wait_for_completion(show_output=True)

ps = RandomParameterSampling({
    '--batch_size':
    choice(2, 4, 8, 16),
    '--filter_sizes':
    choice("3 3 3", "4 4 4", "5 5 5"),
    '--stack_sizes':
    choice("48 96 192", "36 72 144", "12 24 48"),
    '--learning_rate':
    loguniform(-6, -1),
    '--lr_decay':
    loguniform(-9, -1)
})

policy = BanditPolicy(evaluation_interval=2,
                      slack_factor=0.1)  #, delay_evaluation=20)

hdc = HyperDriveRunConfig(estimator=est,
                          hyperparameter_sampling=ps,
コード例 #14
0
ファイル: hyper_config.py プロジェクト: csorgyu/uda_capstone
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive import choice
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.policy import TruncationSelectionPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.core import ScriptRunConfig
import os

# Specify parameter sampler
# https://xgboost.readthedocs.io/en/latest/parameter.html
ps = {
    '--num_boost_round': choice(5, 10, 20, 50),
    '--max_depth': choice(3, 5, 8),
    '--learning_rate': choice(0.001, 0.005, 0.01, 0.05),
    '--gamma': choice(0, 1, 2),
    '--reg_lambda': choice(0.1, 1, 2, 5),
    '--scale_pos_weight': choice(1, 2)
}
samp = RandomParameterSampling(parameter_space=ps)

# Specify a Policy
policy = TruncationSelectionPolicy(
    truncation_percentage=50)  # BanditPolicy(slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")
コード例 #15
0
def build_pipeline(dataset, ws, config):
    print("building pipeline for dataset %s in workspace %s" %
          (dataset, ws.name))

    hostname = socket.gethostname()
    if hostname == 'wopauliNC6':
        base_dir = '.'
    else:
        base_dir = '.'

    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = './scripts'
    os.makedirs(script_folder, exist_ok=True)

    shutil.copy(os.path.join(base_dir, 'video_decoding.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_submit.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_build.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'train.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'prednet.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'keras_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_preparation.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'model_registration.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'config.json'), script_folder)

    cpu_compute_name = config['cpu_compute']
    try:
        cpu_compute_target = AmlCompute(ws, cpu_compute_name)
        print("found existing compute target: %s" % cpu_compute_name)
    except ComputeTargetException:
        print("creating new compute target")

        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_D2_V2',
            max_nodes=4,
            idle_seconds_before_scaledown=1800)
        cpu_compute_target = ComputeTarget.create(ws, cpu_compute_name,
                                                  provisioning_config)
        cpu_compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=20)

    # use get_status() to get a detailed status for the current cluster.
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = config['gpu_compute']

    try:
        gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
        print("found existing compute target: %s" % gpu_compute_name)
    except ComputeTargetException:
        print('Creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_NC6',
            max_nodes=5,
            idle_seconds_before_scaledown=1800)

        # create the cluster
        gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name,
                                                  provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it uses the scale settings for the cluster
        gpu_compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=20)

    # use get_status() to get a detailed status for the current cluster.
    print(gpu_compute_target.get_status().serialize())

    # conda dependencies for compute targets
    cpu_cd = CondaDependencies.create(conda_packages=["py-opencv=3.4.2"],
                                      pip_packages=[
                                          "azure-storage-blob==1.5.0",
                                          "hickle==3.4.3", "requests==2.21.0",
                                          "sklearn", "pandas==0.24.2",
                                          "azureml-sdk==1.0.21",
                                          "numpy==1.16.2", "pillow==6.0.0"
                                      ])
    gpu_cd = CondaDependencies.create(pip_packages=[
        "keras==2.0.8", "theano==1.0.4", "tensorflow==1.8.0",
        "tensorflow-gpu==1.8.0", "hickle==3.4.3", "matplotlib==3.0.3",
        "seaborn==0.9.0", "requests==2.21.0", "bs4==0.0.1", "imageio==2.5.0",
        "sklearn", "pandas==0.24.2", "azureml-sdk==1.0.21", "numpy==1.16.2"
    ])

    # Runconfigs
    cpu_compute_run_config = RunConfiguration(conda_dependencies=cpu_cd)
    cpu_compute_run_config.environment.docker.enabled = True
    cpu_compute_run_config.environment.docker.gpu_support = False
    cpu_compute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    cpu_compute_run_config.environment.spark.precache_packages = False

    gpu_compute_run_config = RunConfiguration(conda_dependencies=gpu_cd)
    gpu_compute_run_config.environment.docker.enabled = True
    gpu_compute_run_config.environment.docker.gpu_support = True
    gpu_compute_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE
    gpu_compute_run_config.environment.spark.precache_packages = False

    print("PipelineData object created")

    video_data = DataReference(datastore=def_blob_store,
                               data_reference_name="video_data",
                               path_on_datastore=os.path.join(
                                   "prednet", "data", "video", dataset))

    # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1.
    raw_data = PipelineData("raw_video_fames", datastore=def_blob_store)
    preprocessed_data = PipelineData("preprocessed_video_frames",
                                     datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    data_output = PipelineData("output_data", datastore=def_blob_store)

    print("DataReference object created")

    # prepare dataset for training/testing prednet
    video_decoding = PythonScriptStep(
        name='decode_videos',
        script_name="video_decoding.py",
        arguments=["--input_data", video_data, "--output_data", raw_data],
        inputs=[video_data],
        outputs=[raw_data],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.'])
    print("video_decode created")

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(name='prepare_data',
                                 script_name="data_preparation.py",
                                 arguments=[
                                     "--input_data", raw_data, "--output_data",
                                     preprocessed_data
                                 ],
                                 inputs=[raw_data],
                                 outputs=[preprocessed_data],
                                 compute_target=cpu_compute_target,
                                 source_directory=script_folder,
                                 runconfig=cpu_compute_run_config,
                                 allow_reuse=True,
                                 hash_paths=['.'])
    data_prep.run_after(video_decoding)

    print("data_prep created")

    est = TensorFlow(source_directory=script_folder,
                     compute_target=gpu_compute_target,
                     pip_packages=[
                         'keras==2.0.8', 'theano', 'tensorflow==1.8.0',
                         'tensorflow-gpu==1.8.0', 'matplotlib', 'horovod',
                         'hickle'
                     ],
                     entry_script='train.py',
                     use_gpu=True,
                     node_count=1)

    ps = RandomParameterSampling({
        '--batch_size':
        choice(2, 4, 8, 16),
        '--filter_sizes':
        choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
        '--stack_sizes':
        choice("48, 96, 192", "36, 72, 144", "12, 24, 48"),  #, "48, 96"),
        '--learning_rate':
        loguniform(-6, -1),
        '--lr_decay':
        loguniform(-9, -1),
        '--freeze_layers':
        choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "1", "2",
               "3"),
        '--transfer_learning':
        choice("True", "False")
    })

    policy = BanditPolicy(evaluation_interval=2,
                          slack_factor=0.1,
                          delay_evaluation=20)

    hdc = HyperDriveRunConfig(
        estimator=est,
        hyperparameter_sampling=ps,
        policy=policy,
        primary_metric_name='val_loss',
        primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
        max_total_runs=5,  #100,
        max_concurrent_runs=5,  #10,
        max_duration_minutes=60 * 6)

    hd_step = HyperDriveStep(name="train_w_hyperdrive",
                             hyperdrive_run_config=hdc,
                             estimator_entry_script_arguments=[
                                 '--data-folder', preprocessed_data,
                                 '--remote_execution'
                             ],
                             inputs=[preprocessed_data],
                             metrics_output=data_metrics,
                             allow_reuse=True)
    hd_step.run_after(data_prep)

    registration_step = PythonScriptStep(
        name='register_model',
        script_name='model_registration.py',
        arguments=['--input_dir', data_metrics, '--output_dir', data_output],
        compute_target=gpu_compute_target,
        inputs=[data_metrics],
        outputs=[data_output],
        source_directory=script_folder,
        allow_reuse=True,
        hash_paths=['.'])
    registration_step.run_after(hd_step)

    pipeline = Pipeline(
        workspace=ws,
        steps=[video_decoding, data_prep, hd_step, registration_step])
    print("Pipeline is built")

    pipeline.validate()
    print("Simple validation complete")

    pipeline_name = 'prednet_' + dataset
    pipeline.publish(name=pipeline_name)

    return pipeline_name
コード例 #16
0
        ],
        compute_target=target,
        environment=env,
    )

    # set up hyperdrive search space
    convert_base = lambda x: float(np.log(x))
    search_space = {
        "--learning_rate":
        hyperdrive.loguniform(
            convert_base(1e-6),
            convert_base(5e-2)),  # NB. loguniform on [exp(min), exp(max)]
        "--weight_decay":
        hyperdrive.uniform(5e-3, 15e-2),
        "--per_device_train_batch_size":
        hyperdrive.choice([16, 32]),
    }

    hyperparameter_sampling = RandomParameterSampling(search_space)

    policy = TruncationSelectionPolicy(truncation_percentage=50,
                                       evaluation_interval=2,
                                       delay_evaluation=0)

    hyperdrive_config = HyperDriveConfig(
        run_config=config,
        hyperparameter_sampling=hyperparameter_sampling,
        policy=policy,
        primary_metric_name="eval_matthews_correlation",
        primary_metric_goal=hyperdrive.PrimaryMetricGoal.MAXIMIZE,
        max_total_runs=20,
コード例 #17
0
# IN AzureML, we can acheive hyper parameter tuning through an experiment that consists of a hyperdrive run, which initiates a child run
# for each hyper parameter combination to be tested. Each child run uses a training script with parameterized hyperparams values to train a model 
# and logs the target performance metric achieved by the trained model.

#for hyperparameter tuning we have to define the search space

from azureml.train.hyperdrive import choice, normal

param_space = {

    '--batch_size' : choice(16,32,64),
    '--learning_rate': normal(10,3)
}


from azureml.train.hyperdrive import GridParameterSampling
#all list would be discrete for grid search 
param_space = {
    "--batch_size" : choice(16,32,64),
    '--learning_rate' : choice(0.01, 0.02, 0.03)
}

param_sampling = GridParameterSampling(param_space)

#distributions can be these type

#for discrete:
# qnormal,quniform,qlognormal, qloguniform

# for continuous:
コード例 #18
0
def build_pipeline(dataset, ws, config):
    print("building pipeline for dataset %s in workspace %s" % (dataset, ws.name))

    base_dir = '.'
        
    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = './scripts'
    os.makedirs(script_folder, exist_ok=True)
    
    shutil.copy(os.path.join(base_dir, 'video_decoding.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_submit.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_create.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'train.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'prednet.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'keras_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_preparation.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'model_registration.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'config.json'), script_folder)
    
    cpu_compute_name = config['cpu_compute']
    try:
        cpu_compute_target = AmlCompute(ws, cpu_compute_name)
        print("found existing compute target: %s" % cpu_compute_name)
    except:# ComputeTargetException:
        print("creating new compute target")
        
        provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                                    max_nodes=4,
                                                                    idle_seconds_before_scaledown=1800)    
        cpu_compute_target = ComputeTarget.create(ws, cpu_compute_name, provisioning_config)
        cpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
        
    # use get_status() to get a detailed status for the current cluster. 
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = config['gpu_compute']

    try:
        gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
        print("found existing compute target: %s" % gpu_compute_name)
    except: 
        print('Creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                                    max_nodes=10,
                                                                    idle_seconds_before_scaledown=1800)

        # create the cluster
        gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name, provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout. 
        # if no min node count is provided it uses the scale settings for the cluster
        gpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

    # use get_status() to get a detailed status for the current cluster. 
    try:
        print(gpu_compute_target.get_status().serialize())
    except BaseException as e:
        print("Could not get status of compute target.")
        print(e)

    # conda dependencies for compute targets
    cpu_cd = CondaDependencies.create(conda_packages=["py-opencv=3.4.2"], pip_indexurl='https://azuremlsdktestpypi.azureedge.net/sdk-release/Candidate/604C89A437BA41BD942B4F46D9A3591D', pip_packages=["azure-storage-blob==1.5.0", "hickle==3.4.3", "requests==2.21.0", "sklearn", "pandas==0.24.2", "azureml-sdk", "numpy==1.16.2", "pillow==6.0.0"])
    
    # Runconfigs
    cpu_compute_run_config = RunConfiguration(conda_dependencies=cpu_cd)
    cpu_compute_run_config.environment.docker.enabled = True
    cpu_compute_run_config.environment.docker.gpu_support = False
    cpu_compute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    cpu_compute_run_config.environment.spark.precache_packages = False

    print("PipelineData object created")

    # DataReference to where video data is stored.
    video_data = DataReference(
        datastore=def_blob_store,
        data_reference_name="video_data",
        path_on_datastore=os.path.join("prednet", "data", "video", dataset))
    print("DataReference object created")
        
    # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1.
    raw_data = PipelineData("raw_video_fames", datastore=def_blob_store)
    preprocessed_data = PipelineData("preprocessed_video_frames", datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    data_output = PipelineData("output_data", datastore=def_blob_store)

    # prepare dataset for training/testing prednet
    video_decoding = PythonScriptStep(
        name='decode_videos',
        script_name="video_decoding.py", 
        arguments=["--input_data", video_data, "--output_data", raw_data],
        inputs=[video_data],
        outputs=[raw_data],
        compute_target=cpu_compute_target, 
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.']
    )
    print("video_decode step created")

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(
        name='prepare_data',
        script_name="data_preparation.py", 
        arguments=["--input_data", raw_data, "--output_data", preprocessed_data],
        inputs=[raw_data],
        outputs=[preprocessed_data],
        compute_target=cpu_compute_target, 
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.']
    )
    data_prep.run_after(video_decoding)

    print("data_prep step created")


    # configure access to ACR for pulling our custom docker image
    acr = ContainerRegistry()
    acr.address = config['acr_address']
    acr.username = config['acr_username']
    acr.password = config['acr_password']
    
    est = Estimator(source_directory=script_folder,
                    compute_target=gpu_compute_target,
                    entry_script='train.py', 
                    use_gpu=True,
                    node_count=1,
                    custom_docker_image = "wopauli_1.8-gpu:1",
                    image_registry_details=acr,
                    user_managed=True
                    )

    ps = RandomParameterSampling(
        {
            '--batch_size': choice(1, 2, 4, 8),
            '--filter_sizes': choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
            '--stack_sizes': choice("48, 96, 192", "36, 72, 144", "12, 24, 48"), #, "48, 96"),
            '--learning_rate': loguniform(-6, -1),
            '--lr_decay': loguniform(-9, -1),
            '--freeze_layers': choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "3"),
            '--transfer_learning': choice("True", "False")
        }
    )

    policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=10)

    hdc = HyperDriveConfig(estimator=est, 
                            hyperparameter_sampling=ps, 
                            policy=policy, 
                            primary_metric_name='val_loss', 
                            primary_metric_goal=PrimaryMetricGoal.MINIMIZE, 
                            max_total_runs=10,
                            max_concurrent_runs=5, 
                            max_duration_minutes=60*6
                            )

    hd_step = HyperDriveStep(
        name="train_w_hyperdrive",
        hyperdrive_run_config=hdc,
        estimator_entry_script_arguments=[
            '--data-folder', preprocessed_data, 
            '--remote_execution',
            '--dataset', dataset
            ],
        inputs=[preprocessed_data],
        metrics_output = data_metrics,
        allow_reuse=True
    )
    hd_step.run_after(data_prep)

    registration_step = PythonScriptStep(
        name='register_model',
        script_name='model_registration.py',
        arguments=['--input_dir', data_metrics, '--output_dir', data_output],
        compute_target=cpu_compute_target,
        inputs=[data_metrics],
        outputs=[data_output],
        source_directory=script_folder,
        allow_reuse=True,
        hash_paths=['.']
    )
    registration_step.run_after(hd_step)

    pipeline = Pipeline(workspace=ws, steps=[video_decoding, data_prep, hd_step, registration_step])
    print ("Pipeline is built")

    pipeline.validate()
    print("Simple validation complete") 

    pipeline_name = 'prednet_' + dataset
    published_pipeline = pipeline.publish(name=pipeline_name)
    

    schedule = Schedule.create(workspace=ws, name=pipeline_name + "_sch",
                            pipeline_id=published_pipeline.id, 
                            experiment_name=pipeline_name,
                            datastore=def_blob_store,
                            wait_for_provisioning=True,
                            description="Datastore scheduler for Pipeline" + pipeline_name,
                            path_on_datastore=os.path.join('prednet/data/video', dataset, 'Train'),
                            polling_interval=1
                            )

    return pipeline_name
コード例 #19
0
# -lognormal
# -loguniform


#continuous hyperparams

#normal
#uniform
#lognormal
#loguniform


from azureml.train.hyperdrive import choice, normal

param_sapce = {
    '--batch_size': choice(10,12,15),
    '--learning_rate': normal(10,3)
}


#configuring sample - grid,random, baysian

from azureml.train.hyperdrive import GridParameterSampling, RandomParameterSampling, BayesianParameterSampling


param_sampling = GridParameterSampling(param_sapce)


#configuring the early stopping 

- bandit policy            -> BanditPolicy(slack_amount = 0.2, evaluation_interval = 1, delay_evaluation = 5)
コード例 #20
0
                                script='training03.py',
                                arguments=[
                                    '--max_depth', 5, '--num_leaves', 50,
                                    '--subsample', 0.9, '--learning_rate',
                                    0.01, '--min_data_in_leaf', 50,
                                    '--lambda_l1', 20, '--lambda_l2', 20,
                                    '--n_estimators', 1000
                                ],
                                environment=sklearn_env,
                                compute_target=training_cluster)

# Sample a range of parameter values
params = BayesianParameterSampling({
    # There's only one parameter, so grid sampling will try each value - with multiple parameters it would try every combination
    '--max_depth':
    choice(list(range(2, 20))),
    '--num_leaves':
    choice(list(range(6, 251))),
    '--subsample':
    uniform(0.5, 1),
    '--learning_rate':
    uniform(0.005, 0.25),
    '--min_data_in_leaf':
    choice(list(range(2, 501))),
    '--lambda_l1':
    choice(list(range(201))),
    '--lambda_l2':
    choice(list(range(201))),
    '--n_estimators':
    choice(list(range(100, 4001, 100)))
})
コード例 #21
0
def build_prednet_pipeline(dataset, ws):
    print("building pipeline for dataset %s in workspace %s" %
          (dataset, ws.name))

    base_dir = "."

    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = "./scripts"
    os.makedirs(script_folder)

    shutil.copytree(os.path.join(base_dir, "models"),
                    os.path.join(base_dir, script_folder, "models"))
    shutil.copy(os.path.join(base_dir, "train.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "data_preparation.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "register_prednet.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "batch_scoring.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "train_clf.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "register_clf.py"), script_folder)

    cpu_compute_name = args.cpu_compute_name
    cpu_compute_target = AmlCompute(ws, cpu_compute_name)
    print("found existing compute target: %s" % cpu_compute_name)

    # use get_status() to get a detailed status for the current cluster.
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = args.gpu_compute_name

    gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
    print(gpu_compute_target.get_status().serialize())

    env = Environment.get(ws, "prednet")

    # Runconfigs
    runconfig = RunConfiguration()
    runconfig.environment = env
    print("PipelineData object created")

    # DataReference to where raw data is stored.
    raw_data = DataReference(
        datastore=def_blob_store,
        data_reference_name="raw_data",
        path_on_datastore=os.path.join("prednet", "data", "raw_data"),
    )
    print("DataReference object created")

    # Naming the intermediate data as processed_data and assigning it to the
    # variable processed_data.
    preprocessed_data = PipelineData("preprocessed_data",
                                     datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    hd_child_cwd = PipelineData("prednet_model_path", datastore=def_blob_store)
    # prednet_path = PipelineData("outputs", datastore=def_blob_store)
    scored_data = PipelineData("scored_data", datastore=def_blob_store)
    model_path = PipelineData("model_path", datastore=def_blob_store)

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(
        name="prepare_data",
        script_name="data_preparation.py",
        arguments=[
            "--raw_data",
            raw_data,
            "--preprocessed_data",
            preprocessed_data,
            "--dataset",
            dataset,
        ],
        inputs=[raw_data],
        outputs=[preprocessed_data],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    # data_prep.run_after(video_decoding)

    print("data_prep step created")

    est = Estimator(
        source_directory=script_folder,
        compute_target=gpu_compute_target,
        entry_script="train.py",
        node_count=1,
        environment_definition=env,
    )

    ps = BayesianParameterSampling({
        "--batch_size":
        choice(1, 2, 4, 10),
        "--filter_sizes":
        choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
        "--stack_sizes":
        choice("48, 96, 192", "36, 72, 144", "12, 24, 48"),
        "--learning_rate":
        uniform(1e-6, 1e-3),
        "--lr_decay":
        uniform(1e-9, 1e-2),
        "--freeze_layers":
        choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "3"),
        # "--fine_tuning": choice("True", "False"),
    })

    hdc = HyperDriveConfig(
        estimator=est,
        hyperparameter_sampling=ps,
        primary_metric_name="val_loss",
        primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
        max_total_runs=3,
        max_concurrent_runs=3,
        max_duration_minutes=60 * 6,
    )

    train_prednet = HyperDriveStep(
        "train_w_hyperdrive",
        hdc,
        estimator_entry_script_arguments=[
            "--preprocessed_data",
            preprocessed_data,
            "--remote_execution",
            "--dataset",
            dataset,
        ],
        inputs=[preprocessed_data],
        outputs=[hd_child_cwd],
        metrics_output=data_metrics,
        allow_reuse=True,
    )
    train_prednet.run_after(data_prep)

    register_prednet = PythonScriptStep(
        name="register_prednet",
        script_name="register_prednet.py",
        arguments=[
            "--data_metrics",
            data_metrics,
        ],
        compute_target=cpu_compute_target,
        inputs=[data_metrics, hd_child_cwd],
        source_directory=script_folder,
        allow_reuse=True,
    )
    register_prednet.run_after(train_prednet)

    batch_scoring = PythonScriptStep(
        name="batch_scoring",
        script_name="batch_scoring.py",
        arguments=[
            "--preprocessed_data",
            preprocessed_data,
            "--scored_data",
            scored_data,
            "--dataset",
            dataset,
            # "--prednet_path",
            # prednet_path
        ],
        compute_target=gpu_compute_target,
        inputs=[preprocessed_data],
        outputs=[scored_data],
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    batch_scoring.run_after(register_prednet)

    train_clf = PythonScriptStep(
        name="train_clf",
        script_name="train_clf.py",
        arguments=[
            "--preprocessed_data", preprocessed_data, "--scored_data",
            scored_data, "--model_path", model_path
        ],
        compute_target=cpu_compute_target,
        inputs=[preprocessed_data, scored_data],
        outputs=[model_path],
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    train_clf.run_after(batch_scoring)

    register_clf = PythonScriptStep(
        name="register_clf",
        script_name="register_clf.py",
        arguments=["--model_path", model_path],
        inputs=[model_path],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        allow_reuse=True,
        runconfig=runconfig,
    )
    register_clf.run_after(train_clf)

    pipeline = Pipeline(
        workspace=ws,
        steps=[
            data_prep,
            train_prednet,
            register_prednet,
            batch_scoring,
            train_clf,
            register_clf,
        ],
    )
    pipeline.validate()

    pipeline_name = "prednet_" + dataset
    published_pipeline = pipeline.publish(name=pipeline_name)

    _ = Schedule.create(
        workspace=ws,
        name=pipeline_name + "_sch",
        pipeline_id=published_pipeline.id,
        experiment_name=pipeline_name,
        datastore=def_blob_store,
        wait_for_provisioning=True,
        description="Datastore scheduler for Pipeline" + pipeline_name,
        path_on_datastore=os.path.join("prednet/data/raw_data", dataset,
                                       "Train"),
        polling_interval=60 * 24,
    )

    published_pipeline.submit(ws, pipeline_name)
コード例 #22
0
#%%

# Create Experiment object - this will be used to submit the Hyperdrive run and store all the given parameters
experiment_hd = Experiment(workspace=ws, name='hyperdrive')

#%% [markdown]

###### Create Random Parameter Sampler

#%%

# Parameter space to sweep over - uses Random Parameter Sampling
ps = hd.RandomParameterSampling({
    '--network-name':
    hd.choice('densenet201', 'resnet152', 'resnet34', 'alexnet', 'vgg19_bn'),
    '--minibatch-size':
    hd.choice(8, 16),
    '--learning-rate':
    hd.uniform(0.00001, 0.001),
    '--step-size':
    hd.choice(10, 25, 50),  # How often should the learning rate decay
    '--gamma':
    hd.uniform(
        0.7, 0.99
    ),  # The decay applied to the learning rate every {step-size} steps
    '--optimizer-type':
    hd.choice('sgd', 'adam')
})

#%% [markdown]
コード例 #23
0
from azureml.core import ScriptRunConfig, Experiment

script_run = ScriptRunConfig(source_directory = ".",
                             script = "hyperdrive_script.py",
                             arguments = ["--input_data", input_ds.as_named_input("raw_data")],
                             environment = my_env,
                             compute_taret = cluster
                            ) 

#creating hyper parmas

from azureml.train.hyperdrive import GridParameterSampling, choice

hyper_params = GridParameterSampling({
    '--n_estimators': choice(10,20,30,100),
    '--min_samples_leaf': choice(1,2,5)
})


#configuring hyperdrive class
from azureml.train.hyperdrive import HyperDriveConfig,PrimaryMetricGoal

hyper_config = HyperDriveConfig(run_config=script_run,
hyperparameter_sampling = hyper_params,
policy= None,
primary_metric_name = 'accuray',
primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
max_total_runs = 20,
max_concurrent_runs=2)
コード例 #24
0
ファイル: tf2train.py プロジェクト: mindis/mlops
pred = imported_model(X_test)
y_hat = np.argmax(pred, axis=1)

# print the first 30 labels and predictions
print('labels:  \t', y_test[:30])
print('predictions:\t', y_hat[:30])

print("Accuracy on the test set:", np.average(y_hat == y_test))

from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive import choice, loguniform

ps = RandomParameterSampling({
    '--batch-size':
    choice(32, 64, 128),
    '--first-layer-neurons':
    choice(16, 64, 128, 256, 512),
    '--second-layer-neurons':
    choice(16, 64, 256, 512),
    '--learning-rate':
    loguniform(-6, -1)
})

est = TensorFlow(source_directory=script_folder,
                 script_params={
                     '--data-folder':
                     dataset.as_named_input('mnist').as_mount()
                 },
                 compute_target=compute_target,
                 entry_script='tf_mnist2.py',
コード例 #25
0
                conda_packages=['cudatoolkit=10.0.130'],
                entry_script='kd_squeezenet.py',
                use_gpu=True,
                node_count=1)

from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.pipeline.steps import HyperDriveStep
from azureml.train.hyperdrive import choice, loguniform, uniform

ps = RandomParameterSampling({
    '--learning_rate': uniform(1e-3, 2e-2),
    '--momentum': uniform(.1, .95),
    '--weight_decay': loguniform(-5, -3),
    '--temperature': uniform(1, 9),
    # '--lambda_const': uniform(.1, .3),
    '--transfer_learning': choice("True", "False")
})

policy = BanditPolicy(evaluation_interval=2,
                      slack_factor=0.1,
                      delay_evaluation=10)

hdc = HyperDriveConfig(
    estimator=est,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name='val_loss',
    primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
    max_total_runs=5,  #100,
    max_concurrent_runs=5)
コード例 #26
0
experiment = Experiment(workspace=ws, name=experiment_name)

# Create a folder for the experiment files
experiment_folder = './' + experiment_name
os.makedirs(experiment_folder, exist_ok=True)

print("Experiment:", experiment.name)

#Fetch GPU cluster for computations
gpu_cluster = ComputeTarget(workspace=ws, name='demo-GPU-cluster')

# Sample a range of parameter values
params = GridParameterSampling({
    # There's only one parameter, so grid sampling will try each value - with multiple parameters it would try every combination
    '--regularization':
    choice(0.001, 0.005, 0.01, 0.05, 0.1, 1.0)
})

# Set evaluation policy to stop poorly performing training runs early
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

# Get the training dataset
diabetes_ds = ws.datasets.get("diabetes_dataset")

# Create an estimator that uses the remote compute
hyper_estimator = SKLearn(
    source_directory=experiment_folder,
    inputs=[diabetes_ds.as_named_input('diabetes')
            ],  # Pass the dataset as an input
    compute_target=gpu_cluster,
    conda_packages=['pandas', 'ipykernel', 'matplotlib'],
コード例 #27
0
        path=ds.path('odFridgeObjects/validation_annotations.jsonl'))
    validation_dataset = validation_dataset.register(
        workspace=ws, name=validation_dataset_name)

print("Training dataset name: " + training_dataset.name)
print("Validation dataset name: " + validation_dataset.name)

training_dataset.to_pandas_dataframe()

image_config_yolov5 = AutoMLImageConfig(
    task='image-object-detection',
    compute_target=compute_target,
    training_data=training_dataset,
    validation_data=validation_dataset,
    hyperparameter_sampling=GridParameterSampling(
        {'model_name': choice('yolov5')}))

automl_image_run = experiment.submit(image_config_yolov5)

automl_image_run.wait_for_completion(wait_post_processing=True)

parameter_space = {
    'model':
    choice(
        {
            'model_name': choice('yolov5'),
            'learning_rate': uniform(0.0001, 0.01),
            #'model_size': choice('small', 'medium'), # model-specific
            'img_size': choice(640, 704, 768),  # model-specific
        },
        {
コード例 #28
0
        if config.get('type') == 'classification':
            script_params = {
                '--task': int(task),
                '--use_cuda': '',
                '--register_model': ''
            }
            est = PyTorch(source_directory=script_folder,
                          compute_target=compute_target,
                          script_params=script_params,
                          entry_script='src/classification.py',
                          pip_packages=pip_packages,
                          use_gpu=True)

            ### Hyperparameters params
            if language == 'en':
                model_type = choice('roberta', 'bert', 'albert')
            elif language == 'de':
                model_type = choice('distilbert', 'bert', 'roberta')
            elif language == 'it' or language == 'es':
                model_type = choice('bert')
            elif language == 'fr':
                model_type = choice('camembert', 'bert')
            param_sampling = RandomParameterSampling({
                '--n_epochs':
                choice(3, 5, 10),
                '--learning_rate':
                choice(1e-5, 2e-5, 3e-5, 4e-5),
                '--model_type':
                model_type,
                '--max_seq_len':
                choice(128, 256),
コード例 #29
0
ファイル: trainseg.py プロジェクト: balakreshnan/visionautoml
print("Training dataset name: " + training_dataset.name)
print("Validation dataset name: " + validation_dataset.name)

training_dataset.to_pandas_dataframe()

from azureml.train.automl import AutoMLImageConfig
from azureml.train.hyperdrive import GridParameterSampling
from azureml.train.hyperdrive import choice

image_config_maskrcnn = AutoMLImageConfig(
    task='image-instance-segmentation',
    compute_target=compute_target,
    training_data=training_dataset,
    validation_data=validation_dataset,
    hyperparameter_sampling=GridParameterSampling(
        {'model_name': choice('maskrcnn_resnet50_fpn')}))

automl_image_run = experiment.submit(image_config_maskrcnn)

automl_image_run.wait_for_completion(wait_post_processing=True)

from azureml.train.automl import AutoMLImageConfig
from azureml.train.hyperdrive import GridParameterSampling, RandomParameterSampling, BayesianParameterSampling
from azureml.train.hyperdrive import BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive import choice, uniform

parameter_space = {
    'model_name': choice('maskrcnn_resnet50_fpn'),
    'learning_rate': uniform(0.0001, 0.001),
    #'warmup_cosine_lr_warmup_epochs': choice(0, 3),
    'optimizer': choice('sgd', 'adam', 'adamw'),
コード例 #30
0
#define serach space
from azureml.train.hyperdrive import choice, normal

param_space = {
                 '--batch_size': choice(16, 32, 64),
                 '--learning_rate': normal(10, 3)
              }

 #grid sampling example
 from azureml.train.hyperdrive import GridParameterSampling, choice

param_space = {
                 '--batch_size': choice(16, 32, 64),
                 '--learning_rate': choice(0.01, 0.1, 1.0)
              }

param_sampling = GridParameterSampling(param_space)             


#random sampling
from azureml.train.hyperdrive import RandomParameterSampling, choice, normal

param_space = {
                 '--batch_size': choice(16, 32, 64),
                 '--learning_rate': normal(10, 3)
              }

param_sampling = RandomParameterSampling(param_space)


#Bayesian sampling