Ejemplo n.º 1
0
# Make experiment reproducible
tf.random.set_seed(CONFIG.SPLIT_SEED)
random.seed(CONFIG.SPLIT_SEED)

DATA_DIR = REPO_DIR / 'data' if run.id.startswith("OfflineRun") else Path(".")
logger.info('DATA_DIR: %s', DATA_DIR)

# Offline run. Download the sample dataset and run locally. Still push results to Azure.
if run.id.startswith("OfflineRun"):
    logger.info('Running in offline mode...')

    # Access workspace.
    logger.info('Accessing workspace...')
    workspace = Workspace.from_config()
    experiment = Experiment(workspace, "training-junkyard")
    run = experiment.start_logging(outputs=None, snapshot_directory=None)

    dataset_name = CONFIG.DATASET_NAME_LOCAL
    dataset_path = get_dataset_path(DATA_DIR / "datasets", dataset_name)
    download_dataset(workspace, dataset_name, dataset_path)

# Online run. Use dataset provided by training notebook.
else:
    logger.info('Running in online mode...')
    experiment = run.experiment
    workspace = experiment.workspace

    dataset_name = CONFIG.DATASET_NAME

    # Mount or download
Ejemplo n.º 2
0
from azureml.core.runconfig import RunConfiguration
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core import ScriptRunConfig
import json
from azureml.core.authentication import AzureCliAuthentication

cli_auth = AzureCliAuthentication()

# Get workspace
ws = Workspace.from_config(auth=cli_auth)

# Attach Experiment
experiment_name = "devops-ai-demo"
exp = Experiment(workspace=ws, name=experiment_name)
print(exp.name, exp.workspace.name, sep="\n")

# Editing a run configuration property on-fly.
run_config_user_managed = RunConfiguration()
run_config_user_managed.environment.python.user_managed_dependencies = True

print("Submitting an experiment.")
src = ScriptRunConfig(
    source_directory="./code",
    script="training/train.py",
    run_config=run_config_user_managed,
)
run = exp.submit(src)

# Shows output of the run on stdout.
        "environments/data_preperation_full/RunConfig/runconfig_data_preperation.yml",
    )),
                                   name="dataprep_full")

dataprep_subset = PythonScriptStep(
    name="subset",
    script_name="data_engineering.py",
    arguments=script_params_sub,
    runconfig=run_config_sub,
    source_directory=os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  '..', 'data'))

dataprep_fulldata = PythonScriptStep(
    name="full",
    script_name="data_engineering.py",
    arguments=script_params,
    runconfig=run_config,
    source_directory=os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  '..', 'data'))

# Attach step to the pipelines
pipeline = Pipeline(workspace=workspace,
                    steps=[dataprep_subset, dataprep_fulldata])

# Submit the pipeline
# Define the experiment
experiment = Experiment(workspace, 'pipeline-dataprep')

# Run the experiment
pipeline_run = experiment.submit(pipeline)
def main():

    run = Run.get_context()
    if (run.id.startswith('OfflineRun')):
        from dotenv import load_dotenv
        # For local development, set values in this section
        load_dotenv()
        workspace_name = os.environ.get("WORKSPACE_NAME")
        experiment_name = os.environ.get("EXPERIMENT_NAME")
        resource_group = os.environ.get("RESOURCE_GROUP")
        subscription_id = os.environ.get("SUBSCRIPTION_ID")
        # run_id useful to query previous runs
        run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012"
        aml_workspace = Workspace.get(
            name=workspace_name,
            subscription_id=subscription_id,
            resource_group=resource_group
        )
        ws = aml_workspace
        exp = Experiment(ws, experiment_name)
    else:
        ws = run.experiment.workspace
        exp = run.experiment
        run_id = 'amlcompute'

    parser = argparse.ArgumentParser("register")

    parser.add_argument(
        "--run_id",
        type=str,
        help="Training run ID",
    )

    parser.add_argument(
        "--model_name",
        type=str,
        help="Name of the Model",
        default="londonboiler_model.pkl",
    )

    parser.add_argument(
        "--step_input",
        type=str,
        help=("input from previous steps")
    )

    args = parser.parse_args()
    if (args.run_id is not None):
        run_id = args.run_id
    if (run_id == 'amlcompute'):
        run_id = run.parent.id
    model_name = args.model_name
    model_path = args.step_input

    print("Getting registration parameters")

    # Load the registration parameters from the parameters file
    with open("parameters.json") as f:
        pars = json.load(f)
    try:
        register_args = pars["registration"]
    except KeyError:
        print("Could not load registration values from file")
        register_args = {"tags": []}

    model_tags = {}
    for tag in register_args["tags"]:
        try:
            mtag = run.parent.get_metrics()[tag]
            model_tags[tag] = mtag
        except KeyError:
            print(f"Could not find {tag} metric on parent run.")

    # load the model
    print("Loading model from " + model_path)
    model_file = os.path.join(model_path, model_name)
    model = joblib.load(model_file)
    parent_tags = run.parent.get_tags()
    try:
        build_id = parent_tags["BuildId"]
    except KeyError:
        build_id = None
        print("BuildId tag not found on parent run.")
        print(f"Tags present: {parent_tags}")
    try:
        build_uri = parent_tags["BuildUri"]
    except KeyError:
        build_uri = None
        print("BuildUri tag not found on parent run.")
        print(f"Tags present: {parent_tags}")

    if (model is not None):
        dataset_id = parent_tags["dataset_id"]
        if (build_id is None):
            register_aml_model(
                model_file,
                model_name,
                model_tags,
                exp,
                run_id,
                dataset_id)
        elif (build_uri is None):
            register_aml_model(
                model_file,
                model_name,
                model_tags,
                exp,
                run_id,
                dataset_id,
                build_id)
        else:
            register_aml_model(
                model_file,
                model_name,
                model_tags,
                exp,
                run_id,
                dataset_id,
                build_id,
                build_uri)
    else:
        print("Model not found. Skipping model registration.")
        sys.exit(0)
Ejemplo n.º 5
0
               workspace_name=workspace_name,
               auth=msi_auth)

#Retrieve dataset and compute for the AutoML run
dataset = Dataset.get_by_name(workspace=ws, name=dataset_name)
compute_target = ws.compute_targets[compute_name]

automl_config = AutoMLConfig(task='regression',
                             experiment_timeout_minutes=30,
                             primary_metric='normalized_root_mean_squared_error',
                             training_data=dataset,
							 compute_target=compute_target,
                             label_column_name=dataset_label_column)

#Execute the AutoML run
experiment = Experiment(ws, experiment_name)
run = experiment.submit(automl_config, show_output=True)
run.wait_for_completion()

#Get the best model from the AutoML run and register it
best_run = run.get_best_child()
best_run.download_files(prefix='outputs', append_prefix=False)
model = Model.register(model_path='outputs/model.pkl',
                       model_name=model_name,
                       workspace=ws)

#Prepare an environment for the model
myenv = Environment.from_conda_specification(name='project_environment', file_path='outputs/conda_env_v_1_0_0.yml')
myenv.docker.enabled = True
inference_config = InferenceConfig(entry_script='outputs/scoring_file_v_1_0_0.py', environment=myenv)
Ejemplo n.º 6
0
label_config = label_ds.as_named_input("labels_input")

batch_score_step = ParallelRunStep(
    name=parallel_step_name,
    inputs=[input_images.as_named_input("input_images")],
    output=output_dir,
    arguments=["--model_name", "inception", "--labels_dir", label_config],
    side_inputs=[label_config],
    parallel_run_config=parallel_run_config,
    allow_reuse=False)

from azureml.core import Experiment
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(workspace=ws, steps=[batch_score_step])
pipeline_run = Experiment(ws, 'batch_scoring').submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

import pandas as pd

batch_run = next(pipeline_run.get_children())
batch_output = batch_run.get_output_data("scores")
batch_output.download(local_path="inception_results")

for root, dirs, files in os.walk("inception_results"):
    for file in files:
        if file.endswith("parallel_run_step.txt"):
            result_file = os.path.join(root, file)

df = pd.read_csv(result_file, delimiter=":", header=None)
df.columns = ["Filename", "Prediction"]
Ejemplo n.º 7
0
                    type=str,
                    required=True,
                    help="The id of the run that contains the trained model.")
args = parser.parse_args()

# Get the current run.
run = Run.get_context()

# Offline run. Download the sample dataset and run locally. Still push results to Azure.
if (run.id.startswith("OfflineRun")):
    print("Running in offline mode...")

    # Access workspace.
    print("Accessing workspace...")
    workspace = Workspace.from_config()
    experiment = Experiment(workspace, "gapnet-offline")
    run = experiment.start_logging(outputs=None, snapshot_directory=".")

    # Get dataset.
    print("Accessing dataset...")
    if os.path.exists("premiumfileshare") == False:
        dataset_name = "cgmmldevpremium-SampleDataset-Example"
        dataset = workspace.datasets[dataset_name]
        dataset.download(target_path='.', overwrite=False)
    dataset_path = glob.glob(os.path.join("premiumfileshare", "*"))[0]

# Online run. Use dataset provided by training notebook.
else:
    print("Running in online mode...")
    experiment = run.experiment
    workspace = experiment.workspace
Ejemplo n.º 8
0
# These run metrics will become particularly important when we begin hyperparameter tuning our model in the "Tune model hyperparameters" section.

# Once your script is ready, copy the training script pytorch_train.py into your project directory for staging.

import shutil

shutil.copy('pytorch_train.py', project_folder)

# Create an experiment

# Create an Experiment to track all the runs in your workspace for this transfer learning PyTorch tutorial.

from azureml.core import Experiment

experiment_name = 'pytorch-hymenoptera'
experiment = Experiment(ws, name=experiment_name)

# Create a PyTorch estimator

# The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer here. The following code will define a single-node PyTorch job.

from azureml.train.dnn import PyTorch

script_params = {'--num_epochs': 30, '--output_dir': './outputs'}

estimator = PyTorch(source_directory=project_folder,
                    script_params=script_params,
                    compute_target=compute_target,
                    entry_script='pytorch_train.py',
                    use_gpu=True)
Ejemplo n.º 9
0
                                allow_reuse=False)
print("evaluateStep created")

evaluateStep.run_after(trainStep)
steps = [evaluateStep]

pipeline = Pipeline(workspace=ws, steps=steps)
print("Pipeline is built")

pipeline.validate()
print("Simple validation complete")

run = Run.get_context()
experiment_name = run.experiment.name

pipeline_run = Experiment(ws, experiment_name).submit(pipeline)
print("Pipeline is submitted for execution")

pipeline_run.wait_for_completion(show_output=True)

print("Downloading evaluation results...")
# access the evaluate_output
data = pipeline_run.find_step_run('evaluate')[0].get_output_data(
    'evaluate_output')
# download the predictions to local path
data.download('.', show_progress=True)

import json
# load the eval info json
with open(os.path.join('./', data.path_on_datastore, 'eval_info.json')) as f:
    eval_info = json.load(f)
Ejemplo n.º 10
0
import pandas as pd
import json

print("SDK Version:", azureml.core.VERSION)

ws = Workspace.from_config()
print('Workspace name: ' + ws.name,
      'Subscription id: ' + ws.subscription_id,
      'Resource group: ' + ws.resource_group,
      sep='\n')

experiment_name = 'aml-pipeline_cicd'  # choose a name for experiment
project_folder = '.'  # project folder

experiment = Experiment(ws, experiment_name)
print("Location:", ws.location)
output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
output['Experiment Name'] = experiment.name
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data=output, index=['']).T

set_diagnostics_collection(send_diagnostics=True)

print("SDK Version:", azureml.core.VERSION)
Ejemplo n.º 11
0
# 05-train-remote-with-remote-data.py
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core import Environment
from azureml.core import ScriptRunConfig
from azureml.core import Dataset

if __name__ == "__main__":
    ws = Workspace.from_config(path='./.azureml',_file_name='config.json')
    datastore = ws.get_default_datastore()
    dataset = Dataset.File.from_files(path=(datastore, 'datasets/Monografia_limpio.csv'))

    experiment = Experiment(workspace=ws, name='dia2-experimento-train-data-remote')

    config = ScriptRunConfig(
        source_directory='./src',
        script='rcv_model.py',
        compute_target='cpu-cluster-mon',
        arguments=[
            '--data_path', dataset.as_named_input('input').as_mount()
            ],
    )
    # setup environment
    env = Environment.from_conda_specification(
        name='amlrcv-env',
        file_path='./.azureml/rcv-aml-env.yml'
    )
    config.run_config.environment = env

    run = experiment.submit(config)
    aml_url = run.get_portal_url()
Ejemplo n.º 12
0
# set aml workspace parameters here.
subscription_id = ""
resource_group = ""
workspace_name = ""
workspace_region = ""

ws = Workspace(subscription_id=subscription_id,
               resource_group=resource_group,
               workspace_name=workspace_name)

# COMMAND ----------

# create experiment
experiment_name = 'bikeSharingDemand'
exp = Experiment(workspace=ws, name=experiment_name)

# COMMAND ----------

run = exp.start_logging(snapshot_directory=None)

# COMMAND ----------

df = (spark.read.format("csv").option("inferSchema", "True").option(
    "header",
    "True").load("/databricks-datasets/bikeSharing/data-001/day.csv"))

# split data
train_df, test_df = df.randomSplit([0.7, 0.3])

# One Hot Encoding
                                       data_preprocess_outputs['valid_dir'],
                                       gpu_compute_target)

# Step 4: Evaluate Model
evaluate_step, evaluate_outputs = evaluate_step(
    train_outputs['model_dir'], data_preprocess_outputs['test_dir'],
    gpu_compute_target)

# Step 5: Deploy Model
deploy_step, deploy_outputs = deploy_step(train_outputs['model_dir'],
                                          evaluate_outputs['accuracy_file'],
                                          data_preprocess_outputs['test_dir'],
                                          cpu_compute_target)

# Submit pipeline
print('Submitting pipeline ...')
pipeline_parameters = {
    'num_images': 100,
    'image_dim': 200,
    'num_epochs': 10,
    'batch_size': 16,
    'learning_rate': 0.001,
    'momentum': 0.9
}
pipeline = Pipeline(workspace=workspace,
                    steps=[
                        data_ingestion_step, data_preprocess_step, train_step,
                        evaluate_step, deploy_step
                    ])
pipeline_run = Experiment(workspace, 'Object-Classification-Demo').submit(
    pipeline, pipeline_parameters=pipeline_parameters)
Ejemplo n.º 14
0
def main():

    run = Run.get_context()

    if (run.id.startswith('OfflineRun')):
        from dotenv import load_dotenv
        load_dotenv()
        sources_dir = os.environ.get("SOURCES_DIR_TRAIN")
        if (sources_dir is None):
            sources_dir = 'aml_recommender'
        workspace_name = os.environ.get("WORKSPACE_NAME")
        experiment_name = os.environ.get("EXPERIMENT_NAME")
        resource_group = os.environ.get("RESOURCE_GROUP")
        subscription_id = os.environ.get("SUBSCRIPTION_ID")
        build_id = os.environ.get('BUILD_BUILDID')
        aml_workspace = Workspace.get(
            name=workspace_name,
            subscription_id=subscription_id,
            resource_group=resource_group
        )
        ws = aml_workspace
        exp = Experiment(ws, experiment_name)
    else:
        exp = run.experiment

    e = Env()

    parser = argparse.ArgumentParser("register")
    parser.add_argument(
        "--build_id",
        type=str,
        help="The Build ID of the build triggering this pipeline run",
    )
    parser.add_argument(
        "--output_model_version_file",
        type=str,
        default="model_version.txt",
        help="Name of a file to write model version to"
    )

    args = parser.parse_args()
    if (args.build_id is not None):
        build_id = args.build_id
    model_name = e.model_name

    try:
        tag_name = 'BuildId'
        model = get_latest_model(
            model_name, tag_name, build_id, exp.workspace)
        if (model is not None):
            print("Model was registered for this build.")
        if (model is None):
            print("Model was not registered for this run.")
            sys.exit(1)
    except Exception as e:
        print(e)
        print("Model was not registered for this run.")
        sys.exit(1)

    # Save the Model Version for other AzDO jobs after script is complete
    if args.output_model_version_file is not None:
        with open(args.output_model_version_file, "w") as out_file:
            out_file.write(str(model.version))
Ejemplo n.º 15
0
# Parametrize dataset input to the pipeline
batch_dataset_parameter = PipelineParameter(name="batch_dataset", default_value=batch_dataset)
batch_dataset_consumption = DatasetConsumptionConfig("batch_dataset", batch_dataset_parameter).as_mount()

datastore = ws.get_default_datastore()
output_dir = PipelineData(name='batch_output', datastore=datastore)

batch_step = ParallelRunStep(
    name="batch-inference-step",
    parallel_run_config=parallel_run_config,
    arguments=['--model_name', args.model_name],
    inputs=[batch_dataset_consumption],
    side_inputs=[],
    output=output_dir,
    allow_reuse=False
)

steps = [batch_step]

print('Creating and validating pipeline')
pipeline = Pipeline(workspace=ws, steps=steps)
pipeline.validate()

print('Publishing pipeline')
published_pipeline = pipeline.publish(args.pipeline_name)

# Output pipeline_id in specified format which will convert it to a variable in Azure DevOps
print(f'##vso[task.setvariable variable=pipeline_id]{published_pipeline.id}')

pipeline_run = Experiment(ws, 'batch-inferencing-pipeline').submit(pipeline)
pipeline_run.wait_for_completion()
Ejemplo n.º 16
0
    workspace = Workspace.from_config()
    instance = ComputeTarget(workspace=workspace, name=args.aml_compute_target)

    dataset = Dataset.get_by_name(workspace, name="recursionbio")

    tf_env = Environment.from_conda_specification(
        name="regbio-tf-env", file_path=args.conda_env_file)

    train_scr = ScriptRunConfig(
        source_directory=args.source_directory,
        script=args.script,
        arguments=[
            "--data-path",
            dataset.as_mount(),
            "--epochs",
            1000,
            "--batch",
            24,
            "--learning-rate",
            0.001,
        ],
        compute_target=instance,
        environment=tf_env,
    )

    run = Experiment(workspace=workspace,
                     name=args.experiment_name).submit(train_scr)

    run.wait_for_completion(show_output=True)
# Check core SDK version number
import azureml.core
print("SDK version:", azureml.core.VERSION)

# Initialize Workspace
from azureml.core import Workspace

ws = Workspace.from_config()
print("Resource group: ", ws.resource_group)
print("Location: ", ws.location)
print("Workspace name: ", ws.name)

from azureml.core import Experiment
experiment_name = 'fashion-mnist'
experiment = Experiment(workspace=ws, name=experiment_name)

from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

# create a new runconfig object
run_config = RunConfiguration()

# signal that you want to use ACI to execute script.
run_config.target = "containerinstance"

# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.
run_config.container_instance.region = ws.location

# set the ACI CPU and Memory
run_config.container_instance.cpu_cores = 1
run_config.container_instance.memory_gb = 2
Ejemplo n.º 18
0
    )
    parser.add_argument("--box_iou_thresh",
                        type=float,
                        required=False,
                        help="IoU threshold")

    # parse arguments
    args = parser.parse_args()

    ws = Workspace.create(
        name=args.workspace_name,
        subscription_id=args.subscription_id,
        resource_group=args.resource_group,
        exist_ok=True,
    )
    experiment = Experiment(ws, name=args.experiment_name)

    # load the best child
    automl_image_run = AutoMLRun(experiment=experiment, run_id=args.run_id)
    best_child_run = automl_image_run.get_best_child()

    model_type = None
    if args.task_type == "image-object-detection":
        if args.model_name.startswith("yolo"):
            # yolo settings
            model_settings = {
                "img_size": args.img_size,
                "model_size": args.model_size,
                "box_score_thresh": args.box_score_thresh,
                "box_iou_thresh": args.box_iou_thresh,
            }
        cluster_name=args.clustername,
        vm_size=args.vmsize,
        max_nodes=args.maxnodes,
    )

    run_config = create_run_config(
        cpu_cluster=cpu_cluster,
        docker_proc_type=docker_proc_type,
        conda_env_file=args.condafile,
    )

    logger.info("exp: In Azure, look for experiment named {}".format(
        args.expname))

    # create new or use existing experiment
    experiment = Experiment(workspace=workspace, name=args.expname)
    run = submit_experiment_to_azureml(
        test=args.test,
        test_folder=args.testfolder,
        test_markers=args.testmarkers,
        junitxml=args.junitxml,
        run_config=run_config,
        experiment=experiment,
    )

    # add helpful information to experiment on Azure
    run.tag("RepoName", args.reponame)
    run.tag("Branch", args.branch)
    run.tag("PR", args.pr)
    # download files from AzureML
    run.download_files(prefix="reports", output_paths="./reports")
Ejemplo n.º 20
0
# 06-run-pytorch-data.py
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core import Environment
from azureml.core import ScriptRunConfig
from azureml.core import Dataset

if __name__ == "__main__":
    ws = Workspace.from_config()
    datastore = ws.get_default_datastore()
    dataset = Dataset.File.from_files(path=(datastore, 'datasets/cifar10'))

    experiment = Experiment(workspace=ws, name='day1-experiment-data')

    config = ScriptRunConfig(
        source_directory='./src',
        script='train.py',
        compute_target='cpu-cluster',
        arguments=[
            '--data_path',
            dataset.as_named_input('input').as_mount(), '--learning_rate',
            0.003, '--momentum', 0.92
        ],
    )
    # set up pytorch environment
    env = Environment.from_conda_specification(
        name='pytorch-env', file_path='./environments/pytorch-env.yml')
    config.run_config.environment = env

    run = experiment.submit(config)
    aml_url = run.get_portal_url()
Ejemplo n.º 21
0
    sj = modelIncubation(sj)
    iq = modelIncubation(iq)

    return sj, iq

sj_train, iq_train = preprocess_data('./data/dengue_features_train.csv',
                                    labels_path="./data/dengue_labels_train.csv")


sj_train.head(10)

#%% create experiment
experiment_name = 'myDengueExp'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name=experiment_name)
print('Experiment created.')


#%% attach existing compute target or attach a new one
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")
Ejemplo n.º 22
0
import os
from azureml.core import Experiment, Workspace, Model
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.hyperdrive import GridParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive import choice
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn

ws = Workspace.from_config()
print("Ready to use Azure ML", azureml.core.VERSION)
print('Ready to work with', ws.name)

# Create an experiment
experiment_name = 'diabetes_training'
experiment = Experiment(workspace=ws, name=experiment_name)

# Create a folder for the experiment files
experiment_folder = './' + experiment_name
os.makedirs(experiment_folder, exist_ok=True)

print("Experiment:", experiment.name)

#Fetch GPU cluster for computations
gpu_cluster = ComputeTarget(workspace=ws, name='demo-GPU-cluster')

# Sample a range of parameter values
params = GridParameterSampling({
    # There's only one parameter, so grid sampling will try each value - with multiple parameters it would try every combination
    '--regularization':
    choice(0.001, 0.005, 0.01, 0.05, 0.1, 1.0)
Ejemplo n.º 23
0
shutil.copy(f'./project/{args.project_name}.config.json',
            f'./src/{fn_config_infer}')

script_folder = "./"
tasks = params.get("tasks")

############################################
#####  PREPARE
############################################

if args.do_prepare:
    logging.warning(f'[INFO] Running  prepare for {args.project_name}')
    for task in tasks:
        config = tasks.get(task)
        if config.get('prepare'):
            exp = Experiment(workspace=ws,
                             name=f'{args.project_name}_prepare_{task}')
            print(f'[INFO] Running prepare for {task}')
            script_params = {
                '--task': int(task),
                '--do_format': '',
                '--register_data': ''
            }
            est = Estimator(source_directory=script_folder,
                            compute_target=compute_target,
                            script_params=script_params,
                            entry_script='src/prepare.py',
                            pip_packages=pip_packages,
                            use_gpu=False)
            run = exp.submit(est)
    if args.do_train:
        run.wait_for_completion(show_output=True)
Ejemplo n.º 24
0
                    type=str,
                    required=True,
                    help="The id of the run that contains the trained model.")
args = parser.parse_args()

# Get the current run.
run = Run.get_context()

# Offline run. Download the sample dataset and run locally. Still push results to Azure.
if (run.id.startswith("OfflineRun")):
    print("Running in offline mode...")

    # Access workspace.
    print("Accessing workspace...")
    workspace = Workspace.from_config()
    experiment = Experiment(workspace, "gapnet-offline")
    run = experiment.start_logging(outputs=None, snapshot_directory=".")

    # Get dataset.
    print("Accessing dataset...")
    if os.path.exists("premiumfileshare") == False:
        dataset_name = "cgmmldevpremium-SampleDataset-Example"
        dataset = workspace.datasets[dataset_name]
        dataset.download(target_path='.', overwrite=False)
    dataset_path = glob.glob(os.path.join("premiumfileshare", "*"))[0]

# Online run. Use dataset provided by training notebook.
else:
    print("Running in online mode...")
    experiment = run.experiment
    workspace = experiment.workspace
Ejemplo n.º 25
0
    def start(self):
        ws = AzureProject(self.ctx)._get_ws()

        dataset_name = self.ctx.config.get('dataset', None)
        if dataset_name is None:
            raise AzureException('Please specify Dataset name...')
        experiment_name = self._fix_name(
            self.ctx.config.get('experiment/name', dataset_name))
        cluster_name = self._fix_name(
            self.ctx.config.get('cluster/name', 'cpucluster'))

        self.ctx.log("Starting search on %s Dataset..." % dataset_name)
        dataset = Dataset.get_by_name(ws, dataset_name)
        #TODO dataset = dataset.drop_columns(columns)

        compute_target = self._get_compute_target(ws, cluster_name)

        model_type = self.ctx.config.get('model_type')
        if not model_type:
            raise AzureException('Please specify model type...')
        primary_metric = self.ctx.config.get(
            'experiment/metric','spearman_correlation')
        if not primary_metric:
            raise AzureException('Please specify primary metric...')
        #TODO: check if primary_metric is constent with model_type
        target = self.ctx.config.get('target')
        if not target:
            raise AzureException('Please specify target column...')

        automl_settings = {
            "iteration_timeout_minutes" : self.ctx.config.get(
                'experiment/max_eval_time',10),
            "iterations" : self.ctx.config.get(
                'experiment/max_n_trials',10),
            "primary_metric" : primary_metric,
            "verbosity" : logging.INFO,
            "enable_stack_ensemble": self.ctx.config.get(
                'experiment/use_ensemble', False)
        }

        validation_data = None
        if self.ctx.config.get('experiment/validation_data'):
            if self.ctx.config.get('validation_dataset'):
                validation_data = Dataset.get_by_name(ws, self.ctx.config.get('validation_dataset'))
            if not validation_data:
                res = AzureDataset(self.ctx).create(
                    source = self.ctx.config.get('experiment/validation_data'),
                    validation = True
                )
                validation_data = Dataset.get_by_name(ws, res['dataset'])
        else:    
            automl_settings["n_cross_validations"] = self.ctx.config.get(
                'experiment/cross_validation_folds', 5)
            if self.ctx.config.get('experiment/validation_size'):
                automl_settings["validation_size"] = self.ctx.config.get('experiment/validation_size')

        if self.ctx.config.get('experiment/max_total_time'):
            automl_settings["experiment_timeout_hours"] = float(self.ctx.config.get('experiment/max_total_time'))/60.0

        if self.ctx.config.get('exclude'):
            fc = FeaturizationConfig()
            fc.drop_columns = self.ctx.config.get('exclude').split(",")
            automl_settings["featurization"] = fc

        automl_config = AutoMLConfig(
            task = model_type,
            debug_log = 'automl_errors.log',
            path = os.getcwd(),
            compute_target = compute_target,
            training_data = dataset,
            validation_data = validation_data,
            label_column_name = target,
            **automl_settings)

        experiment = Experiment(ws, experiment_name)
        run = experiment.submit(automl_config, show_output = False)
        self.ctx.log("Started Experiment %s search..." % experiment_name)
        self.ctx.config.set('azure', 'experiment/name', experiment_name)
        self.ctx.config.set('azure', 'experiment/run_id', run.run_id)
        self.ctx.config.write('azure')

        return {'eperiment_name': experiment_name, 'run_id': run.run_id}
from utils import load_data
import matplotlib.pyplot as plt
import azureml.core
print(azureml.core.VERSION)
from azureml.core import Workspace
ws = Workspace.create(name='mnist1',
                      subscription_id='bcbc4e01-e5d6-42b0-95af-06286341e6ca',
                      resource_group='mnist3',
                      create_resource_group=True,
                      location='eastus2')
ws.get_details()

from azureml.core import Experiment

# Create a new experiment in your workspace.
exp = Experiment(workspace=ws, name='myexp1')
# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the model converge faster.
X_train = load_data(os.path.join(data_folder, 'train-images.gz'),
                    False) / 255.0
X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0
y_train = load_data(os.path.join(data_folder, 'train-labels.gz'),
                    True).reshape(-1)
y_test = load_data(os.path.join(data_folder, 'test-labels.gz'),
                   True).reshape(-1)

# now let's show some randomly chosen images from the traininng set.
# for simply mapping x_train to y_train
count = 0
sample_size = 30
plt.figure(figsize=(16, 6))
for i in np.random.permutation(X_train.shape[0])[:sample_size]:
Ejemplo n.º 27
0
# Get the latest evaluation result
try:
    with open("aml_config/run_id.json") as f:
        config = json.load(f)
    if not config["run_id"]:
        raise Exception(
            "No new model to register as production model perform better")
except:
    print("No new model to register as production model perform better")
    # raise Exception('No new model to register as production model perform better')
    sys.exit(0)

run_id = config["run_id"]
experiment_name = config["experiment_name"]
exp = Experiment(workspace=ws, name=experiment_name)

run = Run(experiment=exp, run_id=run_id)
names = run.get_file_names
names()
print("Run ID for last run: {}".format(run_id))
model_local_dir = "model"
os.makedirs(model_local_dir, exist_ok=True)

# Download Model to Project root directory
model_name = "sklearn_regression_model.pkl"
run.download_file(name="./outputs/" + model_name,
                  output_file_path="./model/" + model_name)
print("Downloaded model {} to Project root directory".format(model_name))
os.chdir("./model")
model = Model.register(
import tensorflow as tf

logs_dir = os.path.join(os.curdir, "logs")
data_dir = os.path.abspath(os.path.join(os.curdir, "mnist_data"))

if not path.exists(data_dir):
    makedirs(data_dir)

os.environ["TEST_TMPDIR"] = data_dir

# Writing logs to ./logs results in their being uploaded to Artifact Service,
# and thus, made accessible to our TensorBoard instance.
arguments_list = ["--log_dir", logs_dir, "--data_dir", data_dir]

# Create an experiment
exp = Experiment(ws, experiment_name)

# If you would like the run to go for longer, add --max_steps 5000 to the arguments list:
# arguments_list += ["--max_steps", "5000"]

script = ScriptRunConfig('./scripts',
                         script="mnist_with_summaries.py",
                         run_config=run_config,
                         arguments=arguments_list)

run = exp.submit(script)
runs.append(run)

run.wait_for_completion(show_output=True)

# Once more, with a Batch AI cluster
    load_dotenv()
    workspace_name = os.environ.get("WORKSPACE_NAME")
    experiment_name = os.environ.get("EXPERIMENT_NAME")
    resource_group = os.environ.get("RESOURCE_GROUP")
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    model_name = os.environ.get("MODEL_NAME")
    app_id = os.environ.get('SP_APP_ID')
    app_secret = os.environ.get('SP_APP_SECRET')
    build_id = os.environ.get('BUILD_BUILDID')

    aml_workspace = Workspace.get(name=workspace_name,
                                  subscription_id=subscription_id,
                                  resource_group=resource_group)
    ws = aml_workspace
    exp = Experiment(ws, experiment_name)
else:
    sys.path.append(os.path.abspath("./util"))  # NOQA: E402
    from model_helper import get_model_by_tag
    exp = run.experiment
    ws = run.experiment.workspace
    run_id = 'amlcompute'

parser = argparse.ArgumentParser("evaluate")
parser.add_argument(
    "--build_id",
    type=str,
    help="The Build ID of the build triggering this pipeline run",
)
parser.add_argument(
    "--run_id",
Ejemplo n.º 30
0
import azureml
from azureml.core import Workspace, Experiment
from azureml.train.estimator import Estimator
azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 10 * 10**9  # 10GB

# Loading the workspace
ws = Workspace.from_config()

# Creating an Estimator which is the environment for your experiment
estimator = Estimator(
    source_directory="./model",
    entry_script="train_char_rnn.py",
    script_params={
        "--dataset":
        "shakespeare",  # TODO: Specify the same dataset_name you provided earlier 
        "--modelname": "shakespeare_model",  # TODO: Specify your modelname
        "--n_epochs": 2000
    },
    compute_target="az-workshop-ci",  # TODO: Specify your compute target
    pip_packages=[
        "azureml-core", "azureml-dataprep", "azureml-train", "pandas", "torch",
        "torchvision", "tqdm", "Unidecode"
    ])

# TODO: Create a "Experiment" and use the submit method to submit the "estimator" object
# Recieve the return object of the submittet experiment and use the "wait_for_completion(show_output=True)" method.
# This will show you the logs for the submitted experiment
experiment = Experiment(workspace=ws, name="demo-skatteetaten-dataset")
run = experiment.submit(config=estimator)
run.wait_for_completion(show_output=True)