Example #1
0
                                                   min_nodes=0,
                                                   max_nodes=1)

    compute_target = ComputeTarget.create(workspace=ws,
                                          name=compute_name,
                                          provisioning_configuration=config)
    compute_target.wait_for_completion(show_output=True,
                                       min_node_count=None,
                                       timeout_in_minutes=20)

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_GPU_IMAGE

cd = CondaDependencies.create(pip_packages=[
    "tensorflow-gpu==1.15.2", "azureml-core", "azureml-dataprep[fuse]"
])
env = Environment(name="parallelenv")
env.python.conda_dependencies = cd
env.docker.base_image = DEFAULT_GPU_IMAGE

from azureml.pipeline.steps import ParallelRunConfig

parallel_run_config = ParallelRunConfig(environment=env,
                                        entry_script="batch_scoring.py",
                                        source_directory="scripts",
                                        output_action="append_row",
                                        mini_batch_size="20",
                                        error_threshold=1,
                                        compute_target=compute_target,
                                        process_count_per_node=2,
Example #2
0
# In[ ]:

get_ipython().run_cell_magic('writefile', 'dockerfile',
                             'RUN apt-get update && apt-get install -y g++')

# In[ ]:

# create a Conda dependencies environment file
print("Creating conda dependencies file locally...")
from azureml.core.conda_dependencies import CondaDependencies
conda_packages = ['numpy', 'pandas']
pip_packages = [
    'scikit-learn==0.20.3', 'sklearn_pandas', 'azureml-sdk',
    'azureml-explain-model', 'azureml-contrib-explain-model'
]
mycondaenv = CondaDependencies.create(conda_packages=conda_packages,
                                      pip_packages=pip_packages)

conda_file = 'sklearn_dependencies.yml'
with open(conda_file, 'w') as f:
    f.write(mycondaenv.serialize_to_string())

runtime = 'python'

# create container image configuration
print("Creating container image configuration...")
from azureml.core.image import ContainerImage
image_config = ContainerImage.image_configuration(execution_script='score.py',
                                                  docker_file='dockerfile',
                                                  runtime=runtime,
                                                  conda_file=conda_file)
Example #3
0
print("5. Instantiate and configure run object for the managed compute...")
print('.............................................')
# Create runconfig object
amlComputeRunConf = RunConfiguration()
# Use the compute provisioned
amlComputeRunConf.target = args.aml_compute_target
# Enable Docker
amlComputeRunConf.environment.docker.enabled = True
# Set Docker base image to the default CPU-based image
amlComputeRunConf.environment.docker.base_image = DEFAULT_CPU_IMAGE
# Use conda_dependencies.yml to create a conda environment in the Docker image for execution
amlComputeRunConf.environment.python.user_managed_dependencies = False
# Auto-prepare the Docker image when used for execution (if it is not already prepared)
amlComputeRunConf.auto_prepare_environment = True
# Specify CondaDependencies obj, add necessary packages
amlComputeRunConf.environment.python.conda_dependencies = CondaDependencies.create(
    pip_packages=['numpy', 'pandas', 'scikit-learn', 'azureml-sdk'])
print("..5. completed")
print('')
print('')

print("6. Define pipeline stage - training...")
print('.............................................')
training_output = PipelineData('train_output', datastore=amlWsStorageRef)
trainPipelineStep = PythonScriptStep(name="train",
                                     script_name="train.py",
                                     arguments=[
                                         "--model_name", args.model_name,
                                         "--build_number", args.build_number
                                     ],
                                     outputs=[training_output],
                                     compute_target=amlTrainingComputeRef,
validdata_dataset = Dataset.get_by_name(ws, name='valid_data_ds')
testdata_dataset = Dataset.get_by_name(ws, name='test_data_ds')
traintarget_dataset = Dataset.get_by_name(ws, name='train_target_ds')
validtarget_dataset = Dataset.get_by_name(ws, name='valid_target_ds')
testtarget_dataset = Dataset.get_by_name(ws, name='test_target_ds')

##########################################################################################

############################# Run Configuration Setup #################################

from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

run_config = RunConfiguration()
run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages = ['keras<=2.3.1','pandas','matplotlib',
                                                                                            'opencv-python','azure-storage-blob==2.1.0','tensorflow-gpu==2.0.0',
                                                                                            'azureml','azureml-core','azureml-dataprep',
                                                                                           'azureml-dataprep[fuse]','azureml-pipeline'])

##########################################################################################

############################# Pythonscript for preprocessing ###################################

from azureml.core import Workspace,Datastore
from azureml.pipeline.core import Pipeline, PipelineParameter, PipelineData
from azureml.pipeline.steps import PythonScriptStep

import os
script_folder = os.path.join(os.getcwd(), "PreProcessing")

print("Pipeline SDK-specific imports completed")
def train_step(datastore, train_dir, valid_dir, vocab_dir, compute_target):
    '''
    This step will take the raw data downloaded from the previous step,
    preprocess it, and split into train, valid, and test directories.
    
    :param datastore: The datastore that will be used
    :type datastore: Datastore
    :param train_dir: The reference to the directory containing the training data
    :type train_src: DataReference
    :param valid_dir: The reference to the directory containing the validation data
    :type valid_dir: DataReference
    :param vocab_dir: The reference to the directory containing the vocab data
    :type vocab_dir: DataReference
    :param compute_target: The compute target to run the step on
    :type compute_target: ComputeTarget
    
    :return: The training step, step outputs dictionary (keys: model_dir)
    :rtype: PythonScriptStep, dict
    '''

    run_config = RunConfiguration()
    run_config.environment.docker.enabled = True
    run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE
    run_config.environment.python.user_managed_dependencies = False
    conda_packages = ['pytorch', 'tqdm', 'nltk']
    run_config.environment.python.conda_dependencies = CondaDependencies.create(
        conda_packages=conda_packages)

    # set hyperparameters of the model training step
    input_col = PipelineParameter(name='input_col', default_value='Title')
    output_col = PipelineParameter(name='output_col', default_value='Abstract')
    cuda = PipelineParameter(name='cuda', default_value=1)
    seed = PipelineParameter(name='seed', default_value=0)
    batch_size = PipelineParameter(name='batch_size', default_value=32)
    embed_size = PipelineParameter(name='embed_size', default_value=256)
    hidden_size = PipelineParameter(name='hidden_size', default_value=256)
    clip_grad = PipelineParameter(name='clip_grad', default_value=5.0)
    label_smoothing = PipelineParameter(name='label_smoothing',
                                        default_value=0.0)
    log_every = PipelineParameter(name='log_every', default_value=1)
    max_epoch = PipelineParameter(name='max_epoch', default_value=2)
    input_feed = PipelineParameter(name='input_feed', default_value=1)
    patience = PipelineParameter(name='patience', default_value=5)
    max_num_trial = PipelineParameter(name='max_num_trial', default_value=5)
    lr_decay = PipelineParameter(name='lr_decay', default_value=0.5)
    beam_size = PipelineParameter(name='beam_size', default_value=5)
    sample_size = PipelineParameter(name='sample_size', default_value=5)
    lr = PipelineParameter(name='lr', default_value=0.001)
    uniform_init = PipelineParameter(name='uniform_init', default_value=0.1)
    valid_niter = PipelineParameter(name='valid_niter', default_value=2000)
    dropout = PipelineParameter(name='dropout', default_value=0.3)
    max_decoding_time_step = PipelineParameter(name='max_decoding_time_step',
                                               default_value=70)

    model_dir = PipelineData(name='model_dir',
                             pipeline_output_name='model_dir',
                             datastore=datastore,
                             output_mode='mount',
                             is_directory=True)

    outputs = [model_dir]
    outputs_map = {
        'model_dir': model_dir,
    }

    step = PythonScriptStep(name="Train",
                            script_name='train.py',
                            arguments=[
                                '--train_dir',
                                train_dir,
                                '--valid_dir',
                                train_dir,
                                '--input_col',
                                input_col,
                                '--output_col',
                                output_col,
                                '--vocab_dir',
                                vocab_dir,
                                '--model_dir',
                                model_dir,
                                '--input_col',
                                input_col,
                                '--output_col',
                                output_col,
                                '--cuda',
                                cuda,
                                '--seed',
                                seed,
                                '--batch_size',
                                batch_size,
                                '--embed_size',
                                embed_size,
                                '--hidden_size',
                                hidden_size,
                                '--clip_grad',
                                clip_grad,
                                '--label_smoothing',
                                label_smoothing,
                                '--log_every',
                                log_every,
                                '--max_epoch',
                                max_epoch,
                                '--input_feed',
                                input_feed,
                                '--patience',
                                patience,
                                '--max_num_trial',
                                max_num_trial,
                                '--lr_decay',
                                lr_decay,
                                '--beam_size',
                                beam_size,
                                '--sample_size',
                                sample_size,
                                '--lr',
                                lr,
                                '--uniform_init',
                                uniform_init,
                                '--valid_niter',
                                valid_niter,
                                '--dropout',
                                dropout,
                                '--max_decoding_time_step',
                                max_decoding_time_step,
                            ],
                            inputs=[train_dir, valid_dir, vocab_dir],
                            outputs=outputs,
                            compute_target=compute_target,
                            runconfig=run_config,
                            source_directory=os.path.dirname(
                                os.path.abspath(__file__)),
                            allow_reuse=True)

    return step, outputs_map
# Code for What's a run configuration

# <run_system_managed>
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

run_system_managed = RunConfiguration()

# Specify the conda dependencies with scikit-learn
run_system_managed.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['scikit-learn'])
# </run_system_managed>
print(run_system_managed)

# <run_user_managed>
from azureml.core.runconfig import RunConfiguration

run_user_managed = RunConfiguration()
run_user_managed.environment.python.user_managed_dependencies = True

# Choose a specific Python environment by pointing to a Python path. For example:
# run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'
# </run_user_managed>
print(run_user_managed)
Example #7
0
run_config = RunConfiguration()

# signal that you want to use AmlCompute to execute script.
run_config.target = "amlcompute"

# AmlCompute will be created in the same region as workspace
# Set vm size for AmlCompute
run_config.amlcompute.vm_size = 'STANDARD_D2_V2'

# enable Docker 
run_config.environment.docker.enabled = True

# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False

# auto-prepare the Docker image when used for execution (if it is not already prepared)
run_config.auto_prepare_environment = True

# specify CondaDependencies obj
run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])

# Now submit a run on AmlCompute
from azureml.core.script_run_config import ScriptRunConfig

script_run_config = ScriptRunConfig(source_directory=project_folder, script='train.py', run_config=run_config)

run = experiment.submit(script_run_config)
run.wait_for_completion()
def RunAutoMLForecast():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    file_name = request.json['file_name']
    location = request.json['location']
    target_var = request.json['target_var']
    cluster_name = request.json['cluster_name']
    best_model = request.json['best_model']
    time_column_name = request.json['time_column_name']
    max_horizon = request.json['max_horizon']

    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')
    compute_target = AmlCompute(ws, cluster_name)
    print('Found existing AML compute context.')
    dataset_name = file_name
    time_column_name = time_column_name
    # Get a dataset by name
    dataset = Dataset.get_by_name(workspace=ws,
                                  name=dataset_name).with_timestamp_columns(
                                      fine_grain_timestamp=time_column_name)
    print(dataset)
    #df_ts = Dataset.Tabular.from_delimited_files(df_ts)
    dataset.to_pandas_dataframe().describe()
    dataset.take(3).to_pandas_dataframe()
    print(dataset)
    #y_df = df_ts[target_var].values
    #x_df = df_ts.drop([target_var], axis=1)
    print('file successfully recieved.')
    #stock_dataset_df.head()
    # create a new RunConfig object
    conda_run_config = RunConfiguration(framework="python")
    conda_run_config.environment.docker.enabled = True
    conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
    cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'],
                                  conda_packages=['numpy', 'py-xgboost<=0.80'])
    conda_run_config.environment.python.conda_dependencies = cd
    print('run config is ready')
    ExperimentName = request.json['ExperimentName']
    tasks = request.json['tasks']
    iterations = request.json['iterations']
    n_cross_validations = request.json['n_cross_validations']
    iteration_timeout_minutes = request.json['iteration_timeout_minutes']
    primary_metric = request.json['primary_metric']
    #max_concurrent_iterations = request.json['max_concurrent_iterations']

    automl_settings = {
        'time_column_name': time_column_name,
        'max_horizon': max_horizon,
        "iterations": iterations,
    }

    automl_config = AutoMLConfig(
        task=tasks,
        primary_metric=primary_metric,
        #blacklist_models = ['ExtremeRandomTrees', 'AutoArima', 'Prophet'],
        experiment_timeout_minutes=iteration_timeout_minutes,
        training_data=dataset,
        label_column_name=target_var,
        compute_target=compute_target,
        enable_early_stopping=True,
        n_cross_validations=n_cross_validations,
        #verbosity=logging.INFO,
        **automl_settings)
    print("AutoML config created.")
    experiment = Experiment(ws, ExperimentName)
    remote_run = experiment.submit(automl_config, show_output=True)
    children = list(remote_run.get_children())
    metricslist = {}
    for run in children:
        properties = run.get_properties()
        metrics = {
            k: v
            for k, v in run.get_metrics().items() if isinstance(v, float)
        }
        metricslist[int(properties['iteration'])] = metrics

    rundata = pd.DataFrame(metricslist).sort_index(axis=1, by=primary_metric)
    rundata.rename(columns={
        0: "one",
        1: "two",
        2: "three",
        3: "four",
        4: "five",
        5: "six",
        6: "seven",
        7: "eight",
        8: "nine",
        9: "ten",
    },
                   inplace=True)
    iterations_toJson = rundata.to_json(orient='columns')
    print(iterations_toJson)
    best_run, fitted_model = remote_run.get_output()
    #best_run_toJson = best_run.get_metrics()
    #dict = {}
    #dict['iterations_toJson'] = iterations_toJson
    #dict['best_run_toJson'] = best_run_toJson
    #print(best_run.get_file_names())
    #Register the model
    #from datetime import date
    model = remote_run.register_model(model_name=best_model,
                                      description='AutoML Model')
    print(model.name, model.id, model.version, sep='\t')
    best_model = model.name
    best_model
    var1 = "@"
    var2 = var1 + best_model
    return '{} {}'.format(iterations_toJson, var2)
from azureml.core.conda_dependencies import CondaDependencies

myenv = CondaDependencies()

myenv.add_pip_package("numpy")
myenv.add_pip_package("sklearn")
# myenv.add_conda_package("nltk")


with open("aml_config/myenv.yml", "w") as f:
    f.write(myenv.serialize_to_string())
print('Found existing AML compute context.')
dataset_name = file_name

# Get a dataset by name
df = Dataset.get_by_name(workspace=ws, name=dataset_name)

X = df.drop_columns(columns=[target_var])
y = df.keep_columns(columns=[target_var], validate=True)
print(y)
#y = diabetes.pop('Y')
#X_train, X_test, y_train, y_test = train_test_split(diabetes, y, test_size=0.2, random_state=0)
#data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}}
conda_run_config = RunConfiguration(framework="python")
conda_run_config.environment.docker.enabled = True
conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'],
                              conda_packages=['numpy', 'py-xgboost<=0.80'])
conda_run_config.environment.python.conda_dependencies = cd
print('run config is ready')

ExperimentName = request.json['ExperimentName']
tasks = request.json['tasks']
iterations = request.json['iterations']
n_cross_validations = request.json['n_cross_validations']
iteration_timeout_minutes = request.json['iteration_timeout_minutes']
primary_metric = request.json['primary_metric']
max_concurrent_iterations = request.json['max_concurrent_iterations']

automl_settings = {
    "name": ExperimentName,
    "iteration_timeout_minutes": iteration_timeout_minutes,
    "iterations": iterations,
Example #11
0
def RunAutoMLReg():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    file_name = request.json['file_name']
    location = request.json['location']
    target_var = request.json['target_var']
    cluster_name = request.json['cluster_name']
    best_model = request.json['best_model']
    #best_model = request.json['best_model']

    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')
    #compute_target = AmlCompute(ws, cluster_name)
    compute_target = ws.compute_targets[cluster_name]
    print('Found existing AML compute context.')
    dataset_name = file_name

    # Get a dataset by name
    df = Dataset.get_by_name(workspace=ws, name=dataset_name)
    #stock_dataset_df = df.to_pandas_dataframe()
    print('file successfully recieved.')
    #stock_dataset_df.head()
    #stock_dataset_json = stock_dataset_df.to_json(orient='split')
    #print(stock_dataset_json)
    X = df.drop_columns(columns=[target_var])
    y = df.keep_columns(columns=[target_var], validate=True)
    #y_df = stock_dataset_df[target_var].values
    #x_df = stock_dataset_df.drop([target_var], axis=1)
    print(y)
    # create a new RunConfig object
    conda_run_config = RunConfiguration(framework="python")
    conda_run_config.environment.docker.enabled = True
    conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
    cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'],
                                  conda_packages=['numpy', 'py-xgboost<=0.90'])
    conda_run_config.environment.python.conda_dependencies = cd
    print('run config is ready')
    ExperimentName = request.json['ExperimentName']
    tasks = request.json['tasks']
    iterations = request.json['iterations']
    n_cross_validations = request.json['n_cross_validations']
    iteration_timeout_minutes = request.json['iteration_timeout_minutes']
    primary_metric = request.json['primary_metric']
    max_concurrent_iterations = request.json['max_concurrent_iterations']

    try:
        automl_settings = {
            "name": ExperimentName,
            "iteration_timeout_minutes": iteration_timeout_minutes,
            "featurization": 'auto',
            "iterations": iterations,
            "n_cross_validations": n_cross_validations,
            "primary_metric": primary_metric,
            "preprocess": True,
            "max_concurrent_iterations": max_concurrent_iterations
            #"verbosity": logging.INFO
        }

        automl_config = AutoMLConfig(
            task=tasks,
            debug_log='automl_errors.log',
            blacklist_models=['XGBoost'],
            #path=os.getcwd(),
            compute_target=compute_target,
            #run_configuration=conda_run_config,
            X=X,
            y=y,
            **automl_settings,
        )

        experiment = Experiment(ws, ExperimentName)
        remote_run = experiment.submit(automl_config, show_output=True)
        remote_run.flush(timeout_seconds=400)
        children = list(remote_run.get_children())
        metricslist = {}
        for run in children:
            properties = run.get_properties()
            metrics = {
                k: v
                for k, v in run.get_metrics().items() if isinstance(v, float)
            }
            metricslist[int(properties['iteration'])] = metrics

        rundata = pd.DataFrame(metricslist).sort_index(axis=1,
                                                       by=primary_metric)
        rundata = rundata.drop([
            'mean_absolute_percentage_error',
            'normalized_median_absolute_error',
            'normalized_root_mean_squared_log_error',
            'root_mean_squared_log_error'
        ])
        rundata.rename(columns={
            0: "one",
            1: "two",
            2: "three",
            3: "four",
            4: "five",
            5: "six",
            6: "seven",
            7: "eight",
            8: "nine",
            9: "ten",
        },
                       inplace=True)
        iterations_toJson = rundata.to_json(orient='columns')
        print(iterations_toJson)
        best_run, fitted_model = remote_run.get_output()
        best_run_toJson = best_run.get_metrics()
        cwd = 'D:/DCSAIAUTOML/BestModels/Azure'
        best_model_name = best_run.name
        model = remote_run.register_model(description=best_model)
        print(model.name, model.id, model.version, sep='\t')
        model_path = os.path.join(cwd, best_model, best_model_name)
        print(model_path)
        #print("Model DownLoad Complete")
        #model = Model(workspace=ws, name=model.name)
        #model.download_files(target_dir=model_path)
        #dict = {}
        #dict['iterations_toJson'] = iterations_toJson
        #dict['best_run_toJson'] = best_run_toJson
        #print(best_run.get_file_names())
        #Register the model
        #from datetime import date

        best_model_id = best_run.name

        var1 = "@"
        var2 = var1 + best_model_id

        Reg_model_name = model.name
        var4 = var1 + Reg_model_name

        best_run.flush(timeout_seconds=3600)
        best_run.download_files(output_directory=model_path)
        # importing required modules
        #import shutil
        #output_path = os.path.join(model_path, best_model_id)
        #dir_name1 = "D:\\DCSAIAUTOML\\BestModels\\Azure\\my_azure_best"
        #dir_name1 = "D:\\DCSAIAUTOML\\BestModels\\Azure\\my_azure_best\\my_azure_best"
        #shutil.make_archive(model_path,'zip',model_path)

        #zipf = zipfile.ZipFile(best_model_id+'.zip', 'w', zipfile.ZIP_DEFLATED)
        #for root, dirs, files in os.walk(model_path):
        #for file in files:
        #zipf.write(os.path.join(root, file))

        #def zipdir(path, ziph):
        # ziph is zipfile handle
        #import os
        #for root, dirs, files in os.walk(path):
        #for file in files:
        #ziph.write(os.path.join(root, file))

        #zipdir(model_path, zipf)
        #remote_run.clean_preprocessor_cache()
        print("ready to return")
        var5 = "no exception"
        return '{} {} {} {} {}'.format(iterations_toJson, var2, var4, var1,
                                       var5)
        #return iterations_toJson
    except Exception as e:
        error_statement = str(e)
        print("Error statement: ", error_statement)
        model_path1 = os.path.join(model_path, 'outputs')
        file_name = 'model.pkl'
        print("in exception: ", model_path1)
        src = 'D:\\Final Script_dev'
        full_file_name = os.path.join(src, file_name)
        import shutil
        #remote_run.download_file('model.pkl', output_file_path=model_path1)
        if os.path.isfile(full_file_name):
            shutil.copy(full_file_name, model_path1)
        return '{} {} {} {} {}'.format(iterations_toJson, var2, var4, var1,
                                       error_statement)
Example #12
0
def generate_yaml(
    directory: str,
    ref_filename: str,
    needed_libraries: list,
    conda_filename: str,
):
    """
    Creates a deployment-specific yaml file as a subset of
    the image classification environment.yml

    Also adds extra libraries, if not present in environment.yml

    Args:
        directory (string): Directory name of reference yaml file
        ref_filename (string): Name of reference yaml file
        needed_libraries (list of strings): List of libraries needed
        in the Docker container
        conda_filename (string): Name of yaml file to be deployed
        in the Docker container

    Returns: Nothing

    """

    with open(os.path.join(directory, ref_filename), "r") as f:
        yaml_content = yaml.load(f, Loader=yaml.FullLoader)

    # Extract libraries to be installed using conda
    extracted_libraries = [
        depend for depend in yaml_content["dependencies"]
        if any(lib in depend for lib in needed_libraries)
    ]

    # Extract libraries to be installed using pip
    if any(isinstance(x, dict) for x in yaml_content["dependencies"]):
        # if the reference yaml file contains a "pip" section,
        # find where it is in the list of dependencies
        ind = [
            yaml_content["dependencies"].index(depend)
            for depend in yaml_content["dependencies"]
            if isinstance(depend, dict)
        ][0]
        extracted_libraries += [
            depend for depend in yaml_content["dependencies"][ind]["pip"]
            if any(lib in depend for lib in needed_libraries)
        ]

    # Check whether additional libraries are needed
    not_found = [
        lib for lib in needed_libraries
        if not any(lib in ext for ext in extracted_libraries)
    ]

    # Create the deployment-specific yaml file
    conda_env = CondaDependencies()
    for ch in yaml_content["channels"]:
        conda_env.add_channel(ch)
    for library in extracted_libraries + not_found:
        conda_env.add_conda_package(library)

    # Display the environment
    print(conda_env.serialize_to_string())

    # Save the file to disk
    conda_env.save_to_file(base_directory=os.getcwd(),
                           conda_file_path=conda_filename)
Example #13
0
    
    df = pd.DataFrame([data[1:]],columns=featurenames)
    
    # make prediction
    if data[0] == 'iq':
        result = iq_model.predict(df).astype(int)
    elif data[0] == 'sj':
        result = sj_model.predict(df).astype(int)

    # you can return any data type as long as it is JSON-serializable
    return result.tolist()

#%% create environment file for deployment
from azureml.core.conda_dependencies import CondaDependencies 

mymodelenv = CondaDependencies()
mymodelenv.add_conda_package("scikit-learn")
mymodelenv.add_conda_package("pandas")
mymodelenv.add_conda_package("statsmodels")
mymodelenv.add_conda_package("scipy=1.2")
mymodelenv.add_conda_package("numpy")

with open("mymodelenv.yml","w") as f:
    f.write(mymodelenv.serialize_to_string())
    
with open("mymodelenv.yml","r") as f:
    print(f.read())
    
print('Complete') 

Example #14
0
def run(workspace, config, args):
    compute_target_name = config['train']['compute_target_name']
    data_folder = config['train']['data_folder']

    try:
        compute_target = ComputeTarget(workspace=workspace,
                                       name=compute_target_name)
        print('found existing:', compute_target.name)
    except ComputeTargetException:
        print('creating new.')
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=config['train']['vm_size'], min_nodes=0, max_nodes=1)
        compute_target = ComputeTarget.create(workspace, compute_target_name,
                                              compute_config)
        compute_target.wait_for_completion(show_output=True)

    # ds = Datastore.register_azure_blob_container(
    #     workspace,
    #     datastore_name=config['train']['datastore_name'],
    #     account_name=config['train']['account_name'],
    #     account_key=config['train']['account_key'],
    #     container_name=config['train']['container_name'],
    #     overwrite=True)
    #
    # # # Upload local "data" folder (incl. files) as "tfdata" folder
    # ds.upload(
    #     src_dir=config['train']['local_directory'],
    #     target_path=data_folder,
    #     overwrite=True)

    ds = Datastore.get(workspace,
                       datastore_name=config['train']['datastore_name'])

    # generate data reference configuration
    dr_conf = DataReferenceConfiguration(
        datastore_name=ds.name, path_on_datastore=data_folder, mode='mount'
    )  # set 'download' if you copy all files instead of mounting

    run_config = RunConfiguration(framework="python",
                                  conda_dependencies=CondaDependencies.create(
                                      conda_packages=ast.literal_eval(
                                          config['train']['conda_packages'])))
    run_config.target = compute_target.name
    run_config.data_references = {ds.name: dr_conf}
    run_config.environment.docker.enabled = True
    # run_config.environment.docker.gpu_support = True
    run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE

    src = ScriptRunConfig(
        source_directory='./script',
        script='train.py',
        run_config=run_config,
        arguments=[
            '--datadir',
            str(ds.as_mount()), '--step', args.step, '--train_on',
            args.train_on, '--fold', args.fold, '--epochs', args.epochs,
            '--experiment', args.experiment, '--reference', args.reference,
            '--batchsize', args.batchsize, '--optimizertype',
            args.optimizertype, '--convrnn_filters', args.convrnn_filters,
            '--learning_rate', args.learning_rate, '--pix250m', args.pix250m
        ])
    # exp = Experiment(workspace=ws, name='test20181210-09')
    exp = Experiment(workspace=workspace,
                     name=config['train']['experiment_name'])
    run = exp.submit(config=src)
    run.wait_for_completion(show_output=True)
        data_reference_name="input_data_ref",
        path_on_datastore=f"{project_config['project_name']}/data/")

    processed_data_ref = PipelineData("processed_data_ref",
                                      datastore=def_blob_store)

    run_config = RunConfiguration()
    run_config.environment.docker.enabled = True
    run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    run_config.environment.python.user_managed_dependencies = False
    pip_packages = [
        "azureml-sdk==1.0.17", "scikit-learn==0.21.3", "download==0.3.4",
        "pandas==0.25.1", "spacy==2.1.4", "numpy==1.17.2"
    ]

    run_config.environment.python.conda_dependencies = CondaDependencies.create(
        pip_packages=pip_packages)

    pipeline_params = []
    for k, v in vars(auth_params).items():
        pipeline_params.append("--" + k)
        pipeline_params.append(PipelineParameter(name=k, default_value=v))

    auth_params = pipeline_params.copy()
    pipeline_params += ["--processed_data_ref", processed_data_ref]
    pipeline_params += ["--input_data_ref", input_data_ref]
    process_step = PythonScriptStep(script_name="process.py",
                                    arguments=pipeline_params,
                                    inputs=[input_data_ref],
                                    outputs=[processed_data_ref],
                                    compute_target=compute_target_cpu,
                                    source_directory='./',
# Register Model
model = Model.register(
    model_path=
    "./resources/models/sklearn_regression_model.pkl",  # this points to a local file
    model_name=
    "sklearn_regression_model",  # this is the name the model is registered as
    tags={
        'area': "diabetes",
        'type': "regression"
    },
    description="Ridge regression model to predict diabetes",
    workspace=ws)

print("Creating docker image configuration...")
# Update your myenv.yml file with the required module
myenv = CondaDependencies.create(conda_packages=['numpy', 'scikit-learn'])
myenv.add_pip_package("azureml-monitoring")

with open(os.path.join(project_folder, "myenv.yml"), "w") as f:
    f.write(myenv.serialize_to_string())

shutil.copy("./scripts/score_diabetes.py", './')

# Create your new Image
image_config = ContainerImage.image_configuration(
    execution_script="score_diabetes.py",
    runtime="python",
    conda_file=os.path.join(project_folder, "myenv.yml"),
    description="Image with ridge regression model",
    tags={
        'area': "diabetes",
        create_output_directories=False,
        mechanism='mount',
        environment_variable_name=input_name,
        overwrite=True)
    return data


# Retrieve a datastore from a ML workspace
try:
    workspace = Workspace.from_config(auth=AzureCliAuthentication())
except UserErrorException:
    workspace = run.experiment.workspace

# Define the conda dependencies
cd = CondaDependencies(conda_dependencies_file_path=os.path.join(
    os.path.dirname(os.path.realpath(__file__)),
    'conda_dependencies_sklearn.yml'))

# define compute
compute_target = 'alwaysoncluster'

# define data set names
input_name_train = 'newsgroups_train'
input_name_test = 'newsgroups_test'

# Retrieve datsets
dataset_train = Dataset.get_by_name(workspace, name=input_name_train)
dataset_test = Dataset.get_by_name(workspace, name=input_name_test)

# Runconfig
amlcompute_run_config = RunConfiguration(
print('Setting up experiment')
exp = Experiment(workspace=ws, name=experimentName)

print('Setting up cluster')
compute_target = ComputeTarget(workspace=ws, name=clusterName)

print('Setting up dataset')
mnistFileDataset = Dataset.get_by_name(workspace=ws, name=datasetName)

print("Defining environment")
# to install required packages
env = Environment('sklearn')
#cd = CondaDependencies.create(pip_packages=['azureml-dataprep[pandas,fuse]>=1.1.14', 'azureml-defaults'], conda_packages = ['scikit-learn==0.22.1'])
cd = CondaDependencies.create(pip_packages=[
    'azureml-sdk', 'scikit-learn==0.22.1',
    'azureml-dataprep[pandas,fuse]>=1.1.14'
])
env.python.conda_dependencies = cd

# Register environment to re-use later
env.register(workspace=ws)

print("Creating estimator")
script_params = {
    # to mount files referenced by mnist dataset
    '--data-folder': mnistFileDataset.as_named_input(datasetName).as_mount(),
    '--regularization': 0.5
}

est = Estimator(source_directory=scriptFolder,
                script_params=script_params,
def create_env(ws):
    '''Creates an azureml enviornment'''

    # Create enviornment object
    env = Environment(name='birdsong-env-gpu')

    # define packages for image
    cd = CondaDependencies.create(
        pip_packages=[
            'azureml-dataset-runtime[pandas,fuse]',
            'azureml-defaults',
            'tensorflow==2.4.0',  #'tensorflow==2.5.0',
            'tensorflow-io==0.17.1',  # 'tensorflow-io==0.18.0', 
            'tensorflow-addons==0.13.0',
            'Pillow',
            'sklearn',
            'kapre',
            'sndfile',
            'librosa',
            'psutil'
        ],
        conda_packages=['SciPy'])

    env.python.conda_dependencies = cd

    #Docker file
    dockerfile = r'''
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

FROM mcr.microsoft.com/azureml/o16n-base/python-assets:20210210.31228572 AS inferencing-assets

# Tag: cuda:11.0.3-devel-ubuntu18.04
# Env: CUDA_VERSION=11.0.3
# Env: NCCL_VERSION=2.8.3
# Env: CUDNN_VERSION=8.0.5.39

FROM nvidia/cuda:11.0.3-cudnn8-devel-ubuntu18.04

USER root:root

ENV com.nvidia.cuda.version $CUDA_VERSION
ENV com.nvidia.volumes.needed nvidia_driver
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV DEBIAN_FRONTEND noninteractive
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
ENV NCCL_DEBUG=INFO
ENV HOROVOD_GPU_ALLREDUCE=NCCL

# Install Common Dependencies
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
    # SSH and RDMA
    libmlx4-1 \
    libmlx5-1 \
    librdmacm1 \
    libibverbs1 \
    libmthca1 \
    libdapl2 \
    dapl2-utils \
    openssh-client \
    openssh-server \
    iproute2 && \
    # Others
    apt-get install -y \
    build-essential \
    bzip2 \
    libbz2-1.0 \
    systemd \
    git \
    wget \
    cpio \
    pciutils \
    libnuma-dev \
    ibutils \
    ibverbs-utils \
    rdmacm-utils \
    infiniband-diags \
    perftest \
    librdmacm-dev \
    libibverbs-dev \
    libsm6 \
    libxext6 \
    libxrender-dev \
    libssl1.0.0 \
    linux-image-aws \
    linux-image-azure \
    linux-image-generic \
    linux-image-kvm \
    linux-image-lowlatency \
    linux-image-virtual \
    linux-image-gke \
    linux-image-oem \
    slapd \
    perl \
    ca-certificates \
    apt \
    p11-kit \
    libp11-kit0 \
    tar \
    libsndfile-dev \
    fuse && \
    apt-get clean -y && \
    rm -rf /var/lib/apt/lists/*

# Inference
# Copy logging utilities, nginx and rsyslog configuration files, IOT server binary, etc.
COPY --from=inferencing-assets /artifacts /var/
RUN /var/requirements/install_system_requirements.sh && \
    cp /var/configuration/rsyslog.conf /etc/rsyslog.conf && \
    cp /var/configuration/nginx.conf /etc/nginx/sites-available/app && \
    ln -s /etc/nginx/sites-available/app /etc/nginx/sites-enabled/app && \
    rm -f /etc/nginx/sites-enabled/default
ENV SVDIR=/var/runit
ENV WORKER_TIMEOUT=300
EXPOSE 5001 8883 8888

# Conda Environment
ENV MINICONDA_VERSION py37_4.9.2
ENV PATH /opt/miniconda/bin:$PATH
RUN wget -qO /tmp/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \
    bash /tmp/miniconda.sh -bf -p /opt/miniconda && \
    conda clean -ay && \
    rm -rf /opt/miniconda/pkgs && \
    rm /tmp/miniconda.sh && \
    find / -type d -name __pycache__ | xargs rm -rf

# Open-MPI-UCX installation
RUN mkdir /tmp/ucx && \
    cd /tmp/ucx && \
	wget -q https://github.com/openucx/ucx/releases/download/v1.6.1-rc2/ucx-1.6.1.tar.gz && \
	tar zxf ucx-1.6.1.tar.gz && \
	cd ucx-1.6.1 && \
	./configure --prefix=/usr/local --enable-optimizations --disable-assertions --disable-params-check --enable-mt && \
	make -j $(nproc --all) && \
	make install && \
	rm -rf /tmp/ucx

# Open-MPI installation
ENV OPENMPI_VERSION 4.1.0
RUN mkdir /tmp/openmpi && \
    cd /tmp/openmpi && \
    wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.gz && \
    tar zxf openmpi-${OPENMPI_VERSION}.tar.gz && \
    cd openmpi-${OPENMPI_VERSION} && \
    ./configure --with-ucx=/usr/local/ --enable-mca-no-build=btl-uct --enable-orterun-prefix-by-default && \
    make -j $(nproc) all && \
    make install && \
    ldconfig && \
    rm -rf /tmp/openmpi

# Msodbcsql17 installation
RUN apt-get update && \
    apt-get install -y curl && \
    curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
    curl https://packages.microsoft.com/config/ubuntu/18.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
    apt-get update && \
    ACCEPT_EULA=Y apt-get install -y msodbcsql17

#Cmake Installation
RUN apt-get update && \
    apt-get install -y cmake

'''
    env.docker.base_image = None
    env.docker.base_dockerfile = dockerfile

    # Register environment to re-use later
    env = env.register(workspace=ws)
Example #20
0
from azureml.core.conda_dependencies import CondaDependencies

cd = CondaDependencies.create()
cd.add_tensorflow_conda_package()
cd.add_conda_package('keras<=2.3.1')
cd.add_pip_package("azureml-defaults")
cd.save_to_file(base_directory='./', conda_file_path='env.yml')

print(cd.serialize_to_string())
Example #21
0
# datastore = ws.get_default_datastore()
# datastore.download("./", prefix="deploy", overwrite=True, show_progress=True)

os.chdir(os.path.dirname(os.path.realpath(__file__)))
print(os.getcwd())
print(ws)
#run.register_model(model_name='iris-model',
#                    model_path="./outputs/model.pkl")

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

environment = Environment('my-sklearn-environment')
environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'azureml-defaults', 'inference-schema[numpy-support]', 'joblib', 'numpy',
    'scikit-learn'
])

from azureml.core.model import InferenceConfig

inference_config = InferenceConfig(entry_script='score.py',
                                   source_directory='.',
                                   environment=environment)

print("Deploying model to AKS...")
# deploying the model and create a new endpoint
from azureml.core.webservice import AksEndpoint
from azureml.core.compute import ComputeTarget

#select a created compute
compute = ComputeTarget(ws, 'aks')
def create_aml_environment(aml_interface):
    aml_env = Environment(name=AML_ENVIRONMENT_NAME)
    conda_dep = CondaDependencies()
    conda_dep.add_pip_package("numpy==1.18.2")
    conda_dep.add_pip_package("pandas==1.0.3")
    conda_dep.add_pip_package("scikit-learn==0.22.2.post1")
    conda_dep.add_pip_package("joblib==0.14.1")
    conda_dep.add_pip_package("azure-storage-blob==12.3.0")

    aml_env.environment_variables[AZURE_STORAGE_ACCOUNT_NAME] = os.getenv(
        AZURE_STORAGE_ACCOUNT_NAME)
    aml_env.environment_variables[AZURE_STORAGE_ACCOUNT_KEY] = os.getenv(
        AZURE_STORAGE_ACCOUNT_KEY)
    aml_env.environment_variables[MODEL_NAME_VARIABLE] = MODEL_NAME

    logger.info(
        f"set environment variables on compute environment: {aml_env.environment_variables}"
    )

    whl_filepath = retrieve_whl_filepath()
    whl_url = Environment.add_private_pip_wheel(
        workspace=aml_interface.workspace,
        file_path=whl_filepath,
        exist_ok=True)
    conda_dep.add_pip_package(whl_url)
    aml_env.python.conda_dependencies = conda_dep
    aml_env.docker.enabled = True
    return aml_env
# Enable Docker
run_amlcompute.environment.docker.enabled = True

# Set Docker base image to the default CPU-based image
run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE

# Use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_amlcompute.environment.python.user_managed_dependencies = False

# Specify CondaDependencies obj, add necessary packages
run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'numpy',
    'pandas',
    'scikit-learn',
    'azure-storage-blob==2.1.0',
    'azureml-sdk',
    'azureml-dataprep[pandas]',
    '-e src'

])

scripts_folder = 'src/my_custom_package/scripts'
def_blob_store = ws.get_default_datastore()

train_output = PipelineData('train_output', datastore=def_blob_store)
print("train_output PipelineData object created")

trainStep = PythonScriptStep(
    name="train",
    script_name="train.py",
    arguments=["--model_name", args.model_name,
Example #24
0
    def __init__(self, request_id, input_container_sas, internal_datastore):
        try:
            aml_config = api_config.AML_CONFIG

            self.ws = Workspace(subscription_id=aml_config['subscription_id'],
                                resource_group=aml_config['resource_group'],
                                workspace_name=aml_config['workspace_name'],
                                auth=svc_pr)
            print('AMLCompute constructor, AML workspace obtained.')

            internal_dir, output_dir = self._get_data_references(
                request_id, internal_datastore)

            compute_target = self.ws.compute_targets[
                aml_config['aml_compute_name']]

            dependencies = CondaDependencies.create(pip_packages=[
                'tensorflow-gpu==1.9.0', 'pillow', 'numpy', 'azure',
                'azure-storage-blob', 'azureml-defaults'
            ])

            amlcompute_run_config = RunConfiguration(
                conda_dependencies=dependencies)
            amlcompute_run_config.environment.docker.enabled = True
            amlcompute_run_config.environment.docker.gpu_support = True
            amlcompute_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE
            amlcompute_run_config.environment.spark.precache_packages = False

            # default values are required and need to be literal values or data references as JSON
            param_job_id = PipelineParameter(name='param_job_id',
                                             default_value='default_job_id')

            param_begin_index = PipelineParameter(name='param_begin_index',
                                                  default_value=0)
            param_end_index = PipelineParameter(name='param_end_index',
                                                default_value=0)

            param_detection_threshold = PipelineParameter(
                name='param_detection_threshold', default_value=0.05)
            param_batch_size = PipelineParameter(name='param_batch_size',
                                                 default_value=8)

            batch_score_step = PythonScriptStep(
                aml_config['script_name'],
                source_directory=aml_config['source_dir'],
                name='batch_scoring',
                arguments=[
                    '--job_id',
                    param_job_id,
                    '--model_name',
                    aml_config['model_name'],
                    '--input_container_sas',
                    input_container_sas,
                    '--internal_dir',
                    internal_dir,
                    '--begin_index',
                    param_begin_index,  # inclusive
                    '--end_index',
                    param_end_index,  # exclusive
                    '--output_dir',
                    output_dir,
                    '--detection_threshold',
                    param_detection_threshold,
                    '--batch_size',
                    param_batch_size
                ],
                compute_target=compute_target,
                inputs=[internal_dir],
                outputs=[output_dir],
                runconfig=amlcompute_run_config)

            self.pipeline = Pipeline(workspace=self.ws,
                                     steps=[batch_score_step])
            self.aml_config = aml_config
            print('AMLCompute constructor all good.')
        except Exception as e:
            raise RuntimeError(
                'Error in setting up AML Compute resource: {}.'.format(str(e)))
# Re-load the model
scaler = pickle.load(open(os.path.join(model_path, 'scaler.pkl'), 'rb'))
scaled_input = scaler.transform([[age, km]])
model2 = pickle.load(open(os.path.join(model_path, 'usedcarsmodel.pkl'), 'rb'))

# Use the loaded model to make a prediction
prediction = model2.predict(scaled_input)
print(prediction)
prediction_json = json.dumps(prediction.tolist())
print(prediction_json)

# Step 4 - Create a Conda dependencies environment file
#######################################################
from azureml.core.conda_dependencies import CondaDependencies

mycondaenv = CondaDependencies.create(
    conda_packages=['scikit-learn', 'numpy', 'pandas'])

with open("mydeployenv.yml", "w") as f:
    f.write(mycondaenv.serialize_to_string())

# Step 5 - Create container image configuration
###############################################

# Create the scoring script
# See the scoring script available in ./score.py

# Build the ContainerImage
runtime = "python"
driver_file = "score.py"
conda_file = "mydeployenv.yml"
Example #26
0
import azureml.core
from azureml.core import Workspace
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core.conda_dependencies import CondaDependencies 
from azureml.core.model import Model
from azureml.core.image import ContainerImage
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice

auth_config = InteractiveLoginAuthentication(False, "72f988bf-86f1-41af-91ab-2d7cd011db47")
ws=Workspace.from_config('aml_config/config.json', auth_config)
ws.get_details()

myenv = CondaDependencies()
myenv.add_conda_package("keras")
myenv.add_conda_package("tensorflow")
myenv.add_conda_package("pillow")

with open("myenv.yml","w") as f:
    f.write(myenv.serialize_to_string())

# Register a trained model
print('Registering model...')
model = Model.register(model_path = "modelfiles",
                       model_name = "dogs-vs-cat",
                       description = "ready lab 314",
                       workspace = ws)

# Image configuration
print('Creating image configuration...')
image_config = ContainerImage.image_configuration(execution_script = "score.py",
Example #27
0
# import azureml.core

# <loadWorkspace>
from azureml.core import Workspace
ws = Workspace.from_config()
# </loadWorkspace>

scorepy_content = "import json\nimport numpy as np\nimport os\nimport pickle\nfrom sklearn.externals import joblib\nfrom sklearn.linear_model import LogisticRegression\n\nfrom azureml.core.model import Model\n\ndef init():\n    global model\n    # retreive the path to the model file using the model name\n    model_path = Model.get_model_path('sklearn_mnist')\n    model = joblib.load(model_path)\n\ndef run(raw_data):\n    data = np.array(json.loads(raw_data)['data'])\n    # make prediction\n    y_hat = model.predict(data)\n    return json.dumps(y_hat.tolist())"
print(scorepy_content)
with open("score.py", "w") as f:
    f.write(scorepy_content)

# PREREQ: create environment file
from azureml.core.conda_dependencies import CondaDependencies

myenv = CondaDependencies()
myenv.add_conda_package("scikit-learn")

with open("myenv.yml", "w") as f:
    f.write(myenv.serialize_to_string())

#<configImage>
from azureml.core.image import ContainerImage

image_config = ContainerImage.image_configuration(
    execution_script="score.py",
    runtime="python",
    conda_file="myenv.yml",
    description="Image with mnist model",
    tags={
        "data": "mnist",
Example #28
0
# 2. via pip requiremnt file
env.from azureml.core import Environment
env = Environment.from_pip_requirements(
    name='env_name',
    file_path='requirements.txt',
)

# 3. via existing conda env
env = Environment.from_existing_conda_environment(name = "training_env", conda_environment_name = 'py_env')


# 4. via specifying packages
from azureml.core.conda_dependencies import CondaDependencies
env = Environment("training_env")
deps = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas', 'numpy'],
                                pip_packages=['azureml-defaults'])

env.python.conda_dependencies = deps


# Registering the environment to the workspace

env.register(workspace = ws)


# get list of all the environments in the workspace
for env_name in Environment.list(workspace = ws):
    print('Name:', env_name)


#for using in the script 
def main():
    """
    Run the experiment for training
    """
    work_space = Workspace.from_config()

    # Set up the dataset for training
    datastore = work_space.get_default_datastore()
    dataset = Dataset.File.from_files(path=(datastore, "datasets/mnist"))

    # Set up the experiment for training
    experiment = Experiment(workspace=work_space, name="keras-lenet-train")
    #     azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 2000000000
    config = ScriptRunConfig(
        source_directory=".",
        script="train_keras.py",
        compute_target="cpu-cluster",
        arguments=[
            "--data_folder",
            dataset.as_named_input("input").as_mount(),
        ],
    )

    # Set up the Tensoflow/Keras environment
    environment = Environment("keras-environment")
    environment.python.conda_dependencies = CondaDependencies.create(
        python_version="3.7.7",
        pip_packages=["azureml-defaults", "numpy", "tensorflow==2.3.1"])
    config.run_config.environment = environment

    # Run the experiment for training
    run = experiment.submit(config)
    aml_url = run.get_portal_url()
    print(
        "Submitted to an Azure Machine Learning compute cluster. Click on the link below"
    )
    print("")
    print(aml_url)

    tboard = Tensorboard([run])
    # If successful, start() returns a string with the URI of the instance.
    tboard.start(start_browser=True)
    run.wait_for_completion(show_output=True)
    # After your job completes, be sure to stop() the streaming otherwise it will continue to run.
    print("Press enter to stop")
    input()
    tboard.stop()

    # Register Model
    metrics = run.get_metrics()
    run.register_model(
        model_name="keras_mnist",
        tags={
            "data": "mnist",
            "model": "classification"
        },
        model_path="outputs/keras_lenet.h5",
        model_framework=Model.Framework.TENSORFLOW,
        model_framework_version="2.3.1",
        properties={
            "train_loss": metrics["train_loss"][-1],
            "train_accuracy": metrics["train_accuracy"][-1],
            "val_loss": metrics["val_loss"][-1],
            "val_accuracy": metrics["val_accuracy"][-1],
        },
    )
run_config.container_instance.memory_gb = 2

# enable Docker
run_config.environment.docker.enabled = True

# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False

# auto-prepare the Docker image when used for execution (if it is not already prepared)
run_config.auto_prepare_environment = True

# specify CondaDependencies obj
conda_dep = CondaDependencies.create(python_version='3.6.2',
                                     conda_packages=['keras', 'matplotlib'])
conda_dep.add_tensorflow_conda_package(core_type='cpu')
run_config.environment.python.conda_dependencies = conda_dep

# Create a directory that will contain all the necessary code from your local machine
# that you will need access to on the remote resource. This includes the training script,
# and any additional files your training script depends on.
import os

project_folder = './tmp/fashion-mnist-aci'
os.makedirs(project_folder, exist_ok=True)

import shutil
shutil.copy('./scripts/train_Fashion_MNIST.py', project_folder)

# Submit Experiment