min_nodes=0, max_nodes=1) compute_target = ComputeTarget.create(workspace=ws, name=compute_name, provisioning_configuration=config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) from azureml.core import Environment from azureml.core.conda_dependencies import CondaDependencies from azureml.core.runconfig import DEFAULT_GPU_IMAGE cd = CondaDependencies.create(pip_packages=[ "tensorflow-gpu==1.15.2", "azureml-core", "azureml-dataprep[fuse]" ]) env = Environment(name="parallelenv") env.python.conda_dependencies = cd env.docker.base_image = DEFAULT_GPU_IMAGE from azureml.pipeline.steps import ParallelRunConfig parallel_run_config = ParallelRunConfig(environment=env, entry_script="batch_scoring.py", source_directory="scripts", output_action="append_row", mini_batch_size="20", error_threshold=1, compute_target=compute_target, process_count_per_node=2,
# In[ ]: get_ipython().run_cell_magic('writefile', 'dockerfile', 'RUN apt-get update && apt-get install -y g++') # In[ ]: # create a Conda dependencies environment file print("Creating conda dependencies file locally...") from azureml.core.conda_dependencies import CondaDependencies conda_packages = ['numpy', 'pandas'] pip_packages = [ 'scikit-learn==0.20.3', 'sklearn_pandas', 'azureml-sdk', 'azureml-explain-model', 'azureml-contrib-explain-model' ] mycondaenv = CondaDependencies.create(conda_packages=conda_packages, pip_packages=pip_packages) conda_file = 'sklearn_dependencies.yml' with open(conda_file, 'w') as f: f.write(mycondaenv.serialize_to_string()) runtime = 'python' # create container image configuration print("Creating container image configuration...") from azureml.core.image import ContainerImage image_config = ContainerImage.image_configuration(execution_script='score.py', docker_file='dockerfile', runtime=runtime, conda_file=conda_file)
print("5. Instantiate and configure run object for the managed compute...") print('.............................................') # Create runconfig object amlComputeRunConf = RunConfiguration() # Use the compute provisioned amlComputeRunConf.target = args.aml_compute_target # Enable Docker amlComputeRunConf.environment.docker.enabled = True # Set Docker base image to the default CPU-based image amlComputeRunConf.environment.docker.base_image = DEFAULT_CPU_IMAGE # Use conda_dependencies.yml to create a conda environment in the Docker image for execution amlComputeRunConf.environment.python.user_managed_dependencies = False # Auto-prepare the Docker image when used for execution (if it is not already prepared) amlComputeRunConf.auto_prepare_environment = True # Specify CondaDependencies obj, add necessary packages amlComputeRunConf.environment.python.conda_dependencies = CondaDependencies.create( pip_packages=['numpy', 'pandas', 'scikit-learn', 'azureml-sdk']) print("..5. completed") print('') print('') print("6. Define pipeline stage - training...") print('.............................................') training_output = PipelineData('train_output', datastore=amlWsStorageRef) trainPipelineStep = PythonScriptStep(name="train", script_name="train.py", arguments=[ "--model_name", args.model_name, "--build_number", args.build_number ], outputs=[training_output], compute_target=amlTrainingComputeRef,
validdata_dataset = Dataset.get_by_name(ws, name='valid_data_ds') testdata_dataset = Dataset.get_by_name(ws, name='test_data_ds') traintarget_dataset = Dataset.get_by_name(ws, name='train_target_ds') validtarget_dataset = Dataset.get_by_name(ws, name='valid_target_ds') testtarget_dataset = Dataset.get_by_name(ws, name='test_target_ds') ########################################################################################## ############################# Run Configuration Setup ################################# from azureml.core.runconfig import RunConfiguration from azureml.core.conda_dependencies import CondaDependencies run_config = RunConfiguration() run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages = ['keras<=2.3.1','pandas','matplotlib', 'opencv-python','azure-storage-blob==2.1.0','tensorflow-gpu==2.0.0', 'azureml','azureml-core','azureml-dataprep', 'azureml-dataprep[fuse]','azureml-pipeline']) ########################################################################################## ############################# Pythonscript for preprocessing ################################### from azureml.core import Workspace,Datastore from azureml.pipeline.core import Pipeline, PipelineParameter, PipelineData from azureml.pipeline.steps import PythonScriptStep import os script_folder = os.path.join(os.getcwd(), "PreProcessing") print("Pipeline SDK-specific imports completed")
def train_step(datastore, train_dir, valid_dir, vocab_dir, compute_target): ''' This step will take the raw data downloaded from the previous step, preprocess it, and split into train, valid, and test directories. :param datastore: The datastore that will be used :type datastore: Datastore :param train_dir: The reference to the directory containing the training data :type train_src: DataReference :param valid_dir: The reference to the directory containing the validation data :type valid_dir: DataReference :param vocab_dir: The reference to the directory containing the vocab data :type vocab_dir: DataReference :param compute_target: The compute target to run the step on :type compute_target: ComputeTarget :return: The training step, step outputs dictionary (keys: model_dir) :rtype: PythonScriptStep, dict ''' run_config = RunConfiguration() run_config.environment.docker.enabled = True run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE run_config.environment.python.user_managed_dependencies = False conda_packages = ['pytorch', 'tqdm', 'nltk'] run_config.environment.python.conda_dependencies = CondaDependencies.create( conda_packages=conda_packages) # set hyperparameters of the model training step input_col = PipelineParameter(name='input_col', default_value='Title') output_col = PipelineParameter(name='output_col', default_value='Abstract') cuda = PipelineParameter(name='cuda', default_value=1) seed = PipelineParameter(name='seed', default_value=0) batch_size = PipelineParameter(name='batch_size', default_value=32) embed_size = PipelineParameter(name='embed_size', default_value=256) hidden_size = PipelineParameter(name='hidden_size', default_value=256) clip_grad = PipelineParameter(name='clip_grad', default_value=5.0) label_smoothing = PipelineParameter(name='label_smoothing', default_value=0.0) log_every = PipelineParameter(name='log_every', default_value=1) max_epoch = PipelineParameter(name='max_epoch', default_value=2) input_feed = PipelineParameter(name='input_feed', default_value=1) patience = PipelineParameter(name='patience', default_value=5) max_num_trial = PipelineParameter(name='max_num_trial', default_value=5) lr_decay = PipelineParameter(name='lr_decay', default_value=0.5) beam_size = PipelineParameter(name='beam_size', default_value=5) sample_size = PipelineParameter(name='sample_size', default_value=5) lr = PipelineParameter(name='lr', default_value=0.001) uniform_init = PipelineParameter(name='uniform_init', default_value=0.1) valid_niter = PipelineParameter(name='valid_niter', default_value=2000) dropout = PipelineParameter(name='dropout', default_value=0.3) max_decoding_time_step = PipelineParameter(name='max_decoding_time_step', default_value=70) model_dir = PipelineData(name='model_dir', pipeline_output_name='model_dir', datastore=datastore, output_mode='mount', is_directory=True) outputs = [model_dir] outputs_map = { 'model_dir': model_dir, } step = PythonScriptStep(name="Train", script_name='train.py', arguments=[ '--train_dir', train_dir, '--valid_dir', train_dir, '--input_col', input_col, '--output_col', output_col, '--vocab_dir', vocab_dir, '--model_dir', model_dir, '--input_col', input_col, '--output_col', output_col, '--cuda', cuda, '--seed', seed, '--batch_size', batch_size, '--embed_size', embed_size, '--hidden_size', hidden_size, '--clip_grad', clip_grad, '--label_smoothing', label_smoothing, '--log_every', log_every, '--max_epoch', max_epoch, '--input_feed', input_feed, '--patience', patience, '--max_num_trial', max_num_trial, '--lr_decay', lr_decay, '--beam_size', beam_size, '--sample_size', sample_size, '--lr', lr, '--uniform_init', uniform_init, '--valid_niter', valid_niter, '--dropout', dropout, '--max_decoding_time_step', max_decoding_time_step, ], inputs=[train_dir, valid_dir, vocab_dir], outputs=outputs, compute_target=compute_target, runconfig=run_config, source_directory=os.path.dirname( os.path.abspath(__file__)), allow_reuse=True) return step, outputs_map
# Code for What's a run configuration # <run_system_managed> from azureml.core.runconfig import RunConfiguration from azureml.core.conda_dependencies import CondaDependencies run_system_managed = RunConfiguration() # Specify the conda dependencies with scikit-learn run_system_managed.environment.python.conda_dependencies = CondaDependencies.create( conda_packages=['scikit-learn']) # </run_system_managed> print(run_system_managed) # <run_user_managed> from azureml.core.runconfig import RunConfiguration run_user_managed = RunConfiguration() run_user_managed.environment.python.user_managed_dependencies = True # Choose a specific Python environment by pointing to a Python path. For example: # run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python' # </run_user_managed> print(run_user_managed)
run_config = RunConfiguration() # signal that you want to use AmlCompute to execute script. run_config.target = "amlcompute" # AmlCompute will be created in the same region as workspace # Set vm size for AmlCompute run_config.amlcompute.vm_size = 'STANDARD_D2_V2' # enable Docker run_config.environment.docker.enabled = True # set Docker base image to the default CPU-based image run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE # use conda_dependencies.yml to create a conda environment in the Docker image for execution run_config.environment.python.user_managed_dependencies = False # auto-prepare the Docker image when used for execution (if it is not already prepared) run_config.auto_prepare_environment = True # specify CondaDependencies obj run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn']) # Now submit a run on AmlCompute from azureml.core.script_run_config import ScriptRunConfig script_run_config = ScriptRunConfig(source_directory=project_folder, script='train.py', run_config=run_config) run = experiment.submit(script_run_config) run.wait_for_completion()
def RunAutoMLForecast(): subscription_id = request.json['subscription_id'] resource_group = request.json['resource_group'] workspace_name = request.json['workspace_name'] file_name = request.json['file_name'] location = request.json['location'] target_var = request.json['target_var'] cluster_name = request.json['cluster_name'] best_model = request.json['best_model'] time_column_name = request.json['time_column_name'] max_horizon = request.json['max_horizon'] ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) print("Found workspace {} at location {}".format(ws.name, ws.location)) print('Found existing Workspace.') compute_target = AmlCompute(ws, cluster_name) print('Found existing AML compute context.') dataset_name = file_name time_column_name = time_column_name # Get a dataset by name dataset = Dataset.get_by_name(workspace=ws, name=dataset_name).with_timestamp_columns( fine_grain_timestamp=time_column_name) print(dataset) #df_ts = Dataset.Tabular.from_delimited_files(df_ts) dataset.to_pandas_dataframe().describe() dataset.take(3).to_pandas_dataframe() print(dataset) #y_df = df_ts[target_var].values #x_df = df_ts.drop([target_var], axis=1) print('file successfully recieved.') #stock_dataset_df.head() # create a new RunConfig object conda_run_config = RunConfiguration(framework="python") conda_run_config.environment.docker.enabled = True conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy', 'py-xgboost<=0.80']) conda_run_config.environment.python.conda_dependencies = cd print('run config is ready') ExperimentName = request.json['ExperimentName'] tasks = request.json['tasks'] iterations = request.json['iterations'] n_cross_validations = request.json['n_cross_validations'] iteration_timeout_minutes = request.json['iteration_timeout_minutes'] primary_metric = request.json['primary_metric'] #max_concurrent_iterations = request.json['max_concurrent_iterations'] automl_settings = { 'time_column_name': time_column_name, 'max_horizon': max_horizon, "iterations": iterations, } automl_config = AutoMLConfig( task=tasks, primary_metric=primary_metric, #blacklist_models = ['ExtremeRandomTrees', 'AutoArima', 'Prophet'], experiment_timeout_minutes=iteration_timeout_minutes, training_data=dataset, label_column_name=target_var, compute_target=compute_target, enable_early_stopping=True, n_cross_validations=n_cross_validations, #verbosity=logging.INFO, **automl_settings) print("AutoML config created.") experiment = Experiment(ws, ExperimentName) remote_run = experiment.submit(automl_config, show_output=True) children = list(remote_run.get_children()) metricslist = {} for run in children: properties = run.get_properties() metrics = { k: v for k, v in run.get_metrics().items() if isinstance(v, float) } metricslist[int(properties['iteration'])] = metrics rundata = pd.DataFrame(metricslist).sort_index(axis=1, by=primary_metric) rundata.rename(columns={ 0: "one", 1: "two", 2: "three", 3: "four", 4: "five", 5: "six", 6: "seven", 7: "eight", 8: "nine", 9: "ten", }, inplace=True) iterations_toJson = rundata.to_json(orient='columns') print(iterations_toJson) best_run, fitted_model = remote_run.get_output() #best_run_toJson = best_run.get_metrics() #dict = {} #dict['iterations_toJson'] = iterations_toJson #dict['best_run_toJson'] = best_run_toJson #print(best_run.get_file_names()) #Register the model #from datetime import date model = remote_run.register_model(model_name=best_model, description='AutoML Model') print(model.name, model.id, model.version, sep='\t') best_model = model.name best_model var1 = "@" var2 = var1 + best_model return '{} {}'.format(iterations_toJson, var2)
from azureml.core.conda_dependencies import CondaDependencies myenv = CondaDependencies() myenv.add_pip_package("numpy") myenv.add_pip_package("sklearn") # myenv.add_conda_package("nltk") with open("aml_config/myenv.yml", "w") as f: f.write(myenv.serialize_to_string())
print('Found existing AML compute context.') dataset_name = file_name # Get a dataset by name df = Dataset.get_by_name(workspace=ws, name=dataset_name) X = df.drop_columns(columns=[target_var]) y = df.keep_columns(columns=[target_var], validate=True) print(y) #y = diabetes.pop('Y') #X_train, X_test, y_train, y_test = train_test_split(diabetes, y, test_size=0.2, random_state=0) #data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}} conda_run_config = RunConfiguration(framework="python") conda_run_config.environment.docker.enabled = True conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy', 'py-xgboost<=0.80']) conda_run_config.environment.python.conda_dependencies = cd print('run config is ready') ExperimentName = request.json['ExperimentName'] tasks = request.json['tasks'] iterations = request.json['iterations'] n_cross_validations = request.json['n_cross_validations'] iteration_timeout_minutes = request.json['iteration_timeout_minutes'] primary_metric = request.json['primary_metric'] max_concurrent_iterations = request.json['max_concurrent_iterations'] automl_settings = { "name": ExperimentName, "iteration_timeout_minutes": iteration_timeout_minutes, "iterations": iterations,
def RunAutoMLReg(): subscription_id = request.json['subscription_id'] resource_group = request.json['resource_group'] workspace_name = request.json['workspace_name'] file_name = request.json['file_name'] location = request.json['location'] target_var = request.json['target_var'] cluster_name = request.json['cluster_name'] best_model = request.json['best_model'] #best_model = request.json['best_model'] ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) print("Found workspace {} at location {}".format(ws.name, ws.location)) print('Found existing Workspace.') #compute_target = AmlCompute(ws, cluster_name) compute_target = ws.compute_targets[cluster_name] print('Found existing AML compute context.') dataset_name = file_name # Get a dataset by name df = Dataset.get_by_name(workspace=ws, name=dataset_name) #stock_dataset_df = df.to_pandas_dataframe() print('file successfully recieved.') #stock_dataset_df.head() #stock_dataset_json = stock_dataset_df.to_json(orient='split') #print(stock_dataset_json) X = df.drop_columns(columns=[target_var]) y = df.keep_columns(columns=[target_var], validate=True) #y_df = stock_dataset_df[target_var].values #x_df = stock_dataset_df.drop([target_var], axis=1) print(y) # create a new RunConfig object conda_run_config = RunConfiguration(framework="python") conda_run_config.environment.docker.enabled = True conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy', 'py-xgboost<=0.90']) conda_run_config.environment.python.conda_dependencies = cd print('run config is ready') ExperimentName = request.json['ExperimentName'] tasks = request.json['tasks'] iterations = request.json['iterations'] n_cross_validations = request.json['n_cross_validations'] iteration_timeout_minutes = request.json['iteration_timeout_minutes'] primary_metric = request.json['primary_metric'] max_concurrent_iterations = request.json['max_concurrent_iterations'] try: automl_settings = { "name": ExperimentName, "iteration_timeout_minutes": iteration_timeout_minutes, "featurization": 'auto', "iterations": iterations, "n_cross_validations": n_cross_validations, "primary_metric": primary_metric, "preprocess": True, "max_concurrent_iterations": max_concurrent_iterations #"verbosity": logging.INFO } automl_config = AutoMLConfig( task=tasks, debug_log='automl_errors.log', blacklist_models=['XGBoost'], #path=os.getcwd(), compute_target=compute_target, #run_configuration=conda_run_config, X=X, y=y, **automl_settings, ) experiment = Experiment(ws, ExperimentName) remote_run = experiment.submit(automl_config, show_output=True) remote_run.flush(timeout_seconds=400) children = list(remote_run.get_children()) metricslist = {} for run in children: properties = run.get_properties() metrics = { k: v for k, v in run.get_metrics().items() if isinstance(v, float) } metricslist[int(properties['iteration'])] = metrics rundata = pd.DataFrame(metricslist).sort_index(axis=1, by=primary_metric) rundata = rundata.drop([ 'mean_absolute_percentage_error', 'normalized_median_absolute_error', 'normalized_root_mean_squared_log_error', 'root_mean_squared_log_error' ]) rundata.rename(columns={ 0: "one", 1: "two", 2: "three", 3: "four", 4: "five", 5: "six", 6: "seven", 7: "eight", 8: "nine", 9: "ten", }, inplace=True) iterations_toJson = rundata.to_json(orient='columns') print(iterations_toJson) best_run, fitted_model = remote_run.get_output() best_run_toJson = best_run.get_metrics() cwd = 'D:/DCSAIAUTOML/BestModels/Azure' best_model_name = best_run.name model = remote_run.register_model(description=best_model) print(model.name, model.id, model.version, sep='\t') model_path = os.path.join(cwd, best_model, best_model_name) print(model_path) #print("Model DownLoad Complete") #model = Model(workspace=ws, name=model.name) #model.download_files(target_dir=model_path) #dict = {} #dict['iterations_toJson'] = iterations_toJson #dict['best_run_toJson'] = best_run_toJson #print(best_run.get_file_names()) #Register the model #from datetime import date best_model_id = best_run.name var1 = "@" var2 = var1 + best_model_id Reg_model_name = model.name var4 = var1 + Reg_model_name best_run.flush(timeout_seconds=3600) best_run.download_files(output_directory=model_path) # importing required modules #import shutil #output_path = os.path.join(model_path, best_model_id) #dir_name1 = "D:\\DCSAIAUTOML\\BestModels\\Azure\\my_azure_best" #dir_name1 = "D:\\DCSAIAUTOML\\BestModels\\Azure\\my_azure_best\\my_azure_best" #shutil.make_archive(model_path,'zip',model_path) #zipf = zipfile.ZipFile(best_model_id+'.zip', 'w', zipfile.ZIP_DEFLATED) #for root, dirs, files in os.walk(model_path): #for file in files: #zipf.write(os.path.join(root, file)) #def zipdir(path, ziph): # ziph is zipfile handle #import os #for root, dirs, files in os.walk(path): #for file in files: #ziph.write(os.path.join(root, file)) #zipdir(model_path, zipf) #remote_run.clean_preprocessor_cache() print("ready to return") var5 = "no exception" return '{} {} {} {} {}'.format(iterations_toJson, var2, var4, var1, var5) #return iterations_toJson except Exception as e: error_statement = str(e) print("Error statement: ", error_statement) model_path1 = os.path.join(model_path, 'outputs') file_name = 'model.pkl' print("in exception: ", model_path1) src = 'D:\\Final Script_dev' full_file_name = os.path.join(src, file_name) import shutil #remote_run.download_file('model.pkl', output_file_path=model_path1) if os.path.isfile(full_file_name): shutil.copy(full_file_name, model_path1) return '{} {} {} {} {}'.format(iterations_toJson, var2, var4, var1, error_statement)
def generate_yaml( directory: str, ref_filename: str, needed_libraries: list, conda_filename: str, ): """ Creates a deployment-specific yaml file as a subset of the image classification environment.yml Also adds extra libraries, if not present in environment.yml Args: directory (string): Directory name of reference yaml file ref_filename (string): Name of reference yaml file needed_libraries (list of strings): List of libraries needed in the Docker container conda_filename (string): Name of yaml file to be deployed in the Docker container Returns: Nothing """ with open(os.path.join(directory, ref_filename), "r") as f: yaml_content = yaml.load(f, Loader=yaml.FullLoader) # Extract libraries to be installed using conda extracted_libraries = [ depend for depend in yaml_content["dependencies"] if any(lib in depend for lib in needed_libraries) ] # Extract libraries to be installed using pip if any(isinstance(x, dict) for x in yaml_content["dependencies"]): # if the reference yaml file contains a "pip" section, # find where it is in the list of dependencies ind = [ yaml_content["dependencies"].index(depend) for depend in yaml_content["dependencies"] if isinstance(depend, dict) ][0] extracted_libraries += [ depend for depend in yaml_content["dependencies"][ind]["pip"] if any(lib in depend for lib in needed_libraries) ] # Check whether additional libraries are needed not_found = [ lib for lib in needed_libraries if not any(lib in ext for ext in extracted_libraries) ] # Create the deployment-specific yaml file conda_env = CondaDependencies() for ch in yaml_content["channels"]: conda_env.add_channel(ch) for library in extracted_libraries + not_found: conda_env.add_conda_package(library) # Display the environment print(conda_env.serialize_to_string()) # Save the file to disk conda_env.save_to_file(base_directory=os.getcwd(), conda_file_path=conda_filename)
df = pd.DataFrame([data[1:]],columns=featurenames) # make prediction if data[0] == 'iq': result = iq_model.predict(df).astype(int) elif data[0] == 'sj': result = sj_model.predict(df).astype(int) # you can return any data type as long as it is JSON-serializable return result.tolist() #%% create environment file for deployment from azureml.core.conda_dependencies import CondaDependencies mymodelenv = CondaDependencies() mymodelenv.add_conda_package("scikit-learn") mymodelenv.add_conda_package("pandas") mymodelenv.add_conda_package("statsmodels") mymodelenv.add_conda_package("scipy=1.2") mymodelenv.add_conda_package("numpy") with open("mymodelenv.yml","w") as f: f.write(mymodelenv.serialize_to_string()) with open("mymodelenv.yml","r") as f: print(f.read()) print('Complete')
def run(workspace, config, args): compute_target_name = config['train']['compute_target_name'] data_folder = config['train']['data_folder'] try: compute_target = ComputeTarget(workspace=workspace, name=compute_target_name) print('found existing:', compute_target.name) except ComputeTargetException: print('creating new.') compute_config = AmlCompute.provisioning_configuration( vm_size=config['train']['vm_size'], min_nodes=0, max_nodes=1) compute_target = ComputeTarget.create(workspace, compute_target_name, compute_config) compute_target.wait_for_completion(show_output=True) # ds = Datastore.register_azure_blob_container( # workspace, # datastore_name=config['train']['datastore_name'], # account_name=config['train']['account_name'], # account_key=config['train']['account_key'], # container_name=config['train']['container_name'], # overwrite=True) # # # # Upload local "data" folder (incl. files) as "tfdata" folder # ds.upload( # src_dir=config['train']['local_directory'], # target_path=data_folder, # overwrite=True) ds = Datastore.get(workspace, datastore_name=config['train']['datastore_name']) # generate data reference configuration dr_conf = DataReferenceConfiguration( datastore_name=ds.name, path_on_datastore=data_folder, mode='mount' ) # set 'download' if you copy all files instead of mounting run_config = RunConfiguration(framework="python", conda_dependencies=CondaDependencies.create( conda_packages=ast.literal_eval( config['train']['conda_packages']))) run_config.target = compute_target.name run_config.data_references = {ds.name: dr_conf} run_config.environment.docker.enabled = True # run_config.environment.docker.gpu_support = True run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE src = ScriptRunConfig( source_directory='./script', script='train.py', run_config=run_config, arguments=[ '--datadir', str(ds.as_mount()), '--step', args.step, '--train_on', args.train_on, '--fold', args.fold, '--epochs', args.epochs, '--experiment', args.experiment, '--reference', args.reference, '--batchsize', args.batchsize, '--optimizertype', args.optimizertype, '--convrnn_filters', args.convrnn_filters, '--learning_rate', args.learning_rate, '--pix250m', args.pix250m ]) # exp = Experiment(workspace=ws, name='test20181210-09') exp = Experiment(workspace=workspace, name=config['train']['experiment_name']) run = exp.submit(config=src) run.wait_for_completion(show_output=True)
data_reference_name="input_data_ref", path_on_datastore=f"{project_config['project_name']}/data/") processed_data_ref = PipelineData("processed_data_ref", datastore=def_blob_store) run_config = RunConfiguration() run_config.environment.docker.enabled = True run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE run_config.environment.python.user_managed_dependencies = False pip_packages = [ "azureml-sdk==1.0.17", "scikit-learn==0.21.3", "download==0.3.4", "pandas==0.25.1", "spacy==2.1.4", "numpy==1.17.2" ] run_config.environment.python.conda_dependencies = CondaDependencies.create( pip_packages=pip_packages) pipeline_params = [] for k, v in vars(auth_params).items(): pipeline_params.append("--" + k) pipeline_params.append(PipelineParameter(name=k, default_value=v)) auth_params = pipeline_params.copy() pipeline_params += ["--processed_data_ref", processed_data_ref] pipeline_params += ["--input_data_ref", input_data_ref] process_step = PythonScriptStep(script_name="process.py", arguments=pipeline_params, inputs=[input_data_ref], outputs=[processed_data_ref], compute_target=compute_target_cpu, source_directory='./',
# Register Model model = Model.register( model_path= "./resources/models/sklearn_regression_model.pkl", # this points to a local file model_name= "sklearn_regression_model", # this is the name the model is registered as tags={ 'area': "diabetes", 'type': "regression" }, description="Ridge regression model to predict diabetes", workspace=ws) print("Creating docker image configuration...") # Update your myenv.yml file with the required module myenv = CondaDependencies.create(conda_packages=['numpy', 'scikit-learn']) myenv.add_pip_package("azureml-monitoring") with open(os.path.join(project_folder, "myenv.yml"), "w") as f: f.write(myenv.serialize_to_string()) shutil.copy("./scripts/score_diabetes.py", './') # Create your new Image image_config = ContainerImage.image_configuration( execution_script="score_diabetes.py", runtime="python", conda_file=os.path.join(project_folder, "myenv.yml"), description="Image with ridge regression model", tags={ 'area': "diabetes",
create_output_directories=False, mechanism='mount', environment_variable_name=input_name, overwrite=True) return data # Retrieve a datastore from a ML workspace try: workspace = Workspace.from_config(auth=AzureCliAuthentication()) except UserErrorException: workspace = run.experiment.workspace # Define the conda dependencies cd = CondaDependencies(conda_dependencies_file_path=os.path.join( os.path.dirname(os.path.realpath(__file__)), 'conda_dependencies_sklearn.yml')) # define compute compute_target = 'alwaysoncluster' # define data set names input_name_train = 'newsgroups_train' input_name_test = 'newsgroups_test' # Retrieve datsets dataset_train = Dataset.get_by_name(workspace, name=input_name_train) dataset_test = Dataset.get_by_name(workspace, name=input_name_test) # Runconfig amlcompute_run_config = RunConfiguration(
print('Setting up experiment') exp = Experiment(workspace=ws, name=experimentName) print('Setting up cluster') compute_target = ComputeTarget(workspace=ws, name=clusterName) print('Setting up dataset') mnistFileDataset = Dataset.get_by_name(workspace=ws, name=datasetName) print("Defining environment") # to install required packages env = Environment('sklearn') #cd = CondaDependencies.create(pip_packages=['azureml-dataprep[pandas,fuse]>=1.1.14', 'azureml-defaults'], conda_packages = ['scikit-learn==0.22.1']) cd = CondaDependencies.create(pip_packages=[ 'azureml-sdk', 'scikit-learn==0.22.1', 'azureml-dataprep[pandas,fuse]>=1.1.14' ]) env.python.conda_dependencies = cd # Register environment to re-use later env.register(workspace=ws) print("Creating estimator") script_params = { # to mount files referenced by mnist dataset '--data-folder': mnistFileDataset.as_named_input(datasetName).as_mount(), '--regularization': 0.5 } est = Estimator(source_directory=scriptFolder, script_params=script_params,
def create_env(ws): '''Creates an azureml enviornment''' # Create enviornment object env = Environment(name='birdsong-env-gpu') # define packages for image cd = CondaDependencies.create( pip_packages=[ 'azureml-dataset-runtime[pandas,fuse]', 'azureml-defaults', 'tensorflow==2.4.0', #'tensorflow==2.5.0', 'tensorflow-io==0.17.1', # 'tensorflow-io==0.18.0', 'tensorflow-addons==0.13.0', 'Pillow', 'sklearn', 'kapre', 'sndfile', 'librosa', 'psutil' ], conda_packages=['SciPy']) env.python.conda_dependencies = cd #Docker file dockerfile = r''' # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. FROM mcr.microsoft.com/azureml/o16n-base/python-assets:20210210.31228572 AS inferencing-assets # Tag: cuda:11.0.3-devel-ubuntu18.04 # Env: CUDA_VERSION=11.0.3 # Env: NCCL_VERSION=2.8.3 # Env: CUDNN_VERSION=8.0.5.39 FROM nvidia/cuda:11.0.3-cudnn8-devel-ubuntu18.04 USER root:root ENV com.nvidia.cuda.version $CUDA_VERSION ENV com.nvidia.volumes.needed nvidia_driver ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 ENV DEBIAN_FRONTEND noninteractive ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64 ENV NCCL_DEBUG=INFO ENV HOROVOD_GPU_ALLREDUCE=NCCL # Install Common Dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ # SSH and RDMA libmlx4-1 \ libmlx5-1 \ librdmacm1 \ libibverbs1 \ libmthca1 \ libdapl2 \ dapl2-utils \ openssh-client \ openssh-server \ iproute2 && \ # Others apt-get install -y \ build-essential \ bzip2 \ libbz2-1.0 \ systemd \ git \ wget \ cpio \ pciutils \ libnuma-dev \ ibutils \ ibverbs-utils \ rdmacm-utils \ infiniband-diags \ perftest \ librdmacm-dev \ libibverbs-dev \ libsm6 \ libxext6 \ libxrender-dev \ libssl1.0.0 \ linux-image-aws \ linux-image-azure \ linux-image-generic \ linux-image-kvm \ linux-image-lowlatency \ linux-image-virtual \ linux-image-gke \ linux-image-oem \ slapd \ perl \ ca-certificates \ apt \ p11-kit \ libp11-kit0 \ tar \ libsndfile-dev \ fuse && \ apt-get clean -y && \ rm -rf /var/lib/apt/lists/* # Inference # Copy logging utilities, nginx and rsyslog configuration files, IOT server binary, etc. COPY --from=inferencing-assets /artifacts /var/ RUN /var/requirements/install_system_requirements.sh && \ cp /var/configuration/rsyslog.conf /etc/rsyslog.conf && \ cp /var/configuration/nginx.conf /etc/nginx/sites-available/app && \ ln -s /etc/nginx/sites-available/app /etc/nginx/sites-enabled/app && \ rm -f /etc/nginx/sites-enabled/default ENV SVDIR=/var/runit ENV WORKER_TIMEOUT=300 EXPOSE 5001 8883 8888 # Conda Environment ENV MINICONDA_VERSION py37_4.9.2 ENV PATH /opt/miniconda/bin:$PATH RUN wget -qO /tmp/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \ bash /tmp/miniconda.sh -bf -p /opt/miniconda && \ conda clean -ay && \ rm -rf /opt/miniconda/pkgs && \ rm /tmp/miniconda.sh && \ find / -type d -name __pycache__ | xargs rm -rf # Open-MPI-UCX installation RUN mkdir /tmp/ucx && \ cd /tmp/ucx && \ wget -q https://github.com/openucx/ucx/releases/download/v1.6.1-rc2/ucx-1.6.1.tar.gz && \ tar zxf ucx-1.6.1.tar.gz && \ cd ucx-1.6.1 && \ ./configure --prefix=/usr/local --enable-optimizations --disable-assertions --disable-params-check --enable-mt && \ make -j $(nproc --all) && \ make install && \ rm -rf /tmp/ucx # Open-MPI installation ENV OPENMPI_VERSION 4.1.0 RUN mkdir /tmp/openmpi && \ cd /tmp/openmpi && \ wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.gz && \ tar zxf openmpi-${OPENMPI_VERSION}.tar.gz && \ cd openmpi-${OPENMPI_VERSION} && \ ./configure --with-ucx=/usr/local/ --enable-mca-no-build=btl-uct --enable-orterun-prefix-by-default && \ make -j $(nproc) all && \ make install && \ ldconfig && \ rm -rf /tmp/openmpi # Msodbcsql17 installation RUN apt-get update && \ apt-get install -y curl && \ curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ curl https://packages.microsoft.com/config/ubuntu/18.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ apt-get update && \ ACCEPT_EULA=Y apt-get install -y msodbcsql17 #Cmake Installation RUN apt-get update && \ apt-get install -y cmake ''' env.docker.base_image = None env.docker.base_dockerfile = dockerfile # Register environment to re-use later env = env.register(workspace=ws)
from azureml.core.conda_dependencies import CondaDependencies cd = CondaDependencies.create() cd.add_tensorflow_conda_package() cd.add_conda_package('keras<=2.3.1') cd.add_pip_package("azureml-defaults") cd.save_to_file(base_directory='./', conda_file_path='env.yml') print(cd.serialize_to_string())
# datastore = ws.get_default_datastore() # datastore.download("./", prefix="deploy", overwrite=True, show_progress=True) os.chdir(os.path.dirname(os.path.realpath(__file__))) print(os.getcwd()) print(ws) #run.register_model(model_name='iris-model', # model_path="./outputs/model.pkl") from azureml.core import Environment from azureml.core.conda_dependencies import CondaDependencies environment = Environment('my-sklearn-environment') environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[ 'azureml-defaults', 'inference-schema[numpy-support]', 'joblib', 'numpy', 'scikit-learn' ]) from azureml.core.model import InferenceConfig inference_config = InferenceConfig(entry_script='score.py', source_directory='.', environment=environment) print("Deploying model to AKS...") # deploying the model and create a new endpoint from azureml.core.webservice import AksEndpoint from azureml.core.compute import ComputeTarget #select a created compute compute = ComputeTarget(ws, 'aks')
def create_aml_environment(aml_interface): aml_env = Environment(name=AML_ENVIRONMENT_NAME) conda_dep = CondaDependencies() conda_dep.add_pip_package("numpy==1.18.2") conda_dep.add_pip_package("pandas==1.0.3") conda_dep.add_pip_package("scikit-learn==0.22.2.post1") conda_dep.add_pip_package("joblib==0.14.1") conda_dep.add_pip_package("azure-storage-blob==12.3.0") aml_env.environment_variables[AZURE_STORAGE_ACCOUNT_NAME] = os.getenv( AZURE_STORAGE_ACCOUNT_NAME) aml_env.environment_variables[AZURE_STORAGE_ACCOUNT_KEY] = os.getenv( AZURE_STORAGE_ACCOUNT_KEY) aml_env.environment_variables[MODEL_NAME_VARIABLE] = MODEL_NAME logger.info( f"set environment variables on compute environment: {aml_env.environment_variables}" ) whl_filepath = retrieve_whl_filepath() whl_url = Environment.add_private_pip_wheel( workspace=aml_interface.workspace, file_path=whl_filepath, exist_ok=True) conda_dep.add_pip_package(whl_url) aml_env.python.conda_dependencies = conda_dep aml_env.docker.enabled = True return aml_env
# Enable Docker run_amlcompute.environment.docker.enabled = True # Set Docker base image to the default CPU-based image run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE # Use conda_dependencies.yml to create a conda environment in the Docker image for execution run_amlcompute.environment.python.user_managed_dependencies = False # Specify CondaDependencies obj, add necessary packages run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[ 'numpy', 'pandas', 'scikit-learn', 'azure-storage-blob==2.1.0', 'azureml-sdk', 'azureml-dataprep[pandas]', '-e src' ]) scripts_folder = 'src/my_custom_package/scripts' def_blob_store = ws.get_default_datastore() train_output = PipelineData('train_output', datastore=def_blob_store) print("train_output PipelineData object created") trainStep = PythonScriptStep( name="train", script_name="train.py", arguments=["--model_name", args.model_name,
def __init__(self, request_id, input_container_sas, internal_datastore): try: aml_config = api_config.AML_CONFIG self.ws = Workspace(subscription_id=aml_config['subscription_id'], resource_group=aml_config['resource_group'], workspace_name=aml_config['workspace_name'], auth=svc_pr) print('AMLCompute constructor, AML workspace obtained.') internal_dir, output_dir = self._get_data_references( request_id, internal_datastore) compute_target = self.ws.compute_targets[ aml_config['aml_compute_name']] dependencies = CondaDependencies.create(pip_packages=[ 'tensorflow-gpu==1.9.0', 'pillow', 'numpy', 'azure', 'azure-storage-blob', 'azureml-defaults' ]) amlcompute_run_config = RunConfiguration( conda_dependencies=dependencies) amlcompute_run_config.environment.docker.enabled = True amlcompute_run_config.environment.docker.gpu_support = True amlcompute_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE amlcompute_run_config.environment.spark.precache_packages = False # default values are required and need to be literal values or data references as JSON param_job_id = PipelineParameter(name='param_job_id', default_value='default_job_id') param_begin_index = PipelineParameter(name='param_begin_index', default_value=0) param_end_index = PipelineParameter(name='param_end_index', default_value=0) param_detection_threshold = PipelineParameter( name='param_detection_threshold', default_value=0.05) param_batch_size = PipelineParameter(name='param_batch_size', default_value=8) batch_score_step = PythonScriptStep( aml_config['script_name'], source_directory=aml_config['source_dir'], name='batch_scoring', arguments=[ '--job_id', param_job_id, '--model_name', aml_config['model_name'], '--input_container_sas', input_container_sas, '--internal_dir', internal_dir, '--begin_index', param_begin_index, # inclusive '--end_index', param_end_index, # exclusive '--output_dir', output_dir, '--detection_threshold', param_detection_threshold, '--batch_size', param_batch_size ], compute_target=compute_target, inputs=[internal_dir], outputs=[output_dir], runconfig=amlcompute_run_config) self.pipeline = Pipeline(workspace=self.ws, steps=[batch_score_step]) self.aml_config = aml_config print('AMLCompute constructor all good.') except Exception as e: raise RuntimeError( 'Error in setting up AML Compute resource: {}.'.format(str(e)))
# Re-load the model scaler = pickle.load(open(os.path.join(model_path, 'scaler.pkl'), 'rb')) scaled_input = scaler.transform([[age, km]]) model2 = pickle.load(open(os.path.join(model_path, 'usedcarsmodel.pkl'), 'rb')) # Use the loaded model to make a prediction prediction = model2.predict(scaled_input) print(prediction) prediction_json = json.dumps(prediction.tolist()) print(prediction_json) # Step 4 - Create a Conda dependencies environment file ####################################################### from azureml.core.conda_dependencies import CondaDependencies mycondaenv = CondaDependencies.create( conda_packages=['scikit-learn', 'numpy', 'pandas']) with open("mydeployenv.yml", "w") as f: f.write(mycondaenv.serialize_to_string()) # Step 5 - Create container image configuration ############################################### # Create the scoring script # See the scoring script available in ./score.py # Build the ContainerImage runtime = "python" driver_file = "score.py" conda_file = "mydeployenv.yml"
import azureml.core from azureml.core import Workspace from azureml.core.authentication import InteractiveLoginAuthentication from azureml.core.conda_dependencies import CondaDependencies from azureml.core.model import Model from azureml.core.image import ContainerImage from azureml.core.webservice import AciWebservice from azureml.core.webservice import Webservice auth_config = InteractiveLoginAuthentication(False, "72f988bf-86f1-41af-91ab-2d7cd011db47") ws=Workspace.from_config('aml_config/config.json', auth_config) ws.get_details() myenv = CondaDependencies() myenv.add_conda_package("keras") myenv.add_conda_package("tensorflow") myenv.add_conda_package("pillow") with open("myenv.yml","w") as f: f.write(myenv.serialize_to_string()) # Register a trained model print('Registering model...') model = Model.register(model_path = "modelfiles", model_name = "dogs-vs-cat", description = "ready lab 314", workspace = ws) # Image configuration print('Creating image configuration...') image_config = ContainerImage.image_configuration(execution_script = "score.py",
# import azureml.core # <loadWorkspace> from azureml.core import Workspace ws = Workspace.from_config() # </loadWorkspace> scorepy_content = "import json\nimport numpy as np\nimport os\nimport pickle\nfrom sklearn.externals import joblib\nfrom sklearn.linear_model import LogisticRegression\n\nfrom azureml.core.model import Model\n\ndef init():\n global model\n # retreive the path to the model file using the model name\n model_path = Model.get_model_path('sklearn_mnist')\n model = joblib.load(model_path)\n\ndef run(raw_data):\n data = np.array(json.loads(raw_data)['data'])\n # make prediction\n y_hat = model.predict(data)\n return json.dumps(y_hat.tolist())" print(scorepy_content) with open("score.py", "w") as f: f.write(scorepy_content) # PREREQ: create environment file from azureml.core.conda_dependencies import CondaDependencies myenv = CondaDependencies() myenv.add_conda_package("scikit-learn") with open("myenv.yml", "w") as f: f.write(myenv.serialize_to_string()) #<configImage> from azureml.core.image import ContainerImage image_config = ContainerImage.image_configuration( execution_script="score.py", runtime="python", conda_file="myenv.yml", description="Image with mnist model", tags={ "data": "mnist",
# 2. via pip requiremnt file env.from azureml.core import Environment env = Environment.from_pip_requirements( name='env_name', file_path='requirements.txt', ) # 3. via existing conda env env = Environment.from_existing_conda_environment(name = "training_env", conda_environment_name = 'py_env') # 4. via specifying packages from azureml.core.conda_dependencies import CondaDependencies env = Environment("training_env") deps = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas', 'numpy'], pip_packages=['azureml-defaults']) env.python.conda_dependencies = deps # Registering the environment to the workspace env.register(workspace = ws) # get list of all the environments in the workspace for env_name in Environment.list(workspace = ws): print('Name:', env_name) #for using in the script
def main(): """ Run the experiment for training """ work_space = Workspace.from_config() # Set up the dataset for training datastore = work_space.get_default_datastore() dataset = Dataset.File.from_files(path=(datastore, "datasets/mnist")) # Set up the experiment for training experiment = Experiment(workspace=work_space, name="keras-lenet-train") # azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 2000000000 config = ScriptRunConfig( source_directory=".", script="train_keras.py", compute_target="cpu-cluster", arguments=[ "--data_folder", dataset.as_named_input("input").as_mount(), ], ) # Set up the Tensoflow/Keras environment environment = Environment("keras-environment") environment.python.conda_dependencies = CondaDependencies.create( python_version="3.7.7", pip_packages=["azureml-defaults", "numpy", "tensorflow==2.3.1"]) config.run_config.environment = environment # Run the experiment for training run = experiment.submit(config) aml_url = run.get_portal_url() print( "Submitted to an Azure Machine Learning compute cluster. Click on the link below" ) print("") print(aml_url) tboard = Tensorboard([run]) # If successful, start() returns a string with the URI of the instance. tboard.start(start_browser=True) run.wait_for_completion(show_output=True) # After your job completes, be sure to stop() the streaming otherwise it will continue to run. print("Press enter to stop") input() tboard.stop() # Register Model metrics = run.get_metrics() run.register_model( model_name="keras_mnist", tags={ "data": "mnist", "model": "classification" }, model_path="outputs/keras_lenet.h5", model_framework=Model.Framework.TENSORFLOW, model_framework_version="2.3.1", properties={ "train_loss": metrics["train_loss"][-1], "train_accuracy": metrics["train_accuracy"][-1], "val_loss": metrics["val_loss"][-1], "val_accuracy": metrics["val_accuracy"][-1], }, )
run_config.container_instance.memory_gb = 2 # enable Docker run_config.environment.docker.enabled = True # set Docker base image to the default CPU-based image run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE # use conda_dependencies.yml to create a conda environment in the Docker image for execution run_config.environment.python.user_managed_dependencies = False # auto-prepare the Docker image when used for execution (if it is not already prepared) run_config.auto_prepare_environment = True # specify CondaDependencies obj conda_dep = CondaDependencies.create(python_version='3.6.2', conda_packages=['keras', 'matplotlib']) conda_dep.add_tensorflow_conda_package(core_type='cpu') run_config.environment.python.conda_dependencies = conda_dep # Create a directory that will contain all the necessary code from your local machine # that you will need access to on the remote resource. This includes the training script, # and any additional files your training script depends on. import os project_folder = './tmp/fashion-mnist-aci' os.makedirs(project_folder, exist_ok=True) import shutil shutil.copy('./scripts/train_Fashion_MNIST.py', project_folder) # Submit Experiment