def create_aml_workspace(cfg): """ Creates the AML workspace if it doesn't exist. If it does exist, return the existing one. input : cfg : AMLConfiguration object containing all creation parameters output : ws : type workspace """ try: log.info('Trying to retrieve config file from local filesystem.') ws = Workspace.from_config() if ws.name == cfg.AMLConfig.workspace: log.info('Workspace found with name: ' + ws.name) log.info(' Azure region: ' + ws.location) log.info(' Subscription id: ' + ws.subscription_id) log.info(' Resource group: ' + ws.resource_group) else: log.error('Workspace found ({}), but not the same as in the JSON config file ({}). Please delete config folder (aml_config) and restart.'.format(ws.name, cfg.AMLConfig.workspace)) exit(-2) except: log.info('Unable to find AML config files in (aml_config) - attempting to Creating them.') try: log.info('Creating the workspace on Azure.') ws = Workspace.create(name = cfg.AMLConfig.workspace, auth = cfg.Credentials, subscription_id = cfg.subscription_id, resource_group = cfg.AMLConfig.resource_group, location = cfg.AMLConfig.location, create_resource_group = True, exist_ok = False) log.info('Workspace created. Saving details to file in (aml_config) to accelerate further launches.') ws.get_details() ws.write_config() except Exception as exc: log.error('Unable to create the workspace on Azure. Error Message : ' + str(exc)) exit(-2) return ws
def get_workspace(config_file): ws = Workspace.from_config(config_file) print('Workspace name: ' + ws.name, 'Azure region: ' + ws.location, 'Subscription id: ' + ws.subscription_id, 'Resource group: ' + ws.resource_group, sep='\n')
def main(train_path, pred_path, n_pred, dt, target, time_limit_min): df_train = pd.read_csv(train_path) df_train[dt] = pd.to_datetime(df_train[dt]) time_series_settings = { "time_column_name": dt, "max_horizon": n_pred, "target_lags": "auto", "target_rolling_window_size": "auto" } automl_config = AutoMLConfig(task="forecasting", training_data=df_train, label_column_name=target, n_cross_validations=5, max_cores_per_iteration=-1, path=os.environ["SCRATCH"], experiment_timeout_minutes=time_limit_min, ensemble_download_models_timeout_sec=3600, **time_series_settings) ws = Workspace.from_config() experiment = Experiment(ws, "experiment") best_run, fitted_model = experiment.submit(automl_config, show_output=True).get_output() print("Best pipeline:") try: ensemble = vars(fitted_model.steps[1][1])["_wrappedEnsemble"] print(ensemble.__class__) steps = ensemble.estimators_ except: steps = fitted_model.steps best_pipeline = "" for i, step in enumerate(steps): best_pipeline += f"{i}. {str(step)}\n" print(best_pipeline) pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option('display.max_colwidth', -1) print(fitted_model.named_steps["timeseriestransformer"]. get_engineered_feature_names()) featurization_summary = fitted_model.named_steps[ "timeseriestransformer"].get_featurization_summary() print(pd.DataFrame.from_records(featurization_summary)) x_pred = pd.date_range(df_train[dt].iloc[-1], periods=n_pred + 1, freq=pd.infer_freq(df_train[dt]))[1:] y_pred = fitted_model.forecast(forecast_destination=x_pred[-1])[0] # y_pred = fitted_model.forecast(pd.DataFrame({dt: x_pred}))[0] df_pred = pd.DataFrame({dt: x_pred, target: y_pred}) df_pred.to_csv(pred_path, index=False)
# auth = InteractiveLoginAuthentication(tenant_id = 'mytenantid') # ws = Workspace.from_config(auth = auth) # ``` # # If you need to run in an environment where interactive login is not possible, you can use Service Principal authentication by replacing the `ws = Workspace.from_config()` line in the cell below with the following: # # ``` # from azureml.core.authentication import ServicePrincipalAuthentication # auth = auth = ServicePrincipalAuthentication('mytenantid', 'myappid', 'mypassword') # ws = Workspace.from_config(auth = auth) # ``` # For more details, see [aka.ms/aml-notebook-auth](http://aka.ms/aml-notebook-auth) # In[85]: ws = Workspace.from_config() # Choose a name for the experiment and specify the project folder. experiment_name = 'automl-classification' project_folder = './sample_projects/automl-classification' experiment = Experiment(ws, experiment_name) output = {} output['SDK version'] = azureml.core.VERSION output['Subscription ID'] = ws.subscription_id output['Workspace Name'] = ws.name output['Resource Group'] = ws.resource_group output['Location'] = ws.location output['Project Directory'] = project_folder output['Experiment Name'] = experiment.name
import json import pickle import numpy as np import pandas as pd from azureml.core.workspace import Workspace import azureml.train.automl from sklearn.externals import joblib from azureml.core.model import Model ws = Workspace.from_config('./config.json') from azureml.core.webservice import Webservice, AciWebservice, AksWebservice service = AciWebservice(ws, "sentiment-scorer-korean") # service = AksWebservice(ws, "sentiment-scorer-korean-aks") # input_sample = pd.DataFrame({'id': pd.Series(['6471903'], dtype='int64'), 'document': pd.Series(['진짜 별로다 헐 ㅡ'], dtype='object')}) from load_dataset import testdata as input_sample import json test = json.dumps({"data": input_sample.values.tolist()}) result = service.run(input_data=bytes(test, encoding="utf8")) input_sample['predicted'] = list(json.loads(result).values())[0] print(input_sample)
def load_workspace_from_config(): return Workspace.from_config()
exp = get_experiment(ws, experiment_name) print( 'Cancelling existing experiment with name: {}'.format(experiment_name)) for run in tqdm(list(exp.get_runs())): run.cancel() if __name__ == "__main__": print("SDK Version:", azureml.core.VERSION) set_diagnostics_collection(send_diagnostics=True) # Read in config conf = Config(config_filepath='~/aml_secrets/aml_secrets_rr2msrlabs.yaml') # Config region conf_aml = conf['aml_config'] conf_cluster = conf['cluster_config'] conf_docker = conf['azure_docker'] conf_experiment = conf['experiment'] # endregion # Initialize workspace # Make sure you have downloaded your workspace config ws = Workspace.from_config(path=conf_aml['aml_config_file']) print('Workspace name: ' + ws.name, 'Azure region: ' + ws.location, 'Subscription id: ' + ws.subscription_id, 'Resource group: ' + ws.resource_group, sep='\n') launch_experiment(ws, conf_aml, conf_cluster, conf_docker, conf_experiment)
from azureml.core.workspace import Workspace from azureml.core.compute import ComputeTarget, AksCompute from azureml.exceptions import ComputeTargetException from azureml.core.webservice import AksWebservice from azureml.core.model import InferenceConfig, Model from azureml.core.environment import Environment, DEFAULT_GPU_IMAGE # Initialize a workspace ws = Workspace.from_config( "C:/Users/Danilo.Bento/Icon Dropbox/DEVDATA/RO/DEVELOPMENT/SIB2/dev/.azureml/config.json" ) print('Workspace name: ' + ws.name, 'Azure region: ' + ws.location, 'Subscription id: ' + ws.subscription_id, 'Resource group: ' + ws.resource_group, 'Workspace connected', sep='\n') # Choose a name for your cluster aks_name = "SIB2-AKS-GPU" # Check to see if the cluster already exists and create it if non existant try: aks_target = ComputeTarget(workspace=ws, name=aks_name) print('Found existing compute target') except ComputeTargetException: print('Creating a new compute target...') # Provision AKS cluster with GPU machine prov_config = AksCompute.provisioning_configuration(vm_size="Standard_NC6") # Create the cluster
import numpy as np from sklearn.metrics import mean_absolute_error from azureml.train.automl.automlexplainer import retrieve_model_explanation from azureml.core.model import Model from azureml.core.image import ContainerImage from azureml.core.image.image import Image from azureml.core import Webservice from azureml.core.webservice import AciWebservice # try: # setting the local env to hadnle missing packages run_user_managed = RunConfiguration() run_user_managed.environment.python.user_managed_dependencies = False # Create workspace object for existing one and create an experiment ws = Workspace.from_config('subscription.json') print(ws.name, ws.location, ws.resource_group, ws.location, sep='\t') experiment = Experiment(workspace=ws, name='experiment1') # full path to training data,testing data file_path1 = os.path.join(os.getcwd(), "cumodelwo2014.csv") dflowtr = dprep.auto_read_file(path=file_path1) file_path2 = os.path.join(os.getcwd(), "test2014.csv") dflowte = dprep.auto_read_file(path=file_path2) # Specifying x(causal) and y(response) attributes in training data dflowtr_x = dflowtr.keep_columns([ 'cell-ID', 'Soil_Name', 'MEAN_Yld_V', 'COUNT_Yld', 'MEAN_Eleva', 'RANGE_Elev', 'Crop-Type', 'V.A.T(F)', 'R.A.T(F)', 'M.A.T(F)', 'V.PET(inch)', 'R.PET(inch)', 'M.PET(inch)', 'V.T.R(inch)', 'R.T.R(inch)', 'M.T.R(inch)'
parser = argparse.ArgumentParser() parser.add_argument('experiment', help='Azure ML experiment name') parser.add_argument('--workspace-config', default="azureml_config.json", help='Download from the Azure ML portal') parser.add_argument('--compute', default="nc6v3", help='Azure ML training cluster') parser.add_argument('--max_epochs', type=int, default=300) args = parser.parse_args() print(args) # load workspace configuration from the config.json file ws = Workspace.from_config(path=args.workspace_config) print('=' * 40) print(ws) # create an experiment exp = Experiment(workspace=ws, name=args.experiment) print('=' * 40) print(exp) # specify a cluster compute_target = ws.compute_targets[args.compute] print('=' * 40) print(compute_target) # Mount the blob to the training container # NOTE: (prerequisite) unzip and upload the ePillID data to the blob
# Check core SDK version number for debugging purposes import azureml.core print("SDK Version:", azureml.core.VERSION) subscription_id = "fac34303-435d-4486-8c3f-7094d82a0b60" resource_group = "aml-notebooks" workspace_name = "haieastus2ws3" workspace_region = 'eastus2' # or eastus2euap # import the Workspace class and check the azureml SDK version from azureml.core.workspace import Workspace, WorkspaceException ws = Workspace.create(name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, location=workspace_region) ws.get_details() ws.write_config() # load workspace configuratio from ./aml_config/config.json file. my_workspace = Workspace.from_config() print(my_workspace.get_details())
from azureml.core.authentication import AzureCliAuthentication from azureml.core.workspace import Workspace from azureml.core.datastore import Datastore from azureml.core.dataset import Dataset dstore_name = 'mldemodatastore' ds_file = "movielens100k.movies" # Configure workspace cli_auth = AzureCliAuthentication() ws = Workspace.from_config(auth=cli_auth) # Access your dataset dataset = Dataset.get(ws, ds_file) # Load in-memory Dataset to your local machine as pandas dataframe pdDf = dataset.to_pandas_dataframe() print(pdDf.head())
def peptide_identification(args): print(datetime.now(), ': Peptid identification starts...') print('Settings: ') print(args) # PLATO setting subclusterCount = args.subclusterCount spy = args.spy spy_portion = args.spy_portion RN = args.RN rnd_all = args.rnd_all # If random method, include all decoys rnd_portion = args.rnd_portion # If random method, include rnd.portion of positive set, default 1: pos set = neg set replicates_cnt = args.replicates_cnt include_label = args.include_label AML_preprocess = args.AML_preprocess output_folder = args.output_folder # AutoML parameter setting autoML_best_model_selection = args.autoML_best_model_selection autoML_iterations = args.autoML_iterations metric = args.metric # Other metrics: azureml.train.automl.utilities.get_primary_metrics('classification') cv_fold = args.cv_fold # Input, output file_name = args.sample_name input_path = args.input_folder output_path = output_folder + '/' + file_name log_file = output_path + '_autoML_errors_log.html' # Instantiate AutoML config and create an experiment in autoML workspace ws = Workspace.from_config() experiment_name = file_name experiment = Experiment(ws, experiment_name) print(datetime.now(), ': Assigned experiment ' + experiment_name + ' on Azure portal ') output = {} output['SDK version'] = azureml.core.VERSION output['Workspace Name'] = ws.name output['Resource Group'] = ws.resource_group output['Location'] = ws.location outputDf = pd.DataFrame(data=output, index=['']) print(outputDf) print(datetime.now(), ': Reading inputs') # Read POSITIVES and ALL inputs positives_path = glob.glob(input_path + file_name + '*POSITIVES*') raw_positives = pd.read_csv(positives_path[0], sep='\t') if AML_preprocess == True: all_path = glob.glob(input_path + file_name + '-ALL.txt') raw_all = pd.read_csv(all_path[0], sep='\t') # Extract new features # First and last three amino acides of peptide sequences as features - If NA then B category raw_all['Peptide'] = raw_all.Peptide.str.replace( r'([\(\[]).*?([\)\]])', r'B', regex=True) raw_all['P1'] = raw_all['Peptide'].str[0] raw_all['P2'] = raw_all['Peptide'].str[2] raw_all['P3'] = raw_all['Peptide'].str[3] raw_all['P4'] = raw_all['Peptide'].str[-4] raw_all['P5'] = raw_all['Peptide'].str[-3] raw_all['P6'] = raw_all['Peptide'].str[-1] else: all_path = glob.glob(input_path + file_name + '_percolator_feature.txt') raw_all = pd.read_csv(all_path[0], sep='\t') raw_all['Class'] = 0 # Make positive and test set test_data = raw_all.drop(['ScanNr', 'Proteins'], axis=1) positive_set = pd.merge(left=pd.DataFrame(raw_positives['SpecId']), right=pd.DataFrame(test_data), how='left', left_on='SpecId', right_on='SpecId') positive_set['Class'] = 1 # Remove decoys in positive set, if there is any decoys_in_positive_idx = positive_set.index[positive_set['Label'] == -1].tolist() positive_set = positive_set[positive_set['Label'] != -1] # Dataframe to store predictions all_predictions = pd.DataFrame({ 'SpecId': list(test_data['SpecId']), 'Peptide': list(test_data['Peptide']), 'Label': list(test_data['Label']) }) prediction_summary = all_predictions # Prepare test set for modeling y_test = test_data['Class'] if include_label == True: X_test = test_data.drop(['SpecId', 'Peptide', 'Class'], axis=1) else: X_test = test_data.drop(['SpecId', 'Peptide', 'Label', 'Class'], axis=1) # Prepare positive set for modeling positive_set_idx = [ test_data['SpecId'].tolist().index(x) for x in positive_set['SpecId'].tolist() if x in test_data['SpecId'].tolist() ] # Used to create the negative set decoys_idx = np.setdiff1d( test_data.index[test_data['Label'] == -1].tolist(), decoys_in_positive_idx).tolist() global gower_dist_avg if RN == True: if os.path.exists(input_path + file_name + 'gower_dist_avg.npy') == False: print(datetime.now(), ': Calculating Gower distance') gower_dist = gower.gower_matrix(test_data) selected_rows = gower_dist[positive_set_idx] gower_dist_avg = np.mean(selected_rows, axis=0) print(datetime.now(), ': Saving Gower distance matrix') np.save(input_path + '/' + file_name + 'gower_dist_avg.npy', gower_dist_avg) # save else: print(datetime.now(), ': Loading Gower distance matrix from ', input_path + file_name + 'gower_dist_avg.npy') gower_dist_avg = np.load(input_path + file_name + 'gower_dist_avg.npy') # load if spy == True: all_spies = pd.DataFrame() ''' Create train set by concatinating positive and negative set, build model(s) using autoML and store predictions based on the best model ''' for rep in range(0, replicates_cnt): print(datetime.now(), ': Replicate #', rep + 1) if spy == True: # Exclude spy_portion of training data to be the spies positive_set = positive_set.sample(n=len(positive_set), random_state=rep * 100).reset_index(drop=True) spySet_size = round(len(positive_set) * spy_portion) spies_ID = positive_set.loc[1:spySet_size, ['SpecId']] positive_set_wSpy = positive_set.iloc[spySet_size + 1:len(positive_set)] if RN == False: if rnd_all == True: # Negative set includes all decoys negative_set_idx = decoys_idx else: # Negative set idx includes rnd_portion times of |positive_set| indecies random.seed(rep) random.shuffle(decoys_idx) negative_set_idx = decoys_idx[0:rnd_portion * len(positive_set)] else: print(datetime.now(), ': Starts estimating RNs') negative_set_idx = reliable_negative(test_data, positive_set, subclusterCount, rep) print(datetime.now(), ': Ends estimating RNs') negative_set = test_data.iloc[negative_set_idx] if spy == True: train_data = pd.concat([positive_set_wSpy, negative_set], axis=0) else: train_data = pd.concat([positive_set, negative_set], axis=0) y_train = train_data['Class'] if include_label == True: X_train = train_data.drop(['SpecId', 'Peptide', 'Class'], axis=1) else: X_train = train_data.drop(['SpecId', 'Peptide', 'Class', 'Label'], axis=1) print('Training set size:', len(y_train), '\nTest set size:', len(y_test)) automl_config = AutoMLConfig(task='classification', debug_log=log_file, primary_metric=metric, iteration_timeout_minutes=200, iterations=autoML_iterations, verbosity=logging.INFO, preprocess=AML_preprocess, X=X_train, y=y_train, n_cross_validations=cv_fold, model_explainability=True) print(datetime.now(), ': modeling replicate #' + str(rep + 1) + '...') local_run = experiment.submit(automl_config, show_output=True) if autoML_best_model_selection == False: # Retrieve the Best Model based on bunch of metrics children = list(local_run.get_children()) metricslist = {} for run in children: properties = run.get_properties() metrics = { k: v for k, v in run.get_metrics().items() if isinstance(v, float) } metricslist[int(properties['iteration'])] = metrics rundata = pd.DataFrame(metricslist).sort_index(1) tmp = rundata.T.sort_values([ 'AUC_weighted', 'f1_score_weighted', 'precision_score_weighted', 'recall_score_weighted', 'weighted_accuracy' ], ascending=False) rundata = tmp.sort_values('log_loss', ascending=True).T best_run_iteration = rundata.columns.values[0] rundata.to_csv(output_path + '_metrics_list_' + str(rep) + '.txt') best_run, fitted_model = local_run.get_output( iteration=best_run_iteration) else: best_run, fitted_model = local_run.get_output() print('Best run: ', best_run) print(datetime.now(), ': Saving best model and predictions') # Save the best model, prediction value and probability modelname = output_path + '_model_' + str(rep) + '.sav' joblib.dump(fitted_model, modelname) y_pred_val = fitted_model.predict(X_test) y_pred_prob = fitted_model.predict_proba(X_test) # Add the results of the replicate to all predictions table all_predictions['pred_rep' + str(rep)] = list(y_pred_val) all_predictions['prob_rep' + str(rep)] = list( [item[1] for item in y_pred_prob]) # Overwrite prediction values based on the spies cutoff if spy == True: threshold = min( pd.merge(spies_ID, all_predictions, on='SpecId')['prob_rep' + str(rep)]) all_predictions['pred_rep' + str(rep)] = np.where( all_predictions['prob_rep' + str(rep)] >= threshold, 1, 0) all_spies['SpecId' + str(rep)] = spies_ID['SpecId'] all_spies['Prob_rep' + str(rep)] = list( pd.merge(spies_ID, all_predictions, on=['SpecId'])['prob_rep' + str(rep)]) print(datetime.now(), ': Replicate #' + str(rep + 1) + ' processed!') all_predictions.to_csv(output_path + '_all_predictions.csv', index=False) if spy == True: all_spies.to_csv(output_path + '_all_spies.csv', index=False) print(datetime.now(), ': Generate prediction summary of all replicates') pred_col_indecies = [ col for col in all_predictions.columns if 'pred' in col ] prob_col_indecies = [ col for col in all_predictions.columns if 'prob' in col ] prediction_summary['Std'] = all_predictions[prob_col_indecies].std( skipna=True, axis=1) prediction_summary['Min'] = all_predictions[prob_col_indecies].min( skipna=True, axis=1) prediction_summary['Max'] = all_predictions[prob_col_indecies].max( skipna=True, axis=1) prediction_summary['Avg'] = all_predictions[prob_col_indecies].mean( skipna=True, axis=1) prediction_summary['Median'] = all_predictions[prob_col_indecies].median( skipna=True, axis=1) prediction_summary['Vote'] = all_predictions[pred_col_indecies].sum( skipna=True, axis=1) prediction_summary.to_csv(output_path + '_prediction_summary.txt', sep='\t', index=False) # Feature importance print(datetime.now(), ': Output feature importance of the best run') client = ExplanationClient.from_run(best_run) raw_explanations = client.download_model_explanation( top_k=len(X_test.columns)) print('Raw feature importance') print(raw_explanations.get_feature_importance_dict()) d = raw_explanations.get_feature_importance_dict() raw_feature_importance = pd.DataFrame(list(d.items())) raw_feature_importance.to_csv(output_path + '_raw_feature_importance.csv', index=False) # Engineered engineered_explanations = client.download_model_explanation( top_k=len(X_test.columns)) print('Engineered feature importance') print(engineered_explanations.get_feature_importance_dict()) d = engineered_explanations.get_feature_importance_dict() engineered_feature_importance = pd.DataFrame(list(d.items())) engineered_feature_importance.to_csv(output_path + '_engineered_feature_importance.csv', index=False) now = datetime.now() print(datetime.now(), ': Program end')
def main(): # local compute run_user_managed = RunConfiguration() run_user_managed.environment.python.user_managed_dependencies = False # print to check azure sdk installation print(azureml.core.VERSION) # create workspace object to connect to omtest workspace in MLSERVICE ws = Workspace.from_config('./config.json') # default data store # ds = ws.get_default_datastore() # print(ds) # choose a name for the run history container in the workspace experiment_name = 'automated-ml-regression' # project folder project_folder = './automated-ml-regression' output = {} output['SDK version'] = azureml.core.VERSION output['Subscription ID'] = ws.subscription_id output['Workspace'] = ws.name output['Resource Group'] = ws.resource_group output['Location'] = ws.location output['Project Directory'] = project_folder pd.set_option('display.max_colwidth', -1) pd.DataFrame(data=output, index=['']).T # stats for all the columns dflow = dprep.auto_read_file(path='/Users/omprakashnekkanti/Desktop/Spring 2019/CS445-Capstone/automatedML/cuformodel.csv') print(type(dflow)) dflow.get_profile() # filepath as a string file_path = os.path.join(os.getcwd(), 'cuformodel.csv') print(file_path) print(type(file_path)) # dflow_prepared = dprep.Dataflow.open(file_path) # dflow_prepared.get_profile() dflow_X = dflow.keep_columns([ 'cell-ID', 'Soil_Name', 'MEAN_Yld_V', 'COUNT_Yld', 'MEAN_Eleva', 'RANGE_Elev', 'Crop-Type', 'V.A.T(F)', 'R.A.T(F)', 'M.A.T(F)', 'V.PET(inch)', 'R.PET(inch)', 'M.PET(inch)', 'V.T.R(inch)', 'R.T.R(inch)', 'M.T.R(inch)' ]) dflow_y = dflow.keep_columns('NormalizedYield') x_df = dflow_X.to_pandas_dataframe() y_df = dflow_y.to_pandas_dataframe() x_train, x_test, y_train, y_test = train_test_split( x_df, y_df, test_size=0.2, random_state=223) # flatten y_train to 1d array y_train.values.flatten() automl_settings = { "iteration_timeout_minutes": 20, "iterations": 40, "primary_metric": 'mean_absolute_error', "preprocess": False, "verbosity": logging.INFO, "n_cross_validations": 10 } # local compute automated_ml_config = AutoMLConfig( task='regression', debug_log='automated_ml_errors.log', path=project_folder, X=x_train.values, y=y_train.values.flatten(), **automl_settings) experiment = Experiment(ws, experiment_name) local_run = experiment.submit(automated_ml_config, show_output=True)
def main(): ws = Workspace.from_config() df = get_DDoS_dataset(ws)
def get_workspace_or_default(subscription_id=None, resource_group=None, workspace_name=None, auth=None, project_path=None, logger=None): """ Order is 1) Get workspace from the specified parameters, 2) From project context, 3) Using az configure defaults. :param workspace_name: :param resource_group: :param auth: :param project_path: :return: """ if not logger: logger = module_logger if not auth: auth = get_cli_specific_auth() logger.debug("No auth specified, using authentication {}".format( type(auth).__name__)) if resource_group and workspace_name: # Simple case where both are specified. The only way to get workspace with no # az configure support for 'mlworkspace' is user explicitly specified parameters # Technically resource group can be az configured in if not subscription_id: subscription_id = get_default_subscription_id(auth) return Workspace(subscription_id, resource_group, workspace_name, auth=auth) if project_path: logger.debug("Project path %s set", project_path) try: return Workspace.from_config(path=project_path, auth=auth, _logger=logger) except UserErrorException as ex: if project_path != ".": logger.warning( "The provided path %s did not contain a config.json, " "falling back to CLI configuration.", project_path) if not subscription_id: subscription_id = get_default_subscription_id(auth) if not workspace_name: workspace_name = get_workspace_or_default_name( workspace_name, throw_error=True, subscription_id=subscription_id, auth=auth, project_path=project_path) if not resource_group: resource_group = get_resource_group_or_default_name( resource_group, throw_error=True, subscription_id=subscription_id, auth=auth, project_path=project_path) return Workspace(subscription_id, resource_group, workspace_name, auth=auth)
def __init__(self, config_filepath: str) -> None: # read in config self.conf = Config(config_filepath) # config region self.conf_aml = self.conf['aml_config'] self.conf_storage = self.conf['storage'] self.conf_cluster = self.conf['cluster_config'] self.conf_docker = self.conf['azure_docker'] self.conf_experiment = self.conf['experiment'] # end region # initialize workspace self.ws = Workspace.from_config(path=self.conf_aml['aml_config_file']) print('Workspace name: ' + self.ws.name, 'Azure region: ' + self.ws.location, 'Subscription id: ' + self.ws.subscription_id, 'Resource group: ' + self.ws.resource_group, sep='\n') # register blobs # TODO: make blob registration more flexible self.input_ds = Datastore.register_azure_blob_container( workspace=self.ws, datastore_name=self.conf_storage['input_datastore_name'], container_name=self.conf_storage['input_container_name'], account_name=self.conf_storage['input_azure_storage_account_name'], account_key=self.conf_storage['input_azure_storage_account_key'], create_if_not_exists=False) self.output_ds = Datastore.register_azure_blob_container( workspace=self.ws, datastore_name=self.conf_storage['output_datastore_name'], container_name=self.conf_storage['output_container_name'], account_name=self. conf_storage['output_azure_storage_account_name'], account_key=self.conf_storage['output_azure_storage_account_key'], create_if_not_exists=False) # create compute cluster try: self.compute_target = ComputeTarget( workspace=self.ws, name=self.conf_cluster['cluster_name']) print(self.compute_target.get_status().serialize()) except Exception as e: print('Encountered error trying to get the compute target') print(f'Exception was {e}') sys.exit(1) self.project_folder = self.conf_experiment['project_folder'] # setup custom docker usage self.image_registry_details = ContainerRegistry() self.image_registry_details.address = self.conf_docker[ 'image_registry_address'] self.image_registry_details.username = self.conf_docker[ 'image_registry_username'] self.image_registry_details.password = self.conf_docker[ 'image_registry_password'] self.user_managed_dependencies = True