def get_best_automl_run(pipeline_run):
    all_children = [c for c in pipeline_run.get_children()]
    automl_step = [
        c for c in all_children if c.properties.get("runTemplate") == "AutoML"
    ]
    for c in all_children:
        print(c, c.properties)
    automlrun = AutoMLRun(pipeline_run.experiment, automl_step[0].id)
    best = automlrun.get_best_child()
    return best
Ejemplo n.º 2
0
    def _deploy_remotly(self, model_id, model_run, ws, experiment):
        from azureml.core.model import Model
        from azureml.core.model import InferenceConfig
        from azureml.core.webservice import Webservice
        from azureml.core.webservice import AciWebservice
        from azureml.exceptions import WebserviceException
        from azureml.train.automl.run import AutoMLRun

        # ws, experiment = self._get_experiment()
        iteration, run_id = self._get_iteration(model_id)

        experiment_run = AutoMLRun(experiment = experiment, run_id = run_id)
        model_name = model_run.properties['model_name']
        self.ctx.log('Registering model: %s' % model_id)

        description = '%s-%s' % (model_name, iteration)
        model = experiment_run.register_model(
            model_name = model_name, iteration=iteration,
            description = description, tags = None)

        script_file_name = '.azureml/score_script.py'
        model_run.download_file(
            'outputs/scoring_file_v_1_0_0.py', script_file_name)

        self._edit_score_script(script_file_name)

        # Deploying ACI Service
        aci_service_name = self._aci_service_name(model_name)
        self.ctx.log('Deploying AciWebservice %s ...' % aci_service_name)

        inference_config = InferenceConfig(
            environment = model_run.get_environment(),
            entry_script = script_file_name)

        aciconfig = AciWebservice.deploy_configuration(
            cpu_cores = 1,
            memory_gb = 2,
            tags = {'type': "inference-%s" % aci_service_name},
            description = "inference-%s" % aci_service_name)

        # Remove any existing service under the same name.
        try:
            Webservice(ws, aci_service_name).delete()
            self.ctx.log('Remove any existing service under the same name...')
        except WebserviceException:
            pass

        aci_service = Model.deploy(
            ws, aci_service_name, [model], inference_config, aciconfig)
        aci_service.wait_for_deployment(True)
        self.ctx.log('%s state %s' % (aci_service_name, str(aci_service.state)))

        return {'model_id': model_id, 'aci_service_name': aci_service_name}
Ejemplo n.º 3
0
 def stop(self):
     ws = AzureProject(self.ctx)._get_ws()
     experiment_name = self.ctx.config.get('experiment/name', None)
     if experiment_name is None:
         raise AzureException('Please specify Experiment name...')
     run_id = self.ctx.config.get('experiment/run_id', None)
     if run_id is None:
         raise AzureException('Pleae provide Run ID (experiment/run_id)...')
     experiment = Experiment(ws, experiment_name)
     run = AutoMLRun(experiment=experiment, run_id=run_id)
     run.cancel()
     return {'stopped': experiment_name}
Ejemplo n.º 4
0
def get_best_run(experiment, run):
    """
    Return the best among child runs
    """
    best_run = None
    if run.type == "automl":
        get_run = AutoMLRun(experiment=experiment, run_id=run.id)
        best_run = get_run.get_best_child()
    if run.type == "hyperdrive":
        get_run = HyperDriveRun(experiment=experiment, run_id=run.id)
        best_run = get_run.get_best_run_by_primary_metric()

    return best_run
Ejemplo n.º 5
0
    def _deploy_locally(self, model_id, model_run, ws, experiment):
        from azureml.train.automl.run import AutoMLRun

        self.ctx.log('Downloading model %s' % model_id)

        iteration, run_id = self._get_iteration(model_id)
        remote_run = AutoMLRun(experiment = experiment, run_id = run_id)
        best_run, fitted_model = remote_run.get_output(iteration=iteration)

        is_loaded, model_path = self.verify_local_model(model_id)
        fsclient.save_object_to_file(fitted_model, model_path)

        self.ctx.log('Downloaded model to %s' % model_path)
        return {'model_id': model_id}
Ejemplo n.º 6
0
    def _predict_remotely(
        self, ws, experiment, predict_data, model_id, threshold):
        input_payload = predict_data.to_json(orient='split', index = False)

        remote_run = AutoMLRun(experiment = experiment, run_id = model_id)
        model_name = remote_run.properties['model_name']
        aci_service_name = self._aci_service_name(model_name)
        aci_service = AciWebservice(ws, aci_service_name)

        input_payload = json.loads(input_payload)
        # If you have a classification model, you can get probabilities by changing this to 'predict_proba'.        
        method = 'predict'
        if threshold is not None:
            method = 'predict_proba'
        input_payload = {
            'method': method,
            'data': input_payload['data']
        }
        input_payload = json.dumps(input_payload)
        try:
            response = aci_service.run(input_data = input_payload)
            print(response)
        except Exception as e:
            print('err log', aci_service.get_logs())
            raise e

        results_proba = None
        proba_classes = None

        return json.loads(response)['result'], results_proba, proba_classes
Ejemplo n.º 7
0
    def deploy(self, model_id, locally):
        if locally:
            self.ctx.log('Local deployment step is not required for Azure..')
            return {'model_id': model_id}

        ws = AzureProject(self.ctx)._get_ws()
        experiment_name = self.ctx.config.get('experiment/name', None)
        if experiment_name is None:
            raise AzureException('Please specify Experiment name...')

        iteration, run_id = self._get_iteration(model_id)

        experiment = Experiment(ws, experiment_name)
        experiment_run = AutoMLRun(experiment=experiment, run_id=run_id)
        model_run = AutoMLRun(experiment=experiment, run_id=model_id)
        model_name = model_run.properties['model_name']
        self.ctx.log('Regestiring model: %s' % model_name)

        description = '%s-%s' % (model_name, iteration)
        model = experiment_run.register_model(model_name=model_name,
                                              iteration=iteration,
                                              description=description,
                                              tags=None)

        script_file_name = '.azureml/score_script.py'
        model_run.download_file('outputs/scoring_file_v_1_0_0.py',
                                script_file_name)

        # Deploying ACI Service
        aci_service_name = self._aci_service_name(model_name)
        self.ctx.log('Deploying AciWebservice %s ...' % aci_service_name)

        inference_config = InferenceConfig(
            environment=model_run.get_environment(),
            entry_script=script_file_name)

        aciconfig = AciWebservice.deploy_configuration(
            cpu_cores=1,
            memory_gb=2,
            tags={'type': "inference-%s" % aci_service_name},
            description="inference-%s" % aci_service_name)

        # Remove any existing service under the same name.
        try:
            Webservice(ws, aci_service_name).delete()
            self.ctx.log('Remove any existing service under the same name...')
        except WebserviceException:
            pass

        aci_service = Model.deploy(ws, aci_service_name, [model],
                                   inference_config, aciconfig)
        aci_service.wait_for_deployment(True)
        self.ctx.log('%s state %s' %
                     (aci_service_name, str(aci_service.state)))

        return {'model_id': model_id, 'aci_service_name': aci_service_name}
Ejemplo n.º 8
0
 def leaderboard(self, run_id=None):
     ws = AzureProject(self.ctx)._get_ws()
     experiment_name = self.ctx.config.get('experiment/name', None)
     if experiment_name is None:
         raise AzureException('Please specify Experiment name...')
     if run_id is None:
         run_id = self.ctx.config.get('experiment/run_id', None)
     if run_id is None:
         raise AzureException(
             'Pleae provide Run ID (experiment/run_id) to evaluate')
     experiment = Experiment(ws, experiment_name)
     run = AutoMLRun(experiment=experiment, run_id=run_id)
     leaderboard = self._get_leaderboard(run).to_dict('records')
     self.ctx.log('Leaderboard for Run %s' % run_id)
     print_table(self.ctx.log, leaderboard)
     status = run.get_status()
     self.ctx.log('Status: %s' % status)
     return {'run_id': run_id, 'leaderboard': leaderboard, 'status': status}
Ejemplo n.º 9
0
    def _predict_remotely(self, predict_data, model_id, predict_proba):
        from azureml.core.webservice import AciWebservice
        from azureml.train.automl.run import AutoMLRun
        from azureml.core.run import Run

        import numpy as np

        ws, experiment = self._get_experiment()

        model_features = None
        target_categories = None

        remote_run = AutoMLRun(experiment = experiment, run_id = model_id)
        model_features, target_categories = self._get_remote_model_features(remote_run)
        if model_id.startswith("AutoML_"):
            model_name = remote_run.properties['model_name']
        else:
            model_name = model_id

        if model_features:
            predict_data = predict_data[model_features]

        input_payload = predict_data.to_json(orient='split', index = False)

        aci_service_name = self._aci_service_name(model_name)
        aci_service = AciWebservice(ws, aci_service_name)

        input_payload = json.loads(input_payload)
        # If you have a classification model, you can get probabilities by changing this to 'predict_proba'.
        method = 'predict'
        if predict_proba:
            method = 'predict_proba'
        input_payload = {
            'data': {'data': input_payload['data'], 'method': method}
        }
        input_payload = json.dumps(input_payload)
        try:
            response = aci_service.run(input_data = input_payload)
        except Exception as e:
            log_file = 'automl_errors.log'
            fsclient.write_text_file(log_file, aci_service.get_logs(), mode="a")
            raise AzureException("Prediction service error. Please redeploy the model. Log saved to file '%s'. Details: %s"%(log_file, str(e)))

        response = json.loads(response)
        if "error" in response or not 'result' in response:
            raise AzureException('Prediction service return error: %s'%response.get('error'))

        results_proba = None
        proba_classes = None
        results = response['result']
        if predict_proba:
            results_proba = results
            proba_classes = response['proba_classes']
            results_proba = np.array(results_proba)

        return results, results_proba, proba_classes, target_categories
Ejemplo n.º 10
0
    def _predict_locally(self, experiment, predict_data, model_id, threshold):
        run_id = model_id
        iteration = None
        parts = model_id.split('_')
        if len(parts) > 2:
            run_id = parts[0]+"_"+parts[1]
            iteration = parts[2]

        remote_run = AutoMLRun(experiment = experiment, run_id = run_id)
        best_run, fitted_model = remote_run.get_output(iteration=iteration)

        results_proba = None
        proba_classes = None
        if threshold is not None:
            results_proba = fitted_model.predict_proba(predict_data)

            proba_classes = list(fitted_model.classes_)

            result = self._calculate_proba_target(results_proba,
                proba_classes, None, threshold, None)
        else:
            result = fitted_model.predict(predict_data)

        return result, results_proba, proba_classes
Ejemplo n.º 11
0
    def existingModel(self, exp_name, run_id):
        SUBSCRIPTION_ID = '1f6fddae-bfa7-4f33-b9a5-ad3d4f29b8a9'
        RESOURCE_GROUP = 'DECADAAPPS'
        WORKSPACE_NAME = 'kongming-aml'
        TENANT_ID = 'd7802200-0ab3-48a9-a946-c4e20d15c1ca'

        auth = InteractiveLoginAuthentication(tenant_id=TENANT_ID)
        ws = Workspace(subscription_id=SUBSCRIPTION_ID,
               resource_group=RESOURCE_GROUP,
               workspace_name=WORKSPACE_NAME,
               auth=auth)
        exp = Experiment(ws, exp_name)
        run = AutoMLRun(experiment=exp, run_id=run_id)
        _, model = run.get_output()
        return run, model
Ejemplo n.º 12
0
    def leaderboard(self, run_id=None):
        ws = AzureProject(self.ctx)._get_ws()
        experiment_name = self.ctx.config.get('experiment/name', None)
        if experiment_name is None:
            raise AzureException('Please specify Experiment name...')
        if run_id is None:
            run_id = self.ctx.config.get('experiment/run_id', None)
        if run_id is None:
            raise AzureException(
                'Pleae provide Run ID (experiment/run_id) to evaluate')
        experiment = Experiment(ws, experiment_name)
        run = AutoMLRun(experiment=experiment, run_id=run_id)
        leaderboard = self._get_leaderboard(run).to_dict('records')
        self.ctx.log('Leaderboard for Run %s' % run_id)
        print_table(self.ctx.log, leaderboard)
        provider_status = run.get_status()
        status = self._map_provider_status(provider_status)

        result = {
            'run_id': run_id,
            'leaderboard': leaderboard,
            'status': status,
            'provider_status': provider_status,
        }

        if status == 'error':
            result['error'] = run.properties.get('errors')
            result['error_details'] = run.get_details().get('error', {}).get(
                'error', {}).get('message')
            self.ctx.log('Status: %s, Error: %s, Details: %s' %
                         (status, error, error_details))
            self.ctx.log_debug(run.get_details().get('error'))
        else:
            self.ctx.log('Status: %s' % status)

        return result
Ejemplo n.º 13
0
    def deploy(self, model_id, locally, review):
        if locally:
            is_loaded, model_path = self.verify_local_model(model_id)
            if is_loaded:
                self.ctx.log('Model already deployed to %s' % model_path)
                return {'model_id': model_id}

        from azureml.train.automl.run import AutoMLRun

        ws, experiment = self._get_experiment()
        model_run = AutoMLRun(experiment = experiment, run_id = model_id)        

        result = self._deploy_locally(model_id, model_run, ws, experiment) if locally else \
            self._deploy_remotly(model_id, model_run, ws, experiment)
        
        model_features, target_categories = self._get_remote_model_features(model_run)
        feature_importance = self._get_feature_importance(model_run)

        options = {
            'uid': model_id,
            'targetFeature': self.ctx.config.get('target'),
            'support_review_model': review,
            'provider': self.ctx.config.name,
            'scoreNames': [self.ctx.config.get('experiment/metric')],
            'scoring': self.ctx.config.get('experiment/metric'),
            "score_name": self.ctx.config.get('experiment/metric'),
            "originalFeatureColumns": model_features
        }
        options.update(self._get_a2ml_info())
        fsclient.write_json_file(os.path.join(self.ctx.config.get_model_path(model_id), "options.json"),
            options)
        fsclient.write_json_file(os.path.join(self.ctx.config.get_model_path(model_id), "target_categoricals.json"), 
            {self.ctx.config.get('target'): {"categories": target_categories}})

        metric_path = ModelHelper.get_metric_path( options, model_id)
        fsclient.write_json_file(os.path.join(metric_path, "metric_names_feature_importance.json"), 
            {'feature_importance_data': {
                'features': list(feature_importance.keys()), 
                'scores': list(feature_importance.values())
            }})

        return result
Ejemplo n.º 14
0
# COMMAND ----------

# DBTITLE 1,Train
# submit a new training run
from azureml.train.automl.run import AutoMLRun

try:
    if new_training == "True":
        print("New Training Run")
        remote_run = experiment.submit(
            automl_config,
            show_output=False)  # Story No. 3018 modified Mukesh Dutta 9/3/2021
    else:
        # If you need to retrieve a run that already started, use the following code
        print("Existing Training Run")
        remote_run = AutoMLRun(experiment=experiment, run_id=runid)
except Exception as error:
    print(error)
    log_error("{} {}".format(notebook, error))  #log error in sentry
    #raise dbutils.notebook.exit(error) #raise the exception
    raise error  #raise the exception

remote_run

# COMMAND ----------

# DBTITLE 1,Retrieve the best model
# !pip install xgboost==0.90
# Get run_id and run_datetime
rr = remote_run.wait_for_completion()
run_id = rr.get("runId")
Ejemplo n.º 15
0
    iterations=2,
    n_cross_validations=2,
    verbosity=logging.INFO,
    X=X_train.values,  # we convert from pandas to numpy arrays using .vaules
    y=y_train.
    values[:, 0],  # we convert from pandas to numpy arrays using .vaules
    path=project_folder,
)

local_run = experiment.submit(automl_config, show_output=True)

# Wait until the run finishes.
local_run.wait_for_completion(show_output=True)

# create new AutoMLRun object to ensure everything is in order
ml_run = AutoMLRun(experiment=experiment, run_id=local_run.id)


# aux function for comparing performance of runs (quick workaround for automl's _get_max_min_comparator)
def maximize(x, y):
    if x >= y:
        return x
    else:
        return y


# next couple of lines are stripped down version of automl's get_output
children = list(ml_run.get_children())

best_run = None  # will be child run with best performance
best_score = None  # performance of that child run
Ejemplo n.º 16
0
    download_path = './outputs/' + output_name
    output_data.download(download_path)
    df_path = get_download_path(download_path, output_name) + '/data'
    return dprep.auto_read_file(path=df_path)


print("Get the best model")
# workaround to get the automl run as its the last step in the pipeline
# and get_steps() returns the steps from latest to first
for step in pipeline_run.get_steps():
    automl_step_run_id = step.id
    print(step.name)
    print(automl_step_run_id)
    break

automl_run = AutoMLRun(experiment=experiment, run_id=automl_step_run_id)
best_run, fitted_model = automl_run.get_output()
print(best_run)
print(fitted_model)

print("Get metrics")
children = list(automl_run.get_children())
metrics_list = {}
for run in children:
    properties = run.get_properties()
    run_metrics = {
        k: v
        for k, v in run.get_metrics().items() if isinstance(v, float)
    }
    metrics_list[int(properties['iteration'])] = run_metrics
Ejemplo n.º 17
0
conda_env_file_name = "myenv.yml"
scoring_file_name = "scoring.py"

if not os.path.exists(directory):
    os.makedirs(directory)

os.chdir(directory)
os.getcwd()

ws = Workspace.get(workspace_name,
                   subscription_id=subscription_id,
                   resource_group=resource_group)

experiment = Experiment(ws, experiment_name)

ml_run = AutoMLRun(experiment=experiment, run_id=run_id)

if ml_run.model_id is None:
    model = ml_run.register_model(description=description,
                                  tags=tags,
                                  iteration=iteration)

file = open(scoring_file_name, "w")

file.writelines([
    "import pickle\n", "import json\n", "import numpy\n",
    "import azureml.train.automl\n", "from sklearn.externals import joblib\n",
    "from azureml.core.model import Model\n", "def init():\n",
    "    global model\n",
    "    model_path = Model.get_model_path(model_name = '" + ml_run.model_id +
    "')\n", "    model = joblib.load(model_path)\n", "def run(rawdata):\n",
if args.model_name == 'automl_image_model':
    print(
        'Default model name provided. Using auto-generated model name from automl child run.'
    )

    # Retrive the model name from automl's best child run if model name is not provided in arguments.
    pipeline_run = run.parent
    pipeline_run.__class__ = PipelineRun

    for step in pipeline_run.get_steps():
        if step.name == 'automl_module':
            automl_step_run_id = step.id
            break

    automl_run = AutoMLRun(experiment=run.experiment,
                           run_id=automl_step_run_id)

    best_child_run = automl_run.get_best_child()
    model_name = best_child_run.properties['model_name']
    print('model name obtained from the AutoML best child run is : {0}'.format(
        model_name))
else:
    model_name = args.model_name
    print('model name obtained from the model_name argument is : {0}'.format(
        model_name))

# Get the training dataset
train_ds = Dataset.get_by_name(ws, args.ds_name)
datasets = [(Dataset.Scenario.TRAINING, train_ds)]

# Get the metrics data
# COMMAND ----------

experiment_name_prefix = 'automl-scoring-' + notebook_username
experiment = [e for e in azureml.core.Experiment.list(ws) if e.name.startswith(experiment_name_prefix)][-1]
experiment

# COMMAND ----------

# MAGIC %md
# MAGIC 
# MAGIC Now it's time to retrieve the best model we have identified during the experiment.

# COMMAND ----------

from azureml.train.automl.run import AutoMLRun
run = [AutoMLRun(experiment, r.id) for r in azureml.core.Run.list(experiment, status='Completed', type='automl')][-1]
run

# COMMAND ----------

# MAGIC %md
# MAGIC 
# MAGIC It's now time to register the model. 
# MAGIC 
# MAGIC You can add tags and descriptions to your models when you register them. 

# COMMAND ----------

model = run.register_model(model_name = "credit_scoring_" + notebook_username,
                       tags = {'area': "Credit scoring", 'type': "classification"}, 
                       description = "Credit Scoring model",
Ejemplo n.º 20
0
                        required=False,
                        help="IoU threshold")

    # parse arguments
    args = parser.parse_args()

    ws = Workspace.create(
        name=args.workspace_name,
        subscription_id=args.subscription_id,
        resource_group=args.resource_group,
        exist_ok=True,
    )
    experiment = Experiment(ws, name=args.experiment_name)

    # load the best child
    automl_image_run = AutoMLRun(experiment=experiment, run_id=args.run_id)
    best_child_run = automl_image_run.get_best_child()

    model_type = None
    if args.task_type == "image-object-detection":
        if args.model_name.startswith("yolo"):
            # yolo settings
            model_settings = {
                "img_size": args.img_size,
                "model_size": args.model_size,
                "box_score_thresh": args.box_score_thresh,
                "box_iou_thresh": args.box_iou_thresh,
            }

        elif args.model_name.startswith(
                "faster") or args.model_name.startswith("retina"):
Ejemplo n.º 21
0
from azureml.core.runconfig import RunConfiguration
from checknotebookoutput import checkNotebookOutput

ws = Workspace.from_config()

# choose a name for the run history container in the workspace
experiment_name = 'automl-local-whitelist'
# project folder
project_folder = './sample_projects/automl-local-whitelist'

experiment = Experiment(ws, experiment_name)
automl_runs = list(experiment.get_runs(type='automl'))

assert (len(automl_runs) == 1)

ml_run = AutoMLRun(experiment=experiment, run_id=automl_runs[0].id)

properties = ml_run.get_properties()
status = ml_run.get_details()
assert (status['status'] == 'Completed')
assert (properties['num_iterations'] == '10')

children = list(ml_run.get_children())
for iteration in children:
    metrics = iteration.get_metrics()
    iteration_status = iteration.get_status()
    iteration_properties = iteration.get_properties()
    pipeline_spec = iteration_properties['pipeline_spec']
    print(iteration.id)
    print(metrics['AUC_weighted'])
    assert (metrics['AUC_weighted'] > 0.4)
Ejemplo n.º 22
0
resource_group = os.environ['AML_RESOURCE_GROUP']
workspace_name = os.environ['AML_WORKSPACE']

ws = Workspace(subscription_id, resource_group, workspace_name)
experiment_name = 'forecast_automl_' + file_prefix + '_' + granularity

# Register the model from last best run
print('registering the latest model for {0}'.format(experiment_name))
exp = Experiment(workspace=ws, name=experiment_name)
run_generator = exp.get_runs()
run_latest = next(run_generator)
if run_latest.get_status() != 'Completed' or run_latest.type != 'automl':
    raise Exception('the last run is not completed or is not automl')

run_id = run_latest.get_details()['runId']
automl_run = AutoMLRun(exp, run_id)
best_run, fitted_model = automl_run.get_output()
model_name = experiment_name.replace('-', '').replace('_', '').lower()
# Register a model
model = best_run.register_model(model_name=model_name,
                                model_path='outputs/model.pkl')
# Get existing model
#model=Model(ws, model_name)

# Figure out the run's dependencies
conda_env_file_name = '{}_env.yml'.format(experiment_name)
localenv = CondaDependencies.create(conda_packages=['numpy', 'scikit-learn'],
                                    pip_packages=['azureml-sdk[automl]'])
localenv.save_to_file('.', conda_env_file_name)
best_iteration = int(str.split(
    best_run.id, '_')[-1])  # the iteration number is a postfix of the run ID.
Ejemplo n.º 23
0
    e for e in azureml.core.Experiment.list(ws)
    if e.name.startswith(experiment_name_prefix)
][-1]
experiment

# COMMAND ----------

# MAGIC %md
# MAGIC
# MAGIC Now it's time to retrieve the best model we have identified during the experiment.

# COMMAND ----------

from azureml.train.automl.run import AutoMLRun
run = [
    AutoMLRun(experiment, r.id) for r in azureml.core.Run.list(
        experiment, status='Completed', type='automl')
][-1]
run

# COMMAND ----------

# MAGIC %md
# MAGIC
# MAGIC It's now time to register the model.
# MAGIC
# MAGIC You can add tags and descriptions to your models when you register them.

# COMMAND ----------

model = run.register_model(