def _check_is_valid_table(table, is_row=False): if not isinstance(table, dict): azureml_error = AzureMLError.create( ArgumentInvalid, argument_name="Table", expected_type="dict[string]: column" ) raise AzureMLException._with_error(azureml_error) if is_row: for key in table: val = table[key] if isinstance(val, list): azureml_error = AzureMLError.create( InvalidColumnData, type="list", column=key ) raise AzureMLException._with_error(azureml_error) else: ScalarMetric._check_is_valid_scalar(val) keys = list(table.keys()) if len(keys) > 0: reference_column = keys[0] table_column_length = TableMetric._get_length(table[reference_column]) for key in table: column_length = TableMetric._get_length(table[key]) if column_length != table_column_length: azureml_error = AzureMLError.create( InvalidColumnLength, reference_column=reference_column, table_column_length=table_column_length, key=key, column_length=column_length ) raise AzureMLException._with_error(azureml_error) if isinstance(table[key], list): ListMetric._check_is_valid_list(table[key]) return table
def create_children(self, tag_key, tag_values, start_children=True): """ Creates one child for each element in tag_values :param tag_key: key for the Tags entry to populate in all created children :type tag_key: str: :param tag_Values: list of values that will map onto Tags[tag_key] for the list of runs created :type tag_values: [str] :param start_children: Optional flag to start created children, defaults True :type start_children: bool: :rtype [RunDto] """ request_child_runs = [] for tag_value in tag_values: create_run_dto = CreateRunDto(run_id=RunHistoryFacade.create_run_id(), parent_run_id=self._run_id, status='NotStarted', tags={tag_key: tag_value}) request_child_runs.append(create_run_dto) result_dto = self.run.batch_create_child_runs(request_child_runs) errors = result_dto.errors if len(errors) > 0: azureml_error = AzureMLError.create( CreateChildrenFailed, run_id='runid' ) raise AzureMLException._with_error(azureml_error) result_child_runs = result_dto.runs child_run_ids = [child_run.run_id for child_run in request_child_runs] if start_children: event_errors = self.run.batch_post_event_start(child_run_ids).errors if len(event_errors) > 0: azureml_error = AzureMLError.create( StartChildrenFailed, run_id='runid' ) raise AzureMLException._with_error(azureml_error) return (result_child_runs[run_id] for run_id in child_run_ids)
def register_submit_function(cls, method_class, submit_function): """ :param cls: :type cls: object :param method_class: :type method_class: str :param submit_function: :type submit_function: object """ function_name = submit_function.__name__ module_logger.debug( "Trying to register submit_function {}, on method {}".format( function_name, method_class)) with cls._lock: if method_class in cls._method_to_submit_dict and \ cls._method_to_submit_dict[method_class] != submit_function: azureml_error = AzureMLError.create(MethodAlreadyRegistered, method_class=method_class) raise AzureMLException._with_error(azureml_error) cls._method_to_submit_dict[method_class] = submit_function module_logger.debug( "Registered submit_function {}, on method {}".format( function_name, method_class))
def _write_output_metadata_file(return_object, output_metadata_file_path, logger): import errno import json import os logger.debug( "Specified output metadata path %s, storing return value type [%s] as json", output_metadata_file_path, type(return_object).__name__) # TODO: Move this to a file utils library in azureml-core full_path = os.path.abspath(output_metadata_file_path) if os.path.exists(full_path): # Can't just use 'x' in open() mode below due to Python 2 azureml_error = AzureMLError.create(FileAlreadyExists, full_path=full_path) raise AzureMLException._with_error(azureml_error) dir_path = os.path.dirname(full_path) if not os.path.exists(dir_path): try: os.makedirs(dir_path) except OSError as exc: # No Python 3 guarantee for exist_ok :( if exc.errno != errno.EEXIST: raise with open(output_metadata_file_path, 'wt') as omf: json.dump(_convert_return_to_json(return_object), omf, indent=4, sort_keys=True)
def _check_is_valid_scalar(value): if not ScalarMetric._is_valid_scalar(value): valid_types = list(Metric._type_to_metric_type.keys()) azureml_error = AzureMLError.create( InvalidArgumentType, type=type(value), expected_type=valid_types ) raise AzureMLException._with_error(azureml_error)
def log_image(self, name, path=None, plot=None, description=""): if path is not None and plot is not None: azureml_error = AzureMLError.create( TwoInvalidParameter, arg_one="path", arg_two="plot" ) raise AzureMLException._with_error(azureml_error) elif path is None and plot is None: azureml_error = AzureMLError.create( TwoInvalidArgument, arg_one="path", arg_two="plot" ) raise AzureMLException._with_error(azureml_error) value = path if path is not None else plot metric = ImageMetric(name, value, None, description=description) if _use_v2_metrics: self._log_metric_v2(metric, is_plot=plot is not None) else: self._log_metric(metric, is_plot=plot is not None)
def _log_image(self, artifact_client, path, origin, container): image_type = imghdr.what(path) if image_type is not None: artifact_client.upload_artifact(path, origin, container, path, content_type="image/{}".format(image_type)) else: azureml_error = AzureMLError.create( MalformedArgument, argument_name=path ) raise AzureMLException._with_error(azureml_error) return path
def _check_is_valid_list(list_value): if isinstance(list_value, list): for i in range(len(list_value)): val = list_value[i] if not ScalarMetric._is_valid_scalar(val): valid_types = list(Metric._type_to_metric_type.keys()) azureml_error = AzureMLError.create( InvalidArgumentType, type=type(list_value), expected_type=valid_types ) raise AzureMLException._with_error(azureml_error)
def _process_single_return_object(return_object): from msrest.serialization import Model if isinstance(return_object, Model): object_dict = return_object.as_dict() return {_to_camel_case(k): v for k, v in object_dict.items()} elif isinstance(return_object, dict): return return_object else: azureml_error = AzureMLError.create(UnsupportedReturnType, return_object=type(return_object)) raise AzureMLException._with_error(azureml_error)
def _is_valid_scalar(value): value_type = type(value) for number_type in six.integer_types + (float,): if isinstance(value, number_type) and sys.getsizeof(value) > AZUREML_MAX_NUMBER_SIZE_IN_BITS: azureml_error = AzureMLError.create( ArgumentSizeOutOfRangeType, argument_name=value_type, min=0, max=AZUREML_MAX_NUMBER_SIZE_IN_BITS ) raise AzureMLException._with_error(azureml_error) return any(value_type in dictionary for dictionary in (Metric._type_to_metric_type, Metric._type_to_converter))
def _log_batch(self, metric_dtos, is_async=False): if len(metric_dtos) > AZUREML_MAX_NUMBER_METRICS_BATCH: azureml_error = AzureMLError.create( MetricsNumberExceeds, metric_dtos=len(metric_dtos), AZUREML_MAX_NUMBER_METRICS_BATCH= AZUREML_MAX_NUMBER_METRICS_BATCH) raise AzureMLException._with_error(azureml_error) batch_metric_dto = BatchMetricDto(metric_dtos) res = self._execute_with_run_arguments( self._client.run_metric.post_batch, batch_metric_dto, is_async=is_async) return res
def _log_batch_v2(self, metric_dtos, is_async=False): if len(metric_dtos) > AZUREML_MAX_NUMBER_METRICS_BATCH: azureml_error = AzureMLError.create( MetricsNumberExceeds, metric_dtos=len(metric_dtos), AZUREML_MAX_NUMBER_METRICS_BATCH= AZUREML_MAX_NUMBER_METRICS_BATCH) raise AzureMLException._with_error(azureml_error) batch_metric_dto = BatchMetricV2Dto(values=metric_dtos, report_errors=True) self._logger.debug( "Metrics Client: _log_batch_v2 is calling post_run_metrics " "posting {} values.".format(len(batch_metric_dto.values))) res = self._execute_with_workspace_run_arguments( self._post_run_metrics_log_failed_validations, batch_metric_dto, is_async=is_async) return res
def _log_plot(self, artifact_client, plot, origin, container): plot_name = self.name + "_" + str(int(time.time())) ext = "png" artifact_path = "{}.{}".format(plot_name, ext) stream = io.BytesIO() try: plot.savefig(stream, format=ext) stream.seek(0) artifact_client.upload_artifact(stream, origin, container, artifact_path, content_type="image/{}".format(ext)) except AttributeError: azureml_error = AzureMLError.create( ArgumentInvalid, argument_name="plot", expected_type="matplotlib.pyplot" ) raise AzureMLException._with_error(azureml_error) finally: stream.close() return artifact_path
def _get_token(self, sdk_resource=None): """ :param sdk_resource: `resource` converted from Track 2 SDK's `scopes` """ external_tenant_tokens = None try: scheme, token, full_token = self._token_retriever(sdk_resource) if self._external_tenant_token_retriever: external_tenant_tokens = self._external_tenant_token_retriever( sdk_resource) except adal.AdalError as err: # pylint: disable=no-member if in_cloud_console(): AdalAuthentication._log_hostname() err = (getattr(err, 'error_response', None) or {}).get('error_description') or str(err) if 'AADSTS70008' in err: # all errors starting with 70008 should be creds expiration related message = "Please run 'az login'" if not in_cloud_console( ) else '' azureml_error = AzureMLError.create( CredentialsExpireInactivity, message=message) raise AzureMLException._with_error(azureml_error) if 'AADSTS50079' in err: message = "Please run 'az login'" if not in_cloud_console( ) else '' azureml_error = AzureMLError.create( AccountConfigurationChanged, message=message) raise AzureMLException._with_error(azureml_error) if 'AADSTS50173' in err: message = "Please clear browser's cookies and run 'az login'" if not in_cloud_console( ) else '' azureml_error = AzureMLError.create( CredentialExpiredPasswordChanged, message=message) raise AzureMLException._with_error(azureml_error) raise AzureMLException(err) except requests.exceptions.SSLError: azureml_error = AzureMLError.create(CertificateVerificationFailure) raise AzureMLException._with_error(azureml_error) except requests.exceptions.ConnectionError as err: azureml_error = AzureMLError.create(NetworkConnectionFailed, error=str(err)) raise AzureMLException._with_error(azureml_error) except Exception as err: if in_cloud_console(): AdalAuthentication._log_hostname() raise err return scheme, token, full_token, external_tenant_tokens
def transform_infer_data(dataset: pd.DataFrame, aml_model_name: str, preprocessor_name: str) -> pd.DataFrame: """Transform the dataset for inference. Downloads the registered Model on AzureML and load the already fit preprocessor to use for the data transformation. Args: dataset (pd.DataFrame): Dataset to transform. aml_model_name (str): The name of the Model registered on AzureML. preprocessor_name (str): The name of the preprocessor registered with the Model's assets on AzureML. Returns: (pd.DataFrame): The preprocessor's transformations applied to the dataframe. """ aml_helper = AmlCustomHelper() try: # download registered model and assets logger.info("Download aml model") aml_model = Model(aml_helper.ws, aml_model_name) logger.info(f"aml_helper.ASSETS_DIR:\t{aml_helper.ASSETS_DIR}") aml_model.download( target_dir=f"{'/'.join(aml_helper.ASSETS_DIR.split('/')[0:-1])}", exist_ok=True, ) # load preprocessor object from model assets logger.info("Load aml model's preprocessor") preprocessor = joblib.load( f"{aml_helper.ASSETS_DIR}/{preprocessor_name}") logger.info(f"Transform the dataset") transformed_dataset = preprocessor.transform(dataset, is_inference=True) return transformed_dataset except (WebserviceException, ModelNotFoundException): logger.info( f"No previous registered model found with the name:\t{MODEL_NAME}") # force to fail the AzureML pipeline step raise AzureMLException(exception_message="Stopping the execution.")
def _create_empty_artifacts(self, paths): """Create empty artifacts.""" if self._run_id is None: raise UserErrorException( "run_id cannot be null when creating empty artifacts") if isinstance(paths, str): paths = [paths] artifacts = [ArtifactPathDto(path) for path in paths] batch_create_command = BatchArtifactCreateCommand(artifacts) res = self._run_artifacts_client._execute_with_experiment_arguments( self._run_artifacts_client._client.run_artifact. batch_create_empty_artifacts, self._run_id, batch_create_command) if res.errors: error_messages = [] for artifact_name in res.errors: error = res.errors[artifact_name].error error_messages.append("{}: {}".format(error.code, error.message)) raise AzureMLException("\n".join(error_messages)) return res
def get_submit_function(cls, method): """ :param cls: :type cls: object :param method: :type method: object :return: submit_function :rtype: object """ method_class = method.__class__ module_logger.debug( "Trying to get submit_function for method_class {}".format( method_class)) with cls._lock: if method_class not in cls._method_to_submit_dict: azureml_error = AzureMLError.create(MethodNotRegistered) raise AzureMLException._with_error(azureml_error) submit_function = cls._method_to_submit_dict[method_class] function_name = submit_function.__name__ module_logger.debug( "Retrieved submit_function {} for method {}".format( function_name, method_class)) return submit_function
def compare_models( model_path: str, X_test_path: str, y_test_path: str, ): """Compares the recall of the current model (new model) with the already registered model (old model). If the final model's score is NOT greater or equals than the old model's, an Error is raised stopping the execution, thus preventing the new model from being registered. Args: model_path (str): Path to the new model object. X_test_path (str): Path to the variables test dataset. y_test_path (str): Path to the target test dataset. Raises: AzureMLException: Error due to the final model having a worst recall than the old model. """ aml_helper = AmlCustomHelper() try: # download old model logger.info("Download old model") old_model_aml = Model(aml_helper.ws, MODEL_NAME) logger.info(f"aml_helper.ASSETS_DIR:\t{aml_helper.ASSETS_DIR}") old_model_aml.download( target_dir=f"{'/'.join(aml_helper.ASSETS_DIR.split('/')[0:-1])}", exist_ok=True, ) # load old model logger.info("Load old model") old_model = joblib.load( f"{aml_helper.ASSETS_DIR}/{model_path.split('/')[-1]}") logger.info(f"Old Model:\t{old_model}") # load new model logger.info("Load new model") new_model = joblib.load(model_path) logger.info(f"New Model:\t{new_model}") # load test dataset logger.info("Load the test datasets") X_test = pd.read_parquet(X_test_path) logger.info(f"X_test shape:\t{X_test.shape}") y_test = pd.read_parquet(y_test_path) logger.info(f"y_test shape:\t{y_test.shape}") # make predictions on the test set logger.info("Make predictions - Old Model") y_hat_old = old_model.predict(X_test) logger.info("Make predictions - Old Model") y_hat_new = new_model.predict(X_test) # Recall logger.info("Calculates Recall") recall_old = recall_score(y_test, y_hat_old) recall_new = recall_score(y_test, y_hat_new) logger.info(f"Old model recall:\t{recall_old}") logger.info(f"New model recall:\t{recall_new}") if recall_old > recall_new: # force to fail the AzureML pipeline step raise AzureMLException( exception_message= "The new model metric scored less than the old model. Thus, We will not proceed to the new model registration." ) except (WebserviceException, ModelNotFoundException): logger.info( f"No previous registered model found with the name:\t{MODEL_NAME}")
def redirect_output_streams_context_manager(self): try: return self._redirect_output_streams_context_manager except AttributeError: azureml_error = AzureMLError.create(InvalidOutputStream) raise AzureMLException._with_error(azureml_error)