def init(): global model global batch_size # Set up logging _set_logging_parameters(TASK_TYPE, {}) parser = argparse.ArgumentParser( description="Retrieve model_name and batch_size from arguments.") parser.add_argument('--model_name', dest="model_name", required=True) parser.add_argument('--batch_size', dest="batch_size", type=int, required=False) args, _ = parser.parse_known_args() batch_size = args.batch_size model_path = os.path.join(Model.get_model_path(args.model_name), 'model.pt') print(model_path) try: logger.info("Loading model from path: {}.".format(model_path)) model_settings = {} model = load_model(TASK_TYPE, model_path, **model_settings) logger.info("Loading successful.") except Exception as e: logging_utilities.log_traceback(e, logger) raise
def init(): global model # This name is model.id of model that we want to deploy deserialize the model file back # into a sklearn model model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl') try: model = joblib.load(model_path) except Exception as e: path = os.path.normpath(model_path) path_split = path.split(os.sep) log_server.update_custom_dimensions({'model_name': path_split[1], 'model_version': path_split[2]}) logging_utilities.log_traceback(e, logger) raise
def init(): global model #model_path = Model.get_model_path("best_hyperdrive_model") model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.joblib') path = os.path.normpath(model_path) path_split = path.split(os.sep) log_server.update_custom_dimensions({ 'model_name': path_split[1], 'model_version': path_split[2] }) try: logger.info("Loading model from path.") model = joblib.load(model_path) logger.info("Loading successful.") except Exception as e: logging_utilities.log_traceback(e, logger) raise
def init(): global model # This name is model.id of model that we want to deploy deserialize the model file back # into a sklearn model model_base_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'outputs') model_path = os.path.join(model_base_path, 'model.pkl') path = os.path.normpath(model_path) path_split = path.split(os.sep) log_server.update_custom_dimensions({ 'model_name': path_split[-3], 'model_version': path_split[-2] }) try: logger.info("Loading model from path.") model = joblib.load(model_path) logger.info("Loading successful.") except Exception as e: logging_utilities.log_traceback(e, logger) raise
def init(): global model # This name is model.id of model that we want to deploy deserialize the model file back # into a sklearn model #model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'automl_bestmodel.pkl') #model_path= Model.get_model_path(model_name="automl_bestmodel.pkl") #logger.error(model_path+" Muzammil1") model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl') #aar= os.listdir(model_path) #print(aar) #logger.error(aar+" Muzammil2") path = os.path.normpath(model_path) path_split = path.split(os.sep) log_server.update_custom_dimensions({'model_name': path_split[1], 'model_version': path_split[2]}) try: logger.info("Loading model from path.") model = joblib.load(model_path) logger.info("Loading successful.") except Exception as e: logging_utilities.log_traceback(e, logger) raise
def run(input_data): entry_script = EntryScript() logger = entry_script.logger os.makedirs('./outputs', exist_ok=True) resultList = [] model_name = None current_run = None error_message = None error_code = None error_type = None tags_dict = None for file in input_data: logs = [] date1 = datetime.datetime.now() logger.info('start (' + file + ') ' + str(datetime.datetime.now())) file_path = file file_name, file_extension = os.path.splitext( os.path.basename(file_path)) try: if file_extension.lower() == ".parquet": data = pd.read_parquet(file_path) else: data = pd.read_csv(file_path, parse_dates=[timestamp_column]) tags_dict = {'ModelType': 'AutoML'} for column_name in group_column_names: tags_dict.update({ column_name: str(data.iat[0, data.columns.get_loc(column_name)]) }) if args.retrain_failed_models: logger.info('querying for existing models') try: tags = [[k, v] for k, v in tags_dict.items()] models = Model.list(current_step_run.experiment.workspace, tags=tags, latest=True) if models: logger.info("model already exists for the dataset " + models[0].name) logs = compose_logs(file_name, models[0], date1) resultList.append(logs) continue except Exception as error: logger.info('Failed to list the models. ' + 'Error message: ' + str(error)) tags_dict.update({'InputData': file_path}) tags_dict.update({'StepRunId': current_step_run.id}) tags_dict.update({'RunId': current_step_run.parent.id}) # train model fitted_model, current_run = train_model(file_path, data, logger) model_string = '_'.join( str(v) for k, v in sorted(tags_dict.items()) if k in group_column_names).lower() logger.info("model string to encode " + model_string) sha = hashlib.sha256() sha.update(model_string.encode()) model_name = 'automl_' + sha.hexdigest() tags_dict.update({'Hash': sha.hexdigest()}) try: logger.info('done training') print('Trained best model ' + model_name) logger.info(fitted_model) logger.info(model_name) logger.info('register model') current_run.register_model(model_name=model_name, description='AutoML', tags=tags_dict) print('Registered ' + model_name) except Exception as error: error_type = ErrorTypes.Unclassified error_message = 'Failed to register the model. ' + 'Error message: ' + str( error) from azureml.automl.core.shared import logging_utilities logging_utilities.log_traceback(error, None) logger.info(error_message) date2 = datetime.datetime.now() logs.append('AutoML') logs.append(file_name) logs.append(current_run.id) logs.append(current_run.get_status()) logs.append(model_name) logs.append(tags_dict) logs.append(str(date1)) logs.append(str(date2)) logs.append(error_type) logs.append(error_code) logs.append(error_message) logger.info('ending (' + file_path + ') ' + str(date2)) # 10.1 Log the error message if an exception occurs except (ClientException, AutoMLException) as error: date2 = datetime.datetime.now() error_message = 'Failed to train the model. ' + 'Error : ' + str( error) logs.append('AutoML') logs.append(file_name) if current_run: logs.append(current_run.id) logs.append(current_run.get_status()) else: logs.append(current_run) logs.append('Failed') logs.append(model_name) logs.append(tags_dict) logs.append(str(date1)) logs.append(str(date2)) if isinstance(error, AutoMLException): logs.append(error.error_type) else: logs.append(None) logs.append(get_error_code(error)) logs.append(error_message) logger.info(error_message) logger.info('ending (' + file_path + ') ' + str(date2)) resultList.append(logs) result = pd.DataFrame(data=resultList) return result