def _make_inference_request_rest(serving_name, data, verb): """ Makes a REST request to Hopsworks for creating or updating a model serving instance Args: :serving_name: name of the model being served :data: data/json to send to the serving :verb: type of request (:predict, :classify, or :regress) Returns: the JSON response Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ json_embeddable = json.dumps(data) headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_POST connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_INFERENCE_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_MODELS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + serving_name + verb) response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) resp_body = response.read() response_object = json.loads(resp_body) error_code, error_msg, user_msg = util._parse_rest_error(response_object) # for python 3 if sys.version_info > (3, 0): if response.code != 201 and response.code != 200: raise exceptions.RestAPIError("Could not create or update serving (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 201 and response.status != 200: raise exceptions.RestAPIError("Could not create or update serving (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _start_or_stop_serving_rest(serving_id, action): """ Makes a REST request to Hopsworks REST API for starting/stopping a serving instance Args: :serving_id: id of the serving to start/stop :action: the action to perform (start or stop) Returns: None Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ method = constants.HTTP_CONFIG.HTTP_POST connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_SERVING_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(serving_id) + constants.MODEL_SERVING.SERVING_START_OR_STOP_PATH_PARAM + action) response = util.send_request(connection, method, resource_url) # for python 3 if sys.version_info > (3, 0): if response.code != 200: resp_body = response.read() response_object = json.loads(resp_body) error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not perform action {} on serving with id {} (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(action, serving_id, resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 200: resp_body = response.read() response_object = json.loads(resp_body) error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not perform action {} on serving with id {} (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(action, serving_id, resource_url, response.status, response.reason, error_code, error_msg, user_msg))
def _get_servings_rest(): """ Makes a REST request to Hopsworks to get a list of all servings in the current project Returns: JSON response parsed as a python dict Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ method = constants.HTTP_CONFIG.HTTP_GET connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_SERVING_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER) response = util.send_request(connection, method, resource_url) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if response.code != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not fetch list of servings from Hopsworks REST API (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, " "error msg: {}, user msg: {}".format(resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: if response.status != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not fetch list of servings from Hopsworks REST API (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, " "error msg: {}, user msg: {}".format(resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _delete_serving_rest(serving_id): """ Makes a REST request to Hopsworks REST API for deleting a serving instance Args: :serving_id: id of the serving to delete Returns: None Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ method = constants.HTTP_CONFIG.HTTP_DELETE resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_SERVING_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(serving_id)) response = util.send_request(method, resource_url) if response.status_code != 200: response_object = response.json() error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not delete serving with id {} (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(serving_id, resource_url, response.status_code, response.reason, error_code, error_msg, user_msg))
def _create_or_update_serving_rest(model_path, model_name, serving_type, model_version, batching_enabled = None, topic_name=None, num_partitions = None, num_replicas = None, serving_id = None, instances=1): """ Makes a REST request to Hopsworks for creating or updating a model serving instance Args: :model_path: path to the model or artifact being served :model_name: the name of the serving to create :serving_type: the type of serving :model_version: version of the serving :batching_enabled: boolean flag whether to enable batching for inference requests to the serving :topic_name: name of the kafka topic ("CREATE" to create a new one, or "NONE" to not use kafka topic) :num_partitions: kafka partitions :num_replicas: kafka replicas :serving_id: the id of the serving in case of UPDATE, if serving_id is None, it is a CREATE operation. :instances: the number of serving instances (the more instances the more inference requests can be served in parallel) Returns: None Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ json_contents = { constants.REST_CONFIG.JSON_SERVING_MODEL_VERSION: model_version, constants.REST_CONFIG.JSON_SERVING_ARTIFACT_PATH: model_path, constants.REST_CONFIG.JSON_SERVING_TYPE: serving_type, constants.REST_CONFIG.JSON_SERVING_NAME: model_name, constants.REST_CONFIG.JSON_SERVING_KAFKA_TOPIC_DTO: { constants.REST_CONFIG.JSON_KAFKA_TOPIC_NAME: topic_name, constants.REST_CONFIG.JSON_KAFKA_NUM_PARTITIONS: num_partitions, constants.REST_CONFIG.JSON_KAFKA_NUM_REPLICAS: num_replicas }, constants.REST_CONFIG.JSON_SERVING_REQUESTED_INSTANCES: instances, } if serving_id is not None: json_contents[constants.REST_CONFIG.JSON_SERVING_ID] = serving_id if serving_type == constants.MODEL_SERVING.SERVING_TYPE_TENSORFLOW: json_contents[constants.REST_CONFIG.JSON_SERVING_BATCHING_ENABLED] = batching_enabled json_embeddable = json.dumps(json_contents) headers = {constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON} method = constants.HTTP_CONFIG.HTTP_PUT resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_SERVING_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER) response = util.send_request(method, resource_url, data=json_embeddable, headers=headers) if response.status_code != 201 and response.status_code != 200: response_object = response.json() error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise exceptions.RestAPIError("Could not create or update serving (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(resource_url, response.status_code, response.reason, error_code, error_msg, user_msg))
def start(runner_name="runner", jobmanager_heap_size=1024, num_of_taskmanagers=1, taskmanager_heap_size=4096, num_task_slots=1, cleanup_runner=False, ignore_running=False): """ Creates and starts a Beam runner and then starts the beam job server. Args: runner_name: Name of the runner. If not specified, the default runner name "runner" will be used. If the runner already exists, it will be updated with the provided arguments. If it doesn't exist, it will be created. jobmanager_heap_size: The memory(mb) of the Flink cluster JobManager num_of_taskmanagers: The number of TaskManagers of the Flink cluster. taskmanager_heap_size: The memory(mb) of the each TaskManager in the Flink cluster. num_task_slots: Number of slots of the Flink cluster. cleanup_runner: Kill runner when Python terminates ignore_running: Ignore currently running instances of Runner Returns: The artifact_port, expansion_port, job_host, job_port, jobserver.pid """ global cleanup_runners cleanup_runners = cleanup_runner execution = jobs.get_executions( runner_name, "?filter_by=state:INITIALIZING,RUNNING,ACCEPTED,NEW,NEW_SAVING,SUBMITTED," "STARTING_APP_MASTER,AGGREGATING_LOGS&sort_by=id:desc") if ignore_running is False and execution is not None and execution[ 'count'] > 0: raise exceptions.RestAPIError( "Runner is already in state running, set ignore_running to True to start a " "new instance") create_runner(runner_name, jobmanager_heap_size, num_of_taskmanagers, taskmanager_heap_size, num_task_slots) start_runner(runner_name) # Wait 90 seconds until runner is in status "RUNNING", then start the jobserver wait = 90 wait_count = 0 while wait_count < wait and jobs.get_executions( runner_name, "?offset=0&limit=1&sort_by=id:desc" )['items'][0]['state'] != "RUNNING": time.sleep(5) wait_count += 5 return start_beam_jobserver(runner_name)
def _create_or_update_serving_rest( serving_id, serving_name, model_path, model_version, artifact_version, transformer, model_server, kfserving, batching_enabled, topic_name, num_partitions, num_replicas, inference_logging, instances, transformer_instances, predictor_resource_config): """ Makes a REST request to Hopsworks for creating or updating a model serving instance Args: :serving_id: the id of the serving in case of UPDATE, if serving_id is None, it is a CREATE operation. :serving_name: the name of the serving to create :model_path: path to the model or artifact being served :model_version: version of the model to serve :artifact_version: version of the artifact to serve :transformer: path to the transformer script :model_server: name of the model server to deploy, e.g "TENSORFLOW_SERVING" or "FLASK" :kfserving: boolean flag whether to serve the model using KFServing serving tool :batching_enabled: boolean flag whether to enable batching for inference requests to the serving :topic_name: name of the kafka topic for inference logging, e.g "CREATE" to create a new one, "NONE" to not use kafka topic or an existent topic name :num_partitions: if a new kafka topic is to created, number of partitions of the new topic :num_replicas: if a new kafka topic is to created, replication factor of the new topic :inference_logging: inference data to log into the Kafka topic, e.g "MODEL_INPUTS", "PREDICTIONS" or "ALL" :instances: the number of serving instances (the more instances the more inference requests can be served in parallel) :transformer_instances: the number of transformer instances (the more instances the more inference requests can be served in parallel) :predictor_resource_config: dict for setting resource configuration parameters required to serve the model, for example {'memory': 2048, 'cores': 1, 'gpus': 0}. Currently only supported if Hopsworks is deployed with Kubernetes installed. Returns: None Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ serving_tool = constants.MODEL_SERVING.SERVING_TOOL_KFSERVING if kfserving else constants.MODEL_SERVING.SERVING_TOOL_DEFAULT json_contents = { constants.REST_CONFIG.JSON_SERVING_NAME: serving_name, constants.REST_CONFIG.JSON_SERVING_MODEL_PATH: model_path, constants.REST_CONFIG.JSON_SERVING_MODEL_VERSION: model_version, constants.REST_CONFIG.JSON_SERVING_ARTIFACT_VERSION: artifact_version, constants.REST_CONFIG.JSON_SERVING_TRANSFORMER: transformer, constants.REST_CONFIG.JSON_SERVING_MODEL_SERVER: model_server, constants.REST_CONFIG.JSON_SERVING_TOOL: serving_tool, constants.REST_CONFIG.JSON_SERVING_KAFKA_TOPIC_DTO: { constants.REST_CONFIG.JSON_KAFKA_TOPIC_NAME: topic_name, constants.REST_CONFIG.JSON_KAFKA_NUM_PARTITIONS: num_partitions, constants.REST_CONFIG.JSON_KAFKA_NUM_REPLICAS: num_replicas }, constants.REST_CONFIG.JSON_SERVING_INFERENCE_LOGGING: inference_logging, constants.REST_CONFIG.JSON_SERVING_REQUESTED_INSTANCES: instances, constants.REST_CONFIG.JSON_SERVING_REQUESTED_TRANSFORMER_INSTANCES: transformer_instances, constants.REST_CONFIG.JSON_SERVING_PREDICTOR_RESOURCE_CONFIG: predictor_resource_config } if serving_id is not None: json_contents[constants.REST_CONFIG.JSON_SERVING_ID] = serving_id if model_server == constants.MODEL_SERVING.MODEL_SERVER_TENSORFLOW_SERVING: json_contents[constants.REST_CONFIG. JSON_SERVING_BATCHING_ENABLED] = batching_enabled if artifact_version == "CREATE": json_contents[constants.REST_CONFIG.JSON_SERVING_ARTIFACT_VERSION] = -1 elif artifact_version == "MODEL-ONLY": json_contents[constants.REST_CONFIG.JSON_SERVING_ARTIFACT_VERSION] = 0 if topic_name is None or topic_name == "NONE": json_contents[ constants.REST_CONFIG.JSON_SERVING_INFERENCE_LOGGING] = None json_embeddable = json.dumps(json_contents) headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_PUT resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_SERVING_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER) response = util.send_request(method, resource_url, data=json_embeddable, headers=headers) if response.status_code != 201 and response.status_code != 200: response_object = response.json() error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not create or update serving (url: {}), server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(resource_url, response.status_code, response.reason, error_code, error_msg, user_msg))