def get_schema(topic, version_id=1): """ Gets the Avro schema for a particular Kafka topic and its version. Args: :topic: Kafka topic name :version_id: Schema version ID Returns: Avro schema as a string object in JSON format """ json_contents = _prepare_rest_appservice_json_request() json_contents[constants.REST_CONFIG.JSON_SCHEMA_TOPICNAME] = topic json_contents[constants.REST_CONFIG.JSON_SCHEMA_VERSION] = version_id json_embeddable = json.dumps(json_contents) headers = {'Content-type': 'application/json'} method = "POST" connection = util._get_http_connection(https=True) resource = constants.REST_CONFIG.HOPSWORKS_SCHEMA_RESOURCE resource_url = "/" + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + "/" + constants.REST_CONFIG.HOPSWORKS_REST_APPSERVICE + "/" + resource connection.request(method, resource_url, json_embeddable, headers) response = connection.getresponse() resp_body = response.read() response_object = json.loads(resp_body) return response_object
def _get_featurestores(): """ Sends a REST request to get all featurestores for the project Returns: a list of Featurestore JSON DTOs Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ method = constants.HTTP_CONFIG.HTTP_GET connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE) response = util.send_request(connection, method, resource_url) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if response.code != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not fetch feature stores (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not fetch feature stores (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) return response_object
def _get_training_dataset_rest(training_dataset_id, featurestore_id): """ Makes a REST call to hopsworks for getting the metadata of a particular training dataset (including the statistics) Args: :training_dataset_id: id of the training_dataset :featurestore_id: id of the featurestore where the training dataset resides Returns: The REST response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_GET connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + util.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_TRAININGDATASETS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(training_dataset_id)) response = util.send_request(connection, method, resource_url, headers=headers) resp_body = response.read().decode('utf-8') response_object = json.loads(resp_body) try: # for python 3 if (response.code != 200): error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError("Could not get the metadata of featuregroup (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) except: # for python 2 if (response.status != 200): error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError("Could not get the metadata of featuregroup (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _make_inference_request_rest(serving_name, data, verb): """ Makes a REST request to Hopsworks for creating or updating a model serving instance Args: :serving_name: name of the model being served :data: data/json to send to the serving :verb: type of request (:predict, :classify, or :regress) Returns: the JSON response Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ json_embeddable = json.dumps(data) headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_POST connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_INFERENCE_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_MODELS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + serving_name + verb) response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) resp_body = response.read() response_object = json.loads(resp_body) error_code, error_msg, user_msg = util._parse_rest_error(response_object) # for python 3 if sys.version_info > (3, 0): if response.code != 201 and response.code != 200: raise exceptions.RestAPIError("Could not create or update serving (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 201 and response.status != 200: raise exceptions.RestAPIError("Could not create or update serving (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _start_or_stop_serving_rest(serving_id, action): """ Makes a REST request to Hopsworks REST API for starting/stopping a serving instance Args: :serving_id: id of the serving to start/stop :action: the action to perform (start or stop) Returns: None Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ method = constants.HTTP_CONFIG.HTTP_POST connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_SERVING_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(serving_id) + constants.MODEL_SERVING.SERVING_START_OR_STOP_PATH_PARAM + action) response = util.send_request(connection, method, resource_url) # for python 3 if sys.version_info > (3, 0): if response.code != 200: resp_body = response.read() response_object = json.loads(resp_body) error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not perform action {} on serving with id {} (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(action, serving_id, resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 200: resp_body = response.read() response_object = json.loads(resp_body) error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not perform action {} on serving with id {} (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(action, serving_id, resource_url, response.status, response.reason, error_code, error_msg, user_msg))
def _get_featurestore_metadata(featurestore): """ Makes a REST call to hopsworks to get all metadata of a featurestore (featuregroups and training datasets) for the provided featurestore. Args: :featurestore: the name of the database, defaults to the project's featurestore Returns: JSON response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ method = constants.HTTP_CONFIG.HTTP_GET connection = util._get_http_connection(https=True) resource_url = ( constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + util.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + featurestore + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORE_METADATA_RESOURCE) response = util.send_request(connection, method, resource_url) resp_body = response.read().decode('utf-8') response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if response.code != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError( "Could not fetch featurestore metadata for featurestore: {} (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, " "error msg: {}, user msg: {}".format( resource_url, featurestore, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError("Could not fetch featurestore metadata for featurestore: {} (url: {}), " "server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, " "error msg: {}, user msg: {}".format( resource_url, featurestore, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _get_servings_rest(): """ Makes a REST request to Hopsworks to get a list of all servings in the current project Returns: JSON response parsed as a python dict Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ method = constants.HTTP_CONFIG.HTTP_GET connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_SERVING_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER) response = util.send_request(connection, method, resource_url) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if response.code != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not fetch list of servings from Hopsworks REST API (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, " "error msg: {}, user msg: {}".format(resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: if response.status != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not fetch list of servings from Hopsworks REST API (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, " "error msg: {}, user msg: {}".format(resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _get_project_info(project_name): """ Makes a REST call to hopsworks to get all metadata of a project for the provided project. Args: :project_name: the name of the project Returns: JSON response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ method = constants.HTTP_CONFIG.HTTP_GET connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_INFO_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + project_name) response = util.send_request(connection, method, resource_url) resp_body = response.read().decode('utf-8') response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if response.code != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError( "Could not fetch project metadata for project: {} (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, " "error msg: {}, user msg: {}".format( project_name, resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError("Could not fetch project metadata for project: {} (url: {}), " "server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, " "error msg: {}, user msg: {}".format( project_name, resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _delete_table_contents(featuregroup_id, featurestore_id): """ Sends a request to clear the contents of a featuregroup by dropping the featuregroup and recreating it with the same metadata. Args: :featuregroup_id: id of the featuregroup :featurestore_id: id of the featurestore Returns: The JSON response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ method = constants.HTTP_CONFIG.HTTP_POST connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATUREGROUPS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featuregroup_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATUREGROUP_CLEAR_RESOURCE) response = util.send_request(connection, method, resource_url) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if response.code != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not clear featuregroup contents (url: {}), server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not clear featuregroup contents (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) return response_object
def get_schema(topic): """ Gets the Avro schema for a particular Kafka topic. Args: :topic: Kafka topic name Returns: Avro schema as a string object in JSON format """ method = constants.HTTP_CONFIG.HTTP_GET connection = util._get_http_connection(https=True) resource_url = constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \ hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_KAFKA_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \ topic + constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_SCHEMA_RESOURCE response = util.send_request(connection, method, resource_url) resp_body = response.read() response_object = json.loads(resp_body) return response_object
def _update_training_dataset_stats_rest( training_dataset_id, featurestore_id, feature_corr_data, featuregroup_desc_stats_data, features_histogram_data, cluster_analysis_data, training_dataset_type, training_dataset_dto_type, jobs): """ A helper function that makes a REST call to hopsworks for updating the stats and schema metadata about a training dataset Args: :training_dataset_id: id of the training dataset :featurestore_id: id of the featurestore that the training dataset is linked to :feature_corr_data: json-string with the feature correlation matrix of the training dataset :featuregroup_desc_stats_data: json-string with the descriptive statistics of the training dataset :features_histogram_data: list of json-strings with histogram data for the features in the training dataset :cluster_analysis_data: the clusters from cluster analysis on the dataset :training_dataset_type: type of the training dataset (external or hopsfs) :training_dataset_dto_type: type of the JSON DTO for the backend :jobs: list of jobs for updating the training dataset stats Returns: the HTTP response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = {constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURE_CORRELATION: feature_corr_data, constants.REST_CONFIG.JSON_FEATUREGROUP_DESC_STATS: featuregroup_desc_stats_data, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_HISTOGRAM: features_histogram_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_CLUSTERS: cluster_analysis_data, constants.REST_CONFIG.JSON_TYPE: training_dataset_dto_type, constants.REST_CONFIG.JSON_FEATURESTORE_SETTINGS_TRAINING_DATASET_TYPE: training_dataset_type, constants.REST_CONFIG.JSON_TRAINING_DATASET_JOBS: _pre_process_jobs_list(jobs)} json_embeddable = json.dumps(json_contents) headers = {constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON} method = constants.HTTP_CONFIG.HTTP_PUT connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_TRAININGDATASETS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(training_dataset_id) + "?" + constants.REST_CONFIG.JSON_FEATURESTORE_UPDATE_STATS_QUERY_PARAM + "=true" + constants.DELIMITERS.AMPERSAND_DELIMITER + constants.REST_CONFIG.JSON_FEATURESTORE_UPDATE_METADATA_QUERY_PARAM + "=false") response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if (response.code != 200): error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not update training dataset stats (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if (response.status != 200): error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not update training dataset stats (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _create_training_dataset_rest(training_dataset, featurestore_id, description, training_dataset_version, data_format, jobs, features_schema_data, feature_corr_data, training_dataset_desc_stats_data, features_histogram_data, cluster_analysis_data, training_dataset_type, training_dataset_dto_type, settings, hopsfs_connector_id = None, s3_connector_id = None): """ Makes a REST request to hopsworks for creating a new training dataset Args: :training_dataset: the name of the training dataset :featurestore_id: the id of the featurestore where the training dataset resides :description: a description of the training dataset :training_dataset_version: the version of the training dataset (defaults to 1) :data_format: the format of the training dataset :jobs: list of Hopsworks jobs linked to the training dataset :features_schema_data: the schema of the training dataset :feature_corr_data: json-string with the feature correlation matrix of the training dataset :cluster_analysis_data: the clusters from cluster analysis on the dataset :training_dataset_desc_stats_data: json-string with the descriptive statistics of the training dataset :features_histogram_data: list of json-strings with histogram data for the features in the training dataset :training_dataset_type: type of the training dataset (external or hopsfs) :training_dataset_dto_type: type of the JSON DTO for the backend :hopsfs_connector_id: id of the connector for a hopsfs training dataset :s3_connector_id: id of the connector for a s3 training dataset :settings: featurestore settings Returns: the HTTP response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = {constants.REST_CONFIG.JSON_TRAINING_DATASET_NAME: training_dataset, constants.REST_CONFIG.JSON_TRAINING_DATASET_VERSION: training_dataset_version, constants.REST_CONFIG.JSON_TRAINING_DATASET_DESCRIPTION: description, constants.REST_CONFIG.JSON_TRAINING_DATASET_JOBS: _pre_process_jobs_list(jobs), constants.REST_CONFIG.JSON_TRAINING_DATASET_SCHEMA: features_schema_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_FEATURE_CORRELATION: feature_corr_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_DESC_STATS: training_dataset_desc_stats_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_FEATURES_HISTOGRAM: features_histogram_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_CLUSTERS: cluster_analysis_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_FORMAT: data_format, constants.REST_CONFIG.JSON_TYPE: training_dataset_dto_type, constants.REST_CONFIG.JSON_FEATURESTORE_SETTINGS_TRAINING_DATASET_TYPE: training_dataset_type } if training_dataset_type == settings.external_training_dataset_type: json_contents[constants.REST_CONFIG.JSON_TRAINING_DATASET_S3_CONNECTOR_ID] = s3_connector_id if training_dataset_type == settings.hopsfs_training_dataset_type: json_contents[constants.REST_CONFIG.JSON_TRAINING_DATASET_HOPSFS_CONNECTOR_ID] = hopsfs_connector_id json_embeddable = json.dumps(json_contents) headers = {constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON} method = constants.HTTP_CONFIG.HTTP_POST connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_TRAININGDATASETS_RESOURCE) response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if response.code != 201 and response.code != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not create training dataset (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 201 and response.status != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not create training dataset (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _update_featuregroup_stats_rest(featuregroup_id, featurestore_id, feature_corr, featuregroup_desc_stats_data, features_histogram_data, cluster_analysis_data, featuregroup_type, featuregroup_dto_type, jobs): """ Makes a REST call to hopsworks appservice for updating the statistics of a particular featuregroup Args: :featuregroup_id: id of the featuregroup :featurestore_id: id of the featurestore where the featuregroup resides :feature_corr: the feature correlation matrix :featuregroup_desc_stats_data: the descriptive statistics of the featuregroup :features_histogram_data: the histograms of the features in the featuregroup :cluster_analysis_data: the clusters from cluster analysis on the featuregroup :featuregroup_type: type of the featuregroup (on-demand or cached) :featuregroup_dto_type: type of the JSON DTO for the backend :jobs: a list of jobs for updating the feature group stats Returns: The REST response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = {constants.REST_CONFIG.JSON_FEATUREGROUP_JOBS: _pre_process_jobs_list(jobs), constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURE_CORRELATION: feature_corr, constants.REST_CONFIG.JSON_FEATUREGROUP_DESC_STATS: featuregroup_desc_stats_data, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_HISTOGRAM: features_histogram_data, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_CLUSTERS: cluster_analysis_data, constants.REST_CONFIG.JSON_TYPE: featuregroup_dto_type, constants.REST_CONFIG.JSON_FEATURESTORE_SETTINGS_FEATUREGROUP_TYPE: featuregroup_type, } json_embeddable = json.dumps(json_contents, allow_nan=False) headers = {constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON} method = constants.HTTP_CONFIG.HTTP_PUT connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATUREGROUPS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featuregroup_id) + "?" + constants.REST_CONFIG.JSON_FEATURESTORE_UPDATE_STATS_QUERY_PARAM + "=true" + constants.DELIMITERS.AMPERSAND_DELIMITER + constants.REST_CONFIG.JSON_FEATURESTORE_UPDATE_METADATA_QUERY_PARAM + "=false") response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if (response.code != 200): error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not update featuregroup stats (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if (response.status != 200): error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not update featuregroup stats (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _create_featuregroup_rest(featuregroup, featurestore_id, description, featuregroup_version, jobs, features_schema, feature_corr_data, featuregroup_desc_stats_data, features_histogram_data, cluster_analysis_data, featuregroup_type, featuregroup_dto_type, sql_query, jdbc_connector_id): """ Sends a REST call to hopsworks to create a new featuregroup with specified metadata Args: :featuregroup: the name of the featuregroup :featurestore_id: id of the featurestore where the featuregroup resides :description: a description of the featuregroup :featuregroup_version: the version of the featuregroup (defaults to 1) :jobs: list of Hopsworks Jobs linked to the feature group :features_schema: the schema of the featuregroup :feature_corr_data: json-string with the feature correlation matrix of the featuregroup :featuregroup_desc_stats_data: json-string with the descriptive statistics of the featurergroup :features_histogram_data: list of json-strings with histogram data for the features in the featuregroup :cluster_analysis_data: cluster analysis for the featuregroup :featuregroup_type: type of the featuregroup (on-demand or cached) :featuregroup_dto_type: type of the JSON DTO for the backend :sql_query: SQL Query for On-demand feature groups :jdbc_connector_id: id of the jdbc_connector for on-demand feature groups Returns: The HTTP response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = {constants.REST_CONFIG.JSON_FEATUREGROUP_NAME: featuregroup, constants.REST_CONFIG.JSON_FEATUREGROUP_VERSION: featuregroup_version, constants.REST_CONFIG.JSON_FEATUREGROUP_DESCRIPTION: description, constants.REST_CONFIG.JSON_FEATUREGROUP_JOBS: _pre_process_jobs_list(jobs), constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES: features_schema, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURE_CORRELATION: feature_corr_data, constants.REST_CONFIG.JSON_FEATUREGROUP_DESC_STATS: featuregroup_desc_stats_data, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_HISTOGRAM: features_histogram_data, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_CLUSTERS: cluster_analysis_data, constants.REST_CONFIG.JSON_TYPE: featuregroup_dto_type, constants.REST_CONFIG.JSON_FEATURESTORE_SETTINGS_FEATUREGROUP_TYPE: featuregroup_type } if featuregroup_type == constants.REST_CONFIG.JSON_FEATUREGROUP_ON_DEMAND_TYPE: json_contents[constants.REST_CONFIG.JSON_FEATUREGROUP_ON_DEMAND_QUERY] = sql_query json_contents[constants.REST_CONFIG.JSON_FEATUREGROUP_JDBC_CONNECTOR_ID] = jdbc_connector_id json_embeddable = json.dumps(json_contents) headers = {constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON} method = constants.HTTP_CONFIG.HTTP_POST connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATUREGROUPS_RESOURCE) response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if response.code != 201 and response.code != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not create feature group (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 201 and response.status != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not create feature group (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _create_or_update_serving_rest(model_path, model_name, serving_type, model_version, batching_enabled=None, topic_name=None, num_partitions=None, num_replicas=None, serving_id=None, instances=1): """ Makes a REST request to Hopsworks for creating or updating a model serving instance Args: :model_path: path to the model or artifact being served :model_name: the name of the serving to create :serving_type: the type of serving :model_version: version of the serving :batching_enabled: boolean flag whether to enable batching for inference requests to the serving :topic_name: name of the kafka topic ("CREATE" to create a new one, or "NONE" to not use kafka topic) :num_partitions: kafka partitions :num_replicas: kafka replicas :serving_id: the id of the serving in case of UPDATE, if serving_id is None, it is a CREATE operation. :instances: the number of serving instances (the more instances the more inference requests can be served in parallel) Returns: None Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ json_contents = { constants.REST_CONFIG.JSON_SERVING_MODEL_VERSION: model_version, constants.REST_CONFIG.JSON_SERVING_ARTIFACT_PATH: model_path, constants.REST_CONFIG.JSON_SERVING_TYPE: serving_type, constants.REST_CONFIG.JSON_SERVING_NAME: model_name, constants.REST_CONFIG.JSON_SERVING_KAFKA_TOPIC_DTO: { constants.REST_CONFIG.JSON_KAFKA_TOPIC_NAME: topic_name, constants.REST_CONFIG.JSON_KAFKA_NUM_PARTITIONS: num_partitions, constants.REST_CONFIG.JSON_KAFKA_NUM_REPLICAS: num_replicas }, constants.REST_CONFIG.JSON_SERVING_REQUESTED_INSTANCES: instances, } if serving_id is not None: json_contents[constants.REST_CONFIG.JSON_SERVING_ID] = serving_id if serving_type == constants.MODEL_SERVING.SERVING_TYPE_TENSORFLOW: json_contents[constants.REST_CONFIG. JSON_SERVING_BATCHING_ENABLED] = batching_enabled json_embeddable = json.dumps(json_contents) headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_PUT connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_SERVING_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER) response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) # for python 3 if sys.version_info > (3, 0): if response.code != 201 and response.code != 200: resp_body = response.read() response_object = json.loads(resp_body) error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError("Could not create or update serving (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 201 and response.status != 200: resp_body = response.read() response_object = json.loads(resp_body) error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError("Could not create or update serving (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(resource_url, response.status, response.reason, error_code, error_msg, user_msg))