def _next(self): if self.params['flowChunkNumber'] > self.params['flowTotalChunks']: raise Exception chunk_size = self.params['flowChunkSize'] if self.params['flowChunkNumber'] == self.params['flowTotalChunks']: if chunk_size >= self.size_last_chunk: chunk_size = self.size_last_chunk else: chunk_size += self.size_last_chunk if self.params['flowTotalSize'] < self.params['flowChunkSize']: chunk_size = self.size_last_chunk chunk = self._read_chunk(chunk_size) self.params['flowCurrentChunkSize'] = chunk_size # Upload chunk response = util.send_request(constants.HTTP_CONFIG.HTTP_POST, resource="/" + self.resource, data=self.params, files={'file': (self.file, chunk)}) response_object = response.json() if response.status_code >= 400: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError( "Could not perform action on job's execution (url: {}), server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}" .format(self.resource, response.status_code, response.reason, error_code, error_msg, user_msg)) progress = round( self.params['flowChunkNumber'] / self.params['flowTotalChunks'] * 100, 3) print("Progress: " + str(progress) + "%") self.params['flowChunkNumber'] += 1
def _remove_tag(featurestore_id, id, tag, resource): """ Makes a REST call to Hopsworks to delete tags attached to a featuregroup or training dataset Args: :featurestore_id: the id of the featurestore :id: the id of the featuregroup or training dataset :tag: name of the tag :resource: featuregroup or training dataset resource Returns: None """ method = constants.HTTP_CONFIG.HTTP_DELETE resource_url = ( constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + resource + constants.DELIMITERS.SLASH_DELIMITER + str(id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORE_TAGS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + tag) response = util.send_request(method, resource_url) if response.status_code >= 400: response_object = response.json() error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError("Could not remove tags (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status_code, response.reason, error_code, error_msg, user_msg))
def _get_project_info(project_name): """ Makes a REST call to hopsworks to get all metadata of a project for the provided project. Args: :project_name: the name of the project Returns: JSON response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ method = constants.HTTP_CONFIG.HTTP_GET resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_INFO_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + project_name) response = util.send_request(method, resource_url) response_object = response.json() if response.status_code != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError( "Could not fetch project metadata for project: {} (url: {}), " "server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, " "error msg: {}, user msg: {}".format(project_name, resource_url, response.status_code, response.reason, error_code, error_msg, user_msg)) return response_object
def get_path_info(remote_path, project_name=None): """ Check if file exists. Example usage: >>> from hops import dataset >>> dataset.get_path_info("Projects/project_name/Resources/myremotefile.txt") Args: :remote_path: the path to the remote file or directory in the dataset. :project_name: whether this method should wait for the zipping process to complete beefore returning. Returns: A json representation of the path metadata. """ project_id = project.project_id_as_shared(project_name) resource_url = constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \ project_id + constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_DATASETS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \ remote_path + "?action=stat" response = util.send_request('GET', resource_url) response_object = response.json() if response.status_code >= 400: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError( "Could not get path (url: {}), server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}" .format(resource_url, response.status_code, response.reason, error_code, error_msg, user_msg)) else: return json.loads(response.content)
def _create_or_update_serving_rest( serving_id, serving_name, model_path, model_version, artifact_version, transformer, model_server, kfserving, batching_enabled, topic_name, num_partitions, num_replicas, inference_logging, instances, transformer_instances, predictor_resource_config): """ Makes a REST request to Hopsworks for creating or updating a model serving instance Args: :serving_id: the id of the serving in case of UPDATE, if serving_id is None, it is a CREATE operation. :serving_name: the name of the serving to create :model_path: path to the model or artifact being served :model_version: version of the model to serve :artifact_version: version of the artifact to serve :transformer: path to the transformer script :model_server: name of the model server to deploy, e.g "TENSORFLOW_SERVING" or "FLASK" :kfserving: boolean flag whether to serve the model using KFServing serving tool :batching_enabled: boolean flag whether to enable batching for inference requests to the serving :topic_name: name of the kafka topic for inference logging, e.g "CREATE" to create a new one, "NONE" to not use kafka topic or an existent topic name :num_partitions: if a new kafka topic is to created, number of partitions of the new topic :num_replicas: if a new kafka topic is to created, replication factor of the new topic :inference_logging: inference data to log into the Kafka topic, e.g "MODEL_INPUTS", "PREDICTIONS" or "ALL" :instances: the number of serving instances (the more instances the more inference requests can be served in parallel) :transformer_instances: the number of transformer instances (the more instances the more inference requests can be served in parallel) :predictor_resource_config: dict for setting resource configuration parameters required to serve the model, for example {'memory': 2048, 'cores': 1, 'gpus': 0}. Currently only supported if Hopsworks is deployed with Kubernetes installed. Returns: None Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ serving_tool = constants.MODEL_SERVING.SERVING_TOOL_KFSERVING if kfserving else constants.MODEL_SERVING.SERVING_TOOL_DEFAULT json_contents = { constants.REST_CONFIG.JSON_SERVING_NAME: serving_name, constants.REST_CONFIG.JSON_SERVING_MODEL_PATH: model_path, constants.REST_CONFIG.JSON_SERVING_MODEL_VERSION: model_version, constants.REST_CONFIG.JSON_SERVING_ARTIFACT_VERSION: artifact_version, constants.REST_CONFIG.JSON_SERVING_TRANSFORMER: transformer, constants.REST_CONFIG.JSON_SERVING_MODEL_SERVER: model_server, constants.REST_CONFIG.JSON_SERVING_TOOL: serving_tool, constants.REST_CONFIG.JSON_SERVING_KAFKA_TOPIC_DTO: { constants.REST_CONFIG.JSON_KAFKA_TOPIC_NAME: topic_name, constants.REST_CONFIG.JSON_KAFKA_NUM_PARTITIONS: num_partitions, constants.REST_CONFIG.JSON_KAFKA_NUM_REPLICAS: num_replicas }, constants.REST_CONFIG.JSON_SERVING_INFERENCE_LOGGING: inference_logging, constants.REST_CONFIG.JSON_SERVING_REQUESTED_INSTANCES: instances, constants.REST_CONFIG.JSON_SERVING_REQUESTED_TRANSFORMER_INSTANCES: transformer_instances, constants.REST_CONFIG.JSON_SERVING_PREDICTOR_RESOURCE_CONFIG: predictor_resource_config } if serving_id is not None: json_contents[constants.REST_CONFIG.JSON_SERVING_ID] = serving_id if model_server == constants.MODEL_SERVING.MODEL_SERVER_TENSORFLOW_SERVING: json_contents[constants.REST_CONFIG. JSON_SERVING_BATCHING_ENABLED] = batching_enabled if artifact_version == "CREATE": json_contents[constants.REST_CONFIG.JSON_SERVING_ARTIFACT_VERSION] = -1 elif artifact_version == "MODEL-ONLY": json_contents[constants.REST_CONFIG.JSON_SERVING_ARTIFACT_VERSION] = 0 if topic_name is None or topic_name == "NONE": json_contents[ constants.REST_CONFIG.JSON_SERVING_INFERENCE_LOGGING] = None json_embeddable = json.dumps(json_contents) headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_PUT resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_SERVING_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER) response = util.send_request(method, resource_url, data=json_embeddable, headers=headers) if response.status_code != 201 and response.status_code != 200: response_object = response.json() error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError( "Could not create or update serving (url: {}), server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(resource_url, response.status_code, response.reason, error_code, error_msg, user_msg))
base_url += "/jars/" + jar_id + "/run?entry-class=" + args.main + "&program-args=" + job_args print("Submitting job to: " + base_url) response = requests.post(base_url, verify=False, headers={ "Content-Type": "application/json", "Authorization": "Apikey " + args.apikey }) try: response_object = response.json() except JSONDecodeError: response_object = None if (response.status_code // 100) != 2: if response_object: error_code, error_msg, user_msg = util._parse_rest_error( response_object) else: error_code, error_msg, user_msg = "", "", "" raise RestAPIError( "Could not execute HTTP request (url: {}), server response: \n " "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}" .format(base_url, response.status_code, response.reason, error_code, error_msg, user_msg)) else: print( "Flink job was submitted successfully, please check Hopsworks UI for progress." )
def _sync_hive_table_with_featurestore_rest(featuregroup, featurestore_id, description, featuregroup_version, jobs, featuregroup_type): """ Sends a REST call to hopsworks to synchronize a Hive table with the feature store Args: :featuregroup: the name of the featuregroup :featurestore_id: id of the featurestore where the featuregroup resides :description: a description of the featuregroup :featuregroup_version: the version of the featuregroup (defaults to 1) :jobs: list of Hopsworks Jobs linked to the feature group :featuregroup_type: type of the featuregroup (on-demand or cached) Returns: The HTTP response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = { constants.REST_CONFIG.JSON_FEATUREGROUP_NAME: featuregroup, constants.REST_CONFIG.JSON_FEATUREGROUP_VERSION: featuregroup_version, constants.REST_CONFIG.JSON_FEATUREGROUP_DESCRIPTION: description, constants.REST_CONFIG.JSON_FEATUREGROUP_JOBS: _pre_process_jobs_list(jobs), constants.REST_CONFIG.JSON_TYPE: featuregroup_type, } json_embeddable = json.dumps(json_contents) headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_POST resource_url = ( constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATUREGROUPS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATUREGROUPS_SYNC_RESOURCE) response = util.send_request(method, resource_url, data=json_embeddable, headers=headers) response_object = response.json() if response.status_code != 201 and response.status_code != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError("Could not sync hive table with featurestore (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status_code, response.reason, error_code, error_msg, user_msg)) return response_object
def _create_training_dataset_rest(training_dataset, featurestore_id, description, training_dataset_version, data_format, jobs, features_schema_data, training_dataset_type, settings, connector_id=None, path=None): """ Makes a REST request to hopsworks for creating a new training dataset Args: :training_dataset: the name of the training dataset :featurestore_id: the id of the featurestore where the training dataset resides :description: a description of the training dataset :training_dataset_version: the version of the training dataset (defaults to 1) :data_format: the format of the training dataset :jobs: list of Hopsworks jobs linked to the training dataset :features_schema_data: the schema of the training dataset :training_dataset_type: type of the training dataset (external or hopsfs) :hopsfs_connector_id: id of the connector for a hopsfs training dataset :s3_connector_id: id of the connector for a s3 training dataset :settings: featurestore settings :path: the path within the storage connector where to save the training dataset Returns: the HTTP response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = { constants.REST_CONFIG.JSON_TRAINING_DATASET_NAME: training_dataset, constants.REST_CONFIG.JSON_TRAINING_DATASET_VERSION: training_dataset_version, constants.REST_CONFIG.JSON_TRAINING_DATASET_DESCRIPTION: description, constants.REST_CONFIG.JSON_TRAINING_DATASET_JOBS: _pre_process_jobs_list(jobs), constants.REST_CONFIG.JSON_TRAINING_DATASET_SCHEMA: features_schema_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_FORMAT: data_format, constants.REST_CONFIG.JSON_FEATURESTORE_SETTINGS_TRAINING_DATASET_TYPE: training_dataset_type, constants.REST_CONFIG.JSON_FEATURESTORE_LOCATION: path, constants.REST_CONFIG.JSON_TRAINING_DATASET_CONNECTOR_ID: connector_id } json_embeddable = json.dumps(json_contents, allow_nan=False) headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_POST resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_TRAININGDATASETS_RESOURCE) response = util.send_request(method, resource_url, data=json_embeddable, headers=headers) response_object = response.json() if response.status_code != 201 and response.status_code != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError("Could not create training dataset (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status_code, response.reason, error_code, error_msg, user_msg)) return response_object
def _disable_featuregroup_online_rest(featuregroup_name, featuregroup_version, featuregroup_id, featurestore_id, featuregroup_type): """ Makes a REST call to hopsworks appservice for disable online serving of a feature group (Drop MySQL table) Args: :featuregroup_name: name of the featuregroup :featuregroup_version: version :featuregroup_id: id of the featuregroup :featurestore_id: id of the featurestore where the featuregroup resides :featuregroup_type: type of the featuregroup (on-demand or cached) Returns: The REST response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = { constants.REST_CONFIG.JSON_TYPE: featuregroup_type, constants.REST_CONFIG.JSON_FEATUREGROUP_NAME: featuregroup_name, constants.REST_CONFIG.JSON_FEATUREGROUP_VERSION: featuregroup_version } json_embeddable = json.dumps(json_contents, allow_nan=False) headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_PUT resource_url = ( constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATUREGROUPS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featuregroup_id) + "?" + constants.REST_CONFIG.JSON_FEATURESTORE_DISABLE_ONLINE_QUERY_PARAM + "=true" + constants.DELIMITERS.AMPERSAND_DELIMITER + constants.REST_CONFIG.JSON_FEATURESTORE_UPDATE_METADATA_QUERY_PARAM + "=false" + constants.DELIMITERS.AMPERSAND_DELIMITER + constants.REST_CONFIG.JSON_FEATURESTORE_UPDATE_STATS_QUERY_PARAM + "=false" + constants.DELIMITERS.AMPERSAND_DELIMITER + constants.REST_CONFIG.JSON_FEATURESTORE_ENABLE_ONLINE_QUERY_PARAM + "=false") response = util.send_request(method, resource_url, data=json_embeddable, headers=headers) response_object = response.json() if response.status_code != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError("Could not disable feature serving (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status_code, response.reason, error_code, error_msg, user_msg)) return response_object
def _create_featuregroup_rest( featuregroup, featurestore_id, description, featuregroup_version, jobs, features_schema, feature_corr_enabled, featuregroup_desc_stats_enabled, features_histogram_enabled, stat_columns, featuregroup_type, sql_query, jdbc_connector_id, online_fg): """ Sends a REST call to hopsworks to create a new featuregroup with specified metadata Args: :featuregroup: the name of the featuregroup :featurestore_id: id of the featurestore where the featuregroup resides :description: a description of the featuregroup :featuregroup_version: the version of the featuregroup (defaults to 1) :jobs: list of Hopsworks Jobs linked to the feature group :features_schema: the schema of the featuregroup :feature_corr_enabled: boolean to save feature correlation setting of the featuregroup :featuregroup_desc_stats_enabled: boolean to save descriptive statistics setting of the featuregroup :features_histogram_enabled: boolean to save features histogram setting of the featuregroup :stat_columns: a list of columns to compute statistics for :featuregroup_type: type of the featuregroup (on-demand or cached) :sql_query: SQL Query for On-demand feature groups :jdbc_connector_id: id of the jdbc_connector for on-demand feature groups :online_fg: whether online feature serving should be enabled for the feature group Returns: The HTTP response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = { constants.REST_CONFIG.JSON_FEATUREGROUP_NAME: featuregroup, constants.REST_CONFIG.JSON_FEATUREGROUP_VERSION: featuregroup_version, constants.REST_CONFIG.JSON_FEATUREGROUP_DESCRIPTION: description, constants.REST_CONFIG.JSON_FEATUREGROUP_JOBS: _pre_process_jobs_list(jobs), constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES: features_schema, constants.REST_CONFIG.JSON_FEATUREGROUP_TYPE: featuregroup_type, constants.REST_CONFIG.JSON_FEATUREGROUP_ONLINE: online_fg, constants.REST_CONFIG.JSON_FEATUREGROUP_DESCRIPTIVE_STATISTICS_ENABLED: featuregroup_desc_stats_enabled, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURE_CORRELATION_ENABLED: feature_corr_enabled, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURE_HISTOGRAM_ENABLED: features_histogram_enabled, constants.REST_CONFIG.JSON_FEATUREGROUP_STATISTIC_COLUMNS: stat_columns } if featuregroup_type == "onDemandFeaturegroupDTO": json_contents[constants.REST_CONFIG. JSON_FEATUREGROUP_ON_DEMAND_QUERY] = sql_query json_contents[constants.REST_CONFIG. JSON_FEATUREGROUP_JDBC_CONNECTOR_ID] = jdbc_connector_id json_embeddable = json.dumps(json_contents, allow_nan=False) headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_POST resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATUREGROUPS_RESOURCE) response = util.send_request(method, resource_url, data=json_embeddable, headers=headers) response_object = response.json() if response.status_code != 201 and response.status_code != 200: error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise RestAPIError("Could not create feature group (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status_code, response.reason, error_code, error_msg, user_msg)) return response_object
def _create_or_update_serving_rest(model_path, model_name, serving_type, model_version, batching_enabled=None, topic_name=None, num_partitions=None, num_replicas=None, serving_id=None, instances=1): """ Makes a REST request to Hopsworks for creating or updating a model serving instance Args: :model_path: path to the model or artifact being served :model_name: the name of the serving to create :serving_type: the type of serving :model_version: version of the serving :batching_enabled: boolean flag whether to enable batching for inference requests to the serving :topic_name: name of the kafka topic ("CREATE" to create a new one, or "NONE" to not use kafka topic) :num_partitions: kafka partitions :num_replicas: kafka replicas :serving_id: the id of the serving in case of UPDATE, if serving_id is None, it is a CREATE operation. :instances: the number of serving instances (the more instances the more inference requests can be served in parallel) Returns: None Raises: :RestAPIError: if there was an error with the REST call to Hopsworks """ json_contents = { constants.REST_CONFIG.JSON_SERVING_MODEL_VERSION: model_version, constants.REST_CONFIG.JSON_SERVING_ARTIFACT_PATH: model_path, constants.REST_CONFIG.JSON_SERVING_TYPE: serving_type, constants.REST_CONFIG.JSON_SERVING_NAME: model_name, constants.REST_CONFIG.JSON_SERVING_KAFKA_TOPIC_DTO: { constants.REST_CONFIG.JSON_KAFKA_TOPIC_NAME: topic_name, constants.REST_CONFIG.JSON_KAFKA_NUM_PARTITIONS: num_partitions, constants.REST_CONFIG.JSON_KAFKA_NUM_REPLICAS: num_replicas }, constants.REST_CONFIG.JSON_SERVING_REQUESTED_INSTANCES: instances, } if serving_id is not None: json_contents[constants.REST_CONFIG.JSON_SERVING_ID] = serving_id if serving_type == constants.MODEL_SERVING.SERVING_TYPE_TENSORFLOW: json_contents[constants.REST_CONFIG. JSON_SERVING_BATCHING_ENABLED] = batching_enabled json_embeddable = json.dumps(json_contents) headers = { constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON } method = constants.HTTP_CONFIG.HTTP_PUT resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_SERVING_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER) response = util.send_request(method, resource_url, data=json_embeddable, headers=headers) if response.status_code != 201 and response.status_code != 200: response_object = response.json() error_code, error_msg, user_msg = util._parse_rest_error( response_object) raise exceptions.RestAPIError("Could not create or update serving (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, " "user msg: {}".format(resource_url, response.status_code, response.reason, error_code, error_msg, user_msg))
def _update_training_dataset_stats_rest( training_dataset_id, featurestore_id, feature_corr_data, featuregroup_desc_stats_data, features_histogram_data, cluster_analysis_data, training_dataset_type, training_dataset_dto_type, jobs): """ A helper function that makes a REST call to hopsworks for updating the stats and schema metadata about a training dataset Args: :training_dataset_id: id of the training dataset :featurestore_id: id of the featurestore that the training dataset is linked to :feature_corr_data: json-string with the feature correlation matrix of the training dataset :featuregroup_desc_stats_data: json-string with the descriptive statistics of the training dataset :features_histogram_data: list of json-strings with histogram data for the features in the training dataset :cluster_analysis_data: the clusters from cluster analysis on the dataset :training_dataset_type: type of the training dataset (external or hopsfs) :training_dataset_dto_type: type of the JSON DTO for the backend :jobs: list of jobs for updating the training dataset stats Returns: the HTTP response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = {constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURE_CORRELATION: feature_corr_data, constants.REST_CONFIG.JSON_FEATUREGROUP_DESC_STATS: featuregroup_desc_stats_data, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_HISTOGRAM: features_histogram_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_CLUSTERS: cluster_analysis_data, constants.REST_CONFIG.JSON_TYPE: training_dataset_dto_type, constants.REST_CONFIG.JSON_FEATURESTORE_SETTINGS_TRAINING_DATASET_TYPE: training_dataset_type, constants.REST_CONFIG.JSON_TRAINING_DATASET_JOBS: _pre_process_jobs_list(jobs)} json_embeddable = json.dumps(json_contents) headers = {constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON} method = constants.HTTP_CONFIG.HTTP_PUT connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_TRAININGDATASETS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(training_dataset_id) + "?" + constants.REST_CONFIG.JSON_FEATURESTORE_UPDATE_STATS_QUERY_PARAM + "=true" + constants.DELIMITERS.AMPERSAND_DELIMITER + constants.REST_CONFIG.JSON_FEATURESTORE_UPDATE_METADATA_QUERY_PARAM + "=false") response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if (response.code != 200): error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not update training dataset stats (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if (response.status != 200): error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not update training dataset stats (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _create_training_dataset_rest(training_dataset, featurestore_id, description, training_dataset_version, data_format, jobs, features_schema_data, feature_corr_data, training_dataset_desc_stats_data, features_histogram_data, cluster_analysis_data, training_dataset_type, training_dataset_dto_type, settings, hopsfs_connector_id = None, s3_connector_id = None): """ Makes a REST request to hopsworks for creating a new training dataset Args: :training_dataset: the name of the training dataset :featurestore_id: the id of the featurestore where the training dataset resides :description: a description of the training dataset :training_dataset_version: the version of the training dataset (defaults to 1) :data_format: the format of the training dataset :jobs: list of Hopsworks jobs linked to the training dataset :features_schema_data: the schema of the training dataset :feature_corr_data: json-string with the feature correlation matrix of the training dataset :cluster_analysis_data: the clusters from cluster analysis on the dataset :training_dataset_desc_stats_data: json-string with the descriptive statistics of the training dataset :features_histogram_data: list of json-strings with histogram data for the features in the training dataset :training_dataset_type: type of the training dataset (external or hopsfs) :training_dataset_dto_type: type of the JSON DTO for the backend :hopsfs_connector_id: id of the connector for a hopsfs training dataset :s3_connector_id: id of the connector for a s3 training dataset :settings: featurestore settings Returns: the HTTP response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = {constants.REST_CONFIG.JSON_TRAINING_DATASET_NAME: training_dataset, constants.REST_CONFIG.JSON_TRAINING_DATASET_VERSION: training_dataset_version, constants.REST_CONFIG.JSON_TRAINING_DATASET_DESCRIPTION: description, constants.REST_CONFIG.JSON_TRAINING_DATASET_JOBS: _pre_process_jobs_list(jobs), constants.REST_CONFIG.JSON_TRAINING_DATASET_SCHEMA: features_schema_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_FEATURE_CORRELATION: feature_corr_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_DESC_STATS: training_dataset_desc_stats_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_FEATURES_HISTOGRAM: features_histogram_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_CLUSTERS: cluster_analysis_data, constants.REST_CONFIG.JSON_TRAINING_DATASET_FORMAT: data_format, constants.REST_CONFIG.JSON_TYPE: training_dataset_dto_type, constants.REST_CONFIG.JSON_FEATURESTORE_SETTINGS_TRAINING_DATASET_TYPE: training_dataset_type } if training_dataset_type == settings.external_training_dataset_type: json_contents[constants.REST_CONFIG.JSON_TRAINING_DATASET_S3_CONNECTOR_ID] = s3_connector_id if training_dataset_type == settings.hopsfs_training_dataset_type: json_contents[constants.REST_CONFIG.JSON_TRAINING_DATASET_HOPSFS_CONNECTOR_ID] = hopsfs_connector_id json_embeddable = json.dumps(json_contents) headers = {constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON} method = constants.HTTP_CONFIG.HTTP_POST connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_TRAININGDATASETS_RESOURCE) response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if response.code != 201 and response.code != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not create training dataset (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 201 and response.status != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not create training dataset (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _update_featuregroup_stats_rest(featuregroup_id, featurestore_id, feature_corr, featuregroup_desc_stats_data, features_histogram_data, cluster_analysis_data, featuregroup_type, featuregroup_dto_type, jobs): """ Makes a REST call to hopsworks appservice for updating the statistics of a particular featuregroup Args: :featuregroup_id: id of the featuregroup :featurestore_id: id of the featurestore where the featuregroup resides :feature_corr: the feature correlation matrix :featuregroup_desc_stats_data: the descriptive statistics of the featuregroup :features_histogram_data: the histograms of the features in the featuregroup :cluster_analysis_data: the clusters from cluster analysis on the featuregroup :featuregroup_type: type of the featuregroup (on-demand or cached) :featuregroup_dto_type: type of the JSON DTO for the backend :jobs: a list of jobs for updating the feature group stats Returns: The REST response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = {constants.REST_CONFIG.JSON_FEATUREGROUP_JOBS: _pre_process_jobs_list(jobs), constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURE_CORRELATION: feature_corr, constants.REST_CONFIG.JSON_FEATUREGROUP_DESC_STATS: featuregroup_desc_stats_data, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_HISTOGRAM: features_histogram_data, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_CLUSTERS: cluster_analysis_data, constants.REST_CONFIG.JSON_TYPE: featuregroup_dto_type, constants.REST_CONFIG.JSON_FEATURESTORE_SETTINGS_FEATUREGROUP_TYPE: featuregroup_type, } json_embeddable = json.dumps(json_contents, allow_nan=False) headers = {constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON} method = constants.HTTP_CONFIG.HTTP_PUT connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATUREGROUPS_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featuregroup_id) + "?" + constants.REST_CONFIG.JSON_FEATURESTORE_UPDATE_STATS_QUERY_PARAM + "=true" + constants.DELIMITERS.AMPERSAND_DELIMITER + constants.REST_CONFIG.JSON_FEATURESTORE_UPDATE_METADATA_QUERY_PARAM + "=false") response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if (response.code != 200): error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not update featuregroup stats (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if (response.status != 200): error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not update featuregroup stats (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object
def _create_featuregroup_rest(featuregroup, featurestore_id, description, featuregroup_version, jobs, features_schema, feature_corr_data, featuregroup_desc_stats_data, features_histogram_data, cluster_analysis_data, featuregroup_type, featuregroup_dto_type, sql_query, jdbc_connector_id): """ Sends a REST call to hopsworks to create a new featuregroup with specified metadata Args: :featuregroup: the name of the featuregroup :featurestore_id: id of the featurestore where the featuregroup resides :description: a description of the featuregroup :featuregroup_version: the version of the featuregroup (defaults to 1) :jobs: list of Hopsworks Jobs linked to the feature group :features_schema: the schema of the featuregroup :feature_corr_data: json-string with the feature correlation matrix of the featuregroup :featuregroup_desc_stats_data: json-string with the descriptive statistics of the featurergroup :features_histogram_data: list of json-strings with histogram data for the features in the featuregroup :cluster_analysis_data: cluster analysis for the featuregroup :featuregroup_type: type of the featuregroup (on-demand or cached) :featuregroup_dto_type: type of the JSON DTO for the backend :sql_query: SQL Query for On-demand feature groups :jdbc_connector_id: id of the jdbc_connector for on-demand feature groups Returns: The HTTP response Raises: :RestAPIError: if there was an error in the REST call to Hopsworks """ json_contents = {constants.REST_CONFIG.JSON_FEATUREGROUP_NAME: featuregroup, constants.REST_CONFIG.JSON_FEATUREGROUP_VERSION: featuregroup_version, constants.REST_CONFIG.JSON_FEATUREGROUP_DESCRIPTION: description, constants.REST_CONFIG.JSON_FEATUREGROUP_JOBS: _pre_process_jobs_list(jobs), constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES: features_schema, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURE_CORRELATION: feature_corr_data, constants.REST_CONFIG.JSON_FEATUREGROUP_DESC_STATS: featuregroup_desc_stats_data, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_HISTOGRAM: features_histogram_data, constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_CLUSTERS: cluster_analysis_data, constants.REST_CONFIG.JSON_TYPE: featuregroup_dto_type, constants.REST_CONFIG.JSON_FEATURESTORE_SETTINGS_FEATUREGROUP_TYPE: featuregroup_type } if featuregroup_type == constants.REST_CONFIG.JSON_FEATUREGROUP_ON_DEMAND_TYPE: json_contents[constants.REST_CONFIG.JSON_FEATUREGROUP_ON_DEMAND_QUERY] = sql_query json_contents[constants.REST_CONFIG.JSON_FEATUREGROUP_JDBC_CONNECTOR_ID] = jdbc_connector_id json_embeddable = json.dumps(json_contents) headers = {constants.HTTP_CONFIG.HTTP_CONTENT_TYPE: constants.HTTP_CONFIG.HTTP_APPLICATION_JSON} method = constants.HTTP_CONFIG.HTTP_POST connection = util._get_http_connection(https=True) resource_url = (constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATURESTORES_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + str(featurestore_id) + constants.DELIMITERS.SLASH_DELIMITER + constants.REST_CONFIG.HOPSWORKS_FEATUREGROUPS_RESOURCE) response = util.send_request(connection, method, resource_url, body=json_embeddable, headers=headers) resp_body = response.read() response_object = json.loads(resp_body) # for python 3 if sys.version_info > (3, 0): if response.code != 201 and response.code != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not create feature group (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.code, response.reason, error_code, error_msg, user_msg)) else: # for python 2 if response.status != 201 and response.status != 200: error_code, error_msg, user_msg = util._parse_rest_error(response_object) raise RestAPIError("Could not create feature group (url: {}), server response: \n " \ "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format( resource_url, response.status, response.reason, error_code, error_msg, user_msg)) return response_object