Example #1
0
def remove_xattr(hdfs_path, xattr_name):
    """
    Remove an extended attribute attached to an hdfs_path

    Args:
        :hdfs_path: path of a file or directory
        :xattr_name: name of the extended attribute

    Returns:
        None
    """
    hdfs_path = urllib.parse.quote(hdfs._expand_path(hdfs_path))
    headers = {
        constants.HTTP_CONFIG.HTTP_CONTENT_TYPE:
        constants.HTTP_CONFIG.HTTP_APPLICATION_JSON
    }
    method = constants.HTTP_CONFIG.HTTP_DELETE
    resource_url = constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_XATTR_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   hdfs_path + constants.DELIMITERS.QUESTION_MARK_DELIMITER + constants.XATTRS.XATTRS_PARAM_NAME + \
                   constants.DELIMITERS.JDBC_CONNECTION_STRING_VALUE_DELIMITER + xattr_name
    response = util.send_request(method, resource_url, headers=headers)
    if response.status_code >= 400:
        response_object = response.json()
        error_code, error_msg, user_msg = util._parse_rest_error(
            response_object)
        raise RestAPIError("Could not remove extened attributes from a path (url: {}), server response: \n " \
                           "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format(
            resource_url, response.status_code, response.reason, error_code, error_msg, user_msg))
Example #2
0
def _copyToHdfsOverwrite(hdfs_filename):
    local_file = os.path.basename(hdfs_filename)
    hdfs_path = hdfs._expand_path(hdfs_filename, exists=False)
    if local_file in hdfs_path:
        # copy_to_hdfs expects directory to copy to, excluding the file name
        hdfs_path = hdfs_path.replace(local_file, "")
    hdfs.copy_to_hdfs(local_file, hdfs_path, overwrite=True)
Example #3
0
def create_or_update(artifact_path, serving_name, serving_type="TENSORFLOW", model_version=1,
                             batching_enabled = False, topic_name="CREATE",  num_partitions = 1, num_replicas = 1,
                             instances = 1, update = False):
    """
    Creates or updates a serving in Hopsworks

    Example use-case:

    >>> from hops import serving
    >>> serving.create_or_update("/Models/mnist", "mnist", "TENSORFLOW", 1)

    Args:
        :artifact_path: path to the artifact to serve (tf model dir or sklearn script)
        :serving_name: name of the serving to create
        :serving_type: type of the serving, e.g "TENSORFLOW" or "SKLEARN"
        :model_version: version of the model to serve
        :batching_enabled: boolean flag whether to enable batching for the inference requests
        :update: boolean flag whether to update existing serving, otherwise it will try to create a new serving
        :instances: the number of serving instances (the more instances the more inference requests can
        be served in parallel)

    Returns:
          None
    """
    serving_id = None
    if update:
        serving_id = get_id(serving_name)
    artifact_path = hdfs._expand_path(artifact_path)
    _validate_user_serving_input(artifact_path, serving_name, serving_type, model_version, batching_enabled,
                                 num_partitions, num_replicas, instances)
    artifact_path = hdfs.get_plain_path(artifact_path)
    print("Creating a serving for model {} ...".format(serving_name))
    _create_or_update_serving_rest(artifact_path, serving_name, serving_type, model_version, batching_enabled,
                                   topic_name, num_partitions, num_replicas, serving_id, instances)
    print("Serving for model {} successfully created".format(serving_name))
Example #4
0
def create_or_update(serving_name,
                     artifact_path,
                     model_version=1,
                     model_server=None,
                     kfserving=False,
                     batching_enabled=False,
                     topic_name="CREATE",
                     num_partitions=1,
                     num_replicas=1,
                     instances=1):
    """
    Creates a serving in Hopsworks if it does not exist, otherwise update the existing one.
    In case model server is not specified, it is inferred from the artifact files.

    Example use-case:

    >>> from hops import serving
    >>> serving.create_or_update("mnist", "/Models/mnist", 1)

    Args:
        :serving_name: name of the serving to create
        :artifact_path: path to the artifact to serve (tf model dir or python script implementing the Predict class)
        :model_version: version of the model to serve
        :model_server: name of the model server to deploy, e.g "TENSORFLOW_SERVING" or "FLASK"
        :kfserving: boolean flag whether to serve the model using KFServing serving tool
        :batching_enabled: boolean flag whether to enable batching for the inference requests
        :instances: the number of serving instances (the more instances the more inference requests can
        be served in parallel)
        :kfserving: boolean flag whether to serve the model using KFServing

    Returns:
          None
    """
    serving_id = get_id(serving_name)
    artifact_path = hdfs._expand_path(artifact_path)
    if model_server is None:
        model_server = _detect_model_server(artifact_path)

    _validate_user_serving_input(serving_name, artifact_path, model_version,
                                 model_server, kfserving, batching_enabled,
                                 topic_name, num_partitions, num_replicas,
                                 instances)
    artifact_path = hdfs.get_plain_path(artifact_path)
    print("Creating serving {} for artifact {} ...".format(
        serving_name, artifact_path))
    _create_or_update_serving_rest(serving_name, artifact_path, model_version,
                                   model_server, kfserving, batching_enabled,
                                   topic_name, num_partitions, num_replicas,
                                   serving_id, instances)
    print("Serving {} successfully created".format(serving_name))
Example #5
0
def save(hdfs_filename, data):
    """
    Saves a numpy array to a file in HopsFS 

    Args:
       :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS).
       :data: numpy array

    Raises:
      IOError: If the local file does not exist.
    """
    local_file = os.path.basename(hdfs_filename)
    np.save(local_file, data)
    hdfs_path = hdfs._expand_path(hdfs_filename, exists=False)
    hdfs.copy_to_hdfs(local_file, hdfs_path, overwrite=True)
Example #6
0
def write_csv(hdfs_filename, dataframe, **kwds):
    """
      Writes a pandas dataframe to a comma-separated values (csv) text file in HDFS. Overwrites the file if it already exists

      Args:
         :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS)
         :dataframe: a Pandas dataframe
         :**kwds: You can add any additional args found in pandas.to_csv(...) 

      Raises:
        IOError: If the file does not exist
    """
    hdfs_path = hdfs._expand_path(hdfs_filename, exists=False)    
    h = hdfs.get_fs()
    with h.open_file(hdfs_path, "wt") as f:
      dataframe.to_csv(f, **kwds)
Example #7
0
def load(hdfs_filename, **kwds):
    """
    Reads a file from HDFS into a Numpy Array

     Args:
       :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS).
       :**kwds: You can add any additional args found in numpy.read(...) 

     Returns:
      A numpy array

     Raises:
      IOError: If the file does not exist
    """
    hdfs_path = hdfs._expand_path(hdfs_filename)
    local_path = hdfs.copy_to_local(hdfs_path)
    return np.load(local_path, **kwds)
Example #8
0
def save(hdfs_filename, data):
    """
    Saves a numpy array to a file in HDFS

    Args:
       :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS)
       :data: numpy array

    Raises:
      IOError: If the local file does not exist
    """
    local_file = os.path.basename(hdfs_filename)
    np.save(local_file, data)
    hdfs_path = hdfs._expand_path(hdfs_filename, exists=False)
    if local_file in hdfs_path:
        # copy_to_hdfs expects directory to copy to, excluding the file name
        hdfs_path = hdfs_path.replace(local_file, "")
    hdfs.copy_to_hdfs(local_file, hdfs_path, overwrite=True)
Example #9
0
def read_csv(hdfs_filename, **kwds):
    """
      Reads a comma-separated values (csv) file from HDFS into a Pandas DataFrame

      Args:
         :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS)
         :**kwds: You can add any additional args found in pandas.read_csv(...) 

      Returns:
        A pandas dataframe

      Raises:
        IOError: If the file does not exist
    """
    hdfs_path = hdfs._expand_path(hdfs_filename)    
    h = hdfs.get_fs()
    with h.open_file(hdfs_path, "rt") as f:
      data = pd.read_csv(f, **kwds)
    return data
Example #10
0
def read_json(hdfs_filename, **kwds):
    """
      Convert a JSON string to pandas object.

      Args:
         :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS)
         :**kwds: You can add any additional args found in pandas.read_csv(...) 

      Returns:
        A pandas dataframe

      Raises:
        IOError: If the file does not exist
    """
    hdfs_path = hdfs._expand_path(hdfs_filename)    
    h = hdfs.get_fs()
    with h.open_file(hdfs_path, "rt") as f:
      data = pd.read_json(f, **kwds)
    return data
Example #11
0
def read_parquet(hdfs_filename, **kwds):
    """
      Load a parquet object from a HDFS path, returning a DataFrame.

      Args:
         :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS)
         :**kwds: You can add any additional args found in pandas.read_csv(...) 

      Returns:
        A pandas dataframe

      Raises:
        IOError: If the file does not exist
    """
    hdfs_path = hdfs._expand_path(hdfs_filename)    
    h = hdfs.get_fs()
    with h.open_file(hdfs_path, "rt") as f:
      data = pd.read_parquet(f, **kwds)
    return data
Example #12
0
def get_xattr(hdfs_path, xattr_name=None):
    """
    Get the extended attribute attached to an hdfs_path.

    Args:
        :hdfs_path: path of a file or directory
        :xattr_name: name of the extended attribute

    Returns:
        A dictionary with the extended attribute(s) as key value pair(s). If the :xattr_name is None,
         the API returns all associated extended attributes.
    """
    hdfs_path = urllib.parse.quote(hdfs._expand_path(hdfs_path))
    headers = {
        constants.HTTP_CONFIG.HTTP_CONTENT_TYPE:
        constants.HTTP_CONFIG.HTTP_APPLICATION_JSON
    }
    method = constants.HTTP_CONFIG.HTTP_GET
    resource_url = constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   hdfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_XATTR_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   hdfs_path
    if xattr_name is not None:
        resource_url += constants.DELIMITERS.QUESTION_MARK_DELIMITER + constants.XATTRS.XATTRS_PARAM_NAME + \
                        constants.DELIMITERS.JDBC_CONNECTION_STRING_VALUE_DELIMITER + xattr_name

    response = util.send_request(method, resource_url, headers=headers)
    response_object = response.json()
    if response.status_code >= 400:
        error_code, error_msg, user_msg = util._parse_rest_error(
            response_object)
        raise RestAPIError("Could not get extened attributes attached to a path (url: {}), server response: \n " \
                           "HTTP code: {}, HTTP reason: {}, error code: {}, error msg: {}, user msg: {}".format(
            resource_url, response.status_code, response.reason, error_code, error_msg, user_msg))

    results = {}
    for item in response_object["items"]:
        results[item["name"]] = item["value"]
    return results
Example #13
0
def read_excel(hdfs_filename, **kwds):
    """
      Retrieve pandas object stored in HDFS file, optionally based on where criteria



      Args:
         :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS)
         :**kwds: You can add any additional args found in pandas.read_csv(...) 

      Returns:
        A pandas dataframe

      Raises:
        IOError: If the file does not exist
    """
    hdfs_path = hdfs._expand_path(hdfs_filename)    
    h = hdfs.get_fs()
    with h.open_file(hdfs_path, "rt") as f:
      data = pd.read_excel(f, **kwds)
    return data
Example #14
0
def create_or_update(
        serving_name,
        model_path,
        model_version=1,
        artifact_version=None,
        transformer=None,
        model_server=None,
        kfserving=None,
        batching_enabled=False,
        topic_name="CREATE",
        num_partitions=1,
        num_replicas=1,
        inference_logging=constants.MODEL_SERVING.INFERENCE_LOGGING_ALL,
        instances=1,
        transformer_instances=None,
        predictor_resource_config=None):
    """
    Creates a serving in Hopsworks if it does not exist, otherwise update the existing one.
    In case model server is not specified, it is inferred from the artifact files.
    If a transformer is specified, KFServing is enabled by default.

    Example use-case:

    >>> from hops import serving
    >>> serving.create_or_update("mnist", "/Models/mnist")

    Args:
        :serving_name: name of the serving to create
        :model_path: path to the artifact to serve (tf model dir or python script implementing the Predict class)
        :model_version: version of the model to serve
        :artifact_version: version of the artifact to serve (Kubernetes only), e.g "CREATE", "MODEL-ONLY" or version number.
        :transformer: path to the transformer script (python script implementing the Transformer class).
        :model_server: name of the model server to deploy, e.g "TENSORFLOW_SERVING" or "FLASK"
        :kfserving: boolean flag whether to serve the model using KFServing serving tool
        :batching_enabled: boolean flag whether to enable batching for the inference requests
        :topic_name: name of the kafka topic for inference logging, e.g "CREATE" to create a new one, "NONE" to not use kafka topic or an existent topic name
        :num_partitions: if a new kafka topic is to created, number of partitions of the new topic
        :num_replicas: if a new kafka topic is to created, replication factor of the new topic
        :inference_logging: inference data to log into the Kafka topic, e.g "MODEL_INPUTS", "PREDICTIONS" or "ALL"
        :instances: the number of serving instances (the more instances the more inference requests can
        be served in parallel)
        :transformer_instances: the number of serving instances (the more instances the more inference requests can
        be served in parallel)
        :predictor_resource_config: dict for setting resource configuration parameters required to serve the model, for
        example {'memory': 2048, 'cores': 1, 'gpus': 0}. Currently only supported if Hopsworks is deployed with Kubernetes installed.

    Returns:
          None
    """
    model_path = hdfs._expand_path(model_path)
    if model_server is None:
        model_server = _detect_model_server(model_path)
    if transformer is not None and kfserving is None:
        kfserving = True

    _validate_user_serving_input(
        serving_name, model_path, model_version, artifact_version, transformer,
        model_server, kfserving, batching_enabled, topic_name, num_partitions,
        num_replicas, inference_logging, instances, transformer_instances,
        predictor_resource_config)
    model_path = hdfs.get_plain_path(model_path)
    serving_id = get_id(serving_name)
    print("Creating serving {} for artifact {} ...".format(
        serving_name, model_path))
    _create_or_update_serving_rest(
        serving_id, serving_name, model_path, model_version, artifact_version,
        transformer, model_server, kfserving, batching_enabled, topic_name,
        num_partitions, num_replicas, inference_logging, instances,
        transformer_instances, predictor_resource_config)
    print("Serving {} successfully created".format(serving_name))
Example #15
0
def _copyHdfsToLocalOverwrite(hdfs_filename):
    hdfs_path = hdfs._expand_path(hdfs_filename)
    local_path = hdfs.copy_to_local(hdfs_path)
    return local_path