コード例 #1
0
def _do_get_training_dataset_path(training_dataset_name, featurestore_metadata, training_dataset_version=1):
    """
    Gets the HDFS path to a training dataset with a specific name and version in a featurestore

    Args:
        :training_dataset_name: name of the training dataset
        :featurestore_metadata: metadata of the featurestore
        :training_dataset_version: version of the training dataset

    Returns:
        The HDFS path to the training dataset
    """
    training_dataset = query_planner._find_training_dataset(featurestore_metadata.training_datasets,
                                                            training_dataset_name,
                                                            training_dataset_version)
    hdfs_path = training_dataset.hopsfs_training_dataset.hdfs_store_path + \
        constants.DELIMITERS.SLASH_DELIMITER + training_dataset.name
    data_format = training_dataset.data_format
    if data_format == constants.FEATURE_STORE.TRAINING_DATASET_NPY_FORMAT:
        hdfs_path = hdfs_path + constants.FEATURE_STORE.TRAINING_DATASET_NPY_SUFFIX
    if data_format == constants.FEATURE_STORE.TRAINING_DATASET_HDF5_FORMAT:
        hdfs_path = hdfs_path + constants.FEATURE_STORE.TRAINING_DATASET_HDF5_SUFFIX
    if data_format == constants.FEATURE_STORE.TRAINING_DATASET_IMAGE_FORMAT:
        hdfs_path = training_dataset.hopsfs_training_dataset.hdfs_store_path
    # abspath means "hdfs://namenode:port/ is preprended
    abspath = util.abspath(hdfs_path)
    return abspath
コード例 #2
0
def _get_hopsfs_training_dataset_path(training_dataset_name, hdfs_store_path, data_format):
    """
    Utility function for getting the hopsfs path of a training dataset in the feature store

    Args:
        :training_dataset_name: name of the training dataset
        :hdfs_store_path: the hdfs path to the dataset where all the training datasets are stored
        :data_format: data format of the training datataset

    Return:
        the hdfs path to the training dataset
    """
    hdfs_path = hdfs_store_path + \
                constants.DELIMITERS.SLASH_DELIMITER + training_dataset_name
    if data_format == constants.FEATURE_STORE.TRAINING_DATASET_IMAGE_FORMAT:
        hdfs_path = hdfs_store_path
    # abspath means "hdfs://namenode:port/ is preprended
    path = util.abspath(hdfs_path)
    return path