def _copyToHdfsOverwrite(hdfs_filename):
    local_file = os.path.basename(hdfs_filename)
    hdfs_path = hdfs._expand_path(hdfs_filename, exists=False)
    if local_file in hdfs_path:
        # copy_to_hdfs expects directory to copy to, excluding the file name
        hdfs_path = hdfs_path.replace(local_file, "")
    hdfs.copy_to_hdfs(local_file, hdfs_path, overwrite=True)
Exemple #2
0
def _export_local_model(local_model_path, model_dir_hdfs, overwrite):
    """
    Exports a local directory of model files to Hopsworks "Models" dataset

     Args:
        :local_model_path: the path to the local model files
        :model_dir_hdfs: path to the directory in HDFS to put the model files
        :overwrite: boolean flag whether to overwrite existing model files

    Returns:
           the path to the exported model files in HDFS
    """
    if os.path.isdir(local_model_path):
        if not local_model_path.endswith(constants.DELIMITERS.SLASH_DELIMITER):
            local_model_path = local_model_path + constants.DELIMITERS.SLASH_DELIMITER
        for filename in os.listdir(local_model_path):
            hdfs.copy_to_hdfs(local_model_path + filename,
                              model_dir_hdfs,
                              overwrite=overwrite)

    if os.path.isfile(local_model_path):
        hdfs.copy_to_hdfs(local_model_path,
                          model_dir_hdfs,
                          overwrite=overwrite)

    return model_dir_hdfs
Exemple #3
0
def save(hdfs_filename, data):
    """
    Saves a numpy array to a file in HopsFS 

    Args:
       :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS).
       :data: numpy array

    Raises:
      IOError: If the local file does not exist.
    """
    local_file = os.path.basename(hdfs_filename)
    np.save(local_file, data)
    hdfs_path = hdfs._expand_path(hdfs_filename, exists=False)
    hdfs.copy_to_hdfs(local_file, hdfs_path, overwrite=True)
def _store_local_tensorboard(local_tb_path, hdfs_exec_logdir):
    """

    Args:
        :local_tb:
        :hdfs_exec_logdir:

    Returns:

    """
    if os.path.exists(local_tb_path):
        tb_contents = os.listdir(local_tb_path)
        for entry in tb_contents:
            hdfs.copy_to_hdfs(local_tb_path + '/' + entry, hdfs_exec_logdir)
        try:
            shutil.rmtree(local_tb_path)
        except:
            pass
Exemple #5
0
def save(hdfs_filename, data):
    """
    Saves a numpy array to a file in HDFS

    Args:
       :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS)
       :data: numpy array

    Raises:
      IOError: If the local file does not exist
    """
    local_file = os.path.basename(hdfs_filename)
    np.save(local_file, data)
    hdfs_path = hdfs._expand_path(hdfs_filename, exists=False)
    if local_file in hdfs_path:
        # copy_to_hdfs expects directory to copy to, excluding the file name
        hdfs_path = hdfs_path.replace(local_file, "")
    hdfs.copy_to_hdfs(local_file, hdfs_path, overwrite=True)
def _upload_file_output(retval, hdfs_exec_logdir):
    if type(retval) is dict:
        for metric_key in retval.keys():
            value = str(retval[metric_key])
            if '/' in value or os.path.exists(os.getcwd() + '/' + value):
                if os.path.exists(value):  # absolute path
                    if hdfs.exists(hdfs_exec_logdir + '/' +
                                   value.split('/')[-1]):
                        hdfs.delete(hdfs_exec_logdir + '/' +
                                    value.split('/')[-1],
                                    recursive=False)
                    hdfs.copy_to_hdfs(value, hdfs_exec_logdir)
                    os.remove(value)
                    hdfs_exec_logdir = hdfs.abs_path(hdfs_exec_logdir)
                    retval[metric_key] = hdfs_exec_logdir[
                        len(hdfs.abs_path(hdfs.project_path())
                            ):] + '/' + value.split('/')[-1]
                elif os.path.exists(os.getcwd() + '/' +
                                    value):  # relative path
                    output_file = os.getcwd() + '/' + value
                    if hdfs.exists(hdfs_exec_logdir + '/' + value):
                        hdfs.delete(hdfs_exec_logdir + '/' + value,
                                    recursive=False)
                    hdfs.copy_to_hdfs(value, hdfs_exec_logdir)
                    os.remove(output_file)
                    hdfs_exec_logdir = hdfs.abs_path(hdfs_exec_logdir)
                    retval[metric_key] = hdfs_exec_logdir[
                        len(hdfs.abs_path(hdfs.project_path())
                            ):] + '/' + output_file.split('/')[-1]
                elif value.startswith('Experiments') and value.endswith(
                        'output.log'):
                    continue
                elif value.startswith('Experiments') and hdfs.exists(
                        hdfs.project_path() + '/' + value):
                    hdfs.cp(hdfs.project_path() + '/' + value,
                            hdfs_exec_logdir)
                else:
                    raise Exception(
                        'Could not find file or directory on path ' +
                        str(value))