def _copyToHdfsOverwrite(hdfs_filename): local_file = os.path.basename(hdfs_filename) hdfs_path = hdfs._expand_path(hdfs_filename, exists=False) if local_file in hdfs_path: # copy_to_hdfs expects directory to copy to, excluding the file name hdfs_path = hdfs_path.replace(local_file, "") hdfs.copy_to_hdfs(local_file, hdfs_path, overwrite=True)
def _export_local_model(local_model_path, model_dir_hdfs, overwrite): """ Exports a local directory of model files to Hopsworks "Models" dataset Args: :local_model_path: the path to the local model files :model_dir_hdfs: path to the directory in HDFS to put the model files :overwrite: boolean flag whether to overwrite existing model files Returns: the path to the exported model files in HDFS """ if os.path.isdir(local_model_path): if not local_model_path.endswith(constants.DELIMITERS.SLASH_DELIMITER): local_model_path = local_model_path + constants.DELIMITERS.SLASH_DELIMITER for filename in os.listdir(local_model_path): hdfs.copy_to_hdfs(local_model_path + filename, model_dir_hdfs, overwrite=overwrite) if os.path.isfile(local_model_path): hdfs.copy_to_hdfs(local_model_path, model_dir_hdfs, overwrite=overwrite) return model_dir_hdfs
def save(hdfs_filename, data): """ Saves a numpy array to a file in HopsFS Args: :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS). :data: numpy array Raises: IOError: If the local file does not exist. """ local_file = os.path.basename(hdfs_filename) np.save(local_file, data) hdfs_path = hdfs._expand_path(hdfs_filename, exists=False) hdfs.copy_to_hdfs(local_file, hdfs_path, overwrite=True)
def _store_local_tensorboard(local_tb_path, hdfs_exec_logdir): """ Args: :local_tb: :hdfs_exec_logdir: Returns: """ if os.path.exists(local_tb_path): tb_contents = os.listdir(local_tb_path) for entry in tb_contents: hdfs.copy_to_hdfs(local_tb_path + '/' + entry, hdfs_exec_logdir) try: shutil.rmtree(local_tb_path) except: pass
def save(hdfs_filename, data): """ Saves a numpy array to a file in HDFS Args: :hdfs_filename: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS) :data: numpy array Raises: IOError: If the local file does not exist """ local_file = os.path.basename(hdfs_filename) np.save(local_file, data) hdfs_path = hdfs._expand_path(hdfs_filename, exists=False) if local_file in hdfs_path: # copy_to_hdfs expects directory to copy to, excluding the file name hdfs_path = hdfs_path.replace(local_file, "") hdfs.copy_to_hdfs(local_file, hdfs_path, overwrite=True)
def _upload_file_output(retval, hdfs_exec_logdir): if type(retval) is dict: for metric_key in retval.keys(): value = str(retval[metric_key]) if '/' in value or os.path.exists(os.getcwd() + '/' + value): if os.path.exists(value): # absolute path if hdfs.exists(hdfs_exec_logdir + '/' + value.split('/')[-1]): hdfs.delete(hdfs_exec_logdir + '/' + value.split('/')[-1], recursive=False) hdfs.copy_to_hdfs(value, hdfs_exec_logdir) os.remove(value) hdfs_exec_logdir = hdfs.abs_path(hdfs_exec_logdir) retval[metric_key] = hdfs_exec_logdir[ len(hdfs.abs_path(hdfs.project_path()) ):] + '/' + value.split('/')[-1] elif os.path.exists(os.getcwd() + '/' + value): # relative path output_file = os.getcwd() + '/' + value if hdfs.exists(hdfs_exec_logdir + '/' + value): hdfs.delete(hdfs_exec_logdir + '/' + value, recursive=False) hdfs.copy_to_hdfs(value, hdfs_exec_logdir) os.remove(output_file) hdfs_exec_logdir = hdfs.abs_path(hdfs_exec_logdir) retval[metric_key] = hdfs_exec_logdir[ len(hdfs.abs_path(hdfs.project_path()) ):] + '/' + output_file.split('/')[-1] elif value.startswith('Experiments') and value.endswith( 'output.log'): continue elif value.startswith('Experiments') and hdfs.exists( hdfs.project_path() + '/' + value): hdfs.cp(hdfs.project_path() + '/' + value, hdfs_exec_logdir) else: raise Exception( 'Could not find file or directory on path ' + str(value))