def __get_log_file_path_list(self, url, handler): """ Returns a list of log file path for this job """ job = self.__load_job() # get the directory that contains all the logs log_file_path = job.get_log_file_path() path_list = [] # list the directory to get the full path to each log if _file_util.is_s3_path(log_file_path): ec2_log_list = _file_util.list_s3( log_file_path, job.environment.get_credentials()) if ec2_log_list and len(ec2_log_list) > 0: path_list.extend([log['path'] for log in ec2_log_list]) elif _file_util.is_hdfs_path(log_file_path): hdfs_log_list = _file_util.list_hdfs( log_file_path, job.environment.hadoop_conf_dir) if hdfs_log_list and len(hdfs_log_list) > 0: path_list.extend([log['path'] for log in hdfs_log_list]) else: path_list.append(log_file_path) handler.write({'log_file_list': path_list})
def __get_log_file_path_list(self, url, handler): """ Returns a list of log file path for this job """ job = self.__load_job() # get the directory that contains all the logs log_file_path = job.get_log_file_path() path_list = [] # list the directory to get the full path to each log if _file_util.is_s3_path(log_file_path): ec2_log_list = _file_util.list_s3(log_file_path, job.environment.get_credentials()) if ec2_log_list and len(ec2_log_list) > 0: path_list.extend([log['path'] for log in ec2_log_list]) elif _file_util.is_hdfs_path(log_file_path): hdfs_log_list = _file_util.list_hdfs(log_file_path, job.environment.hadoop_conf_dir) if hdfs_log_list and len(hdfs_log_list) > 0: path_list.extend([log['path'] for log in hdfs_log_list]) else: path_list.append(log_file_path) handler.write({'log_file_list': path_list})
def show_available_packages(turi_dist_path, hadoop_conf_dir=None): ''' Show all availabe packages in Hadoop Turi Distributed installation turi_dist_path : str The location where Turi Distributed is installed. This usually comes from your Hadoop Administrator. This path must be a valid HDFS path. hadoop_conf_dir : str, optional Hadoop configure directory where Hadoop configuration files are stored. If not given, the configuration file is automatically searched in your CLASSPATH. hadoop_conf_dir must be a local file path. Returns ------- out : dict Dict of two lists, default_packages in the format: "rsa==3.1.4", "scikit-learn==0.16.1", "scipy==0.15.1" and user_packages, additional PyPi packages which have been uploaded to the Turi Distributed installation. user_packages has the format: "names-0.3.0.tar.gz", "boto-2.33.0-py2.py3-none-any.whl", ... ''' hadoop_conf_dir = _file_util.expand_full_path( hadoop_conf_dir) if hadoop_conf_dir else None _validate_turi_distr_param(turi_dist_path, hadoop_conf_dir) conda_list = turi_dist_path + HadoopCluster._DIST_CONDA_LIST user_list = turi_dist_path + HadoopCluster._DIST_USER_PKG packages = _file_util.read_file_to_string_hdfs( conda_list, hadoop_conf_dir=hadoop_conf_dir) if packages is None: raise RuntimeError( "It seems like you do not have a valid Turi Distributed" " installation. Please contact your Hadoop administrator.") lines = packages.split(_os.linesep) output_lines = [] for l in lines: splited = l.split() if len(splited) == 3: output_lines.append('%s==%s' % (splited[0], splited[1])) result = {'default_packages': output_lines} user_add = _file_util.list_hdfs(user_list, hadoop_conf_dir=hadoop_conf_dir) user = [_os.path.basename(x['path']) for x in user_add] result['user_packages'] = user return result
def show_available_packages(dato_dist_path, hadoop_conf_dir = None): ''' Show all availabe packages in Hadoop Dato Distributed installation dato_dist_path : str The location where Dato Distributed is installed. This usually comes from your Hadoop Administrator. This path must be a valid HDFS path. hadoop_conf_dir : str, optional Hadoop configure directory where Hadoop configuration files are stored. If not given, the configuration file is automatically searched in your CLASSPATH. hadoop_conf_dir must be a local file path. Returns ------- out : dict Dict of two lists, default_packages in the format: "rsa==3.1.4", "scikit-learn==0.16.1", "scipy==0.15.1" and user_packages, additional PyPi packages which have been uploaded to the Dato Distributed installation. user_packages has the format: "names-0.3.0.tar.gz", "boto-2.33.0-py2.py3-none-any.whl", ... ''' hadoop_conf_dir = _file_util.expand_full_path(hadoop_conf_dir) if hadoop_conf_dir else None _validate_dato_distr_param(dato_dist_path, hadoop_conf_dir) conda_list = dato_dist_path + HadoopCluster._DIST_CONDA_LIST user_list = dato_dist_path + HadoopCluster._DIST_USER_PKG packages = _file_util.read_file_to_string_hdfs(conda_list, hadoop_conf_dir=hadoop_conf_dir) if packages is None: raise RuntimeError("It seems like you do not have a valid Dato Distributed" " installation. Please contact your Hadoop administrator.") lines = packages.split(_os.linesep) output_lines = [] for l in lines: splited = l.split() if len(splited) == 3: output_lines.append('%s==%s' % (splited[0], splited[1])) result = {'default_packages': output_lines} user_add = _file_util.list_hdfs(user_list, hadoop_conf_dir=hadoop_conf_dir) user = [_os.path.basename(x['path']) for x in user_add] result['user_packages'] = user return result