def __get_log_file_content(self, url, handler): """ Get and return the log file content """ log_file_path = str(handler.get_argument("log", None)) job = self.__load_job() content = "" max_size = long(1048576) # max size is 1mb status_code = 200 if log_file_path: try: if _file_util.is_local_path(log_file_path): if _os.path.getsize(log_file_path) > max_size: raise RuntimeError( "Cannot read file greater than max size.") else: content = self.__load_local_log_file(log_file_path) elif _file_util.is_s3_path(log_file_path): content = _file_util.read_file_to_string_s3( log_file_path, max_size, job.environment.get_credentials()) elif _file_util.is_hdfs_path(log_file_path): content = _file_util.read_file_to_string_hdfs( log_file_path, max_size, job.environment.hadoop_conf_dir) else: status_code = 404 content = "Log file path (%s) is not valid." % log_file_path except RuntimeError: status_code = 413 content = "File size too large. Please load log file manually at %s." % log_file_path handler.set_status(status_code) handler.set_header("Content-Type", "text/plain") handler.write(content)
def __get_log_file_content(self, url, handler): """ Get and return the log file content """ log_file_path = str(handler.get_argument("log", None)) job = self.__load_job() content = "" max_size = 1048576L # max size is 1mb status_code = 200 if log_file_path: try: if _file_util.is_local_path(log_file_path): if _os.path.getsize(log_file_path) > max_size: raise RuntimeError("Cannot read file greater than max size.") else: content = self.__load_local_log_file(log_file_path) elif _file_util.is_s3_path(log_file_path): content = _file_util.read_file_to_string_s3(log_file_path, max_size, job.environment.get_credentials()) elif _file_util.is_hdfs_path(log_file_path): content = _file_util.read_file_to_string_hdfs(log_file_path, max_size, job.environment.hadoop_conf_dir) else: status_code = 404 content = "Log file path (%s) is not valid." % log_file_path except RuntimeError: status_code = 413 content = "File size too large. Please load log file manually at %s." % log_file_path handler.set_status(status_code) handler.set_header("Content-Type", "text/plain") handler.write(content)
def show_available_packages(turi_dist_path, hadoop_conf_dir=None): ''' Show all availabe packages in Hadoop Turi Distributed installation turi_dist_path : str The location where Turi Distributed is installed. This usually comes from your Hadoop Administrator. This path must be a valid HDFS path. hadoop_conf_dir : str, optional Hadoop configure directory where Hadoop configuration files are stored. If not given, the configuration file is automatically searched in your CLASSPATH. hadoop_conf_dir must be a local file path. Returns ------- out : dict Dict of two lists, default_packages in the format: "rsa==3.1.4", "scikit-learn==0.16.1", "scipy==0.15.1" and user_packages, additional PyPi packages which have been uploaded to the Turi Distributed installation. user_packages has the format: "names-0.3.0.tar.gz", "boto-2.33.0-py2.py3-none-any.whl", ... ''' hadoop_conf_dir = _file_util.expand_full_path( hadoop_conf_dir) if hadoop_conf_dir else None _validate_turi_distr_param(turi_dist_path, hadoop_conf_dir) conda_list = turi_dist_path + HadoopCluster._DIST_CONDA_LIST user_list = turi_dist_path + HadoopCluster._DIST_USER_PKG packages = _file_util.read_file_to_string_hdfs( conda_list, hadoop_conf_dir=hadoop_conf_dir) if packages is None: raise RuntimeError( "It seems like you do not have a valid Turi Distributed" " installation. Please contact your Hadoop administrator.") lines = packages.split(_os.linesep) output_lines = [] for l in lines: splited = l.split() if len(splited) == 3: output_lines.append('%s==%s' % (splited[0], splited[1])) result = {'default_packages': output_lines} user_add = _file_util.list_hdfs(user_list, hadoop_conf_dir=hadoop_conf_dir) user = [_os.path.basename(x['path']) for x in user_add] result['user_packages'] = user return result
def show_available_packages(dato_dist_path, hadoop_conf_dir = None): ''' Show all availabe packages in Hadoop Dato Distributed installation dato_dist_path : str The location where Dato Distributed is installed. This usually comes from your Hadoop Administrator. This path must be a valid HDFS path. hadoop_conf_dir : str, optional Hadoop configure directory where Hadoop configuration files are stored. If not given, the configuration file is automatically searched in your CLASSPATH. hadoop_conf_dir must be a local file path. Returns ------- out : dict Dict of two lists, default_packages in the format: "rsa==3.1.4", "scikit-learn==0.16.1", "scipy==0.15.1" and user_packages, additional PyPi packages which have been uploaded to the Dato Distributed installation. user_packages has the format: "names-0.3.0.tar.gz", "boto-2.33.0-py2.py3-none-any.whl", ... ''' hadoop_conf_dir = _file_util.expand_full_path(hadoop_conf_dir) if hadoop_conf_dir else None _validate_dato_distr_param(dato_dist_path, hadoop_conf_dir) conda_list = dato_dist_path + HadoopCluster._DIST_CONDA_LIST user_list = dato_dist_path + HadoopCluster._DIST_USER_PKG packages = _file_util.read_file_to_string_hdfs(conda_list, hadoop_conf_dir=hadoop_conf_dir) if packages is None: raise RuntimeError("It seems like you do not have a valid Dato Distributed" " installation. Please contact your Hadoop administrator.") lines = packages.split(_os.linesep) output_lines = [] for l in lines: splited = l.split() if len(splited) == 3: output_lines.append('%s==%s' % (splited[0], splited[1])) result = {'default_packages': output_lines} user_add = _file_util.list_hdfs(user_list, hadoop_conf_dir=hadoop_conf_dir) user = [_os.path.basename(x['path']) for x in user_add] result['user_packages'] = user return result