Ejemplo n.º 1
0
    def __get_log_file_path_list(self, url, handler):
        """
        Returns a list of log file path for this job
        """
        job = self.__load_job()

        # get the directory that contains all the logs
        log_file_path = job.get_log_file_path()
        path_list = []

        # list the directory to get the full path to each log
        if _file_util.is_s3_path(log_file_path):
            ec2_log_list = _file_util.list_s3(
                log_file_path, job.environment.get_credentials())
            if ec2_log_list and len(ec2_log_list) > 0:
                path_list.extend([log['path'] for log in ec2_log_list])
        elif _file_util.is_hdfs_path(log_file_path):
            hdfs_log_list = _file_util.list_hdfs(
                log_file_path, job.environment.hadoop_conf_dir)
            if hdfs_log_list and len(hdfs_log_list) > 0:
                path_list.extend([log['path'] for log in hdfs_log_list])
        else:
            path_list.append(log_file_path)

        handler.write({'log_file_list': path_list})
Ejemplo n.º 2
0
    def __get_log_file_path_list(self, url, handler):
        """
        Returns a list of log file path for this job
        """
        job = self.__load_job()

        # get the directory that contains all the logs
        log_file_path = job.get_log_file_path()
        path_list = []

        # list the directory to get the full path to each log
        if _file_util.is_s3_path(log_file_path):
            ec2_log_list = _file_util.list_s3(log_file_path,
                                              job.environment.get_credentials())
            if ec2_log_list and len(ec2_log_list) > 0:
                path_list.extend([log['path'] for log in ec2_log_list])
        elif _file_util.is_hdfs_path(log_file_path):
            hdfs_log_list = _file_util.list_hdfs(log_file_path,
                                                   job.environment.hadoop_conf_dir)
            if hdfs_log_list and len(hdfs_log_list) > 0:
                path_list.extend([log['path'] for log in hdfs_log_list])
        else:
            path_list.append(log_file_path)

        handler.write({'log_file_list': path_list})
Ejemplo n.º 3
0
def show_available_packages(turi_dist_path, hadoop_conf_dir=None):
    '''
    Show all availabe packages in Hadoop Turi Distributed installation

    turi_dist_path : str
        The location where Turi Distributed is installed. This usually comes from
        your Hadoop Administrator. This path must be a valid HDFS path.

    hadoop_conf_dir : str, optional
        Hadoop configure directory where Hadoop configuration files are stored.
        If not given, the configuration file is automatically searched in your
        CLASSPATH. hadoop_conf_dir must be a local file path.

    Returns
    -------
    out : dict
        Dict of two lists, default_packages in the format:

            "rsa==3.1.4",
            "scikit-learn==0.16.1",
            "scipy==0.15.1"
        and user_packages, additional PyPi packages which have been uploaded to the Turi Distributed
        installation.  user_packages has the format:

            "names-0.3.0.tar.gz",
            "boto-2.33.0-py2.py3-none-any.whl",
            ...

    '''
    hadoop_conf_dir = _file_util.expand_full_path(
        hadoop_conf_dir) if hadoop_conf_dir else None
    _validate_turi_distr_param(turi_dist_path, hadoop_conf_dir)

    conda_list = turi_dist_path + HadoopCluster._DIST_CONDA_LIST
    user_list = turi_dist_path + HadoopCluster._DIST_USER_PKG
    packages = _file_util.read_file_to_string_hdfs(
        conda_list, hadoop_conf_dir=hadoop_conf_dir)
    if packages is None:
        raise RuntimeError(
            "It seems like you do not have a valid Turi Distributed"
            " installation. Please contact your Hadoop administrator.")

    lines = packages.split(_os.linesep)
    output_lines = []
    for l in lines:
        splited = l.split()
        if len(splited) == 3:
            output_lines.append('%s==%s' % (splited[0], splited[1]))

    result = {'default_packages': output_lines}
    user_add = _file_util.list_hdfs(user_list, hadoop_conf_dir=hadoop_conf_dir)
    user = [_os.path.basename(x['path']) for x in user_add]
    result['user_packages'] = user
    return result
def show_available_packages(dato_dist_path, hadoop_conf_dir = None):
    '''
    Show all availabe packages in Hadoop Dato Distributed installation

    dato_dist_path : str
        The location where Dato Distributed is installed. This usually comes from
        your Hadoop Administrator. This path must be a valid HDFS path.

    hadoop_conf_dir : str, optional
        Hadoop configure directory where Hadoop configuration files are stored.
        If not given, the configuration file is automatically searched in your
        CLASSPATH. hadoop_conf_dir must be a local file path.

    Returns
    -------
    out : dict
        Dict of two lists, default_packages in the format:

            "rsa==3.1.4",
            "scikit-learn==0.16.1",
            "scipy==0.15.1"
        and user_packages, additional PyPi packages which have been uploaded to the Dato Distributed
        installation.  user_packages has the format:

            "names-0.3.0.tar.gz",
            "boto-2.33.0-py2.py3-none-any.whl",
            ...

    '''
    hadoop_conf_dir = _file_util.expand_full_path(hadoop_conf_dir) if hadoop_conf_dir else None
    _validate_dato_distr_param(dato_dist_path, hadoop_conf_dir)

    conda_list = dato_dist_path + HadoopCluster._DIST_CONDA_LIST
    user_list = dato_dist_path + HadoopCluster._DIST_USER_PKG
    packages = _file_util.read_file_to_string_hdfs(conda_list, hadoop_conf_dir=hadoop_conf_dir)
    if packages is None:
        raise RuntimeError("It seems like you do not have a valid Dato Distributed"
        " installation. Please contact your Hadoop administrator.")

    lines = packages.split(_os.linesep)
    output_lines = []
    for l in lines:
        splited = l.split()
        if len(splited) == 3:
            output_lines.append('%s==%s' % (splited[0], splited[1]))

    result = {'default_packages': output_lines}
    user_add = _file_util.list_hdfs(user_list, hadoop_conf_dir=hadoop_conf_dir)
    user = [_os.path.basename(x['path']) for x in user_add]
    result['user_packages'] = user
    return result