Ejemplo n.º 1
0
def upload_packages(turi_dist_path,
                    filename_or_dir,
                    hadoop_conf_dir=None,
                    force=False):
    '''
    Upload a package to the available packages for this Hadoop Turi Distributed
    installation.  Files must be a valid PyPI package.  You may download packages
    from PyPI with the commands

        >>> mkdir <directory_name>
        >>> pip install --download <directory_name> <package-name>

        then

        >>> graphlab.deploy.hadoop_cluster.upload_packages(<turi_dist_path>, <path_to_directory>)

    These packages will be available for future work on the cluster.

    Parameters
    -----------
    turi_dist_path : str
        The location where Turi Distributed is installed. This usually comes from
        your Hadoop Administrator. This path must be a valid HDFS path.

    filename_or_dir :  str
        A file or directory containing files to upload, the file(s) must be a
        correct package for your target host's operating system in your Hadoop
        setup.

    hadoop_conf_dir : str, optional
        Hadoop configure directory where Hadoop configuration files are stored.
        If not given, the configuration file is automatically searched in your
        CLASSPATH. hadoop_conf_dir must be a local file path.

    force: boolean, optional
        Boolean, whether to force overwrite if file exists

    Returns
    -------
    '''
    hadoop_conf_dir = _file_util.expand_full_path(
        hadoop_conf_dir) if hadoop_conf_dir else None
    _validate_turi_distr_param(turi_dist_path, hadoop_conf_dir)

    dest = turi_dist_path + HadoopCluster._DIST_USER_PKG
    if _os.path.isdir(filename_or_dir):
        for root, directories, filenames in _os.walk(filename_or_dir):
            for f in filenames:
                full = _os.path.join(root, f)
                _file_util.upload_to_hdfs(full,
                                          dest,
                                          hadoop_conf_dir=hadoop_conf_dir,
                                          force=force)
    else:
        _file_util.upload_to_hdfs(filename_or_dir,
                                  dest,
                                  hadoop_conf_dir=hadoop_conf_dir,
                                  force=force)
def upload_packages(
    dato_dist_path, filename_or_dir,
    hadoop_conf_dir = None, force=False):
    '''
    Upload a package to the available packages for this Hadoop Dato Distributed
    installation.  Files must be a valid PyPI package.  You may download packages
    from PyPI with the commands

        >>> mkdir <directory_name>
        >>> pip install --download <directory_name> <package-name>

        then

        >>> graphlab.deploy.hadoop_cluster.upload_packages(<dato_dist_path>, <path_to_directory>)

    These packages will be available for future work on the cluster.

    Parameters
    -----------
    dato_dist_path : str
        The location where Dato Distributed is installed. This usually comes from
        your Hadoop Administrator. This path must be a valid HDFS path.

    filename_or_dir :  str
        A file or directory containing files to upload, the file(s) must be a
        correct package for your target host's operating system in your Hadoop
        setup.

    hadoop_conf_dir : str, optional
        Hadoop configure directory where Hadoop configuration files are stored.
        If not given, the configuration file is automatically searched in your
        CLASSPATH. hadoop_conf_dir must be a local file path.

    force: boolean, optional
        Boolean, whether to force overwrite if file exists

    Returns
    -------
    '''
    hadoop_conf_dir = _file_util.expand_full_path(hadoop_conf_dir) if hadoop_conf_dir else None
    _validate_dato_distr_param(dato_dist_path, hadoop_conf_dir)

    dest = dato_dist_path + HadoopCluster._DIST_USER_PKG
    if _os.path.isdir(filename_or_dir):
        for root, directories, filenames in _os.walk(filename_or_dir):
            for f in filenames:
                full= _os.path.join(root, f)
                _file_util.upload_to_hdfs(full, dest,
                    hadoop_conf_dir=hadoop_conf_dir, force=force)
    else:
        _file_util.upload_to_hdfs(filename_or_dir, dest,
            hadoop_conf_dir=hadoop_conf_dir, force=force)
Ejemplo n.º 3
0
    def _save_remote(self, path, aws_credentials):
        tempdir = _gl.util._make_temp_filename(prefix='predictive_policy_')

        try:
            self._save_local(tempdir)
            if _file_util.is_s3_path(path):
                _file_util.upload_to_s3(tempdir, path, is_dir=True, \
                                        aws_credentials = aws_credentials)
            elif _file_util.is_hdfs_path(path):
                _file_util.hdfs_mkdir(path)
                _file_util.upload_to_hdfs(tempdir + '/*', path)
        finally:
            _shutil.rmtree(tempdir)
 def _save_remote(self, path, aws_credentials):
     '''Save current predictive object to S3
     '''
     tempdir = _tempfie.mkdtemp(prefix='predictive_object_')
     try:
         self._save_local(tempdir)
         if fu.is_s3_path(path):
             fu.upload_to_s3(tempdir, path, is_dir=True, aws_credentials = aws_credentials)
         elif fu.is_hdfs_path(path):
             fu.hdfs_mkdir(path)
             fu.upload_to_hdfs(tempdir + '/*', path)
     finally:
         shutil.rmtree(tempdir)
def _copy_predictive_object_files(source_path, target_path, is_dir, src_credentials, tgt_credentials):
    '''
    Copy either file or folder from source location to target location
    '''
    # Cleanup existing file path if exists
    if _file_util.is_local_path(target_path) and _os.path.exists(target_path):
        _shutil.rmtree(target_path)

    if _file_util.is_s3_path(source_path) and _file_util.is_s3_path(target_path):

        # compare credentials
        _check_aws_credentials(src_credentials, tgt_credentials, source_path)

        # intra s3 copy model
        _file_util.intra_s3_copy_model(source_path, target_path, is_dir, tgt_credentials)
    elif _file_util.is_local_path(source_path):

        _file_util.copy_from_local(source_path, target_path, is_dir = is_dir)

    else:
        tmp_dir = _tempfile.mkdtemp(prefix = 'copy_predictive_object')
        try:
            # download to local first
            local_path = _os.path.join(tmp_dir, 'temp_po_file')
            if _file_util.is_s3_path(source_path):
                _file_util.download_from_s3(
                    source_path,
                    local_path,
                    is_dir=is_dir,
                    aws_credentials=src_credentials,
                    silent=False)
            elif _file_util.is_hdfs_path(source_path):
                _file_util.download_from_hdfs(source_path, local_path, is_dir = False)
            else:
                raise RuntimeError('Unsupported file system type: %s' % source_path)

            # upload from local to remote
            if _file_util.is_s3_path(target_path):
                _file_util.upload_to_s3(local_path, target_path, is_dir=is_dir,
                    aws_credentials=tgt_credentials, silent=False)
            elif _file_util.is_hdfs_path(target_path):
                _file_util.hdfs_mkdir(target_path)
                _file_util.upload_to_hdfs(local_path, target_path, force=True, silent=False)
            else:
                _file_util.upload_to_local(local_path, target_path, is_dir=is_dir, silent=False)

        finally:
            _shutil.rmtree(tmp_dir)