def upload_packages(turi_dist_path, filename_or_dir, hadoop_conf_dir=None, force=False): ''' Upload a package to the available packages for this Hadoop Turi Distributed installation. Files must be a valid PyPI package. You may download packages from PyPI with the commands >>> mkdir <directory_name> >>> pip install --download <directory_name> <package-name> then >>> graphlab.deploy.hadoop_cluster.upload_packages(<turi_dist_path>, <path_to_directory>) These packages will be available for future work on the cluster. Parameters ----------- turi_dist_path : str The location where Turi Distributed is installed. This usually comes from your Hadoop Administrator. This path must be a valid HDFS path. filename_or_dir : str A file or directory containing files to upload, the file(s) must be a correct package for your target host's operating system in your Hadoop setup. hadoop_conf_dir : str, optional Hadoop configure directory where Hadoop configuration files are stored. If not given, the configuration file is automatically searched in your CLASSPATH. hadoop_conf_dir must be a local file path. force: boolean, optional Boolean, whether to force overwrite if file exists Returns ------- ''' hadoop_conf_dir = _file_util.expand_full_path( hadoop_conf_dir) if hadoop_conf_dir else None _validate_turi_distr_param(turi_dist_path, hadoop_conf_dir) dest = turi_dist_path + HadoopCluster._DIST_USER_PKG if _os.path.isdir(filename_or_dir): for root, directories, filenames in _os.walk(filename_or_dir): for f in filenames: full = _os.path.join(root, f) _file_util.upload_to_hdfs(full, dest, hadoop_conf_dir=hadoop_conf_dir, force=force) else: _file_util.upload_to_hdfs(filename_or_dir, dest, hadoop_conf_dir=hadoop_conf_dir, force=force)
def upload_packages( dato_dist_path, filename_or_dir, hadoop_conf_dir = None, force=False): ''' Upload a package to the available packages for this Hadoop Dato Distributed installation. Files must be a valid PyPI package. You may download packages from PyPI with the commands >>> mkdir <directory_name> >>> pip install --download <directory_name> <package-name> then >>> graphlab.deploy.hadoop_cluster.upload_packages(<dato_dist_path>, <path_to_directory>) These packages will be available for future work on the cluster. Parameters ----------- dato_dist_path : str The location where Dato Distributed is installed. This usually comes from your Hadoop Administrator. This path must be a valid HDFS path. filename_or_dir : str A file or directory containing files to upload, the file(s) must be a correct package for your target host's operating system in your Hadoop setup. hadoop_conf_dir : str, optional Hadoop configure directory where Hadoop configuration files are stored. If not given, the configuration file is automatically searched in your CLASSPATH. hadoop_conf_dir must be a local file path. force: boolean, optional Boolean, whether to force overwrite if file exists Returns ------- ''' hadoop_conf_dir = _file_util.expand_full_path(hadoop_conf_dir) if hadoop_conf_dir else None _validate_dato_distr_param(dato_dist_path, hadoop_conf_dir) dest = dato_dist_path + HadoopCluster._DIST_USER_PKG if _os.path.isdir(filename_or_dir): for root, directories, filenames in _os.walk(filename_or_dir): for f in filenames: full= _os.path.join(root, f) _file_util.upload_to_hdfs(full, dest, hadoop_conf_dir=hadoop_conf_dir, force=force) else: _file_util.upload_to_hdfs(filename_or_dir, dest, hadoop_conf_dir=hadoop_conf_dir, force=force)
def _save_remote(self, path, aws_credentials): tempdir = _gl.util._make_temp_filename(prefix='predictive_policy_') try: self._save_local(tempdir) if _file_util.is_s3_path(path): _file_util.upload_to_s3(tempdir, path, is_dir=True, \ aws_credentials = aws_credentials) elif _file_util.is_hdfs_path(path): _file_util.hdfs_mkdir(path) _file_util.upload_to_hdfs(tempdir + '/*', path) finally: _shutil.rmtree(tempdir)
def _save_remote(self, path, aws_credentials): '''Save current predictive object to S3 ''' tempdir = _tempfie.mkdtemp(prefix='predictive_object_') try: self._save_local(tempdir) if fu.is_s3_path(path): fu.upload_to_s3(tempdir, path, is_dir=True, aws_credentials = aws_credentials) elif fu.is_hdfs_path(path): fu.hdfs_mkdir(path) fu.upload_to_hdfs(tempdir + '/*', path) finally: shutil.rmtree(tempdir)
def _copy_predictive_object_files(source_path, target_path, is_dir, src_credentials, tgt_credentials): ''' Copy either file or folder from source location to target location ''' # Cleanup existing file path if exists if _file_util.is_local_path(target_path) and _os.path.exists(target_path): _shutil.rmtree(target_path) if _file_util.is_s3_path(source_path) and _file_util.is_s3_path(target_path): # compare credentials _check_aws_credentials(src_credentials, tgt_credentials, source_path) # intra s3 copy model _file_util.intra_s3_copy_model(source_path, target_path, is_dir, tgt_credentials) elif _file_util.is_local_path(source_path): _file_util.copy_from_local(source_path, target_path, is_dir = is_dir) else: tmp_dir = _tempfile.mkdtemp(prefix = 'copy_predictive_object') try: # download to local first local_path = _os.path.join(tmp_dir, 'temp_po_file') if _file_util.is_s3_path(source_path): _file_util.download_from_s3( source_path, local_path, is_dir=is_dir, aws_credentials=src_credentials, silent=False) elif _file_util.is_hdfs_path(source_path): _file_util.download_from_hdfs(source_path, local_path, is_dir = False) else: raise RuntimeError('Unsupported file system type: %s' % source_path) # upload from local to remote if _file_util.is_s3_path(target_path): _file_util.upload_to_s3(local_path, target_path, is_dir=is_dir, aws_credentials=tgt_credentials, silent=False) elif _file_util.is_hdfs_path(target_path): _file_util.hdfs_mkdir(target_path) _file_util.upload_to_hdfs(local_path, target_path, force=True, silent=False) else: _file_util.upload_to_local(local_path, target_path, is_dir=is_dir, silent=False) finally: _shutil.rmtree(tmp_dir)