def transfer(src, dst, creds, upstream=True,\ tries=3, include=None, parallelism=10, extract=False): """ @extract: boolean - whether to extract tar or zip files after transfer @parallelism(default=10): number of parallel processes to use """ if isinstance(creds, dict): creds = Bunch(creds) if 'key' in creds: creds.key = os.path.expanduser(creds.key) if 'key_filename' in creds: creds.key = os.path.expanduser(creds.key_filename[0]) if upstream: srcs = executor.find_files(src, None, include=include) else: srcs = executor.find_files(src, creds, include=include) if len(srcs) < 1: print('No source files found to transfer.') return src_dirs = set([os.path.dirname(path) for path in srcs]) dst_dirs = [path.replace(src, dst) for path in src_dirs] dst_dirs = [path for path in dst_dirs if path not in ['', '/']] if upstream: executor.make_dirs(dst_dirs, creds=creds) else: executor.make_dirs(dst_dirs) dests = [] for path in srcs: if path[:len(src)].endswith('/'): path = os.path.join(dst, path[len(src):]) else: path = os.path.join(dst, path[len(src) + 1:]) dests.append(path) rsync = "rsync -raz -e 'ssh"\ " -o StrictHostKeyChecking=no"\ " -o ServerAliveInterval=100"\ " -i {}'".format(creds.key) cmds = [] for ind, path in enumerate(srcs): cmd = "{} {}@{}:{} {}".format(rsync, creds.user, creds.host, path, dests[ind]) if upstream: cmd = "{} {} {}@{}:{}".format(rsync, path, creds.user, creds.host, dests[ind]) cmds.append(cmd) pool = Pool(parallelism, init_worker) func = partial(executor._local, None, tries) pool.map(func, cmds) pool.close() pool.join() if extract: compression.extract(dst, creds=creds)
def extract(target_path, creds=None): """ unzipps or untars a files or multiple files under a directory either locally or on a remote host @target_path: string - directory or file path @creds: a dictionary or Bunch object used for remote execution """ if not executor.path_exists(target_path, creds): logging.warn("Invalid path: %s" % target_path) return cmds = [] if executor.is_file(target_path, creds): target_dir = os.path.dirname(target_path) filename = os.path.basename(target_path) cmd = get_unzip_cmd(filename) if cmd is not None: cmds.append("cd {}; {} {}".format(target_dir, get_unzip_cmd(filename), filename)) else: # directory files = executor.find_files(target_path, creds, include=["*.gz", "*.zip"]) for path in files: target_dir = os.path.dirname(path) filename = os.path.basename(path) unzip_cmd = get_unzip_cmd(filename) if unzip_cmd is not None: cmds.append("cd {}; {} {}".format(target_dir, unzip_cmd, filename)) executor.run(cmds, creds)
def extract(target_path, creds=None): """ unzipps or untars a files or multiple files under a directory either locally or on a remote host @target_path: string - directory or file path @creds: a dictionary or Bunch object used for remote execution """ if not executor.path_exists(target_path, creds): logging.warn('Invalid path: %s' % target_path) return cmds = [] if executor.is_file(target_path, creds): target_dir = os.path.dirname(target_path) filename = os.path.basename(target_path) cmd = get_unzip_cmd(filename) if cmd is not None: cmds.append('cd {}; {} {}'\ .format(target_dir,\ get_unzip_cmd(filename),\ filename)) else: # directory files = executor.find_files(target_path, creds, include=['*.gz', '*.zip']) for path in files: target_dir = os.path.dirname(path) filename = os.path.basename(path) unzip_cmd = get_unzip_cmd(filename) if unzip_cmd is not None: cmds.append('cd {}; {} {}'\ .format(target_dir, unzip_cmd, filename)) executor.run(cmds, creds)
def transfer(src, dst, creds, upstream=True,\ tries=3, include=[], exclude=[], parallelism=10, extract=False,\ validate=False, additional_params='-c'): """ @src, @dst: source and destination directories @creds: dict of credentials @extract: boolean - whether to extract tar or zip files after transfer @parallelism(default=10): number of parallel processes to use @additional_params: str - additional parameters to pass on to rsync """ if isinstance(creds, dict): creds = Bunch(creds) if 'key' in creds: creds.key = os.path.expanduser(creds.key) if 'key_filename' in creds: path = creds.key_filename if isinstance(path, list): path = path[0] creds.key = os.path.expanduser(path) if upstream: srcs = executor.find_files(src, None, include=include, exclude=exclude) else: srcs = executor.find_files(src, creds, include=include, exclude=exclude) if len(srcs) < 1: logging.warn('No source files found to transfer.') return paths = [] for path in srcs: dst_path = path[len(src):] if dst_path.startswith('/'): dst_path = dst_path[1:] if dst.endswith('/'): dst = dst[:-1] dst_path = os.path.join(dst, dst_path) paths.append((path, dst_path)) transfer_paths(paths, creds, upstream,\ tries=tries, parallelism=parallelism, extract=extract,\ validate=validate, additional_params=additional_params)