def take_action(self, parsed_args): parsed_args = self.preprocess_args(parsed_args) identifier = JobsUUID.get_identifier(self, parsed_args) self.requests_client.setup(API_NAME, SERVICE_VERSION) self.update_payload(parsed_args) # Optionally disable creation and use of a job folder dest_dir = './{0}'.format(identifier) if parsed_args.use_cwd: dest_dir = '.' else: makedirs(dest_dir, exist_ok=True) headers = self.render_headers(File, parsed_args) downloaded, skipped, exceptions, elapsed = download( parsed_args.file_path, identifier, destination=dest_dir, excludes=parsed_args.exclude_files, # includes=parsed_args.include_files, force=parsed_args.overwrite, sync=parsed_args.sync, progress=parsed_args.progress, atomic=False, agave=self.tapis_client) headers = ['downloaded', 'skipped', 'messages', 'elapsed_sec'] if parsed_args.formatter in ('json', 'yaml'): data = [downloaded, skipped, [str(e) for e in exceptions], elapsed] else: data = [len(downloaded), len(skipped), len(exceptions), elapsed] return (tuple(headers), tuple(data))
def download(source, system_id, destination='.', includes=None, excludes=None, force=False, sync=False, atomic=False, progress=False, agave=None): downloaded, skipped, errors, dl_bytes, runtime = ([], [], [], 0, None) if excludes is None: excludes = [] if includes is None: includes = [] if progress: print_stderr('Walking remote resource...') start_time = seconds() all_targets = walk(source, system_id=system_id, dotfiles=True, recurse=True, agave=agave) elapsed_walk = seconds() - start_time msg = 'Found {0} file(s) in {1}s'.format(len(all_targets), elapsed_walk) logger.debug(msg) if progress: print_stderr(msg) # Filters that build up list of paths to create and files to download abs_names = [f['path'] for f in all_targets] sizes = [f['length'] for f in all_targets] mods = [datestring_to_epoch(f['lastModified']) for f in all_targets] # Create local destination paths dirs = [os.path.dirname(p) for p in abs_names] if not isfile(source, system_id=system_id, agave=agave): sub_root = None if source.endswith('/'): sub_root = source else: sub_root = os.path.dirname(source) sub_root = re.sub('([/]+)$', '', sub_root) dirs = [re.sub(sub_root, '', d) for d in dirs] dest_names = [ os.path.join(destination, relpath(re.sub(sub_root, '', f))) for f in abs_names ] dirs = [d for d in dirs if d != sub_root] make_dirs = [os.path.join(destination, relpath(p)) for p in dirs] # Create destinations for dir in make_dirs: makedirs(dir, exist_ok=True) else: sub_root = os.path.dirname(source) dest_names = [os.path.join(destination, os.path.basename(source))] # Do the downloads downloads = [list(a) for a in zip(abs_names, sizes, mods, dest_names)] start_time_all = seconds() for src, size, mod, dest in downloads: if progress: print_stderr('Downloading {0}...'.format(os.path.basename(src))) try: _download(src, system_id, size=size, timestamp=mod, dest=dest, includes=includes, excludes=excludes, force=force, sync=sync, atomic=False, agave=agave) downloaded.append(src) # Track cumulative data size dl_bytes = dl_bytes + size except (FileExistsError, FileExcludedError) as fxerr: if sync or force: skipped.append(src) errors.append(fxerr) else: errors.append(fxerr) except Exception as exc: errors.append(exc) elapsed_download = seconds() - start_time_all msg = 'Downloaded {0} files in {1}s'.format(len(abs_names), elapsed_download) logger.debug(msg) if progress: print_stderr(msg) return downloaded, skipped, errors, dl_bytes, elapsed_walk + elapsed_download
def download(source, job_uuid, destination=None, excludes=None, includes=None, force=False, sync=False, atomic=False, progress=False, agave=None): downloaded, skipped, errors, runtime = ([], [], [], None) if destination is None: dest_dir = str(job_uuid) else: dest_dir = destination # else: # includes = [os.path.join('/', i) for i in includes] if progress: print_stderr('Walking remote resource...') start_time = seconds() # Try to avoid timeouts since walk is already pretty slow agave.refresh() all_targets = walk(source, job_uuid=job_uuid, dotfiles=True, recurse=True, agave=agave) elapsed_walk = seconds() - start_time msg = 'Found {0} file(s) in {1}s'.format(len(all_targets), elapsed_walk) logger.debug(msg) if progress: print_stderr(msg) # Extract absolute names # Under jobs, paths all begin with / paths = [f['path'] for f in all_targets] # Tapis Jobs returns a spurious "null/" at the start of # each file's path. This is a temporary workaround. paths = [re.sub('null/', '/', p) for p in paths] sizes = [f['length'] for f in all_targets] mods = [datestring_to_epoch(f['lastModified']) for f in all_targets] # Create local destination paths dirs = [os.path.dirname(p) for p in paths] make_dirs = [ os.path.join(dest_dir, relpath(p)) for p in dirs if p not in ('/', './') ] # Create destinations for dir in make_dirs: makedirs(dir, exist_ok=True) # Local filenames including destination directory rel_paths = [os.path.join(dest_dir, relpath(p)) for p in paths] downloads = [list(a) for a in zip(paths, sizes, mods, rel_paths)] start_time_all = seconds() # Try to avoid timeouts since walk is already pretty slow agave.refresh() for src, size, mod, dest in downloads: if progress: print_stderr('Downloading {0}...'.format(os.path.basename(src))) try: # TODO - refresh token is size > threshold _download(src, job_uuid, size=size, timestamp=mod, dest=dest, includes=includes, excludes=excludes, atomic=atomic, force=force, sync=sync, agave=agave) downloaded.append(src) except FileExcludedError as fexc: errors.append(fexc) skipped.append(src) except OutputFileExistsError as ofe: if sync or force: skipped.append(src) errors.append(ofe) except Exception as exc: errors.append(exc) elapsed_download = seconds() - start_time_all msg = 'Downloaded {0} files in {1}s'.format(len(paths), elapsed_download) logger.debug(msg) if progress: print_stderr(msg) return downloaded, skipped, errors, elapsed_walk + elapsed_download