コード例 #1
0
    def take_action(self, parsed_args):
        parsed_args = self.preprocess_args(parsed_args)
        identifier = JobsUUID.get_identifier(self, parsed_args)

        self.requests_client.setup(API_NAME, SERVICE_VERSION)
        self.update_payload(parsed_args)

        # Optionally disable creation and use of a job folder
        dest_dir = './{0}'.format(identifier)
        if parsed_args.use_cwd:
            dest_dir = '.'
        else:
            makedirs(dest_dir, exist_ok=True)

        headers = self.render_headers(File, parsed_args)
        downloaded, skipped, exceptions, elapsed = download(
            parsed_args.file_path,
            identifier,
            destination=dest_dir,
            excludes=parsed_args.exclude_files,
        # includes=parsed_args.include_files,
            force=parsed_args.overwrite,
            sync=parsed_args.sync,
            progress=parsed_args.progress,
            atomic=False,
            agave=self.tapis_client)

        headers = ['downloaded', 'skipped', 'messages', 'elapsed_sec']
        if parsed_args.formatter in ('json', 'yaml'):
            data = [downloaded, skipped, [str(e) for e in exceptions], elapsed]
        else:
            data = [len(downloaded), len(skipped), len(exceptions), elapsed]
        return (tuple(headers), tuple(data))
コード例 #2
0
ファイル: sync.py プロジェクト: TACC-Cloud/tapis-cli
def download(source,
             system_id,
             destination='.',
             includes=None,
             excludes=None,
             force=False,
             sync=False,
             atomic=False,
             progress=False,
             agave=None):

    downloaded, skipped, errors, dl_bytes, runtime = ([], [], [], 0, None)

    if excludes is None:
        excludes = []
    if includes is None:
        includes = []

    if progress:
        print_stderr('Walking remote resource...')
    start_time = seconds()
    all_targets = walk(source,
                       system_id=system_id,
                       dotfiles=True,
                       recurse=True,
                       agave=agave)
    elapsed_walk = seconds() - start_time

    msg = 'Found {0} file(s) in {1}s'.format(len(all_targets), elapsed_walk)
    logger.debug(msg)
    if progress:
        print_stderr(msg)

    # Filters that build up list of paths to create and files to download
    abs_names = [f['path'] for f in all_targets]
    sizes = [f['length'] for f in all_targets]
    mods = [datestring_to_epoch(f['lastModified']) for f in all_targets]

    # Create local destination paths
    dirs = [os.path.dirname(p) for p in abs_names]
    if not isfile(source, system_id=system_id, agave=agave):
        sub_root = None
        if source.endswith('/'):
            sub_root = source
        else:
            sub_root = os.path.dirname(source)
        sub_root = re.sub('([/]+)$', '', sub_root)
        dirs = [re.sub(sub_root, '', d) for d in dirs]
        dest_names = [
            os.path.join(destination, relpath(re.sub(sub_root, '', f)))
            for f in abs_names
        ]
        dirs = [d for d in dirs if d != sub_root]
        make_dirs = [os.path.join(destination, relpath(p)) for p in dirs]
        # Create destinations
        for dir in make_dirs:
            makedirs(dir, exist_ok=True)
    else:
        sub_root = os.path.dirname(source)
        dest_names = [os.path.join(destination, os.path.basename(source))]

    # Do the downloads
    downloads = [list(a) for a in zip(abs_names, sizes, mods, dest_names)]
    start_time_all = seconds()
    for src, size, mod, dest in downloads:
        if progress:
            print_stderr('Downloading {0}...'.format(os.path.basename(src)))
        try:
            _download(src,
                      system_id,
                      size=size,
                      timestamp=mod,
                      dest=dest,
                      includes=includes,
                      excludes=excludes,
                      force=force,
                      sync=sync,
                      atomic=False,
                      agave=agave)
            downloaded.append(src)
            # Track cumulative data size
            dl_bytes = dl_bytes + size
        except (FileExistsError, FileExcludedError) as fxerr:
            if sync or force:
                skipped.append(src)
                errors.append(fxerr)
            else:
                errors.append(fxerr)
        except Exception as exc:
            errors.append(exc)

    elapsed_download = seconds() - start_time_all
    msg = 'Downloaded {0} files in {1}s'.format(len(abs_names),
                                                elapsed_download)
    logger.debug(msg)
    if progress:
        print_stderr(msg)

    return downloaded, skipped, errors, dl_bytes, elapsed_walk + elapsed_download
コード例 #3
0
ファイル: sync.py プロジェクト: TACC-Cloud/tapis-cli
def download(source,
             job_uuid,
             destination=None,
             excludes=None,
             includes=None,
             force=False,
             sync=False,
             atomic=False,
             progress=False,
             agave=None):

    downloaded, skipped, errors, runtime = ([], [], [], None)

    if destination is None:
        dest_dir = str(job_uuid)
    else:
        dest_dir = destination

    # else:
    #     includes = [os.path.join('/', i) for i in includes]

    if progress:
        print_stderr('Walking remote resource...')
    start_time = seconds()
    # Try to avoid timeouts since walk is already pretty slow
    agave.refresh()
    all_targets = walk(source,
                       job_uuid=job_uuid,
                       dotfiles=True,
                       recurse=True,
                       agave=agave)
    elapsed_walk = seconds() - start_time

    msg = 'Found {0} file(s) in {1}s'.format(len(all_targets), elapsed_walk)
    logger.debug(msg)
    if progress:
        print_stderr(msg)

    # Extract absolute names
    # Under jobs, paths all begin with /
    paths = [f['path'] for f in all_targets]

    # Tapis Jobs returns a spurious "null/" at the start of
    # each file's path. This is a temporary workaround.
    paths = [re.sub('null/', '/', p) for p in paths]
    sizes = [f['length'] for f in all_targets]
    mods = [datestring_to_epoch(f['lastModified']) for f in all_targets]

    # Create local destination paths
    dirs = [os.path.dirname(p) for p in paths]
    make_dirs = [
        os.path.join(dest_dir, relpath(p)) for p in dirs
        if p not in ('/', './')
    ]
    # Create destinations
    for dir in make_dirs:
        makedirs(dir, exist_ok=True)

    # Local filenames including destination directory
    rel_paths = [os.path.join(dest_dir, relpath(p)) for p in paths]

    downloads = [list(a) for a in zip(paths, sizes, mods, rel_paths)]
    start_time_all = seconds()
    # Try to avoid timeouts since walk is already pretty slow
    agave.refresh()
    for src, size, mod, dest in downloads:
        if progress:
            print_stderr('Downloading {0}...'.format(os.path.basename(src)))
        try:
            # TODO - refresh token is size > threshold
            _download(src,
                      job_uuid,
                      size=size,
                      timestamp=mod,
                      dest=dest,
                      includes=includes,
                      excludes=excludes,
                      atomic=atomic,
                      force=force,
                      sync=sync,
                      agave=agave)
            downloaded.append(src)
        except FileExcludedError as fexc:
            errors.append(fexc)
            skipped.append(src)
        except OutputFileExistsError as ofe:
            if sync or force:
                skipped.append(src)
            errors.append(ofe)
        except Exception as exc:
            errors.append(exc)

    elapsed_download = seconds() - start_time_all
    msg = 'Downloaded {0} files in {1}s'.format(len(paths), elapsed_download)
    logger.debug(msg)
    if progress:
        print_stderr(msg)

    return downloaded, skipped, errors, elapsed_walk + elapsed_download