Ejemplo n.º 1
0
 def extract_files(result):
     files = result.get('files', [])
     for file in files:
         file['path'] = '{}{}{}'.format(folder['path'],
                                        file['name'].replace('/', '_'),
                                        '/' if is_folder(file) else '')
     return files
Ejemplo n.º 2
0
 def recurse(file):
     if is_folder(file):
         return concat(
             of([file]),
             list_folder_recursively(file),
         )
     else:
         return of([file])
Ejemplo n.º 3
0
 def recurse(file):
     if is_folder(file):
         return concat(
             of([file]),
             list_folder_recursively(credentials, file, retries),
         )
     else:
         return of([file])
Ejemplo n.º 4
0
def download_directory(file: dict,
                       destination: str,
                       matching: str = None,
                       delete_after_download: bool = False) -> Observable:
    destination = os.path.abspath(destination)

    def get_destination(f):
        relative_path = f['path'][len(file['path']):]
        next_destination = '{}{}{}'.format(
            destination, '' if destination[-1] == '/' else '/', relative_path)
        return next_destination

    def initial_stats(files):
        return {
            'progress': 0,
            'total_files': len(files),
            'total_bytes': sum([int(f.get('size', 0)) for f in files]),
            'downloaded_files': 0,
            'downloaded_bytes': 0
        }

    def update_stats(stats, download):
        downloaded_files = stats['downloaded_files'] + (
            0 if download['progress'] < 1 else 1)
        downloaded_bytes = stats['downloaded_bytes'] + download[
            'downloaded_bytes']
        progress = downloaded_bytes / stats['total_bytes']
        return {
            'progress': progress,
            'total_files': stats['total_files'],
            'total_bytes': stats['total_bytes'],
            'downloaded_files': downloaded_files,
            'downloaded_bytes': downloaded_bytes
        }

    def is_file_matching(f):
        return not matching or fnmatch.fnmatch(f['path'], matching)

    def delete_downloaded(downloaded):
        if delete_after_download:
            return delete_file(downloaded['file']).pipe(
                map(lambda _: downloaded))
        else:
            return of(downloaded)

    def filter_files(files):
        return [f for f in files if not is_folder(f) and is_file_matching(f)]

    if is_folder(file):
        return list_folder_recursively(file).pipe(
            map(lambda files: filter_files(files)),
            flat_map(lambda files: of(True).pipe(
                flat_map(lambda _: of(*files).pipe(
                    flat_map(lambda f: download_file(f, get_destination(f))),
                    flat_map(delete_downloaded))),
                scan(update_stats, initial_stats(files)))))
    else:
        return download_file(file, destination)
Ejemplo n.º 5
0
    def filter_files(files):
        seen_file_ids = set()
        unique_files = []
        for f in files:
            file_id = f['id']
            if file_id not in seen_file_ids:
                seen_file_ids.add(file_id)
                unique_files.append(f)

        filtered = [
            f for f in unique_files if not is_folder(f) and is_file_matching(f)
        ]
        return filtered
Ejemplo n.º 6
0
def download(credentials,
             file: dict,
             destination: str,
             matching: str = None,
             delete_after_download: bool = False,
             retries: int = 5) -> Observable:
    logging.debug('downloading {} to {}'.format(file, destination))
    destination = os.path.abspath(destination)

    def get_file_destination(f):
        relative_path = f['path'][len(file['path']):]
        next_destination = '{}{}{}'.format(
            destination, '' if destination[-1] == '/' else '/', relative_path)
        return next_destination

    def is_file_matching(f):
        return not matching or fnmatch.fnmatch(f['path'], matching)

    def delete_downloaded(f):
        if delete_after_download:
            try:
                return delete_file(credentials,
                                   f).pipe(flat_map(lambda _: empty()))
            except HttpError:
                logging.warning(
                    'Failed to delete downloaded file {}'.format(file))
                return empty()
        else:
            return empty()

    def filter_files(files):
        seen_file_ids = set()
        unique_files = []
        for f in files:
            file_id = f['id']
            if file_id not in seen_file_ids:
                seen_file_ids.add(file_id)
                unique_files.append(f)

        filtered = [
            f for f in unique_files if not is_folder(f) and is_file_matching(f)
        ]
        return filtered

    def download_file(f, dest):
        total_bytes = int(f['size'])

        def next_chunk(downloader):
            status, done = downloader.next_chunk()
            logging.debug('downloaded chunk from {} to {}: {}'.format(
                file, destination, status))
            return 1.0 if done else status.progress()

        def download_from_drive(destination_file):
            def create_downloader():
                request = get_service(credentials).files().get_media(
                    fileId=f['id'])
                return MediaIoBaseDownload(fd=destination_file,
                                           request=request,
                                           chunksize=CHUNK_SIZE)

            downloader = create_downloader()
            return forever().pipe(
                map(lambda _: next_chunk(downloader)),
                take_while(lambda p: p < 1, inclusive=True),
                flat_map(lambda p: progress(
                    default_message=
                    'Downloaded {downloaded_files} of {total_files} files ({downloaded} of {total})',
                    message_key='tasks.drive.download_folder',
                    downloaded_bytes=int(total_bytes * p),
                    downloaded=format_bytes(int(total_bytes * p)),
                    total_bytes=total_bytes,
                    total=format_bytes(total_bytes),
                    file=f)))

        def action():
            return using_file(file=dest,
                              mode='wb',
                              to_observable=download_from_drive)

        os.makedirs(os.path.dirname(dest), exist_ok=True)

        initial_progress = progress(
            default_message=
            'Downloaded {downloaded_files} of {total_files} files ({downloaded} of {total})',
            message_key='tasks.drive.download_folder',
            downloaded_bytes=0,
            downloaded='0 bytes',
            total_bytes=total_bytes,
            total=format_bytes(total_bytes),
            file=f)
        touch_stream = interval(TOUCH_PERIOD).pipe(
            flat_map(lambda _: touch(credentials, f)))
        download_stream = enqueue(credentials,
                                  queue=_drive_downloads,
                                  action=action,
                                  retries=retries,
                                  description='Download {} to {}'.format(
                                      f, dest)).pipe(aside(touch_stream))

        return concat(initial_progress, download_stream, delete_downloaded(f))

    def download_folder(folder):
        def aggregate_progress(progresses: list):
            total_files = len(progresses)
            total_bytes = sum([int(p.file['size']) for p in progresses])
            downloaded_files = len(
                [p for p in progresses if p.downloaded_bytes == p.total_bytes])
            downloaded_bytes = sum([p.downloaded_bytes for p in progresses])

            return progress(
                default_message=
                'Downloaded {downloaded_files} of {total_files} files ({downloaded} of {total})',
                message_key='tasks.drive.download_folder',
                downloaded_files=downloaded_files,
                downloaded_bytes=downloaded_bytes,
                downloaded=format_bytes(downloaded_bytes),
                total_files=total_files,
                total_bytes=total_bytes,
                total=format_bytes(total_bytes))

        return list_folder_recursively(credentials, folder).pipe(
            map(lambda files: filter_files(files)),
            flat_map(lambda files: combine_latest(
                *[download_file(f, get_file_destination(f)) for f in files])
                     if files else empty()), flat_map(aggregate_progress))

    if is_folder(file):
        return download_folder(file)
    else:
        return download_file(file, destination)
Ejemplo n.º 7
0
 def filter_files(files):
     return [f for f in files if not is_folder(f) and is_file_matching(f)]
Ejemplo n.º 8
0
def download(
    credentials,
    file: dict,
    destination: str,
    matching: str = None,
    delete_after_download: bool = False,
) -> Observable:
    logging.debug('downloading {} to {}'.format(file, destination))
    destination = os.path.abspath(destination)

    def get_file_destination(f):
        relative_path = f['path'][len(file['path']):]
        next_destination = '{}{}{}'.format(
            destination, '' if destination[-1] == '/' else '/', relative_path)
        return next_destination

    def is_file_matching(f):
        return not matching or fnmatch.fnmatch(f['path'], matching)

    def delete_downloaded(f):
        if delete_after_download:
            return delete_file(credentials,
                               f).pipe(flat_map(lambda _: empty()))
        else:
            return empty()

    def filter_files(files):
        return [f for f in files if not is_folder(f) and is_file_matching(f)]

    def download_file(f, dest):
        total_bytes = int(f['size'])

        def next_chunk(downloader):
            status, done = downloader.next_chunk()
            logging.debug('downloaded chunk from {} to {}: {}'.format(
                file, destination, status))
            return 1.0 if done else status.progress()

        def download_from_drive(destination_file):
            def create_downloader():
                request = get_service(credentials).files().get_media(
                    fileId=f['id'])
                return MediaIoBaseDownload(fd=destination_file,
                                           request=request,
                                           chunksize=CHUNK_SIZE)

            downloader = create_downloader()
            return forever().pipe(
                map(lambda _: next_chunk(downloader)),
                take_while(lambda progress: progress < 1, inclusive=True),
                map(
                    lambda progress: {
                        'downloaded_bytes': int(total_bytes * progress),
                        'total_bytes': total_bytes,
                        'file': f
                    }))

        def action():
            return using_file(file=dest,
                              mode='wb',
                              to_observable=download_from_drive)

        os.makedirs(os.path.dirname(dest), exist_ok=True)

        initial_progress_stream = of({
            'downloaded_bytes': 0,
            'total_bytes': total_bytes,
            'file': f
        })
        touch_stream = interval(TOUCH_PERIOD).pipe(
            flat_map(lambda _: touch(credentials, f)))
        download_stream = enqueue(credentials,
                                  queue=_drive_downloads,
                                  action=action,
                                  retries=0,
                                  description='Download {} to {}'.format(
                                      f, dest)).pipe(aside(touch_stream))

        return concat(initial_progress_stream, download_stream,
                      delete_downloaded(f))

    def download_folder(folder):
        def aggregate_progress(progresses: list):
            total_files = len(progresses)
            total_bytes = sum([int(p['file']['size']) for p in progresses])
            downloaded_files = len([
                p for p in progresses
                if p['downloaded_bytes'] == p['total_bytes']
            ])
            downloaded_bytes = sum([p['downloaded_bytes'] for p in progresses])

            return {
                'downloaded_files': downloaded_files,
                'downloaded_bytes': downloaded_bytes,
                'total_files': total_files,
                'total_bytes': total_bytes
            }

        return list_folder_recursively(credentials, folder).pipe(
            map(lambda files: filter_files(files)),
            flat_map(lambda files: combine_latest(
                *[download_file(f, get_file_destination(f)) for f in files])),
            map(aggregate_progress))

    if is_folder(file):
        return download_folder(file)
    else:
        return download_file(file, destination)