Python TimedProgress Examples

Programming Language: Python

Namespace/Package Name: util

Class/Type: TimedProgress

Examples at hotexamples.com: 3

Python TimedProgress - 3 examples found. These are the top rated real world Python examples of util.TimedProgress extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TimedProgress(9)

end(9)

Frequently Used Methods

TimedProgress (9)

end (9)

Example #1

Show file

def fetch_media(link_dir, link, timeout=MEDIA_TIMEOUT):
    """Download playlists or individual video, audio, and subtitles using youtube-dl"""

    output = 'media'
    output_path = os.path.join(link_dir, 'media')
    os.makedirs(output_path, exist_ok=True)
    cmd = [
        YOUTUBEDL_BINARY,
        '--write-description',
        '--write-info-json',
        '--write-annotations',
        '--yes-playlist',
        '--write-thumbnail',
        '--no-call-home',
        '--no-check-certificate',
        '--user-agent',
        '--all-subs',
        '--extract-audio',
        '--keep-video',
        '--ignore-errors',
        '--geo-bypass',
        '--audio-format',
        'mp3',
        '--audio-quality',
        '320K',
        '--embed-thumbnail',
        '--add-metadata',
        *(() if CHECK_SSL_VALIDITY else ('--no-check-certificate', )),
        link['url'],
    ]
    status = 'succeeded'
    timer = TimedProgress(timeout, prefix='      ')
    try:
        result = run(cmd,
                     stdout=PIPE,
                     stderr=PIPE,
                     cwd=output_path,
                     timeout=timeout + 1)
        chmod_file(output, cwd=link_dir)
        if result.returncode:
            if (b'ERROR: Unsupported URL' in result.stderr
                    or b'HTTP Error 404' in result.stderr
                    or b'HTTP Error 403' in result.stderr
                    or b'URL could be a direct video link' in result.stderr
                    or b'Unable to extract container ID' in result.stderr):
                # These happen too frequently on non-media pages to warrant printing to console
                pass
            else:
                hints = (
                    'Got youtube-dl response code: {}.'.format(
                        result.returncode),
                    *result.stderr.decode().split('\n'),
                )
                raise ArchiveError('Failed to download media', hints)
    except Exception as err:
        status = 'failed'
        output = err
    finally:
        timer.end()

    return {
        'cmd': cmd,
        'pwd': link_dir,
        'output': output,
        'status': status,
        **timer.stats,
    }

Example #2

Show file

def archive_dot_org(link_dir, link, timeout=TIMEOUT):
    """submit site to archive.org for archiving via their service, save returned archive url"""

    output = 'archive.org.txt'
    archive_org_url = None
    submit_url = 'https://web.archive.org/save/{}'.format(link['url'])
    cmd = [
        CURL_BINARY,
        '--location',
        '--head',
        '--user-agent',
        'ArchiveBox/{} (+https://github.com/pirate/ArchiveBox/)'.format(
            GIT_SHA
        ),  # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
        '--max-time',
        str(timeout),
        *(() if CHECK_SSL_VALIDITY else ('--insecure', )),
        submit_url,
    ]
    status = 'succeeded'
    timer = TimedProgress(timeout, prefix='      ')
    try:
        result = run(cmd,
                     stdout=PIPE,
                     stderr=DEVNULL,
                     cwd=link_dir,
                     timeout=timeout)
        content_location, errors = parse_archive_dot_org_response(
            result.stdout)
        if content_location:
            archive_org_url = 'https://web.archive.org{}'.format(
                content_location[0])
        elif len(errors) == 1 and 'RobotAccessControlException' in errors[0]:
            archive_org_url = None
            # raise ArchiveError('Archive.org denied by {}/robots.txt'.format(domain(link['url'])))
        elif errors:
            raise ArchiveError(', '.join(errors))
        else:
            raise ArchiveError(
                'Failed to find "content-location" URL header in Archive.org response.'
            )
    except Exception as err:
        status = 'failed'
        output = err
    finally:
        timer.end()

    if not isinstance(output, Exception):
        # instead of writing None when archive.org rejects the url write the
        # url to resubmit it to archive.org. This is so when the user visits
        # the URL in person, it will attempt to re-archive it, and it'll show the
        # nicer error message explaining why the url was rejected if it fails.
        archive_org_url = archive_org_url or submit_url
        with open(os.path.join(link_dir, output), 'w', encoding='utf-8') as f:
            f.write(archive_org_url)
        chmod_file('archive.org.txt', cwd=link_dir)
        output = archive_org_url

    return {
        'cmd': cmd,
        'pwd': link_dir,
        'output': output,
        'status': status,
        **timer.stats,
    }

Example #3

Show file

def fetch_wget(link_dir, link, timeout=TIMEOUT):
    """download full site using wget"""

    if FETCH_WARC:
        warc_dir = os.path.join(link_dir, 'warc')
        os.makedirs(warc_dir, exist_ok=True)
        warc_path = os.path.join('warc', str(int(datetime.now().timestamp())))

    # WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html
    output = None
    cmd = [
        WGET_BINARY,
        # '--server-response',  # print headers for better error parsing
        '--no-verbose',
        '--adjust-extension',
        '--convert-links',
        '--force-directories',
        '--backup-converted',
        '--span-hosts',
        '--no-parent',
        '-e',
        'robots=off',
        '--restrict-file-names=unix',
        '--timeout={}'.format(timeout),
        *(('--compression=auto', ) if WGET_AUTO_COMPRESSION else ()),
        *(() if FETCH_WARC else ('--timestamping', )),
        *(('--warc-file={}'.format(warc_path), ) if FETCH_WARC else ()),
        *(('--page-requisites', ) if FETCH_WGET_REQUISITES else ()),
        *(('--user-agent={}'.format(WGET_USER_AGENT), ) if WGET_USER_AGENT else
          ()),
        *(('--load-cookies', COOKIES_FILE) if COOKIES_FILE else ()),
        *((() if CHECK_SSL_VALIDITY else
           ('--no-check-certificate', '--no-hsts'))),
        link['url'],
    ]
    status = 'succeeded'
    timer = TimedProgress(timeout, prefix='      ')
    try:
        result = run(cmd,
                     stdout=PIPE,
                     stderr=PIPE,
                     cwd=link_dir,
                     timeout=timeout)
        output = wget_output_path(link)

        # parse out number of files downloaded from last line of stderr:
        #  "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
        output_tail = [
            line.strip()
            for line in (result.stdout +
                         result.stderr).decode().rsplit('\n', 3)[-3:]
            if line.strip()
        ]
        files_downloaded = (int(output_tail[-1].strip().split(' ', 2)[1] or 0)
                            if 'Downloaded:' in output_tail[-1] else 0)

        # Check for common failure cases
        if result.returncode > 0 and files_downloaded < 1:
            hints = (
                'Got wget response code: {}.'.format(result.returncode),
                *output_tail,
            )
            if b'403: Forbidden' in result.stderr:
                raise ArchiveError(
                    '403 Forbidden (try changing WGET_USER_AGENT)', hints)
            if b'404: Not Found' in result.stderr:
                raise ArchiveError('404 Not Found', hints)
            if b'ERROR 500: Internal Server Error' in result.stderr:
                raise ArchiveError('500 Internal Server Error', hints)
            raise ArchiveError('Got an error from the server', hints)
    except Exception as err:
        status = 'failed'
        output = err
    finally:
        timer.end()

    return {
        'cmd': cmd,
        'pwd': link_dir,
        'output': output,
        'status': status,
        **timer.stats,
    }