Beispiel #1
0
def _format_download_uri(etextno, mirror=None, prefer_ascii=False):
    """Returns the download location on the Project Gutenberg servers for a
    given text.
    Use prefer_ascii to control whether you want to fetch plaintext us-ascii
    file first (default old behavior) or if you prefer UTF-8 then 8-bits then
    plaintext.
    Raises:
        UnknownDownloadUri: If no download location can be found for the text.
    """
    mirror = mirror or _GUTENBERG_MIRROR
    if not _does_mirror_exist(mirror):
        raise UnknownDownloadUriException(
            'Could not reach Gutenberg mirror "{:s}". Try setting a '
            'different mirror (https://www.gutenberg.org/MIRRORS.ALL) for '
            '--mirror flag or GUTENBERG_MIRROR environment variable.'.format(
                mirror))

    # Check https://www.gutenberg.org/files/ for details about available
    # extensions ;
    #  - .txt is plaintext us-ascii
    #  - -8.txt is 8-bit plaintext, multiple encodings
    #  - -0.txt is UTF-8
    ascii_first = ('.txt', '-0.txt', '-8.txt')
    utf8_first = ('-0.txt', '-8.txt', '.txt')
    extensions = ascii_first if prefer_ascii else utf8_first
    for extension in extensions:
        uri = _format_download_uri_for_extension(etextno, extension, mirror)
        if _does_uri_exist(uri):
            return uri

    raise UnknownDownloadUriException(
        'Failed to find a textual download candidate for {} on {}. '
        'Either the book does not exist or it is only available in '
        'non-textual formats.'.format(etextno, mirror))
Beispiel #2
0
def _format_download_uri(etextno, mirror=None):
    """Returns the download location on the Project Gutenberg servers for a
    given text.

    Raises:
        UnknownDownloadUri: If no download location can be found for the text.
    """
    uri_root = mirror or _GUTENBERG_MIRROR
    uri_root = uri_root.strip().rstrip('/')
    _check_mirror_exists(uri_root)

    extensions = ('.txt', '-8.txt', '-0.txt')
    for extension in extensions:
        path = _etextno_to_uri_subdirectory(etextno)
        uri = '{root}/{path}/{etextno}{extension}'.format(
            root=uri_root,
            path=path,
            etextno=etextno,
            extension=extension)
        response = requests.head(uri)
        if response.ok:
            return uri

    raise UnknownDownloadUriException('Failed to find {0} on {1}.'
                                      .format(etextno, uri_root))
Beispiel #3
0
def _check_mirror_exists(mirror):
    response = requests.head(mirror)
    if not response.ok:
        raise UnknownDownloadUriException(
            'Could not reach Gutenberg mirror "{0:s}". Try setting a '
            'different mirror (https://www.gutenberg.org/MIRRORS.ALL) for '
            '--mirror flag or GUTENBERG_MIRROR environment variable.'
            .format(mirror))