예제 #1
0
def load_etext(etextno, refresh_cache=False, mirror=None, prefer_ascii=False):
    """Returns a unicode representation of the full body of a Project Gutenberg
    text. After making an initial remote call to Project Gutenberg's servers,
    the text is persisted locally.

    """
    etextno = validate_etextno(etextno)
    cached = os.path.join(_TEXT_CACHE, '{}.txt.gz'.format(etextno))

    if refresh_cache:
        remove(cached)
    if not os.path.exists(cached):
        makedirs(os.path.dirname(cached))
        download_uri = _format_download_uri(etextno, mirror, prefer_ascii)
        response = requests.get(download_uri)
        # Ensure proper UTF-8 saving. There might be instances of ebooks or
        # mirrors which advertise a broken encoding, and this will break
        # downstream usages. For example, #55517 from aleph.gutenberg.org:
        #
        # from gutenberg.acquire import load_etext
        # print(load_etext(55517, refresh_cache=True)[0:1000])
        #
        # response.encoding will be 'ISO-8859-1' while the file is UTF-8
        if response.encoding != response.apparent_encoding:
            response.encoding = response.apparent_encoding
        text = response.text
        with closing(gzip.open(cached, 'w')) as cache:
            cache.write(text.encode('utf-8'))

    with closing(gzip.open(cached, 'r')) as cache:
        text = cache.read().decode('utf-8')
    return text
예제 #2
0
def load_etext(etextno, refresh_cache=False, mirror=None, prefer_ascii=False):
    """Returns a unicode representation of the full body of a Project Gutenberg
    text. After making an initial remote call to Project Gutenberg's servers,
    the text is persisted locally.
    """
    etextno = validate_etextno(etextno)
    cached = os.path.join(_TEXT_CACHE, '{}.txt.gz'.format(etextno))

    if refresh_cache:
        remove(cached)
    if not os.path.exists(cached):
        makedirs(os.path.dirname(cached))
        download_uri = _format_download_uri(etextno, mirror, prefer_ascii)
        response = requests.get(download_uri)
        # Ensure proper UTF-8 saving. There might be instances of ebooks or
        # mirrors which advertise a broken encoding, and this will break
        # downstream usages. For example, #55517 from aleph.gutenberg.org:
        #
        # from gutenberg.acquire import load_etext
        # print(load_etext(55517, refresh_cache=True)[0:1000])
        #
        # response.encoding will be 'ISO-8859-1' while the file is UTF-8
        if response.encoding != response.apparent_encoding:
            response.encoding = response.apparent_encoding
        text = response.text
        with closing(gzip.open(cached, 'w')) as cache:
            cache.write(text.encode('utf-8'))

    with closing(gzip.open(cached, 'r')) as cache:
        text = cache.read().decode('utf-8')
    return text


# def _main():
#     """Command line interface to the module.
#     """
#     from argparse import ArgumentParser, FileType
#     from gutenberg import Error
#     from gutenberg._util.os import reopen_encoded

#     parser = ArgumentParser(description='Download a Project Gutenberg text')
#     parser.add_argument('etextno', type=int)
#     parser.add_argument('outfile', type=FileType('w'))
#     parser.add_argument('--mirror', '-m', type=str, default=None)
#     parser.add_argument('--prefer-ascii', '-a', type=bool, default=False)
#     args = parser.parse_args()

#     try:
#         text = load_etext(args.etextno,
#                           mirror=args.mirror,
#                           prefer_ascii=args.prefer_ascii)
#         with reopen_encoded(args.outfile, 'w', 'utf8') as outfile:
#             outfile.write(text)
#     except Error as error:
#         parser.error(str(error))

# if __name__ == '__main__':
#     _main()
예제 #3
0
    def _populate_setup(self):
        """Just create a local marker file since the actual database should
        already be created on the Fuseki server.

        """
        makedirs(os.path.dirname(self._cache_marker))
        with codecs.open(self._cache_marker, 'w', encoding='utf-8') as fobj:
            fobj.write(self.cache_uri)
        self.graph.open(self.cache_uri)
예제 #4
0
파일: metadata.py 프로젝트: c-w/Gutenberg
    def _populate_setup(self):
        """Just create a local marker file since the actual database should
        already be created on the Fuseki server.

        """
        makedirs(os.path.dirname(self._cache_marker))
        with codecs.open(self._cache_marker, 'w', encoding='utf-8') as fobj:
            fobj.write(self.cache_uri)
        self.graph.open(self.cache_uri)
예제 #5
0
def _open_or_create_metadata_graph():
    """Connects to the persistent RDF graph (creating the graph if necessary).

    """
    global _METADATA_DATABASE_SINGLETON
    _METADATA_DATABASE_SINGLETON = _create_metadata_graph()
    if not os.path.exists(_METADATA_CACHE):
        makedirs(_METADATA_CACHE)
        _populate_metadata_graph(_METADATA_DATABASE_SINGLETON)
    _METADATA_DATABASE_SINGLETON.open(_METADATA_CACHE, create=False)
    return _add_namespaces(_METADATA_DATABASE_SINGLETON)
예제 #6
0
def _open_or_create_metadata_graph():
    """Connects to the persistent RDF graph (creating the graph if necessary).

    """
    global _METADATA_DATABASE_SINGLETON
    _METADATA_DATABASE_SINGLETON = _create_metadata_graph()
    if not os.path.exists(_METADATA_CACHE):
        makedirs(_METADATA_CACHE)
        _populate_metadata_graph(_METADATA_DATABASE_SINGLETON)
    _METADATA_DATABASE_SINGLETON.open(_METADATA_CACHE, create=False)
    return _add_namespaces(_METADATA_DATABASE_SINGLETON)
예제 #7
0
def load_etext(etextno, refresh_cache=False):
    """Returns a unicode representation of the full body of a Project Gutenberg
    text. After making an initial remote call to Project Gutenberg's servers,
    the text is persisted locally.

    """
    etextno = validate_etextno(etextno)
    cached = os.path.join(_TEXT_CACHE, '{0}.txt.gz'.format(etextno))

    if refresh_cache:
        remove(cached)
    if not os.path.exists(cached):
        makedirs(os.path.dirname(cached))
        download_uri = _format_download_uri(etextno)
        response = requests.get(download_uri)
        text = response.text
        with closing(gzip.open(cached, 'w')) as cache:
            cache.write(text.encode('utf-8'))

    with closing(gzip.open(cached, 'r')) as cache:
        text = cache.read().decode('utf-8')
    return text
예제 #8
0
def load_etext(etextno, refresh_cache=False):
    """Returns a unicode representation of the full body of a Project Gutenberg
    text. After making an initial remote call to Project Gutenberg's servers,
    the text is persisted locally.

    """
    etextno = validate_etextno(etextno)
    cached = os.path.join(_TEXT_CACHE, '{0}.txt.gz'.format(etextno))

    if refresh_cache:
        remove(cached)
    if not os.path.exists(cached):
        makedirs(os.path.dirname(cached))
        download_uri = _format_download_uri(etextno)
        response = requests.get(download_uri)
        response.encoding = 'utf-8'
        text = response.text
        with contextlib.closing(gzip.open(cached, 'w')) as cache:
            cache.write(text.encode('utf-8'))
    else:
        with contextlib.closing(gzip.open(cached, 'r')) as cache:
            text = cache.read().decode('utf-8')
    return text
예제 #9
0
 def test_makedirs(self):
     path = os.path.join(self.temporary_directory, 'foo', 'bar', 'baz')
     self.assertFalse(os.path.exists(path))
     makedirs(path)
     self.assertTrue(os.path.exists(path))
예제 #10
0
 def _populate_setup(self):
     makedirs(self.cache_uri)
     self.graph.open(self.cache_uri, create=True)
예제 #11
0
 def _populate_setup(self):
     makedirs(self.cache_uri)
예제 #12
0
 def test_makedirs(self):
     path = os.path.join(self.temporary_directory, 'foo', 'bar', 'baz')
     self.assertFalse(os.path.exists(path))
     makedirs(path)
     self.assertTrue(os.path.exists(path))
예제 #13
0
파일: metadata.py 프로젝트: c-w/Gutenberg
 def _populate_setup(self):
     makedirs(self.cache_uri)
     self.graph.open(self.cache_uri, create=True)
예제 #14
0
 def test_makedirs_does_not_swallow_exception(self):
     original_makedirs = os.makedirs
     os.makedirs = always_throw(OSError)
     with self.assertRaises(OSError):
         makedirs('/some/path')
     os.makedirs = original_makedirs
예제 #15
0
 def _populate_setup(self):
     makedirs(self.cache_uri)
예제 #16
0
파일: test_util.py 프로젝트: c-w/Gutenberg
 def test_makedirs_does_not_swallow_exception(self):
     original_makedirs = os.makedirs
     os.makedirs = always_throw(OSError)
     with self.assertRaises(OSError):
         makedirs('/some/path')
     os.makedirs = original_makedirs