def export_skeleton(static_folder, dev_mode=False, languages=[], formats=[], only_books=[]): # ensure dir exist path(static_folder).mkdir_p() project_id = get_project_id(languages=languages, formats=formats, only_books=only_books) books = get_list_of_filtered_books(languages=languages, formats=formats, only_books=only_books) # copy CSS/JS/* to static_folder src_folder = tmpl_path() for fname in ('css', 'js', 'jquery', 'favicon.ico', 'favicon.png', 'jquery-ui', 'datatables', 'fonts'): src = os.path.join(src_folder, fname) dst = os.path.join(static_folder, fname) if not path(fname).ext: path(dst).rmtree_p() path(src).copytree(dst) else: path(src).copyfile(dst) # export homepage context = get_default_context(project_id, books=books) context.update({'show_books': True, 'dev_mode': dev_mode}) for tpl_path in ('Home.html', 'js/tools.js', 'js/l10n.js'): template = jinja_env.get_template(tpl_path) rendered = template.render(**context) save_bs_output(rendered, os.path.join(static_folder, tpl_path), UTF8)
def build_zimfile(static_folder, zim_path=None, languages=[], formats=[], title=None, description=None, only_books=[], create_index=True, force=False): # revert HTML/JS/CSS to zim-compatible versions export_skeleton(static_folder=static_folder, dev_mode=False, languages=languages, formats=formats, only_books=only_books) if not languages: languages = ['mul'] languages.sort() formats.sort() if title is None: if len(languages) > 5: title = ("Project Gutenberg Library with {formats}" .format(formats=",".join(formats))) else: title = ("Project Gutenberg Library ({langs}) with {formats}" .format(langs=",".join(languages), formats=",".join(formats))) logger.info("\tWritting ZIM for {}".format(title)) if description is None: description = "The first producer of free ebooks" project_id = get_project_id(languages, formats, only_books) if zim_path is None: zim_path = "{}.zim".format(project_id) if path(zim_path).exists() and not force: logger.info("ZIM file `{}` already exist.".format(zim_path)) return languages = [ISO_MATRIX.get(lang, lang) for lang in languages] languages.sort() cmd = ['zimwriterfs', '--welcome', "Home.html", '--favicon', "favicon.png", '--language', ','.join(languages), '--name', project_id, '--title', title, '--description', description, '--creator', "gutenberg.org", '--publisher', "Kiwix", static_folder, zim_path] if create_index: cmd.insert(1, '--withFullTextIndex') if exec_cmd(cmd) == 0: logger.info("Successfuly created ZIM file at {}".format(zim_path)) else: logger.error("Unable to create ZIM file :(")
def export_skeleton( static_folder=None, dev_mode=False, languages=[], formats=[], only_books=[], title_search=False, add_bookshelves=False, ): # ensure dir exist path(static_folder).mkdir_p() project_id = get_project_id( languages=languages, formats=formats, only_books=only_books ) books = get_list_of_filtered_books( languages=languages, formats=formats, only_books=only_books ) # copy CSS/JS/* to static_folder src_folder = tmpl_path() for fname in ( "css", "js", "jquery", "favicon.ico", "favicon.png", "jquery-ui", "datatables", "fonts", ): src = os.path.join(src_folder, fname) dst = os.path.join(static_folder, fname) if not path(fname).ext: path(dst).rmtree_p() path(src).copytree(dst) else: path(src).copyfile(dst) # export homepage context = get_default_context(project_id, books=books) context.update( { "show_books": True, "dev_mode": dev_mode, "title_search": title_search, "add_bookshelves": add_bookshelves, } ) for tpl_path in ("Home.html", "js/tools.js", "js/l10n.js"): template = jinja_env.get_template(tpl_path) rendered = template.render(**context) save_bs_output(rendered, os.path.join(static_folder, tpl_path), UTF8)
def build_zimfile(static_folder, zim_path=None, languages=[], formats=[], title=None, description=None, only_books=[], create_index=True, force=False): # revert HTML/JS/CSS to zim-compatible versions export_skeleton(static_folder=static_folder, dev_mode=False, languages=languages, formats=formats, only_books=only_books) if not languages: languages = ['mul'] languages.sort() formats.sort() if title is None: if len(languages) > 5: title = ("Project Gutenberg Library with {formats}".format( formats=",".join(formats))) else: title = ( "Project Gutenberg Library ({langs}) with {formats}".format( langs=",".join(languages), formats=",".join(formats))) logger.info("\tWritting ZIM for {}".format(title)) if description is None: description = "The first producer of free ebooks" project_id = get_project_id(languages, formats, only_books) if zim_path is None: zim_path = "{}.zim".format(project_id) if path(zim_path).exists() and not force: logger.info("ZIM file `{}` already exist.".format(zim_path)) return languages = [ISO_MATRIX.get(lang, lang) for lang in languages] languages.sort() cmd = [ 'zimwriterfs', '--welcome', "Home.html", '--favicon', "favicon.png", '--language', ','.join(languages), '--name', project_id, '--title', title, '--description', description, '--creator', "gutenberg.org", '--publisher', "Kiwix", static_folder, zim_path ] if create_index: cmd.insert(1, '--withFullTextIndex') if exec_cmd(cmd) == 0: logger.info("Successfuly created ZIM file at {}".format(zim_path)) else: logger.error("Unable to create ZIM file :(")
def build_zimfile( static_folder, output_folder, zim_name=None, languages=[], formats=[], title=None, description=None, only_books=[], create_index=True, force=False, title_search=False, add_bookshelves=False, ): # revert HTML/JS/CSS to zim-compatible versions export_skeleton( static_folder=static_folder, dev_mode=False, languages=languages, formats=formats, only_books=only_books, title_search=title_search, add_bookshelves=add_bookshelves, ) if not languages: languages = ["mul"] languages.sort() formats.sort() if title is None: if len(languages) > 5: title = "Project Gutenberg Library" else: title = "Project Gutenberg Library ({langs})".format( langs=",".join(languages) ) if len(formats) < len(FORMAT_MATRIX): title += " with {formats}".format(formats=",".join(formats)) logger.info("\tWritting ZIM for {}".format(title)) if description is None: description = "The first producer of free ebooks" project_id = get_project_id(languages, formats, only_books) if zim_name is None: zim_name = "{}.zim".format(project_id) zim_path = output_folder.joinpath(zim_name) if path(zim_name).exists() and not force: logger.info("ZIM file `{}` already exist.".format(zim_name)) return languages = [ISO_MATRIX.get(lang, lang) for lang in languages] languages.sort() cmd = [ "zimwriterfs", "--welcome", "Home.html", "--favicon", "favicon.png", "--language", ",".join(languages), "--name", project_id, "--title", title, "--description", description, "--creator", "gutenberg.org", "--tags", "gutenberg", "--publisher", "Kiwix", "--scraper", "gutengergtozim-{v}".format(v=VERSION), static_folder, six.text_type(zim_path), ] if not create_index: cmd.insert(1, "--withoutFTIndex") if exec_cmd(cmd) == 0: logger.info("Successfuly created ZIM file at {}".format(zim_path)) else: logger.error("Unable to create ZIM file :(")
def export_all_books( static_folder=None, download_cache=None, concurrency=None, languages=[], formats=[], only_books=[], force=False, title_search=False, add_bookshelves=False, s3_storage=None, optimizer_version=None, ): project_id = get_project_id( languages=languages, formats=formats, only_books=only_books ) # ensure dir exist path(static_folder).mkdir_p() books = get_list_of_filtered_books( languages=languages, formats=formats, only_books=only_books ) if not len(get_langs_with_count(books=books)): critical_error( "Unable to proceed. Combination of lamguages, " "books and formats has no result." ) # sz = len(list(books)) # logger.debug("\tFiltered book collection size: {}".format(sz)) def nb_by_fmt(fmt): return sum( [ 1 for book in books if BookFormat.select(BookFormat, Book, Format) .join(Book) .switch(BookFormat) .join(Format) .where(Book.id == book.id) .where(Format.mime == FORMAT_MATRIX.get(fmt)) .count() ] ) logger.debug("\tFiltered book collection, PDF: {}".format(nb_by_fmt("pdf"))) logger.debug("\tFiltered book collection, ePUB: {}".format(nb_by_fmt("epub"))) logger.debug("\tFiltered book collection, HTML: {}".format(nb_by_fmt("html"))) # export to JSON helpers export_to_json_helpers( books=books, static_folder=static_folder, languages=languages, formats=formats, project_id=project_id, title_search=title_search, add_bookshelves=add_bookshelves, ) # export HTML index and other static files export_skeleton( static_folder=static_folder, dev_mode=False, languages=languages, formats=formats, only_books=only_books, title_search=title_search, add_bookshelves=add_bookshelves, ) # Compute popularity popbooks = books.order_by(Book.downloads.desc()) popbooks_count = popbooks.count() stars_limits = [0] * NB_POPULARITY_STARS stars = NB_POPULARITY_STARS nb_downloads = popbooks[0].downloads for ibook in range(0, popbooks.count(), 1): if ( ibook > float(NB_POPULARITY_STARS - stars + 1) / NB_POPULARITY_STARS * popbooks_count and popbooks[ibook].downloads < nb_downloads ): stars_limits[stars - 1] = nb_downloads stars = stars - 1 nb_downloads = popbooks[ibook].downloads for book in books: book.popularity = sum( [int(book.downloads >= stars_limits[i]) for i in range(NB_POPULARITY_STARS)] ) def dlb(b): return export_book( b, static_folder=pathlib.Path(static_folder), book_dir=pathlib.Path(download_cache).joinpath(str(b.id)), languages=languages, formats=formats, books=books, project_id=project_id, force=force, title_search=title_search, add_bookshelves=add_bookshelves, s3_storage=s3_storage, optimizer_version=optimizer_version, ) Pool(concurrency).map(dlb, books)
def export_all_books(static_folder, download_cache, concurrency, languages=[], formats=[], only_books=[], force=False): project_id = get_project_id(languages=languages, formats=formats, only_books=only_books) # ensure dir exist path(static_folder).mkdir_p() books = get_list_of_filtered_books(languages=languages, formats=formats, only_books=only_books) if not len(get_langs_with_count(books=books)): critical_error("Unable to proceed. Combination of lamguages, " "books and formats has no result.") # sz = len(list(books)) # logger.debug("\tFiltered book collection size: {}".format(sz)) def nb_by_fmt(fmt): return sum([1 for book in books if BookFormat.select(BookFormat, Book, Format) .join(Book).switch(BookFormat) .join(Format) .where(Book.id == book.id) .where(Format.mime == FORMAT_MATRIX.get(fmt)) .count()]) logger.debug("\tFiltered book collection, PDF: {}" .format(nb_by_fmt('pdf'))) logger.debug("\tFiltered book collection, ePUB: {}" .format(nb_by_fmt('epub'))) logger.debug("\tFiltered book collection, HTML: {}" .format(nb_by_fmt('html'))) # export to JSON helpers export_to_json_helpers(books=books, static_folder=static_folder, languages=languages, formats=formats, project_id=project_id) # export HTML index and other static files export_skeleton(static_folder=static_folder, dev_mode=False, languages=languages, formats=formats, only_books=only_books) # Compute popularity popbooks = books.order_by(Book.downloads.desc()) popbooks_count = popbooks.count() stars_limits = [0] * NB_POPULARITY_STARS stars = NB_POPULARITY_STARS nb_downloads = popbooks[0].downloads for ibook in range(0, popbooks.count(), 1): if ibook > float(NB_POPULARITY_STARS-stars+1)/NB_POPULARITY_STARS*popbooks_count \ and popbooks[ibook].downloads < nb_downloads: stars_limits[stars-1] = nb_downloads stars = stars - 1 nb_downloads = popbooks[ibook].downloads # export to HTML cached_files = os.listdir(download_cache) for book in books: book.popularity = sum( [int(book.downloads >= stars_limits[i]) for i in range(NB_POPULARITY_STARS)]) dlb = lambda b: export_book_to(b, static_folder=static_folder, download_cache=download_cache, cached_files=cached_files, languages=languages, formats=formats, books=books, project_id=project_id, force=force) Pool(concurrency).map(dlb, books)
def export_all_books(static_folder, download_cache, concurrency, languages=[], formats=[], only_books=[], force=False): project_id = get_project_id(languages=languages, formats=formats, only_books=only_books) # ensure dir exist path(static_folder).mkdir_p() books = get_list_of_filtered_books(languages=languages, formats=formats, only_books=only_books) if not len(get_langs_with_count(books=books)): critical_error("Unable to proceed. Combination of lamguages, " "books and formats has no result.") # sz = len(list(books)) # logger.debug("\tFiltered book collection size: {}".format(sz)) def nb_by_fmt(fmt): return sum([ 1 for book in books if BookFormat.select(BookFormat, Book, Format).join(Book).switch( BookFormat).join(Format).where(Book.id == book.id).where( Format.mime == FORMAT_MATRIX.get(fmt)).count() ]) logger.debug("\tFiltered book collection, PDF: {}".format( nb_by_fmt('pdf'))) logger.debug("\tFiltered book collection, ePUB: {}".format( nb_by_fmt('epub'))) logger.debug("\tFiltered book collection, HTML: {}".format( nb_by_fmt('html'))) # export to JSON helpers export_to_json_helpers(books=books, static_folder=static_folder, languages=languages, formats=formats, project_id=project_id) # export HTML index and other static files export_skeleton(static_folder=static_folder, dev_mode=False, languages=languages, formats=formats, only_books=only_books) # Compute popularity popbooks = books.order_by(Book.downloads.desc()) popbooks_count = popbooks.count() stars_limits = [0] * NB_POPULARITY_STARS stars = NB_POPULARITY_STARS nb_downloads = popbooks[0].downloads for ibook in range(0, popbooks.count(), 1): if ibook > float(NB_POPULARITY_STARS-stars+1)/NB_POPULARITY_STARS*popbooks_count \ and popbooks[ibook].downloads < nb_downloads: stars_limits[stars - 1] = nb_downloads stars = stars - 1 nb_downloads = popbooks[ibook].downloads # export to HTML cached_files = os.listdir(download_cache) for book in books: book.popularity = sum([ int(book.downloads >= stars_limits[i]) for i in range(NB_POPULARITY_STARS) ]) dlb = lambda b: export_book_to(b, static_folder=static_folder, download_cache=download_cache, cached_files=cached_files, languages=languages, formats=formats, books=books, project_id=project_id, force=force) Pool(concurrency).map(dlb, books)