def setup_urls(): file_with_url = os.path.join("tmp","file_on_{}".format(UrlBuilder.SERVER_NAME)) cmd = ["bash", "-c", "rsync -a --list-only {} > {}".format(UrlBuilder.RSYNC,file_with_url) ] exec_cmd(cmd) cmd = ["sed" , "-i", "s#.* \(.*\)$#\\1#", file_with_url ] exec_cmd(cmd) field_names = [ 'url' ] load_csv(Url, file_with_url, field_names = field_names)
def setup_urls(): file_with_url = os.path.join("tmp", "file_on_{}".format(UrlBuilder.SERVER_NAME)) cmd = [ "bash", "-c", "rsync -a --list-only {} > {}".format(UrlBuilder.RSYNC, file_with_url) ] exec_cmd(cmd) cmd = ["sed", "-i", "s#.* \(.*\)$#\\1#", file_with_url] exec_cmd(cmd) field_names = ['url'] load_csv(Url, file_with_url, field_names=field_names)
def setup_urls(): file_with_url = os.path.join("tmp", "file_on_{}".format(UrlBuilder.SERVER_NAME)) cmd = [ "bash", "-c", "rsync -a --list-only {} > {}".format(UrlBuilder.RSYNC, file_with_url) ] exec_cmd(cmd) in_place_opt = ["-i", ".bak"] if platform.system() == "Darwin" else ["-i"] cmd = ["sed"] + in_place_opt + [r"s#.* \(.*\)$#\\1#", file_with_url] exec_cmd(cmd) field_names = ['url'] load_csv(Url, file_with_url, field_names=field_names)
def extract_rdf_files(rdf_tarball, rdf_path, force=False): if path(rdf_path).exists() and not force: logger.info("\tRDF-files folder already exists in {}".format(rdf_path)) return logger.info("\tExtracting {} into {}".format(rdf_tarball, rdf_path)) # create destdir if not exists dest = path(rdf_path) dest.mkdir_p() exec_cmd([ "tar", "-C", rdf_path, "--strip-components", "2", "-x", "-f", rdf_tarball ]) return
def build_zimfile(static_folder, zim_path=None, languages=[], formats=[], title=None, description=None, only_books=[], create_index=True, force=False): # revert HTML/JS/CSS to zim-compatible versions export_skeleton(static_folder=static_folder, dev_mode=False, languages=languages, formats=formats, only_books=only_books) if not languages: languages = ['mul'] languages.sort() formats.sort() if title is None: if len(languages) > 5: title = ("Project Gutenberg Library with {formats}" .format(formats=",".join(formats))) else: title = ("Project Gutenberg Library ({langs}) with {formats}" .format(langs=",".join(languages), formats=",".join(formats))) logger.info("\tWritting ZIM for {}".format(title)) if description is None: description = "The first producer of free ebooks" project_id = get_project_id(languages, formats, only_books) if zim_path is None: zim_path = "{}.zim".format(project_id) if path(zim_path).exists() and not force: logger.info("ZIM file `{}` already exist.".format(zim_path)) return languages = [ISO_MATRIX.get(lang, lang) for lang in languages] languages.sort() cmd = ['zimwriterfs', '--welcome', "Home.html", '--favicon', "favicon.png", '--language', ','.join(languages), '--name', project_id, '--title', title, '--description', description, '--creator', "gutenberg.org", '--publisher', "Kiwix", static_folder, zim_path] if create_index: cmd.insert(1, '--withFullTextIndex') if exec_cmd(cmd) == 0: logger.info("Successfuly created ZIM file at {}".format(zim_path)) else: logger.error("Unable to create ZIM file :(")
def setup_urls(): file_with_url = os.path.join("tmp", "file_on_{}".format(UrlBuilder.SERVER_NAME)) cmd = [ "bash", "-c", "rsync -a --list-only {} > {}".format(UrlBuilder.RSYNC, file_with_url), ] exec_cmd(cmd) # make a copy of rsync's result shutil.copyfile(file_with_url, file_with_url + ".bak") # strip rsync file to only contain relative path with open(file_with_url + ".bak", "r") as src, open(file_with_url, "w") as dest: for line in src.readlines(): if len(line) >= 47: dest.write(line[46:]) field_names = ["url"] load_csv(Url, file_with_url, field_names=field_names)
def build_zimfile( static_folder, output_folder, zim_name=None, languages=[], formats=[], title=None, description=None, only_books=[], create_index=True, force=False, title_search=False, add_bookshelves=False, ): # revert HTML/JS/CSS to zim-compatible versions export_skeleton( static_folder=static_folder, dev_mode=False, languages=languages, formats=formats, only_books=only_books, title_search=title_search, add_bookshelves=add_bookshelves, ) if not languages: languages = ["mul"] languages.sort() formats.sort() if title is None: if len(languages) > 5: title = "Project Gutenberg Library" else: title = "Project Gutenberg Library ({langs})".format( langs=",".join(languages) ) if len(formats) < len(FORMAT_MATRIX): title += " with {formats}".format(formats=",".join(formats)) logger.info("\tWritting ZIM for {}".format(title)) if description is None: description = "The first producer of free ebooks" project_id = get_project_id(languages, formats, only_books) if zim_name is None: zim_name = "{}.zim".format(project_id) zim_path = output_folder.joinpath(zim_name) if path(zim_name).exists() and not force: logger.info("ZIM file `{}` already exist.".format(zim_name)) return languages = [ISO_MATRIX.get(lang, lang) for lang in languages] languages.sort() cmd = [ "zimwriterfs", "--welcome", "Home.html", "--favicon", "favicon.png", "--language", ",".join(languages), "--name", project_id, "--title", title, "--description", description, "--creator", "gutenberg.org", "--tags", "gutenberg", "--publisher", "Kiwix", "--scraper", "gutengergtozim-{v}".format(v=VERSION), static_folder, six.text_type(zim_path), ] if not create_index: cmd.insert(1, "--withoutFTIndex") if exec_cmd(cmd) == 0: logger.info("Successfuly created ZIM file at {}".format(zim_path)) else: logger.error("Unable to create ZIM file :(")
def optimize_jpeg(src, dst): if src != dst: copy_file(src, dst) exec_cmd(["jpegoptim", "--strip-all", "-m50", dst])
def optimize_png(src, dst): exec_cmd(["pngquant", "--nofs", "--force", "--output", dst, src]) exec_cmd(["advdef", "-z", "-4", "-i", "5", dst])
def optimize_gif(src, dst): exec_cmd(["gifsicle", "-O3", src, "-o", dst])
def optimize_jpeg(src, dst): copy_from_cache(src, dst) exec_cmd(['jpegoptim', '--strip-all', '-m50', dst])
def optimize_gif(src, dst): exec_cmd(['gifsicle', '-O3', src, '-o', dst])
def optimize_jpeg(src, dst): copy_from_cache(src, dst) exec_cmd(['jpegoptim', '--strip-all', '-m50', dst])
def optimize_png(src, dst): exec_cmd(['pngquant', '--nofs', '--force', '--output', dst, src]) exec_cmd(['advdef', '-z', '-4', '-i', '5', dst])
def optimize_gif(src, dst): exec_cmd(['gifsicle', '-O3', src, '-o', dst])
def build_zimfile(static_folder, zim_path=None, languages=[], formats=[], title=None, description=None, only_books=[], create_index=True, force=False): # revert HTML/JS/CSS to zim-compatible versions export_skeleton(static_folder=static_folder, dev_mode=False, languages=languages, formats=formats, only_books=only_books) if not languages: languages = ['mul'] languages.sort() formats.sort() if title is None: if len(languages) > 5: title = ("Project Gutenberg Library with {formats}".format( formats=",".join(formats))) else: title = ( "Project Gutenberg Library ({langs}) with {formats}".format( langs=",".join(languages), formats=",".join(formats))) logger.info("\tWritting ZIM for {}".format(title)) if description is None: description = "The first producer of free ebooks" project_id = get_project_id(languages, formats, only_books) if zim_path is None: zim_path = "{}.zim".format(project_id) if path(zim_path).exists() and not force: logger.info("ZIM file `{}` already exist.".format(zim_path)) return languages = [ISO_MATRIX.get(lang, lang) for lang in languages] languages.sort() cmd = [ 'zimwriterfs', '--welcome', "Home.html", '--favicon', "favicon.png", '--language', ','.join(languages), '--name', project_id, '--title', title, '--description', description, '--creator', "gutenberg.org", '--publisher', "Kiwix", static_folder, zim_path ] if create_index: cmd.insert(1, '--withFullTextIndex') if exec_cmd(cmd) == 0: logger.info("Successfuly created ZIM file at {}".format(zim_path)) else: logger.error("Unable to create ZIM file :(")
def optimize_png(src, dst): exec_cmd(['pngquant', '--nofs', '--force', '--output', dst, src]) exec_cmd(['advdef', '-z', '-4', '-i', '5', dst])
def optimize_jpeg(src, dst): copy_from_cache(src, dst) exec_cmd(["jpegoptim", "--strip-all", "-m50", dst])