def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None): container = Container(opf_path, log) fix_markup(container) report_progress(0.05, _('Parsed all content for markup transformation')) if opts.pdf_hyphenate: from calibre.ebooks.oeb.polish.hyphenation import add_soft_hyphens add_soft_hyphens(container) has_maths = add_maths_script(container) fix_fullscreen_images(container) name_anchor_map = make_anchors_unique(container, log) margin_files = tuple(create_margin_files(container)) toc = get_toc(container, verify_destinations=False) has_toc = toc and len(toc) links_page_uuid = add_all_links(container, margin_files) container.commit() report_progress(0.1, _('Completed markup transformation')) manager = RenderManager(opts, log, container.root) page_layout = get_page_layout(opts) pdf_doc = None anchor_locations = {} jobs = [] for margin_file in margin_files: jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout)) results = manager.convert_html_files(jobs, settle_time=1, has_maths=has_maths) num_pages = 0 page_margins_map = [] for margin_file in margin_files: name = margin_file.name data = results[name] if not isinstance(data, bytes): raise SystemExit(data) doc = data_as_pdf_doc(data) anchor_locations.update(get_anchor_locations(name, doc, num_pages + 1, links_page_uuid, log)) doc_pages = doc.page_count() page_margins_map.extend(repeat(resolve_margins(margin_file.margins, page_layout), doc_pages)) num_pages += doc_pages if pdf_doc is None: pdf_doc = doc else: pdf_doc.append(doc) page_number_display_map = get_page_number_display_map(manager, opts, num_pages, log) if has_toc: annotate_toc(toc, anchor_locations, name_anchor_map, log) if opts.pdf_add_toc: tocname = create_skeleton(container) root = container.parsed(tocname) add_pagenum_toc(root, toc, opts, page_number_display_map) container.commit() jobs = [job_for_name(container, tocname, None, page_layout)] results = manager.convert_html_files(jobs, settle_time=1) tocdoc = data_as_pdf_doc(results[tocname]) page_margins_map.extend(repeat(resolve_margins(None, page_layout), tocdoc.page_count())) pdf_doc.append(tocdoc) report_progress(0.7, _('Rendered all HTML as PDF')) fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links, log) if toc and len(toc): add_toc(PDFOutlineRoot(pdf_doc), toc) report_progress(0.75, _('Added links to PDF content')) pdf_metadata = PDFMetadata(metadata) add_header_footer( manager, opts, pdf_doc, container, page_number_display_map, page_layout, page_margins_map, pdf_metadata, report_progress, toc if has_toc else None) num_removed = remove_unused_fonts(pdf_doc) if num_removed: log('Removed', num_removed, 'unused fonts') merge_fonts(pdf_doc, log) num_removed = dedup_type3_fonts(pdf_doc) if num_removed: log('Removed', num_removed, 'duplicated Type3 glyphs') num_removed = pdf_doc.dedup_images() if num_removed: log('Removed', num_removed, 'duplicate images') if opts.pdf_odd_even_offset: for i in range(1, pdf_doc.page_count()): margins = page_margins_map[i] mult = -1 if i % 2 else 1 val = opts.pdf_odd_even_offset if abs(val) < min(margins.left, margins.right): box = list(pdf_doc.get_page_box("CropBox", i)) box[0] += val * mult pdf_doc.set_page_box("CropBox", i, *box) if cover_data: add_cover(pdf_doc, cover_data, page_layout, opts) if metadata is not None: update_metadata(pdf_doc, pdf_metadata) report_progress(1, _('Updated metadata in PDF')) if opts.uncompressed_pdf: pdf_doc.uncompress() pdf_data = pdf_doc.write() if output_path is None: return pdf_data with open(output_path, 'wb') as f: f.write(pdf_data)
def polish_one(ebook, opts, report, customization=None): rt = lambda x: report('\n### ' + x) jacket = None changed = False customization = customization or CUSTOMIZATION.copy() has_subsettable_fonts = False for x in iter_subsettable_fonts(ebook): has_subsettable_fonts = True break if (opts.subset and has_subsettable_fonts) or opts.embed: stats = StatsCollector(ebook, do_embed=opts.embed) if opts.opf: changed = True rt(_('Updating metadata')) update_metadata(ebook, opts.opf) jacket = find_existing_jacket(ebook) if jacket is not None: replace_jacket(ebook, jacket) report(_('Updated metadata jacket')) report(_('Metadata updated\n')) if opts.cover: changed = True rt(_('Setting cover')) set_cover(ebook, opts.cover, report) report('') if opts.jacket: changed = True rt(_('Inserting metadata jacket')) if jacket is None: if add_or_replace_jacket(ebook): report(_('Existing metadata jacket replaced')) else: report(_('Metadata jacket inserted')) else: report(_('Existing metadata jacket replaced')) report('') if opts.remove_jacket: rt(_('Removing metadata jacket')) if remove_jacket(ebook): report(_('Metadata jacket removed')) changed = True else: report(_('No metadata jacket found')) report('') if opts.smarten_punctuation: rt(_('Smartening punctuation')) if smarten_punctuation(ebook, report): changed = True report('') if opts.embed: rt(_('Embedding referenced fonts')) if embed_all_fonts(ebook, stats, report): changed = True has_subsettable_fonts = True report('') if opts.subset: if has_subsettable_fonts: rt(_('Subsetting embedded fonts')) if subset_all_fonts(ebook, stats.font_stats, report): changed = True else: rt(_('No embedded fonts to subset')) report('') if opts.remove_unused_css: rt(_('Removing unused CSS rules')) if remove_unused_css( ebook, report, remove_unused_classes=customization['remove_unused_classes'], merge_rules=customization['merge_identical_selectors'], merge_rules_with_identical_properties=customization[ 'merge_rules_with_identical_properties'], remove_unreferenced_sheets=customization[ 'remove_unreferenced_sheets']): changed = True report('') if opts.compress_images: rt(_('Losslessly compressing images')) if compress_images(ebook, report)[0]: changed = True report('') if opts.upgrade_book: rt(_('Upgrading book, if possible')) if upgrade_book(ebook, report, remove_ncx=customization['remove_ncx']): changed = True report('') if opts.remove_soft_hyphens: rt(_('Removing soft hyphens')) remove_soft_hyphens(ebook, report) changed = True elif opts.add_soft_hyphens: rt(_('Adding soft hyphens')) add_soft_hyphens(ebook, report) changed = True return changed