Exemple #1
0
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None):
    container = Container(opf_path, log)
    report_progress(0.05, _('Parsed all content for markup transformation'))
    has_maths = add_maths_script(container)
    fix_fullscreen_images(container)

    name_anchor_map = make_anchors_unique(container, log)
    margin_files = tuple(create_margin_files(container))
    toc = get_toc(container, verify_destinations=False)
    has_toc = toc and len(toc)
    links_page_uuid = add_all_links(container, margin_files)
    container.commit()
    report_progress(0.1, _('Completed markup transformation'))

    manager = RenderManager(opts, log, container.root)
    page_layout = get_page_layout(opts)
    pdf_doc = None
    anchor_locations = {}
    jobs = []
    for margin_file in margin_files:
        jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout))
    results = manager.convert_html_files(jobs, settle_time=1, has_maths=has_maths)
    num_pages = 0
    page_margins_map = []
    for margin_file in margin_files:
        name = margin_file.name
        data = results[name]
        if not isinstance(data, bytes):
            raise SystemExit(data)
        doc = data_as_pdf_doc(data)
        anchor_locations.update(get_anchor_locations(doc, num_pages + 1, links_page_uuid))
        doc_pages = doc.page_count()
        page_margins_map.extend(repeat(resolve_margins(margin_file.margins, page_layout), doc_pages))
        num_pages += doc_pages

        if pdf_doc is None:
            pdf_doc = doc
        else:
            pdf_doc.append(doc)

    page_number_display_map = get_page_number_display_map(manager, opts, num_pages, log)

    if has_toc:
        annotate_toc(toc, anchor_locations, name_anchor_map, log)
        if opts.pdf_add_toc:
            tocname = create_skeleton(container)
            root = container.parsed(tocname)
            add_pagenum_toc(root, toc, opts, page_number_display_map)
            container.commit()
            jobs = [job_for_name(container, tocname, None, page_layout)]
            results = manager.convert_html_files(jobs, settle_time=1)
            tocdoc = data_as_pdf_doc(results[tocname])
            page_margins_map.extend(repeat(resolve_margins(None, page_layout), tocdoc.page_count()))
            pdf_doc.append(tocdoc)

    report_progress(0.7, _('Rendered all HTML as PDF'))

    fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links, log)
    if toc and len(toc):
        add_toc(PDFOutlineRoot(pdf_doc), toc)
    report_progress(0.75, _('Added links to PDF content'))

    pdf_metadata = PDFMetadata(metadata)
    add_header_footer(
        manager, opts, pdf_doc, container,
        page_number_display_map, page_layout, page_margins_map,
        pdf_metadata, report_progress, toc if has_toc else None)

    merge_fonts(pdf_doc)
    num_removed = dedup_type3_fonts(pdf_doc)
    if num_removed:
        log('Removed', num_removed, 'duplicated Type3 glyphs')

    num_removed = remove_unused_fonts(pdf_doc)
    if num_removed:
        log('Removed', num_removed, 'unused fonts')

    num_removed = pdf_doc.dedup_images()
    if num_removed:
        log('Removed', num_removed, 'duplicate images')

    if cover_data:
        add_cover(pdf_doc, cover_data, page_layout, opts)

    if metadata is not None:
        update_metadata(pdf_doc, pdf_metadata)
    report_progress(1, _('Updated metadata in PDF'))

    if opts.uncompressed_pdf:
        pdf_doc.uncompress()

    pdf_data = pdf_doc.write()
    if output_path is None:
        return pdf_data
    with open(output_path, 'wb') as f:
        f.write(pdf_data)
Exemple #2
0
def convert(opf_path,
            opts,
            metadata=None,
            output_path=None,
            log=default_log,
            cover_data=None,
            report_progress=lambda x, y: None):
    container = Container(opf_path, log)
    fix_markup(container)
    report_progress(0.05, _('Parsed all content for markup transformation'))
    if opts.pdf_hyphenate:
        from calibre.ebooks.oeb.polish.hyphenation import add_soft_hyphens
        add_soft_hyphens(container)
    has_maths = add_maths_script(container)
    fix_fullscreen_images(container)

    name_anchor_map = make_anchors_unique(container, log)
    margin_files = tuple(create_margin_files(container))
    toc = get_toc(container, verify_destinations=False)
    has_toc = toc and len(toc)
    links_page_uuid = add_all_links(container, margin_files)
    container.commit()
    report_progress(0.1, _('Completed markup transformation'))

    manager = RenderManager(opts, log, container.root)
    page_layout = get_page_layout(opts)
    pdf_doc = None
    anchor_locations = {}
    jobs = []
    for margin_file in margin_files:
        jobs.append(
            job_for_name(container, margin_file.name, margin_file.margins,
                         page_layout))
    results = manager.convert_html_files(jobs,
                                         settle_time=1,
                                         has_maths=has_maths)
    num_pages = 0
    page_margins_map = []
    for margin_file in margin_files:
        name = margin_file.name
        data = results[name]
        if not isinstance(data, bytes):
            raise SystemExit(data)
        doc = data_as_pdf_doc(data)
        anchor_locations.update(
            get_anchor_locations(name, doc, num_pages + 1, links_page_uuid,
                                 log))
        doc_pages = doc.page_count()
        page_margins_map.extend(
            repeat(resolve_margins(margin_file.margins, page_layout),
                   doc_pages))
        num_pages += doc_pages

        if pdf_doc is None:
            pdf_doc = doc
        else:
            pdf_doc.append(doc)

    page_number_display_map = get_page_number_display_map(
        manager, opts, num_pages, log)

    if has_toc:
        annotate_toc(toc, anchor_locations, name_anchor_map, log)
        if opts.pdf_add_toc:
            tocname = create_skeleton(container)
            root = container.parsed(tocname)
            add_pagenum_toc(root, toc, opts, page_number_display_map)
            container.commit()
            jobs = [job_for_name(container, tocname, None, page_layout)]
            results = manager.convert_html_files(jobs, settle_time=1)
            tocdoc = data_as_pdf_doc(results[tocname])
            page_margins_map.extend(
                repeat(resolve_margins(None, page_layout),
                       tocdoc.page_count()))
            pdf_doc.append(tocdoc)

    report_progress(0.7, _('Rendered all HTML as PDF'))

    fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links,
              log)
    if toc and len(toc):
        add_toc(PDFOutlineRoot(pdf_doc), toc)
    report_progress(0.75, _('Added links to PDF content'))

    pdf_metadata = PDFMetadata(metadata)
    add_header_footer(manager, opts, pdf_doc, container,
                      page_number_display_map, page_layout, page_margins_map,
                      pdf_metadata, report_progress, toc if has_toc else None)

    merge_fonts(pdf_doc, log)
    num_removed = dedup_type3_fonts(pdf_doc)
    if num_removed:
        log('Removed', num_removed, 'duplicated Type3 glyphs')

    num_removed = remove_unused_fonts(pdf_doc)
    if num_removed:
        log('Removed', num_removed, 'unused fonts')

    # Needed because of https://bugreports.qt.io/browse/QTBUG-88976
    subset_fonts(pdf_doc, log)

    num_removed = pdf_doc.dedup_images()
    if num_removed:
        log('Removed', num_removed, 'duplicate images')

    if opts.pdf_odd_even_offset:
        for i in range(1, pdf_doc.page_count()):
            margins = page_margins_map[i]
            mult = -1 if i % 2 else 1
            val = opts.pdf_odd_even_offset
            if abs(val) < min(margins.left, margins.right):
                box = list(pdf_doc.get_page_box("CropBox", i))
                box[0] += val * mult
                pdf_doc.set_page_box("CropBox", i, *box)

    if cover_data:
        add_cover(pdf_doc, cover_data, page_layout, opts)

    if metadata is not None:
        update_metadata(pdf_doc, pdf_metadata)
    report_progress(1, _('Updated metadata in PDF'))

    if opts.uncompressed_pdf:
        pdf_doc.uncompress()

    pdf_data = pdf_doc.write()
    if output_path is None:
        return pdf_data
    with open(output_path, 'wb') as f:
        f.write(pdf_data)