def make_page(page, pdf_page, psem): # Prepare everything in parallel @asyncio.coroutine def get_pdf_thumbnail(psem): if page.thumbnail is None: return None return (yield from page.thumbnail.pdf_thumbnail(psem)) @asyncio.coroutine def get_pdf_background(psem): if page.background is None: return None return (yield from page.background.pdf_image(psem)) @asyncio.coroutine def get_pdf_mask(foreground, psem): if foreground.color is not None: return None return (yield from foreground.pdf_mask(psem)) pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = ( yield from asyncio.gather( get_pdf_thumbnail(psem), get_pdf_background(psem), asyncio.gather(*[fg.pdf_image(psem) for fg in page.foreground]), asyncio.gather(*[get_pdf_mask(fg, psem) for fg in page.foreground]))) pdf_page.MediaBox = PdfArray([0, 0, PdfNumber(page.width), PdfNumber(page.height)]) pdf_page.Group = pdf_group pdf_resources = PdfDict() pdf_xobject = PdfDict() if pdf_thumbnail is not None: pdf_page.Thumb = pdf_thumbnail im_index = 0 # Save graphics state and scale unity rectangle to page size matrix = TransformationMatrix() matrix.scale(page.width, page.height) before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf()) after_graphics = "\nQ\n" contents = "" graphics = "" current_color = None if page.color != self._factory.WHITE: if current_color != page.color: current_color = page.color graphics += page.color.to_pdf() + " rg " graphics += ("0 0 1 1 re " + "f\n") if pdf_background is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background graphics += "/Im%d Do\n" % im_index im_index += 1 for foreground, pdf_foreground, pdf_mask in zip( page.foreground, pdf_foregrounds, pdf_masks): if pdf_mask is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask im_index += 1 pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground if (foreground.color is not None and current_color != foreground.color): current_color = foreground.color graphics += foreground.color.to_pdf() + " rg " graphics += "/Im%d Do\n" % im_index im_index += 1 if graphics: contents += (before_graphics + graphics.rstrip(" \n") + after_graphics) current_color = None before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n") after_text = "\nET\n" text = "" pdf_annots = [] for t in page.text: if t.text: matrix = TransformationMatrix() # Glyph size is 0.5 x 1 matrix.scale(2 / len(t.text), 1) matrix.translate(-0.5, -0.5) if t.direction == "ltr": pass elif t.direction == "rtl": matrix.translate(0, -1) elif t.direction == "ttb": matrix.rotate(90) matrix.rotate(-t.rotation) matrix.translate(0.5, 0.5) matrix.scale(t.width, t.height) matrix.translate(t.x, t.y) text += "%s Tm %s Tj\n" % ( matrix.to_pdf(), PdfString().from_bytes( t.text.encode("utf-16-be"), bytes_encoding="hex")) if t.external_link is not None or t.internal_link is not None: pdf_annot = PdfDict() pdf_annots.append(pdf_annot) pdf_annot.Type = PdfName.Annot pdf_annot.Subtype = PdfName.Link pdf_annot.Border = [0, 0, 0] pdf_annot.Rect = [PdfNumber(t.x), PdfNumber(t.y), PdfNumber(t.x + t.width), PdfNumber(t.y + t.height)] if t.external_link is not None: pdf_a = PdfDict() pdf_annot.A = pdf_a pdf_a.Type = PdfName.Action pdf_a.S = PdfName.URI pdf_a.URI = t.external_link.decode("latin-1") if t.internal_link is not None: pdf_target_page = pdf_pages[t.internal_link[0]] target_x, target_y = t.internal_link[1] pdf_annot.Dest = [ pdf_target_page, PdfName.XYZ, PdfNumber(target_x), PdfNumber(target_y), 0] text = text.rstrip(" \n") if text: pdf_resources.Font = pdf_font_mapping contents += (before_text + text + after_text) contents = contents.rstrip(" \n") if contents: pdf_contents = PdfDict() pdf_contents.indirect = True pdf_page.Contents = pdf_contents if COMPRESS_PAGE_CONTENTS: pdf_contents.Filter = [PdfName.FlateDecode] pdf_contents.stream = zlib.compress( contents.encode("latin-1"), 9).decode("latin-1") else: pdf_contents.stream = contents if pdf_annots: pdf_page.Annots = pdf_annots if pdf_xobject: pdf_resources.XObject = pdf_xobject if pdf_resources: pdf_page.Resources = pdf_resources # Report progress nonlocal finished_pages finished_pages += 1 if progress_cb: progress_cb(finished_pages / len(self._pages))
def make_page(page, pdf_page, psem): # Prepare everything in parallel @asyncio.coroutine def get_pdf_thumbnail(psem): if page.thumbnail is None: return None return (yield from page.thumbnail.pdf_thumbnail(psem)) @asyncio.coroutine def get_pdf_background(psem): if page.background is None: return None return (yield from page.background.pdf_image(psem)) @asyncio.coroutine def get_pdf_mask(foreground, psem): if foreground.color is not None: return None return (yield from foreground.pdf_mask(psem)) pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = ( yield from asyncio.gather( get_pdf_thumbnail(psem), get_pdf_background(psem), asyncio.gather( *[fg.pdf_image(psem) for fg in page.foreground]), asyncio.gather( *[get_pdf_mask(fg, psem) for fg in page.foreground]))) pdf_page.MediaBox = PdfArray( [0, 0, PdfNumber(page.width), PdfNumber(page.height)]) pdf_page.Group = pdf_group pdf_resources = PdfDict() pdf_colorspace = PdfDict() pdf_colorspace.DefaultRGB = default_rgb_colorspace pdf_resources.ColorSpace = pdf_colorspace pdf_xobject = PdfDict() if pdf_thumbnail is not None: pdf_page.Thumb = pdf_thumbnail im_index = 0 # Save graphics state and scale unity rectangle to page size matrix = TransformationMatrix() matrix.scale(page.width, page.height) before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf()) after_graphics = "\nQ\n" contents = "" graphics = "" current_color = None if page.color != self._factory.WHITE: if current_color != page.color: current_color = page.color graphics += page.color.to_pdf() + " rg " graphics += ("0 0 1 1 re " + "f\n") if pdf_background is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background graphics += "/Im%d Do\n" % im_index im_index += 1 for foreground, pdf_foreground, pdf_mask in zip( page.foreground, pdf_foregrounds, pdf_masks): if pdf_mask is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask im_index += 1 pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground if (foreground.color is not None and current_color != foreground.color): current_color = foreground.color graphics += foreground.color.to_pdf() + " rg " graphics += "/Im%d Do\n" % im_index im_index += 1 if graphics: contents += (before_graphics + graphics.rstrip(" \n") + after_graphics) current_color = None before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n") after_text = "\nET\n" text = "" pdf_annots = [] for t in page.text: if t.text: matrix = TransformationMatrix() # Glyph size is 0.5 x 1 matrix.scale(2 / len(t.text), 1) matrix.translate(-0.5, -0.5) if t.direction == "ltr": pass elif t.direction == "rtl": matrix.translate(0, -1) elif t.direction == "ttb": matrix.rotate(90) matrix.rotate(-t.rotation) matrix.translate(0.5, 0.5) matrix.scale(t.width, t.height) matrix.translate(t.x, t.y) text += "%s Tm %s Tj\n" % ( matrix.to_pdf(), PdfString().from_bytes( t.text.encode("utf-16-be"), bytes_encoding="hex")) if t.external_link is not None or t.internal_link is not None: pdf_annot = PdfDict() pdf_annots.append(pdf_annot) pdf_annot.Type = PdfName.Annot pdf_annot.Subtype = PdfName.Link pdf_annot.Border = [0, 0, 0] pdf_annot.Rect = [ PdfNumber(t.x), PdfNumber(t.y), PdfNumber(t.x + t.width), PdfNumber(t.y + t.height) ] if t.external_link is not None: pdf_a = PdfDict() pdf_annot.A = pdf_a pdf_a.Type = PdfName.Action pdf_a.S = PdfName.URI pdf_a.URI = t.external_link.decode("latin-1") if t.internal_link is not None: pdf_target_page = pdf_pages[t.internal_link[0]] target_x, target_y = t.internal_link[1] pdf_annot.Dest = [ pdf_target_page, PdfName.XYZ, PdfNumber(target_x), PdfNumber(target_y), 0 ] text = text.rstrip(" \n") if text: pdf_resources.Font = pdf_font_mapping contents += (before_text + text + after_text) contents = contents.rstrip(" \n") if contents: pdf_contents = PdfDict() pdf_contents.indirect = True pdf_page.Contents = pdf_contents if COMPRESS_PAGE_CONTENTS: pdf_contents.Filter = [PdfName.FlateDecode] pdf_contents.stream = zlib.compress( contents.encode("latin-1"), 9).decode("latin-1") else: pdf_contents.stream = contents if pdf_annots: pdf_page.Annots = pdf_annots if pdf_xobject: pdf_resources.XObject = pdf_xobject if pdf_resources: pdf_page.Resources = pdf_resources # Report progress nonlocal finished_pages finished_pages += 1 if progress_cb: progress_cb(finished_pages / len(self._pages))