def _convert_pdf(self, pdf_data): """ Converts all pages of PDF in A4 format if communication is printed. :param pdf_data: binary data of original pdf :return: binary data of converted pdf """ if self.send_mode != 'physical': return pdf_data pdf = PdfFileReader(BytesIO(base64.b64decode(pdf_data))) convert = PdfFileWriter() a4_width = 594.48 a4_height = 844.32 # A4 units in PyPDF for i in xrange(0, pdf.numPages): # translation coordinates tx = 0 ty = 0 page = pdf.getPage(i) corner = [float(x) for x in page.mediaBox.getUpperRight()] if corner[0] > a4_width or corner[1] > a4_height: page.scaleBy(max(a4_width / corner[0], a4_height / corner[1])) elif corner[0] < a4_width or corner[1] < a4_height: tx = (a4_width - corner[0]) / 2 ty = (a4_height - corner[1]) / 2 convert.addBlankPage(a4_width, a4_height) convert.getPage(i).mergeTranslatedPage(page, tx, ty) output_stream = BytesIO() convert.write(output_stream) output_stream.seek(0) return base64.b64encode(output_stream.read())
def test_two_on_one_page(self): # Build a document with two pages pdf = PdfFileReader(self.get_pdf_stream()) output = PdfFileWriter() output.addPage(pdf.getPage(0)) output.addPage(pdf.getPage(0)) assert output.getNumPages() == 2 assert output.getPage(0).extractText().count('Test') == 1 buf = StringIO() output.write(buf) buf.seek(0) rv = self.app.get('/') self.assertEquals(rv.status_code, 200) rv = self.app.post('/handleform', data={'file': (buf, 'test.pdf')}) rv = self.combine_and_download(pages_sheet='2') pdf_download = PdfFileReader(StringIO(rv.data)) self.assertEquals( pdf_download.getPage(0).extractText().count('Test'), 2) self.assertEquals(pdf_download.getNumPages(), 1) self.clean_up()
def test_two_on_one_page(self): # Build a document with two pages pdf = PdfFileReader(self.get_pdf_stream()) output = PdfFileWriter() output.addPage(pdf.getPage(0)) output.addPage(pdf.getPage(0)) assert output.getNumPages() == 2 assert output.getPage(0).extractText().count('Test') == 1 buf = StringIO() output.write(buf) buf.seek(0) rv = self.app.get('/') self.assertEquals(rv.status_code, 200) rv = self.app.post('/handleform', data={'file': (buf, 'test.pdf')}) rv = self.combine_and_download(pages_sheet='2') pdf_download = PdfFileReader(StringIO(rv.data)) self.assertEquals(pdf_download.getPage(0).extractText().count('Test'), 2) self.assertEquals(pdf_download.getNumPages(), 1) self.clean_up()
def compose_letter_image(self): """ Puts the translated text of a letter inside the original image given the child letter layout. :return: True if the composition succeeded, False otherwise """ self.ensure_one() layout = self.b2s_layout_id image_data = base64.b64decode(self.letter_image) text = self.translated_text or self.english_text if not text or not layout: return False # Read the existing PDF of the letter existing_pdf = PdfFileReader(BytesIO(image_data)) # Prepare a new composed PDF final_pdf = PdfFileWriter() # Holds the text that cannot fit in the box remaining_text = '' additional_pages_header = 'Page ' if self.partner_id.lang == 'de_DE': additional_pages_header = 'Seite ' elif self.partner_id.lang == 'it_IT': additional_pages_header = 'Pagina ' def get_chars(t): return "".join(re.findall("[a-zA-Z]+", t)) for i in xrange(0, existing_pdf.numPages): text = '' if len(self.page_ids) > i: page = self.page_ids[i] text = page.translated_text or page.english_translated_text \ or '' if len(get_chars(remaining_text + text)) < 3: # Page with less than 3 characters are not considered valid # for translation. Just keep the original page. final_pdf.addPage(existing_pdf.getPage(i)) continue # Take the boxes depending on which page we handle boxes = False if i == 0: boxes = layout.page_1_box_ids elif i == 1: boxes = layout.page_2_box_ids if not boxes: # For subsequent pages, translation will go at the end of pdf. final_pdf.addPage(existing_pdf.getPage(i)) if remaining_text: remaining_text += '\n\n' + additional_pages_header +\ str(i + 1) + ':\n' + text else: remaining_text = additional_pages_header + str(i + 1) +\ ':\n' + text continue box_texts = text.split(BOX_SEPARATOR) if len(box_texts) > len(boxes): # There should never be more text than expected by the # layout. Try with only one text. if len(boxes) == 1: box_texts = [text.replace(BOX_SEPARATOR, '\n\n')] else: return False # Construct new PDF for the current page page_output = PdfFileWriter() page_output.addPage(existing_pdf.getPage(i)) # Compose the text for each box inside the page for j in xrange(0, len(box_texts)): text = remaining_text + box_texts[j] box = boxes[j] translation_pdf, remaining_text = box.get_pdf(text) # Check that the text can fit in the box if remaining_text: # Add a return to separate remaining text from following remaining_text += '\n\n' # Log when text is too long to see if that happens a lot self.message_post( _('Translation went out of the translation box'), _('Translation too long')) # Merge the translation on the existing page page = page_output.getPage(j) page.mergePage(translation_pdf.getPage(0)) # Compress page page.compressContentStreams() page_output.addPage(page) # Write the last version of the page into final pdf final_pdf.addPage(page_output.getPage(j)) # Add pages if there is remaining text while remaining_text: box = layout.additional_page_box_id translation_pdf, remaining_text = box.get_pdf(remaining_text, True) final_pdf.addPage(translation_pdf.getPage(0)) # Finally write the pdf back into letter_image output_stream = BytesIO() final_pdf.write(output_stream) output_stream.seek(0) self.letter_image = base64.b64encode(output_stream.read()) return True
class PDFMonthlyTimeSheet(object): def __init__(self, topic, name, number, template="muster.pdf"): self.__packet = StringIO.StringIO() # create a new PDF with Reportlab self.__can = canvas.Canvas(self.__packet, pagesize=A4) self.s = 187 self.offset = 18 self.topic = topic self.name = name self.number = number self.output = PdfFileWriter() def __put_hours(self, values, y_offset): for key, value in values.iteritems(): day = int(key.day) i = self.s + self.offset * day self.__can.drawString(i, y_offset, str(value)) def __put_total_hours(self, total_hours, y_offset): self.__can.drawString(self.s + self.offset * 32, y_offset, str(total_hours)) def __put_large_string(self, string, x, y): self.__can.setFont("Helvetica", 18) self.__can.drawString(x, y, string) self.__can.setFont("Helvetica", 8) def fill_pdf(self, values, month): self.__can.setFont("Helvetica", 12) self.__can.rotate(90) self.__can.setFont("Helvetica", 8) self.__put_hours(values['project_hours'], -325) self.__put_hours(values['other_hours'], -343) self.__put_hours(values['productive_hours'], -360) self.__put_hours(values['absence_hours'], -400) self.__put_total_hours(values['total_project_hours'], -325) self.__put_total_hours(values['total_other_hours'], -343) self.__put_total_hours(values['total_productive_hours'], -360) self.__put_total_hours(values['total_absence_hours'], -400) self.__put_large_string(self.topic, 68, -190) self.__put_large_string(self.name, 345, -235) self.__put_large_string(self.number, 640, -70) self.__put_large_string(get_month_name(month, "de_DE.UTF-8"), 68, -235) self.__can.showPage() def write_pdf(self, template="muster.pdf", outputname="destination.pdf"): self.__can.save() self.__packet.seek(0) self.new_pdf = PdfFileReader(self.__packet) for page in range(self.new_pdf.numPages): existing_pdf = PdfFileReader(file(template, "rb")) self.page = existing_pdf.getPage(0) self.output.addPage(self.page) self.output.getPage(page).mergePage(self.new_pdf.getPage(page)) output_stream = file(outputname, "wb") self.output.write(output_stream) output_stream.close()
filename = args[0] document = PdfFileReader(file(filename, "rb")) output = PdfFileWriter() for page_num in range(document.getNumPages()): # Get the page dimensions page = document.getPage(page_num) box = page.mediaBox # PDF dimensions are in points width = round(float(box[2]) / MM_TO_PT) height = round(float(box[3]) / MM_TO_PT) # Create the outline outline_creator = OutlineCreator( width, height, bleed=(0 if options.no_bleed else options.bleed_margin), crop=options.crop_margin ) outline = outline_creator.create() # Merge the outline with the current page and add it to the output output.addPage(PdfFileReader(outline).getPage(0)) offset = outline_creator.print_marks * MM_TO_PT output.getPage(page_num).mergeTranslatedPage(page, offset, offset) outputStream = file(options.output_filename, "wb") output.write(outputStream) outputStream.close()