def update_generated_pdf(self): """ If there isn't a custom pdf version of the review, generate the pdf from an Office document file, (anything supported by abiword). If there isn't an Office file then generate the pdf from the contents of the review text (html) """ has_custom_pdf = hasattr(self, "pdf") and self.pdf.get_size() > 0 if not has_custom_pdf: # Generate the pdf file and save it as a blob pdf_blob = Blob() doc = None try: create_pdf = RunSubprocess( "abiword", input_params="--plugin=AbiCommand -t pdf", output_params="-o") create_pdf.create_tmp_ouput() if hasattr(self, "doc"): doc = self.getDoc() if doc: open_blob = doc.blob.open("r") blob_path = open_blob.name open_blob.close() create_pdf.run(input_path=blob_path) else: review = self.getReview() # Insert the review into a template so we have a # valid html file pdf_template = SimpleZpt( "../browser/templates/htmltopdf.pt") data = pdf_template( context={"review":review}).encode("utf-8") with NamedTemporaryFile() as tmp_input: with NamedTemporaryFile() as tmp_output: tmp_input.write(data) tmp_input.flush() try: SimpleSubprocess( '/usr/bin/tidy', '-utf8', '-numeric', '-o', tmp_output.name, tmp_input.name, exitcodes=[0, 1], ) tmp_output.seek(0) data = tmp_output.read() except RuntimeError: log.error( "Tidy was unable to tidy the html for %s", self.absolute_url(), exc_info=True, ) create_pdf.create_tmp_input(suffix=".html", data=data) try: create_pdf.run() except RuntimeError: log.error( "Abiword was unable to generate a pdf for %s and created an error pdf", self.absolute_url(), exc_info=True, ) create_pdf.create_tmp_input(suffix=".html", data="Could not create PDF") create_pdf.run() pdf_file = open(create_pdf.output_path, "r") pdf_data = pdf_file.read() pdf_blob.open("w").writelines(pdf_data) pdf_file.close() create_pdf.clean_up() self.generatedPdf = pdf_blob except SubprocessException: log.error("The application Abiword does not seem to be available", exc_info=True)
def _getAllPageImages(context, size=(320,452)): """ Generate the preview images for a pdf """ pdf = context.get_review_pdf() # import pdb; pdb.set_trace() if pdf: pdf_data = pdf["blob"].open().read() if not pdf or not pdf_data: return "%s has no pdf" %( context.absolute_url()), None else: # Split the pdf, one file per page try: split_pdf_pages = RunSubprocess( "pdftk", output_params="burst output") except SubprocessException, e: return e split_pdf_pages.create_tmp_input(suffix=".pdf", data=pdf_data) split_pdf_pages.create_tmp_output_dir() split_pdf_pages.output_path = os.path.join( split_pdf_pages.tmp_output_dir, "%04d.pdf") split_pdf_pages.run() msg = tuple() if split_pdf_pages.errors != "": msg += ("Message from split_pdf_pages:" "\n%s\n" % split_pdf_pages.errors,) # Convert the pages to .gifs # rewritten to have one converter step per page as we have seen process # sizes larger than 2GB for 60 pages in a batch for filename in glob.glob(split_pdf_pages.tmp_output_dir+"/*.pdf"): pdf_to_image = RunSubprocess( "convert", input_params="-density 250", input_path=filename, output_params= "-resize %sx%s -background white -flatten" % ( size[0], size[1])) outputname = '.'.join(filename.split("/")[-1].split('.')[:-1])+'.gif' pdf_to_image.output_path = os.path.join( split_pdf_pages.tmp_output_dir, outputname) pdf_to_image.run() if pdf_to_image.errors != "": msg += ("Message from pdfs_to_images:" "\n%s\n" % pdf_to_image.errors,) pdf_to_image.clean_up() imgfiles = [gif for gif in os.listdir(split_pdf_pages.tmp_output_dir) if os.path.splitext(gif)[1] == '.gif'] imgfiles.sort() pages = [] for img in imgfiles: img = open(os.path.join( split_pdf_pages.tmp_output_dir, img), "r") img_data = img.read() pages.append(img_data) img.close() # Remove temporary files split_pdf_pages.clean_up() if pages: imgfields = [] for img in pages: IF = ImageField() IF.set(context, img) imgfields.append(IF) setattr(context, 'pagePictures', imgfields) return msg or "Successfully converted %s pages" % len(pages)