def setUp(self): self.app = self.layer['app'] self.portal = self.layer['portal'] self.request = self.layer['request'] with api.env.adopt_roles(['Manager']): self.image = api.content.create( self.portal, 'Image', 'test_image', image=dummy_image(), ) annotations = IAnnotations(self.image) thumbnail = ImageField() thumbnail.set(self.image, dummy_image().data) annotations[THUMBNAIL_KEY] = list([thumbnail]) self.image_view = api.content.get_view( 'document_preview', self.image, self.request, ) self.portal_view = api.content.get_view( 'document_preview', self.portal, self.request, )
def __call__(self): helperutilities.RUN_SHELL_COMMANDS = True log = self.mklog() log('converting preview images...') IF = ImageField() cat = self.context.portal_catalog results = cat(Language='all', portal_type=TYPES, b_size=100000) log(str(len(results))+'\n') for result in results: try: ob = result.getObject() except AttributeError: continue if not hasattr(ob, 'pagePictures') or len(ob.pagePictures)==0: log("NOTFOUND: %s has no pagePictures\n" % result.getPath()) continue first = ob.pagePictures[0] if type(first) == type(IF): continue # wrap as blobfield newpics = [] for pic in ob.pagePictures: NF = ImageField() NF.set(ob, pic) newpics.append(NF) ob.pagePictures = newpics log("Success: %s converted\n" % result.getPath()) return # Code to kill old pagePictures from history # PH = portal.portal_historiesstorage # REP = PH._getZVCRepo() # cnt = 0 # for zvh in REP.objectValues(): # for vid in zvh.getVersionIds(): # data = zvh.getVersionById(vid)._data # wob = data.getWrappedObject() # if not hasattr(wob, 'object'): # continue # rob = wob.object # if hasattr(rob, 'pagePictures'): # pp = rob.pagePictures # if len(pp)>0 and type(pp[0]) == type(''): # print "found one" # rob.pagePictures = [] # rob._p_changed = 1 # #
def __call__(self): annotations = IAnnotations(self.context) # get the contents of the context mimetype, payload = self.getPayload() if mimetype: basetype = mimetype.split('/')[0] else: basetype = None if basetype in EXCLUDE_TYPES: logger.warn('Type {0} is in excluded types, ' 'skipping {1}'.format( basetype, self.context.getId()) ) annotations[PREVIEW_MESSAGE_KEY] = ('There is no preview for this ' 'file type') return try: converted = self.convert_on_server(payload, mimetype) except ServerError as e: if e.args and e.args[0].startswith("Error connecting"): annotations[PREVIEW_MESSAGE_KEY] = ( 'Could not contact conversion server') else: annotations[PREVIEW_MESSAGE_KEY] = ( 'Sorry, this document type cannot be converted. There is ' 'no preview available.') return except ConfigError: converted = self.convert_locally(payload, mimetype) pdfdata = FileField() pdfdata.set(self.context, converted['pdfs'][0]) previewdata = [] thumbdata = [] for filedata in converted['previews'][:20]: IF = ImageField() IF.set(self.context, filedata) previewdata.append(IF) for filedata in converted['thumbnails'][:20]: IF = ImageField() IF.set(self.context, filedata) thumbdata.append(IF) annotations[PDF_VERSION_KEY] = pdfdata annotations[PREVIEW_IMAGES_KEY] = previewdata annotations[THUMBNAIL_KEY] = thumbdata
def run_conversion(self): file_id = 'dump' ext = '.html' filename = file_id + ext inputfilepath = self.html_dump(filename) if not inputfilepath: return cmd = [ docsplit.binary, 'pdf', inputfilepath, '--output', self.storage_dir ] docsplit._run_command(cmd) pdf_path = os.path.join(self.storage_dir, file_id + '.pdf') annotations = IAnnotations(self.context) gsettings = self.gsettings sizes = ( ('large', gsettings.large_size), ('normal', gsettings.normal_size), ('small', gsettings.thumb_size), ) outputfilepath = os.path.join(self.storage_dir, file_id + '.pdf') with codecs.open(outputfilepath, 'r') as pdf_file: annotations[PDF_VERSION_KEY] = pdf_file.read() docsplit.dump_images(pdf_path, self.storage_dir, sizes=sizes, format='gif') thumb_dir = os.path.join(self.storage_dir, 'small') thumbnails = sorted(os.listdir(thumb_dir)) preview_dir = os.path.join(self.storage_dir, 'large') previews = sorted(os.listdir(preview_dir)) previewdata = [] thumbdata = [] for filename in previews[:20]: IF = ImageField() with open(os.path.join(preview_dir, filename)) as img: IF.set(self.context, img.read()) previewdata.append(IF) for filename in thumbnails[:20]: IF = ImageField() with open(os.path.join(thumb_dir, filename)) as img: IF.set(self.context, img.read()) thumbdata.append(IF) annotations[PREVIEW_IMAGES_KEY] = previewdata annotations[THUMBNAIL_KEY] = thumbdata
def _getAllPageImages(context, size=(320,452)): """ Generate the preview images for a pdf """ pdf = context.get_review_pdf() # import pdb; pdb.set_trace() if pdf: pdf_data = pdf["blob"].open().read() if not pdf or not pdf_data: return "%s has no pdf" %( context.absolute_url()), None else: # Split the pdf, one file per page try: split_pdf_pages = RunSubprocess( "pdftk", output_params="burst output") except SubprocessException, e: return e split_pdf_pages.create_tmp_input(suffix=".pdf", data=pdf_data) split_pdf_pages.create_tmp_output_dir() split_pdf_pages.output_path = os.path.join( split_pdf_pages.tmp_output_dir, "%04d.pdf") split_pdf_pages.run() msg = tuple() if split_pdf_pages.errors != "": msg += ("Message from split_pdf_pages:" "\n%s\n" % split_pdf_pages.errors,) # Convert the pages to .gifs # rewritten to have one converter step per page as we have seen process # sizes larger than 2GB for 60 pages in a batch for filename in glob.glob(split_pdf_pages.tmp_output_dir+"/*.pdf"): pdf_to_image = RunSubprocess( "convert", input_params="-density 250", input_path=filename, output_params= "-resize %sx%s -background white -flatten" % ( size[0], size[1])) outputname = '.'.join(filename.split("/")[-1].split('.')[:-1])+'.gif' pdf_to_image.output_path = os.path.join( split_pdf_pages.tmp_output_dir, outputname) pdf_to_image.run() if pdf_to_image.errors != "": msg += ("Message from pdfs_to_images:" "\n%s\n" % pdf_to_image.errors,) pdf_to_image.clean_up() imgfiles = [gif for gif in os.listdir(split_pdf_pages.tmp_output_dir) if os.path.splitext(gif)[1] == '.gif'] imgfiles.sort() pages = [] for img in imgfiles: img = open(os.path.join( split_pdf_pages.tmp_output_dir, img), "r") img_data = img.read() pages.append(img_data) img.close() # Remove temporary files split_pdf_pages.clean_up() if pages: imgfields = [] for img in pages: IF = ImageField() IF.set(context, img) imgfields.append(IF) setattr(context, 'pagePictures', imgfields) return msg or "Successfully converted %s pages" % len(pages)