def handle_work_item(processor, item): """ Pick up a (possibly) multipage PDF upload and turn it into a document having (possibly) multiple individual pages. """ asset = item['Asset-Instance'] local_path = item['Local-Path'] work_dir = os.path.dirname(local_path) page_prefix = os.path.join(work_dir, 'page-') asset_list = [] pdf.split_pages( local_path, page_prefix ) if asset.get_children(models.AssetClass.PAGE_ORIGINAL).count() == 0: document = operations.create_document( asset.owner, title = 'Uploaded on %s (%s)' % ( asset.date_created, asset.producer.process )) else: document = None position = 1 all_page_files = glob.glob('%s*.pdf' % page_prefix) all_page_files.sort() for page_pdf_path in all_page_files: if document: page_asset = operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_ORIGINAL, file_name = page_pdf_path, related_page = operations.create_page(document, position), parent = asset, child_number = position, mime_type = models.MimeType.PDF ), else: page_asset = asset.children.get(position=position) operations.upload_asset_file(page_asset, page_pdf_path) asset_list.append(page_asset) position += 1 asset_list.append( document.assets.get( asset_class__name = models.AssetClass.DOCUMENT, mime_type__name = models.MimeType.BINARY )) return asset_list
def handle_work_item(processor, item): """ Pick up a (possibly) multipage PDF upload and turn it into a document having (possibly) multiple individual pages. """ asset = item['Asset-Instance'] local_path = item['Local-Path'] work_dir = os.path.dirname(local_path) page_prefix = os.path.join(work_dir, 'page-') asset_list = [] pdf.split_pages(local_path, page_prefix) if asset.get_children(models.AssetClass.PAGE_ORIGINAL).count() == 0: document = operations.create_document( asset.owner, title='Uploaded on %s (%s)' % (asset.date_created, asset.producer.process)) else: document = None position = 1 all_page_files = glob.glob('%s*.pdf' % page_prefix) all_page_files.sort() for page_pdf_path in all_page_files: if document: page_asset = operations.create_asset_from_file( owner=document.owner, producer=processor, asset_class=models.AssetClass.PAGE_ORIGINAL, file_name=page_pdf_path, related_page=operations.create_page(document, position), parent=asset, child_number=position, mime_type=models.MimeType.PDF), else: page_asset = asset.children.get(position=position) operations.upload_asset_file(page_asset, page_pdf_path) asset_list.append(page_asset) position += 1 asset_list.append( document.assets.get(asset_class__name=models.AssetClass.DOCUMENT, mime_type__name=models.MimeType.BINARY)) return asset_list
def redo_page( processor, parent_asset, tiff_original_path, position ): """ Re-convert the given TIFF file (representing a s single page) to a JPEG and a thumbnail. """ try: original = parent_asset.children.get( child_number = position, asset_class = models.AssetClass.PAGE_ORIGINAL) image = parent_asset.get( child_number = position, asset_class = models.AssetClass.PAGE_IMAGE) thumbnail = parent_asset.get( child_number = position, asset_class = models.AssetClass.PAGE_THUMBNAIL) except Asset.DoesNotExist: logging.debug("Skipping deleted page") return # Stuff we'll need later base_name = os.path.splitext(tiff_original_path)[0] rgba_path = '%s.rgba' % base_name jpeg_path = '%s.jpeg' % base_name thumb_path = '%s-thumbnail.jpeg' % base_name # Convert original TIFF to RGBA # TODO use convert instead of tiff2rgba os.system('tiff2rgba %r %r' % (tiff_original_path, rgba_path)) # Save the original as JPEG image.save( image.load(rgba_path), jpeg_path) # Save the thumbnail as JPEG image.save( image.thumbnail( image.load(rgba_path), settings.THUMBNAIL_SIZE), thumb_path) # Upload the new asset files operations.upload_asset_file( original, pdf_orig_path ) operations.upload_asset_file( image, jpeg_path ) operations.upload_asset_file( thumbnail, thumb_path ) # Put the assets into the work queue return [ original, image, thumbnail ]
def redo_page( processor, parent_asset, pdf_orig_path, position ): """ Re-convert the given PDF file (representing a s single page) to a JPEG and a thumbnail. """ try: asset = { 'original' : parent_asset.children.get( child_number = position, asset_class = models.AssetClass.PAGE_ORIGINAL), 'image' : parent_asset.get( child_number = position, asset_class = models.AssetClass.PAGE_IMAGE), 'thumbnail' : parent_asset.get( child_number = position, asset_class = models.AssetClass.PAGE_THUMBNAIL), } except models.Asset.DoesNotExist: logging.debug("Skipping deleted page") return # Stuff we'll need later base_name = os.path.splitext(pdf_orig_path)[0] jpeg_path = pdf.convert(pdf_orig_path, 'jpeg') thumb_path = '%s-thumbnail.jpeg' % base_name # Save the re-converted JPEG as a new thumbnail JPEG image.save( image.thumbnail( image.load(jpeg_path), settings.THUMBNAIL_SIZE), thumb_path) # Upload the new asset files operations.upload_asset_file( asset['original'], pdf_orig_path ) operations.upload_asset_file( asset['image'], jpeg_path ) operations.upload_asset_file( asset['thumbnail'], thumb_path ) # Put the assets into the work queue return asset.values()
def redo_page(processor, parent_asset, tiff_original_path, position): """ Re-convert the given TIFF file (representing a s single page) to a JPEG and a thumbnail. """ try: original = parent_asset.children.get( child_number=position, asset_class=models.AssetClass.PAGE_ORIGINAL) image = parent_asset.get(child_number=position, asset_class=models.AssetClass.PAGE_IMAGE) thumbnail = parent_asset.get( child_number=position, asset_class=models.AssetClass.PAGE_THUMBNAIL) except Asset.DoesNotExist: logging.debug("Skipping deleted page") return # Stuff we'll need later base_name = os.path.splitext(tiff_original_path)[0] rgba_path = '%s.rgba' % base_name jpeg_path = '%s.jpeg' % base_name thumb_path = '%s-thumbnail.jpeg' % base_name # Convert original TIFF to RGBA # TODO use convert instead of tiff2rgba os.system('tiff2rgba %r %r' % (tiff_original_path, rgba_path)) # Save the original as JPEG image.save(image.load(rgba_path), jpeg_path) # Save the thumbnail as JPEG image.save(image.thumbnail(image.load(rgba_path), settings.THUMBNAIL_SIZE), thumb_path) # Upload the new asset files operations.upload_asset_file(original, pdf_orig_path) operations.upload_asset_file(image, jpeg_path) operations.upload_asset_file(thumbnail, thumb_path) # Put the assets into the work queue return [original, image, thumbnail]