def create_page( processor, parent_asset, document, pdf_orig_path, position ): """ Convert the given PDF file (representing a s single page) to a JPEG and a thumbnail. """ # Stuff we'll need later page = operations.create_page(document, position) base_name = os.path.splitext(pdf_orig_path)[0] jpeg_path = pdf.convert(pdf_orig_path, 'jpeg') thumb_path = '%s-thumbnail.jpeg' % base_name # Save the converted JPEG as a thumbnail JPEG image.save( image.thumbnail( image.load(jpeg_path), settings.THUMBNAIL_SIZE), thumb_path) # Put the assets into the work queue return [ # The oginal full-res page as a PDF operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_ORIGINAL, file_name = pdf_orig_path, related_page = page, parent = parent_asset, child_number = page.position, mime_type = models.MimeType.PDF ), # The full-res page as a JPEG operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_IMAGE, file_name = jpeg_path, related_page = page, parent = parent_asset, child_number = page.position, mime_type = models.MimeType.JPEG ), # The thumbnail as a JPEG operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_THUMBNAIL, file_name = thumb_path, related_page = page, parent = parent_asset, child_number = page.position, mime_type = models.MimeType.JPEG ), ]
def test_split_pages(self): """ Split pages """ from donomo.archive.utils import pdf source_file = os.path.join(os.path.dirname(__file__), 'data', '2008_06_26_15_57_07.pdf') output_dir = pdf.split_pages(source_file) input_files = glob(os.path.join(output_dir, '*.pdf')) output_files = [pdf.convert(f) for f in input_files] self.assertEqual(len(input_files), len(output_files)) shutil.rmtree(output_dir)
def redo_page( processor, parent_asset, pdf_orig_path, position ): """ Re-convert the given PDF file (representing a s single page) to a JPEG and a thumbnail. """ try: asset = { 'original' : parent_asset.children.get( child_number = position, asset_class = models.AssetClass.PAGE_ORIGINAL), 'image' : parent_asset.get( child_number = position, asset_class = models.AssetClass.PAGE_IMAGE), 'thumbnail' : parent_asset.get( child_number = position, asset_class = models.AssetClass.PAGE_THUMBNAIL), } except models.Asset.DoesNotExist: logging.debug("Skipping deleted page") return # Stuff we'll need later base_name = os.path.splitext(pdf_orig_path)[0] jpeg_path = pdf.convert(pdf_orig_path, 'jpeg') thumb_path = '%s-thumbnail.jpeg' % base_name # Save the re-converted JPEG as a new thumbnail JPEG image.save( image.thumbnail( image.load(jpeg_path), settings.THUMBNAIL_SIZE), thumb_path) # Upload the new asset files operations.upload_asset_file( asset['original'], pdf_orig_path ) operations.upload_asset_file( asset['image'], jpeg_path ) operations.upload_asset_file( asset['thumbnail'], thumb_path ) # Put the assets into the work queue return asset.values()
def test_split_pages(self): """ Split pages """ from donomo.archive.utils import pdf source_file = os.path.join( os.path.dirname(__file__), 'data', '2008_06_26_15_57_07.pdf' ) output_dir = pdf.split_pages(source_file) input_files = glob(os.path.join(output_dir, '*.pdf')) output_files = [ pdf.convert(f) for f in input_files ] self.assertEqual(len(input_files), len(output_files)) shutil.rmtree(output_dir)