def test_merge_documents_3(self): # Insert doc2 into middle of doc 1 doc1 = operations.create_document( owner = self.user ) doc2 = operations.create_document( owner = self.user ) pages = ( [ operations.create_page(doc1) for _ in xrange(5) ] + [ operations.create_page(doc2) for _ in xrange(5) ] ) self.assert_( doc1.num_pages == 5 ) self.assert_( doc2.num_pages == 5 ) operations.merge_documents(doc1, doc2, 3) self.assert_( manager(Document).filter( pk = doc2.pk ).count() == 0 ) self.assert_( doc1.num_pages == 10 ) # First 3 pages of doc1 stay first pages of 10 pager for i in xrange(0,3): self.assert_(pages[i].pk == doc1.pages.get(position=i+1).pk) # all pages from doc2 not starting at 4th page of 10 pager for i in xrange(3,8): self.assert_(pages[i+2].pk == doc1.pages.get(position=i+1).pk) # last tow pages of dooc 1 are now last two pages or 10 pager for i in xrange(8, 10): self.assert_(pages[i-5].pk == doc1.pages.get(position=i+1).pk)
def create_page( processor, parent_asset, document, pdf_orig_path, position ): """ Convert the given PDF file (representing a s single page) to a JPEG and a thumbnail. """ # Stuff we'll need later page = operations.create_page(document, position) base_name = os.path.splitext(pdf_orig_path)[0] jpeg_path = pdf.convert(pdf_orig_path, 'jpeg') thumb_path = '%s-thumbnail.jpeg' % base_name # Save the converted JPEG as a thumbnail JPEG image.save( image.thumbnail( image.load(jpeg_path), settings.THUMBNAIL_SIZE), thumb_path) # Put the assets into the work queue return [ # The oginal full-res page as a PDF operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_ORIGINAL, file_name = pdf_orig_path, related_page = page, parent = parent_asset, child_number = page.position, mime_type = models.MimeType.PDF ), # The full-res page as a JPEG operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_IMAGE, file_name = jpeg_path, related_page = page, parent = parent_asset, child_number = page.position, mime_type = models.MimeType.JPEG ), # The thumbnail as a JPEG operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_THUMBNAIL, file_name = thumb_path, related_page = page, parent = parent_asset, child_number = page.position, mime_type = models.MimeType.JPEG ), ]
def test_merge_documents_2(self): # Prepend doc2 to beginning of doc 1 doc1 = operations.create_document( owner = self.user ) doc2 = operations.create_document( owner = self.user ) pages = ( [ operations.create_page(doc1) for _ in xrange(5) ] + [ operations.create_page(doc2) for _ in xrange(5) ] ) self.assert_( doc1.num_pages == 5 ) self.assert_( doc2.num_pages == 5 ) operations.merge_documents(doc1, doc2, 0) self.assert_( manager(Document).filter( pk = doc2.pk ).count() == 0 ) self.assert_( doc1.num_pages == 10 ) for i in xrange(5): self.assert_(pages[i].pk == doc1.pages.get(position=i+6).pk) self.assert_(pages[i+5].pk == doc1.pages.get(position=i+1).pk)
def handle_work_item(processor, item): """ Pick up a (possibly) multipage PDF upload and turn it into a document having (possibly) multiple individual pages. """ asset = item['Asset-Instance'] local_path = item['Local-Path'] work_dir = os.path.dirname(local_path) page_prefix = os.path.join(work_dir, 'page-') asset_list = [] pdf.split_pages( local_path, page_prefix ) if asset.get_children(models.AssetClass.PAGE_ORIGINAL).count() == 0: document = operations.create_document( asset.owner, title = 'Uploaded on %s (%s)' % ( asset.date_created, asset.producer.process )) else: document = None position = 1 all_page_files = glob.glob('%s*.pdf' % page_prefix) all_page_files.sort() for page_pdf_path in all_page_files: if document: page_asset = operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_ORIGINAL, file_name = page_pdf_path, related_page = operations.create_page(document, position), parent = asset, child_number = position, mime_type = models.MimeType.PDF ), else: page_asset = asset.children.get(position=position) operations.upload_asset_file(page_asset, page_pdf_path) asset_list.append(page_asset) position += 1 asset_list.append( document.assets.get( asset_class__name = models.AssetClass.DOCUMENT, mime_type__name = models.MimeType.BINARY )) return asset_list
def handle_work_item(processor, item): """ Pick up a (possibly) multipage PDF upload and turn it into a document having (possibly) multiple individual pages. """ asset = item['Asset-Instance'] local_path = item['Local-Path'] work_dir = os.path.dirname(local_path) page_prefix = os.path.join(work_dir, 'page-') asset_list = [] pdf.split_pages(local_path, page_prefix) if asset.get_children(models.AssetClass.PAGE_ORIGINAL).count() == 0: document = operations.create_document( asset.owner, title='Uploaded on %s (%s)' % (asset.date_created, asset.producer.process)) else: document = None position = 1 all_page_files = glob.glob('%s*.pdf' % page_prefix) all_page_files.sort() for page_pdf_path in all_page_files: if document: page_asset = operations.create_asset_from_file( owner=document.owner, producer=processor, asset_class=models.AssetClass.PAGE_ORIGINAL, file_name=page_pdf_path, related_page=operations.create_page(document, position), parent=asset, child_number=position, mime_type=models.MimeType.PDF), else: page_asset = asset.children.get(position=position) operations.upload_asset_file(page_asset, page_pdf_path) asset_list.append(page_asset) position += 1 asset_list.append( document.assets.get(asset_class__name=models.AssetClass.DOCUMENT, mime_type__name=models.MimeType.BINARY)) return asset_list
def test_split_document(self): doc1 = operations.create_document( owner = self.user ) pages = [ operations.create_page(doc1) for _ in xrange(10) ] self.assert_( doc1.num_pages == 10 ) doc2 = operations.split_document(doc1, 5) self.assert_( doc1.num_pages == 5 ) self.assert_( doc2.num_pages == 5 ) for i in xrange(5): self.assert_(pages[i].pk == doc1.pages.get(position=i+1).pk) self.assert_(pages[i+5].pk == doc2.pages.get(position=i+1).pk)
def handle_page(processor, parent_asset, document, tiff_original_path, position): """ Convert the given TIFF file (representing a s single page) whose path is given to a JPEG (via RGBA). Also create two thumbnails. """ # Stuff we'll need later page = operations.create_page(document, position) base_name = os.path.splitext(tiff_original_path)[0] rgba_path = '%s.rgba' % base_name jpeg_path = '%s.jpeg' % base_name thumb_path = '%s-thumbnail.jpeg' % base_name # Convert original TIFF to RGBA # TODO use convert instead of tiff2rgba os.system('tiff2rgba %r %r' % (tiff_original_path, rgba_path)) # Save the original as JPEG image.save(image.load(rgba_path), jpeg_path) # Save the thumbnail as JPEG image.save(image.thumbnail(image.load(rgba_path), settings.THUMBNAIL_SIZE), thumb_path) # Put the assets into the work queue return [ # The oginal full-res page as a TIFF operations.create_asset_from_file( owner=document.owner, producer=processor, asset_class=models.AssetClass.PAGE_ORIGINAL, file_name=tiff_original_path, related_page=page, parent=parent_asset, child_number=page.position, mime_type=models.MimeType.TIFF), # The full-res page as a JPEG operations.create_asset_from_file( owner=document.owner, producer=processor, asset_class=models.AssetClass.PAGE_IMAGE, file_name=jpeg_path, related_page=page, parent=parent_asset, child_number=page.position, mime_type=models.MimeType.JPEG), # The thumbnail as a JPEG operations.create_asset_from_file( owner=document.owner, producer=processor, asset_class=models.AssetClass.PAGE_THUMBNAIL, file_name=thumb_path, related_page=page, parent=parent_asset, child_number=page.position, mime_type=models.MimeType.JPEG), ]
def handle_page( processor, parent_asset, document, tiff_original_path, position ): """ Convert the given TIFF file (representing a s single page) whose path is given to a JPEG (via RGBA). Also create two thumbnails. """ # Stuff we'll need later page = operations.create_page(document, position) base_name = os.path.splitext(tiff_original_path)[0] rgba_path = '%s.rgba' % base_name jpeg_path = '%s.jpeg' % base_name thumb_path = '%s-thumbnail.jpeg' % base_name # Convert original TIFF to RGBA # TODO use convert instead of tiff2rgba os.system('tiff2rgba %r %r' % (tiff_original_path, rgba_path)) # Save the original as JPEG image.save( image.load(rgba_path), jpeg_path) # Save the thumbnail as JPEG image.save( image.thumbnail( image.load(rgba_path), settings.THUMBNAIL_SIZE), thumb_path) # Put the assets into the work queue return [ # The oginal full-res page as a TIFF operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_ORIGINAL, file_name = tiff_original_path, related_page = page, parent = parent_asset, child_number = page.position, mime_type = models.MimeType.TIFF ), # The full-res page as a JPEG operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_IMAGE, file_name = jpeg_path, related_page = page, parent = parent_asset, child_number = page.position, mime_type = models.MimeType.JPEG ), # The thumbnail as a JPEG operations.create_asset_from_file( owner = document.owner, producer = processor, asset_class = models.AssetClass.PAGE_THUMBNAIL, file_name = thumb_path, related_page = page, parent = parent_asset, child_number = page.position, mime_type = models.MimeType.JPEG ), ]