Example #1
0
    def test_merge_documents_3(self):
        # Insert doc2 into middle of doc 1
        doc1 = operations.create_document( owner = self.user )
        doc2 = operations.create_document( owner = self.user )

        pages = ( [ operations.create_page(doc1) for _ in xrange(5) ]
                  + [ operations.create_page(doc2) for _ in xrange(5) ] )

        self.assert_( doc1.num_pages == 5 )
        self.assert_( doc2.num_pages == 5 )

        operations.merge_documents(doc1, doc2, 3)

        self.assert_( manager(Document).filter( pk = doc2.pk ).count() == 0 )
        self.assert_( doc1.num_pages == 10 )

        # First 3 pages of doc1 stay first pages of 10 pager
        for i in xrange(0,3):
            self.assert_(pages[i].pk == doc1.pages.get(position=i+1).pk)

        # all pages from doc2 not starting at 4th page of 10 pager
        for i in xrange(3,8):
            self.assert_(pages[i+2].pk == doc1.pages.get(position=i+1).pk)

        # last tow pages of dooc 1 are now last two pages or 10 pager
        for i in xrange(8, 10):
            self.assert_(pages[i-5].pk == doc1.pages.get(position=i+1).pk)
Example #2
0
def create_page(
    processor,
    parent_asset,
    document,
    pdf_orig_path,
    position ):

    """ Convert the given PDF file (representing a s single page) to a
        JPEG and a thumbnail.
    """

    # Stuff we'll need later
    page         = operations.create_page(document, position)
    base_name    = os.path.splitext(pdf_orig_path)[0]
    jpeg_path    = pdf.convert(pdf_orig_path, 'jpeg')
    thumb_path   = '%s-thumbnail.jpeg' % base_name

    # Save the converted JPEG as a thumbnail JPEG
    image.save(
        image.thumbnail(
            image.load(jpeg_path),
            settings.THUMBNAIL_SIZE),
        thumb_path)

    # Put the assets into the work queue
    return [

        # The oginal full-res page as a PDF
        operations.create_asset_from_file(
            owner        = document.owner,
            producer     = processor,
            asset_class  = models.AssetClass.PAGE_ORIGINAL,
            file_name    = pdf_orig_path,
            related_page = page,
            parent       = parent_asset,
            child_number = page.position,
            mime_type    = models.MimeType.PDF ),

        # The full-res page as a JPEG
        operations.create_asset_from_file(
            owner        = document.owner,
            producer     = processor,
            asset_class  = models.AssetClass.PAGE_IMAGE,
            file_name    = jpeg_path,
            related_page = page,
            parent       = parent_asset,
            child_number = page.position,
            mime_type    = models.MimeType.JPEG ),

        # The thumbnail as a JPEG
        operations.create_asset_from_file(
            owner        = document.owner,
            producer     = processor,
            asset_class  = models.AssetClass.PAGE_THUMBNAIL,
            file_name    = thumb_path,
            related_page = page,
            parent       = parent_asset,
            child_number = page.position,
            mime_type    = models.MimeType.JPEG ),
        ]
Example #3
0
    def test_merge_documents_2(self):
        # Prepend doc2 to beginning of doc 1
        doc1 = operations.create_document( owner = self.user )
        doc2 = operations.create_document( owner = self.user )

        pages = ( [ operations.create_page(doc1) for _ in xrange(5) ]
                  + [ operations.create_page(doc2) for _ in xrange(5) ] )

        self.assert_( doc1.num_pages == 5 )
        self.assert_( doc2.num_pages == 5 )

        operations.merge_documents(doc1, doc2, 0)

        self.assert_( manager(Document).filter( pk = doc2.pk ).count() == 0 )
        self.assert_( doc1.num_pages == 10 )

        for i in xrange(5):
            self.assert_(pages[i].pk   == doc1.pages.get(position=i+6).pk)
            self.assert_(pages[i+5].pk == doc1.pages.get(position=i+1).pk)
Example #4
0
def handle_work_item(processor, item):

    """ Pick up a (possibly) multipage PDF upload and turn it into a
        document having (possibly) multiple individual pages.

    """

    asset       = item['Asset-Instance']
    local_path  = item['Local-Path']
    work_dir    = os.path.dirname(local_path)
    page_prefix = os.path.join(work_dir, 'page-')
    asset_list  = []

    pdf.split_pages( local_path, page_prefix )

    if asset.get_children(models.AssetClass.PAGE_ORIGINAL).count() == 0:
        document = operations.create_document(
            asset.owner,
            title = 'Uploaded on %s (%s)' % (
                asset.date_created,
                asset.producer.process ))
    else:
        document = None

    position = 1
    all_page_files = glob.glob('%s*.pdf' % page_prefix)
    all_page_files.sort()

    for page_pdf_path in all_page_files:
        if document:
            page_asset = operations.create_asset_from_file(
                owner        = document.owner,
                producer     = processor,
                asset_class  = models.AssetClass.PAGE_ORIGINAL,
                file_name    = page_pdf_path,
                related_page = operations.create_page(document, position),
                parent       = asset,
                child_number = position,
                mime_type    = models.MimeType.PDF ),
        else:
            page_asset = asset.children.get(position=position)
            operations.upload_asset_file(page_asset, page_pdf_path)

        asset_list.append(page_asset)
        position += 1

    asset_list.append(
        document.assets.get(
            asset_class__name = models.AssetClass.DOCUMENT,
            mime_type__name   = models.MimeType.BINARY ))

    return asset_list
Example #5
0
def handle_work_item(processor, item):
    """ Pick up a (possibly) multipage PDF upload and turn it into a
        document having (possibly) multiple individual pages.

    """

    asset = item['Asset-Instance']
    local_path = item['Local-Path']
    work_dir = os.path.dirname(local_path)
    page_prefix = os.path.join(work_dir, 'page-')
    asset_list = []

    pdf.split_pages(local_path, page_prefix)

    if asset.get_children(models.AssetClass.PAGE_ORIGINAL).count() == 0:
        document = operations.create_document(
            asset.owner,
            title='Uploaded on %s (%s)' %
            (asset.date_created, asset.producer.process))
    else:
        document = None

    position = 1
    all_page_files = glob.glob('%s*.pdf' % page_prefix)
    all_page_files.sort()

    for page_pdf_path in all_page_files:
        if document:
            page_asset = operations.create_asset_from_file(
                owner=document.owner,
                producer=processor,
                asset_class=models.AssetClass.PAGE_ORIGINAL,
                file_name=page_pdf_path,
                related_page=operations.create_page(document, position),
                parent=asset,
                child_number=position,
                mime_type=models.MimeType.PDF),
        else:
            page_asset = asset.children.get(position=position)
            operations.upload_asset_file(page_asset, page_pdf_path)

        asset_list.append(page_asset)
        position += 1

    asset_list.append(
        document.assets.get(asset_class__name=models.AssetClass.DOCUMENT,
                            mime_type__name=models.MimeType.BINARY))

    return asset_list
Example #6
0
    def test_split_document(self):
        doc1 = operations.create_document( owner = self.user )

        pages = [ operations.create_page(doc1) for _ in xrange(10) ]

        self.assert_( doc1.num_pages == 10 )

        doc2 = operations.split_document(doc1, 5)

        self.assert_( doc1.num_pages == 5 )
        self.assert_( doc2.num_pages == 5 )

        for i in xrange(5):
            self.assert_(pages[i].pk   == doc1.pages.get(position=i+1).pk)
            self.assert_(pages[i+5].pk == doc2.pages.get(position=i+1).pk)
Example #7
0
def handle_page(processor, parent_asset, document, tiff_original_path,
                position):
    """ Convert the given TIFF file (representing a s single page) whose path
        is given to a JPEG (via RGBA).  Also create two thumbnails.

    """

    # Stuff we'll need later
    page = operations.create_page(document, position)
    base_name = os.path.splitext(tiff_original_path)[0]
    rgba_path = '%s.rgba' % base_name
    jpeg_path = '%s.jpeg' % base_name
    thumb_path = '%s-thumbnail.jpeg' % base_name

    # Convert original TIFF to RGBA
    # TODO use convert instead of tiff2rgba
    os.system('tiff2rgba %r %r' % (tiff_original_path, rgba_path))

    # Save the original as JPEG
    image.save(image.load(rgba_path), jpeg_path)

    # Save the thumbnail as JPEG
    image.save(image.thumbnail(image.load(rgba_path), settings.THUMBNAIL_SIZE),
               thumb_path)

    # Put the assets into the work queue
    return [

        # The oginal full-res page as a TIFF
        operations.create_asset_from_file(
            owner=document.owner,
            producer=processor,
            asset_class=models.AssetClass.PAGE_ORIGINAL,
            file_name=tiff_original_path,
            related_page=page,
            parent=parent_asset,
            child_number=page.position,
            mime_type=models.MimeType.TIFF),

        # The full-res page as a JPEG
        operations.create_asset_from_file(
            owner=document.owner,
            producer=processor,
            asset_class=models.AssetClass.PAGE_IMAGE,
            file_name=jpeg_path,
            related_page=page,
            parent=parent_asset,
            child_number=page.position,
            mime_type=models.MimeType.JPEG),

        # The thumbnail as a JPEG
        operations.create_asset_from_file(
            owner=document.owner,
            producer=processor,
            asset_class=models.AssetClass.PAGE_THUMBNAIL,
            file_name=thumb_path,
            related_page=page,
            parent=parent_asset,
            child_number=page.position,
            mime_type=models.MimeType.JPEG),
    ]
Example #8
0
def handle_page(
    processor,
    parent_asset,
    document,
    tiff_original_path,
    position ):


    """ Convert the given TIFF file (representing a s single page) whose path
        is given to a JPEG (via RGBA).  Also create two thumbnails.

    """

    # Stuff we'll need later
    page     = operations.create_page(document, position)
    base_name    = os.path.splitext(tiff_original_path)[0]
    rgba_path    = '%s.rgba' % base_name
    jpeg_path    = '%s.jpeg' % base_name
    thumb_path   = '%s-thumbnail.jpeg' % base_name

    # Convert original TIFF to RGBA
    # TODO use convert instead of tiff2rgba
    os.system('tiff2rgba %r %r' % (tiff_original_path, rgba_path))

    # Save the original as JPEG
    image.save(
        image.load(rgba_path),
        jpeg_path)

    # Save the thumbnail as JPEG
    image.save(
        image.thumbnail(
            image.load(rgba_path),
            settings.THUMBNAIL_SIZE),
        thumb_path)

    # Put the assets into the work queue
    return [

        # The oginal full-res page as a TIFF
        operations.create_asset_from_file(
            owner        = document.owner,
            producer     = processor,
            asset_class  = models.AssetClass.PAGE_ORIGINAL,
            file_name    = tiff_original_path,
            related_page = page,
            parent       = parent_asset,
            child_number = page.position,
            mime_type    = models.MimeType.TIFF ),

        # The full-res page as a JPEG
        operations.create_asset_from_file(
            owner        = document.owner,
            producer     = processor,
            asset_class  = models.AssetClass.PAGE_IMAGE,
            file_name    = jpeg_path,
            related_page = page,
            parent       = parent_asset,
            child_number = page.position,
            mime_type    = models.MimeType.JPEG ),

        # The thumbnail as a JPEG
        operations.create_asset_from_file(
            owner        = document.owner,
            producer     = processor,
            asset_class  = models.AssetClass.PAGE_THUMBNAIL,
            file_name    = thumb_path,
            related_page = page,
            parent       = parent_asset,
            child_number = page.position,
            mime_type    = models.MimeType.JPEG ),
        ]