Esempio n. 1
0
def create_page(
    processor,
    parent_asset,
    document,
    pdf_orig_path,
    position ):

    """ Convert the given PDF file (representing a s single page) to a
        JPEG and a thumbnail.
    """

    # Stuff we'll need later
    page         = operations.create_page(document, position)
    base_name    = os.path.splitext(pdf_orig_path)[0]
    jpeg_path    = pdf.convert(pdf_orig_path, 'jpeg')
    thumb_path   = '%s-thumbnail.jpeg' % base_name

    # Save the converted JPEG as a thumbnail JPEG
    image.save(
        image.thumbnail(
            image.load(jpeg_path),
            settings.THUMBNAIL_SIZE),
        thumb_path)

    # Put the assets into the work queue
    return [

        # The oginal full-res page as a PDF
        operations.create_asset_from_file(
            owner        = document.owner,
            producer     = processor,
            asset_class  = models.AssetClass.PAGE_ORIGINAL,
            file_name    = pdf_orig_path,
            related_page = page,
            parent       = parent_asset,
            child_number = page.position,
            mime_type    = models.MimeType.PDF ),

        # The full-res page as a JPEG
        operations.create_asset_from_file(
            owner        = document.owner,
            producer     = processor,
            asset_class  = models.AssetClass.PAGE_IMAGE,
            file_name    = jpeg_path,
            related_page = page,
            parent       = parent_asset,
            child_number = page.position,
            mime_type    = models.MimeType.JPEG ),

        # The thumbnail as a JPEG
        operations.create_asset_from_file(
            owner        = document.owner,
            producer     = processor,
            asset_class  = models.AssetClass.PAGE_THUMBNAIL,
            file_name    = thumb_path,
            related_page = page,
            parent       = parent_asset,
            child_number = page.position,
            mime_type    = models.MimeType.JPEG ),
        ]
Esempio n. 2
0
    def test_split_pages(self):
        """
        Split pages

        """
        from donomo.archive.utils import pdf

        source_file = os.path.join(os.path.dirname(__file__), 'data',
                                   '2008_06_26_15_57_07.pdf')

        output_dir = pdf.split_pages(source_file)
        input_files = glob(os.path.join(output_dir, '*.pdf'))
        output_files = [pdf.convert(f) for f in input_files]

        self.assertEqual(len(input_files), len(output_files))
        shutil.rmtree(output_dir)
Esempio n. 3
0
def redo_page(
    processor,
    parent_asset,
    pdf_orig_path,
    position ):

    """ Re-convert the given PDF file (representing a s single page) to a
        JPEG and a thumbnail.
    """
    try:
        asset = {
            'original' : parent_asset.children.get(
                child_number = position,
                asset_class = models.AssetClass.PAGE_ORIGINAL),

            'image' : parent_asset.get(
                child_number = position,
                asset_class = models.AssetClass.PAGE_IMAGE),

            'thumbnail' : parent_asset.get(
                child_number = position,
                asset_class = models.AssetClass.PAGE_THUMBNAIL),
            }

    except models.Asset.DoesNotExist:
        logging.debug("Skipping deleted page")
        return

    # Stuff we'll need later
    base_name    = os.path.splitext(pdf_orig_path)[0]
    jpeg_path    = pdf.convert(pdf_orig_path, 'jpeg')
    thumb_path   = '%s-thumbnail.jpeg' % base_name

    # Save the re-converted JPEG as a new thumbnail JPEG
    image.save(
        image.thumbnail(
            image.load(jpeg_path),
            settings.THUMBNAIL_SIZE),
        thumb_path)

    # Upload the new asset files
    operations.upload_asset_file( asset['original'],  pdf_orig_path )
    operations.upload_asset_file( asset['image'],     jpeg_path     )
    operations.upload_asset_file( asset['thumbnail'], thumb_path    )

    # Put the assets into the work queue
    return asset.values()
Esempio n. 4
0
    def test_split_pages(self):
        """
        Split pages

        """
        from donomo.archive.utils import pdf

        source_file = os.path.join(
            os.path.dirname(__file__),
            'data',
            '2008_06_26_15_57_07.pdf' )

        output_dir = pdf.split_pages(source_file)
        input_files  = glob(os.path.join(output_dir, '*.pdf'))
        output_files = [ pdf.convert(f) for f in input_files ]

        self.assertEqual(len(input_files), len(output_files))
        shutil.rmtree(output_dir)