예제 #1
0
def pdf_info(original):
    """
    Extract dictionary of pdf information. This could use a library instead
    of a process.

    Note: I'm assuming pdfinfo output is sanitized (integers where integers are
    expected, etc.) - if this is wrong then an exception will be raised and caught
    leading to the dreaded error page. It seems a safe assumption.
    """
    ret_dict = {}
    pdfinfo = where('pdfinfo')
    try:
        proc = Popen(executable=pdfinfo, args=[pdfinfo, original], stdout=PIPE)
        lines = proc.stdout.readlines()
    except OSError:
        _log.debug('pdfinfo could not read the pdf file.')
        raise BadMediaFail()

    lines = [l.decode('utf-8', 'replace') for l in lines]
    info_dict = dict([[part.strip() for part in l.strip().split(':', 1)]
                      for l in lines if ':' in l])

    if 'Page size' not in info_dict.keys():
        # TODO - message is for the user, not debug, but BadMediaFail not taking an argument, fix that.
        _log.debug(
            'Missing "Page size" key in returned pdf - conversion failed?')
        raise BadMediaFail()

    for date_key in [('pdf_mod_date', 'ModDate'),
                     ('pdf_creation_date', 'CreationDate')]:
        if date_key in info_dict:
            ret_dict[date_key] = dateutil.parser.parse(info_dict[date_key])
    for db_key, int_key in [('pdf_pages', 'Pages')]:
        if int_key in info_dict:
            ret_dict[db_key] = int(info_dict[int_key])

    # parse 'PageSize' field: 595 x 842 pts (A4)
    page_size_parts = info_dict['Page size'].split()
    ret_dict['pdf_page_size_width'] = float(page_size_parts[0])
    ret_dict['pdf_page_size_height'] = float(page_size_parts[2])

    for db_key, str_key in [('pdf_keywords', 'Keywords'),
                            ('pdf_creator', 'Creator'),
                            ('pdf_producer', 'Producer'),
                            ('pdf_author', 'Author'), ('pdf_title', 'Title')]:
        ret_dict[db_key] = info_dict.get(str_key, None)
    ret_dict['pdf_version_major'], ret_dict['pdf_version_minor'] = \
        map(int, info_dict['PDF version'].split('.'))

    return ret_dict
예제 #2
0
def resize_tool(entry,
                force,
                keyname,
                orig_file,
                target_name,
                conversions_subdir,
                exif_tags,
                quality,
                filter,
                new_size=None):
    # Use the default size if new_size was not given
    if not new_size:
        max_width = mgg.global_config['media:' + keyname]['max_width']
        max_height = mgg.global_config['media:' + keyname]['max_height']
        new_size = (max_width, max_height)

    # If the size of the original file exceeds the specified size for the desized
    # file, a target_name file is created and later associated with the media
    # entry.
    # Also created if the file needs rotation, or if forced.
    try:
        im = Image.open(orig_file)
    except IOError:
        raise BadMediaFail()
    if force \
        or im.size[0] > new_size[0]\
        or im.size[1] > new_size[1]\
        or exif_image_needs_rotation(exif_tags):
        resize_image(entry, im, unicode(keyname), target_name, tuple(new_size),
                     exif_tags, conversions_subdir, quality, filter)
예제 #3
0
    def extract_metadata(self, file):
        """ Extract all the metadata from the image and store """
        # Extract GPS data and store in Location
        gps_data = get_gps_data(self.exif_tags)

        if len(gps_data):
            Location.create({"position": gps_data}, self.entry)

        # Insert exif data into database
        exif_all = clean_exif(self.exif_tags)

        if len(exif_all):
            self.entry.media_data_init(exif_all=exif_all)

        # Extract file metadata
        try:
            im = Image.open(self.process_filename)
        except IOError:
            raise BadMediaFail()

        metadata = {
            "width": im.size[0],
            "height": im.size[1],
        }

        self.entry.set_file_metadata(file, **metadata)
예제 #4
0
def extract_exif(filename):
    """
    Returns EXIF tags found in file at ``filename``
    """
    try:
        with open(filename, 'rb') as image:
            return process_file(image, details=False)
    except IOError:
        raise BadMediaFail(_('Could not read the image file.'))
예제 #5
0
    def __on_discovered(self, data, is_media):
        if not is_media:
            self._failed = BadMediaFail()
            _log.error('Could not discover {0}'.format(self._src_path))
            self.halt()

        _log.debug('Discovered: {0}'.format(data.__dict__))

        self._discovery_data = data

        # Gracefully shut down MainLoop
        self.halt()
예제 #6
0
def process_pdf(proc_state):
    """Code to process a pdf file. Will be run by celery.

    A Workbench() represents a local tempory dir. It is automatically
    cleaned up when this function exits.
    """
    entry = proc_state.entry
    workbench = proc_state.workbench

    queued_filename = proc_state.get_queued_filename()
    name_builder = FilenameBuilder(queued_filename)

    # Copy our queued local workbench to its final destination
    original_dest = name_builder.fill('{basename}{ext}')
    proc_state.copy_original(original_dest)

    # Create a pdf if this is a different doc, store pdf for viewer
    ext = queued_filename.rsplit('.', 1)[-1].lower()
    if ext == 'pdf':
        pdf_filename = queued_filename
    else:
        pdf_filename = queued_filename.rsplit('.', 1)[0] + '.pdf'
        unoconv = where('unoconv')
        Popen(executable=unoconv,
              args=[unoconv, '-v', '-f', 'pdf', queued_filename]).wait()
        if not os.path.exists(pdf_filename):
            _log.debug('unoconv failed to convert file to pdf')
            raise BadMediaFail()
        proc_state.store_public(keyname=u'pdf', local_file=pdf_filename)

    pdf_info_dict = pdf_info(pdf_filename)

    for name, width, height in [
        (u'thumb', mgg.global_config['media:thumb']['max_width'],
         mgg.global_config['media:thumb']['max_height']),
        (u'medium', mgg.global_config['media:medium']['max_width'],
         mgg.global_config['media:medium']['max_height']),
    ]:
        filename = name_builder.fill('{basename}.%s.png' % name)
        path = workbench.joinpath(filename)
        create_pdf_thumb(pdf_filename, path, width, height)
        assert (os.path.exists(path))
        proc_state.store_public(keyname=name, local_file=path)

    proc_state.delete_queue_file()

    entry.media_data_init(**pdf_info_dict)
    entry.save()
예제 #7
0
    def _generate_pdf(self):
        """
        Store the pdf. If the file is not a pdf, make it a pdf
        """
        tmp_pdf = self.process_filename

        unoconv = where('unoconv')
        Popen(executable=unoconv,
              args=[unoconv, '-v', '-f', 'pdf', self.process_filename]).wait()

        if not os.path.exists(tmp_pdf):
            _log.debug('unoconv failed to convert file to pdf')
            raise BadMediaFail()

        store_public(self.entry, 'pdf', tmp_pdf,
                     self.name_builder.fill('{basename}.pdf'))

        return self.workbench.localized_file(
            mgg.public_store, self.entry.media_files['pdf'])
예제 #8
0
def resize_tool(proc_state, force, keyname, target_name, conversions_subdir,
                exif_tags):
    # filename -- the filename of the original image being resized
    filename = proc_state.get_queued_filename()
    max_width = mgg.global_config['media:' + keyname]['max_width']
    max_height = mgg.global_config['media:' + keyname]['max_height']
    # If the size of the original file exceeds the specified size for the desized
    # file, a target_name file is created and later associated with the media
    # entry.
    # Also created if the file needs rotation, or if forced.
    try:
        im = Image.open(filename)
    except IOError:
        raise BadMediaFail()
    if force \
        or im.size[0] > max_width \
        or im.size[1] > max_height \
        or exif_image_needs_rotation(exif_tags):
        resize_image(proc_state, im, unicode(keyname), target_name,
                     (max_width, max_height), exif_tags, conversions_subdir)
예제 #9
0
def resize_tool(entry,
                force,
                keyname,
                orig_file,
                target_name,
                conversions_subdir,
                exif_tags,
                quality,
                filter,
                new_size=None):
    # Use the default size if new_size was not given
    if not new_size:
        max_width = mgg.global_config['media:' + keyname]['max_width']
        max_height = mgg.global_config['media:' + keyname]['max_height']
        new_size = (max_width, max_height)

    # If thumb or medium is already the same quality and size, then don't
    # reprocess
    if _skip_resizing(entry, keyname, new_size, quality, filter):
        _log.info('{0} of same size and quality already in use, skipping '
                  'resizing of media {1}.'.format(keyname, entry.id))
        return

    # If the size of the original file exceeds the specified size for the desized
    # file, a target_name file is created and later associated with the media
    # entry.
    # Also created if the file needs rotation, or if forced.
    try:
        im = Image.open(orig_file)
    except IOError:
        raise BadMediaFail()
    if force \
        or im.size[0] > new_size[0]\
        or im.size[1] > new_size[1]\
        or exif_image_needs_rotation(exif_tags):
        resize_image(entry, im, six.text_type(keyname), target_name,
                     tuple(new_size), exif_tags, conversions_subdir, quality,
                     filter)