def pdf_info(original): """ Extract dictionary of pdf information. This could use a library instead of a process. Note: I'm assuming pdfinfo output is sanitized (integers where integers are expected, etc.) - if this is wrong then an exception will be raised and caught leading to the dreaded error page. It seems a safe assumption. """ ret_dict = {} pdfinfo = where('pdfinfo') try: proc = Popen(executable=pdfinfo, args=[pdfinfo, original], stdout=PIPE) lines = proc.stdout.readlines() except OSError: _log.debug('pdfinfo could not read the pdf file.') raise BadMediaFail() lines = [l.decode('utf-8', 'replace') for l in lines] info_dict = dict([[part.strip() for part in l.strip().split(':', 1)] for l in lines if ':' in l]) if 'Page size' not in info_dict.keys(): # TODO - message is for the user, not debug, but BadMediaFail not taking an argument, fix that. _log.debug( 'Missing "Page size" key in returned pdf - conversion failed?') raise BadMediaFail() for date_key in [('pdf_mod_date', 'ModDate'), ('pdf_creation_date', 'CreationDate')]: if date_key in info_dict: ret_dict[date_key] = dateutil.parser.parse(info_dict[date_key]) for db_key, int_key in [('pdf_pages', 'Pages')]: if int_key in info_dict: ret_dict[db_key] = int(info_dict[int_key]) # parse 'PageSize' field: 595 x 842 pts (A4) page_size_parts = info_dict['Page size'].split() ret_dict['pdf_page_size_width'] = float(page_size_parts[0]) ret_dict['pdf_page_size_height'] = float(page_size_parts[2]) for db_key, str_key in [('pdf_keywords', 'Keywords'), ('pdf_creator', 'Creator'), ('pdf_producer', 'Producer'), ('pdf_author', 'Author'), ('pdf_title', 'Title')]: ret_dict[db_key] = info_dict.get(str_key, None) ret_dict['pdf_version_major'], ret_dict['pdf_version_minor'] = \ map(int, info_dict['PDF version'].split('.')) return ret_dict
def resize_tool(entry, force, keyname, orig_file, target_name, conversions_subdir, exif_tags, quality, filter, new_size=None): # Use the default size if new_size was not given if not new_size: max_width = mgg.global_config['media:' + keyname]['max_width'] max_height = mgg.global_config['media:' + keyname]['max_height'] new_size = (max_width, max_height) # If the size of the original file exceeds the specified size for the desized # file, a target_name file is created and later associated with the media # entry. # Also created if the file needs rotation, or if forced. try: im = Image.open(orig_file) except IOError: raise BadMediaFail() if force \ or im.size[0] > new_size[0]\ or im.size[1] > new_size[1]\ or exif_image_needs_rotation(exif_tags): resize_image(entry, im, unicode(keyname), target_name, tuple(new_size), exif_tags, conversions_subdir, quality, filter)
def extract_metadata(self, file): """ Extract all the metadata from the image and store """ # Extract GPS data and store in Location gps_data = get_gps_data(self.exif_tags) if len(gps_data): Location.create({"position": gps_data}, self.entry) # Insert exif data into database exif_all = clean_exif(self.exif_tags) if len(exif_all): self.entry.media_data_init(exif_all=exif_all) # Extract file metadata try: im = Image.open(self.process_filename) except IOError: raise BadMediaFail() metadata = { "width": im.size[0], "height": im.size[1], } self.entry.set_file_metadata(file, **metadata)
def extract_exif(filename): """ Returns EXIF tags found in file at ``filename`` """ try: with open(filename, 'rb') as image: return process_file(image, details=False) except IOError: raise BadMediaFail(_('Could not read the image file.'))
def __on_discovered(self, data, is_media): if not is_media: self._failed = BadMediaFail() _log.error('Could not discover {0}'.format(self._src_path)) self.halt() _log.debug('Discovered: {0}'.format(data.__dict__)) self._discovery_data = data # Gracefully shut down MainLoop self.halt()
def process_pdf(proc_state): """Code to process a pdf file. Will be run by celery. A Workbench() represents a local tempory dir. It is automatically cleaned up when this function exits. """ entry = proc_state.entry workbench = proc_state.workbench queued_filename = proc_state.get_queued_filename() name_builder = FilenameBuilder(queued_filename) # Copy our queued local workbench to its final destination original_dest = name_builder.fill('{basename}{ext}') proc_state.copy_original(original_dest) # Create a pdf if this is a different doc, store pdf for viewer ext = queued_filename.rsplit('.', 1)[-1].lower() if ext == 'pdf': pdf_filename = queued_filename else: pdf_filename = queued_filename.rsplit('.', 1)[0] + '.pdf' unoconv = where('unoconv') Popen(executable=unoconv, args=[unoconv, '-v', '-f', 'pdf', queued_filename]).wait() if not os.path.exists(pdf_filename): _log.debug('unoconv failed to convert file to pdf') raise BadMediaFail() proc_state.store_public(keyname=u'pdf', local_file=pdf_filename) pdf_info_dict = pdf_info(pdf_filename) for name, width, height in [ (u'thumb', mgg.global_config['media:thumb']['max_width'], mgg.global_config['media:thumb']['max_height']), (u'medium', mgg.global_config['media:medium']['max_width'], mgg.global_config['media:medium']['max_height']), ]: filename = name_builder.fill('{basename}.%s.png' % name) path = workbench.joinpath(filename) create_pdf_thumb(pdf_filename, path, width, height) assert (os.path.exists(path)) proc_state.store_public(keyname=name, local_file=path) proc_state.delete_queue_file() entry.media_data_init(**pdf_info_dict) entry.save()
def _generate_pdf(self): """ Store the pdf. If the file is not a pdf, make it a pdf """ tmp_pdf = self.process_filename unoconv = where('unoconv') Popen(executable=unoconv, args=[unoconv, '-v', '-f', 'pdf', self.process_filename]).wait() if not os.path.exists(tmp_pdf): _log.debug('unoconv failed to convert file to pdf') raise BadMediaFail() store_public(self.entry, 'pdf', tmp_pdf, self.name_builder.fill('{basename}.pdf')) return self.workbench.localized_file( mgg.public_store, self.entry.media_files['pdf'])
def resize_tool(proc_state, force, keyname, target_name, conversions_subdir, exif_tags): # filename -- the filename of the original image being resized filename = proc_state.get_queued_filename() max_width = mgg.global_config['media:' + keyname]['max_width'] max_height = mgg.global_config['media:' + keyname]['max_height'] # If the size of the original file exceeds the specified size for the desized # file, a target_name file is created and later associated with the media # entry. # Also created if the file needs rotation, or if forced. try: im = Image.open(filename) except IOError: raise BadMediaFail() if force \ or im.size[0] > max_width \ or im.size[1] > max_height \ or exif_image_needs_rotation(exif_tags): resize_image(proc_state, im, unicode(keyname), target_name, (max_width, max_height), exif_tags, conversions_subdir)
def resize_tool(entry, force, keyname, orig_file, target_name, conversions_subdir, exif_tags, quality, filter, new_size=None): # Use the default size if new_size was not given if not new_size: max_width = mgg.global_config['media:' + keyname]['max_width'] max_height = mgg.global_config['media:' + keyname]['max_height'] new_size = (max_width, max_height) # If thumb or medium is already the same quality and size, then don't # reprocess if _skip_resizing(entry, keyname, new_size, quality, filter): _log.info('{0} of same size and quality already in use, skipping ' 'resizing of media {1}.'.format(keyname, entry.id)) return # If the size of the original file exceeds the specified size for the desized # file, a target_name file is created and later associated with the media # entry. # Also created if the file needs rotation, or if forced. try: im = Image.open(orig_file) except IOError: raise BadMediaFail() if force \ or im.size[0] > new_size[0]\ or im.size[1] > new_size[1]\ or exif_image_needs_rotation(exif_tags): resize_image(entry, im, six.text_type(keyname), target_name, tuple(new_size), exif_tags, conversions_subdir, quality, filter)