def __import_original__(self, original_path): """ safe-copy an image from original_path into the package and set all metadata """ logger = logging.getLogger(sys._getframe().f_code.co_name) # verify and copy the original image real_path = validate_path(original_path, 'file') filename, extension = os.path.splitext(real_path) self.original = '.'.join(('original', EXTENSIONS[extension[1:].lower()])) dest_path = os.path.join(self.path, self.original) # fix up filename extensions hash_orig = safe_copy(real_path, dest_path) self.__append_event__('copied original file from {src} to {dest}'.format(src=real_path, dest=dest_path)) self.manifest.set(self.original, hash_orig) # capture and store metadata from the original file using exiftool # note this could be optimized by re-using a single exiftool instance, but code refactoring will have to happen with exiftool.ExifTool() as et: metadata = et.get_metadata_batch([dest_path,]) exif_path = os.path.join(self.path, 'original-exif.json') with open(exif_path, 'w') as exif_file: for d in metadata: json.dump(d, exif_file, sort_keys=True, indent=4) logger.debug('wrote exiftool metadata for original file on {exif_path}'.format(exif_path=exif_path)) for k in d.keys(): logger.debug("exiftool found: {key}='{value}'".format(key=k, value=d[k])) hash_exif = hash_of_file(exif_path) self.__append_event__('wrote exif extracted from original file in json format on {exif_path}'.format(exif_path=exif_path)) self.manifest.set('original-exif.json', hash_exif) # capture and store technical metadata using jhove (TBD) logger.warning('no jhove metadata is created')
def __append_event__(self, msg): """ append an event notice to the package history """ logger = logging.getLogger(sys._getframe().f_code.co_name) logger.debug(msg) event = '{stamp} {message}\n'.format(stamp=datetime.datetime.now(pytz.timezone('US/Eastern')).isoformat(), message=msg) with open(os.path.join(self.path, 'history.txt'), 'a') as hf: hf.write(event) self.manifest.set('history.txt', hash_of_file(os.path.join(self.path, "history.txt")))
def __import_original__(self, original_path): """ safe-copy an image from original_path into the package and set all metadata """ logger = logging.getLogger(sys._getframe().f_code.co_name) # verify and copy the original image real_path = validate_path(original_path, 'file') filename, extension = os.path.splitext(real_path) self.original = '.'.join( ('original', EXTENSIONS[extension[1:].lower()])) dest_path = os.path.join(self.path, self.original) # fix up filename extensions hash_orig = safe_copy(real_path, dest_path) self.__append_event__( 'copied original file from {src} to {dest}'.format(src=real_path, dest=dest_path)) self.manifest.set(self.original, hash_orig) # capture and store metadata from the original file using exiftool # note this could be optimized by re-using a single exiftool instance, but code refactoring will have to happen with exiftool.ExifTool() as et: metadata = et.get_metadata_batch([ dest_path, ]) exif_path = os.path.join(self.path, 'original-exif.json') with open(exif_path, 'w') as exif_file: for d in metadata: json.dump(d, exif_file, sort_keys=True, indent=4) logger.debug( 'wrote exiftool metadata for original file on {exif_path}'. format(exif_path=exif_path)) for k in d.keys(): logger.debug("exiftool found: {key}='{value}'".format( key=k, value=d[k])) hash_exif = hash_of_file(exif_path) self.__append_event__( 'wrote exif extracted from original file in json format on {exif_path}' .format(exif_path=exif_path)) self.manifest.set('original-exif.json', hash_exif) # capture and store technical metadata using jhove (TBD) logger.warning('no jhove metadata is created')
def __generate_master__(self): """ create a master file from the original already in the package and set all metadata """ # open original # capture existing ICC profile (if there is one) # if no ICC profile, assign sRGB # if ICC profile and != sRGB, convert to sRGB # save as uncompressed TIFF logger = logging.getLogger(sys._getframe().f_code.co_name) profile_srgb2 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'icc', 'sRGB_IEC61966-2-1_black_scaled.icc') profile_srgb4 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'icc', 'sRGB_v4_ICC_preference.icc') original_path = os.path.join(self.path, self.original) original_image = Image.open(original_path) try: raw_profile = original_image.info['icc_profile'] except KeyError: raw_profile = getOpenProfile(profile_srgb2).tobytes() logger.warning('{original} does not have an internal ICC color profile'.format(original=self.original)) else: logger.debug('detected internal ICC color profile in {original}'.format(original=self.original)) original_profile = getOpenProfile(BytesIO(raw_profile)) original_profile_name = getProfileName(original_profile).strip() target_profile = getOpenProfile(profile_srgb4) target_profile_name = getProfileName(target_profile).strip() logger.debug('attempting to convert from "{original}" to "{target}"'.format(original=original_profile_name, target=target_profile_name)) converted_image = profileToProfile(original_image, original_profile, target_profile) master_path = os.path.join(self.path, 'master.tif') tiffinfo = TiffImagePlugin.ImageFileDirectory() tiffinfo[TiffImagePlugin.ICCPROFILE] = target_profile.tobytes() tiffinfo.tagtype[TiffImagePlugin.ICCPROFILE] = 1 # byte according to TiffTags.TYPES converted_image.DEBUG=True converted_image.save(master_path, tiffinfo=tiffinfo) hash_master = hash_of_file(master_path) logger.debug('saved converted master image to {master}'.format(master=master_path)) self.__append_event__('created master.tif file at {master}'.format(master=master_path)) self.manifest.set('master.tif', hash_master)
def validate(self): """ verify completeness and fixity of the current package """ logger = logging.getLogger(sys._getframe().f_code.co_name) try: path = self.path except AttributeError: logger.warning('Package.validate() was called before Package.path was set.') return False try: manifest = self.manifest except AttributeError: logger.warning('Package.validate() was called before Package.manifest was set.') return False result = True # make sure the minimally required components are present and have been successfully opened filenames=self.manifest.get_all().keys() if 'master.tif' not in filenames: result = False logger.error("Validation failed to find 'master.tif' in manifest") if self.original not in filenames: result = False logger.error("Validation failed to find '{0}' in manifest".format(self.original)) # verify that checksums are valid for every item in the manifest for filename in filenames: checksum = self.manifest.get(filename) filepath = os.path.join(path, filename) real_filepath = validate_path(filepath, 'file') if checksum != hash_of_file(real_filepath): logger.error("checksum verification FAILED on '{0}' in Package.validate()".format(real_filepath)) result = False return result
def make_derivatives(self, overwrite=False): """ create derivative images """ logger = logging.getLogger(sys._getframe().f_code.co_name) try: thumbnail = self.thumbnail except AttributeError: pass else: if not overwrite: return False master_path = os.path.join(self.path, 'master.tif') master_image = Image.open(master_path) master_profile = master_image.info.get('icc_profile') # make and save "maximum" image, a jpeg same resolution as the master maximum_image = master_image.copy() maximum_path = os.path.join(self.path, 'maximum.jpg') try: save_image(maximum_image, maximum_path, 'JPEG', options={'optimize':True, 'progressive':False, 'quality':95, 'icc_profile':master_profile}) except IOError: save_image(preview_image, preview_path, 'JPEG', options={'optimize':True, 'progressive':False, 'icc_profile':master_profile}) self.maximum = True maximum_hash = hash_of_file(maximum_path) self.__append_event__("Wrote derivative 'maximum' jpeg file on {0}".format(maximum_path)) self.manifest.set('maximum.jpg', maximum_hash) del maximum_image # save RAM # make and save preview image # Note: the resampling algorithm that gives the highest quality result (bicubic) # is expensive in terms of compute time, and that expense is proportional to the # size of the original image and the relative size of the target image. # Consequently, if the starting image is significantly larger than the desired # down-sampled image, we'll make a first pass with the much less expensive # "nearest neighbor" resampling algorithm to get an image that is only twice the # size of the target, then use "bicubic" on it to get the desired outcome. The # wisdom of the Internet seems to point to this as a time-saving step that # sacrifices little or nothing in quality. Caveat lector. Of course, if we # really wanted to do this fast, we'd write it in C. preview_image = master_image.copy() del master_image # save RAM size = preview_image.size logger.debug("master size: {0}, {1}".format(size[0], size[1])) if size[0] > 3* SIZEPREVIEW[0] or size[1] > 3* SIZEPREVIEW[1]: preview_image.thumbnail(tuple(s*2 for s in SIZEPREVIEW), Image.NEAREST) logger.debug("did nearest pre-shrink for preview, resulting size: {0}, {1}".format(preview_image.size[0], preview_image.size[1])) preview_image.thumbnail(SIZEPREVIEW) logger.debug("resulting preview size: {0}, {1}".format(preview_image.size[0], preview_image.size[1])) preview_path = os.path.join(self.path, 'preview.jpg') try: save_image(preview_image, preview_path, 'JPEG', options={'optimize':True, 'progressive':True, 'quality':80, 'icc_profile':master_profile}) except IOError: save_image(preview_image, preview_path, 'JPEG', options={'optimize':True, 'progressive':True, 'icc_profile':master_profile}) logger.warning("preview image could not be written at quality 80; using defaults") self.preview = True preview_hash = hash_of_file(preview_path) self.__append_event__("wrote derivative 'preview' jpeg file on {0}".format(preview_path)) self.manifest.set('preview.jpg', preview_hash) # make and save thumbnail image # Note: use the same approach as above, but start with the preview image, which # is surely much smaller than the master. thumbnail_image = preview_image.copy() del preview_image # save the RAMs! thumbnail_image.thumbnail(SIZETHUMB) thumbnail_path = os.path.join(self.path, 'thumb.jpg') try: save_image(thumbnail_image, thumbnail_path, 'JPEG', options={'optimize':True, 'progressive':True, 'quality':80, 'icc_profile':master_profile}) except IOError: save_image(thumbnail_image, thumbnail_path, 'JPEG', options={'optimize':True, 'progressive':True, 'icc_profile':master_profile}) logger.warning("preview image could not be written at quality 80; using defaults") self.thumbnail = True thumbnail_hash = hash_of_file(thumbnail_path) self.__append_event__("wrote derivative 'thumbnail' jpeg file on {0}".format(thumbnail_path)) self.manifest.set('thumb.jpg', thumbnail_hash) del thumbnail_image # probably not necessary to save the RAM here cuz gc will get it but anyway... return True