def test_preserve_existing_labels(self): labeler = FileLabeler() info = IPTCInfo(self.jpg_file, force=True) info.keywords = ('cat', 'mammal') info.save() os.remove('%s~' % self.jpg_file) labeler.label(self.jpg_file, (u'dog', u'mammal')) info = IPTCInfo(self.jpg_file) self.assertEqual(info.keywords, ['cat', 'mammal', 'dog'])
def test_only_write_tags_once(self): labeler = FileLabeler() info = IPTCInfo(self.jpg_file, force=True) info.keywords = ('cat', 'mammal') info.save() os.remove('%s~' % self.jpg_file) labeler.label(self.jpg_file, (u'cat', u'mammal')) info = IPTCInfo(self.jpg_file) self.assertEqual(info.keywords, ['cat', 'mammal'])
def test_skip_already_tagged_files(self): file_walker = FileWalker(FileLabeler(), LabelServiceExecutor(TestServiceConnector())) os.makedirs('_testdir/2016/10') self._create_testfile('_testdir/2016/10/test1.jpg') os.makedirs('_testdir/2016/11') self._create_testfile('_testdir/2016/11/test2.jpg') info = IPTCInfo('_testdir/2016/11/test2.jpg', force=True) info.keywords = ('already', 'tagged') info.data[TAGGED_PHOTO_KEY] = TAGGED_PHOTO_LABEL info.save() file_walker.walk_and_tag('_testdir/2016') self.assertEqual(IPTCInfo('_testdir/2016/10/test1.jpg').keywords, ['cat', 'mammal', 'vertebrate', 'whiskers', 'animal']) self.assertEqual(IPTCInfo('_testdir/2016/11/test2.jpg').keywords, ['already', 'tagged'])
def test_walk_files_and_tag_only_in_subdirectory(self): file_walker = FileWalker(FileLabeler(), LabelServiceExecutor(TestServiceConnector())) os.makedirs('_testdir/2016/10') self._create_testfile('_testdir/2016/10/test1.jpg') os.makedirs('_testdir/2016/11') self._create_testfile('_testdir/2016/11/test2.jpg') os.makedirs('_testdir/2015/10') self._create_testfile('_testdir/2015/10/test3.jpg') file_walker.walk_and_tag('_testdir/2016') self.assertEqual(IPTCInfo('_testdir/2016/10/test1.jpg').keywords, ['cat', 'mammal', 'vertebrate', 'whiskers', 'animal']) self.assertEqual(IPTCInfo('_testdir/2016/11/test2.jpg').keywords, ['cat', 'mammal', 'vertebrate', 'whiskers', 'animal']) # file in 2015 has no tag set self.assertRaisesRegexp(Exception, 'No IPTC data found', IPTCInfo, '_testdir/2015/10/test3.jpg')
def process_files(srcdir): global dryrun from iptcinfo import IPTCInfo files = os.listdir(srcdir) for name in files: if not os.path.splitext(name)[1] in fileTypesAllowed: logger.info('Ignoring %s' % name) continue else: filepath = os.path.join(srcdir, name) logger.info('Processing %s' % name) try: info = IPTCInfo(filepath, force=True) caption = info.data['caption/abstract'] if not caption: logger.info('Caption not found.') info.data['caption/abstract'] = os.path.splitext(name)[0] logger.info('New caption -> %s' % info.data['caption/abstract']) if not dryrun: logger.info('Overwriting %s' % name) info.save() else: logger.info('Found existing caption. Nothing to do') except: continue
def get_photo_title_and_description(path): """ Extract JPEG metadata and parse title and description out of it. Images *must* be edited with Picasa. Use the first sentence of the description as a title. Handle wacky cases like usage of ... and "" Looks like Picasa does not use standard EXIF metadata http://productforums.google.com/forum/#!topic/picasa/fiNTD6432as but IPTC instead. """ info = IPTCInfo(path) title = os.path.basename(path) desc = info.data['caption/abstract'] if not desc: # No metadata title = os.path.basename(path) description = "" return title, description # Picasa guys screwed it up # https://groups.google.com/forum/?fromgroups=#!topic/picasawebalbums/CjeRCs402WA desc = desc.decode("utf-8") return title, desc
def _process_new_files(current_folder): items = os.listdir(NEW_ITEM_PATH + current_folder) config_path = _get_config_file_path(current_folder) dom = minidom.parse(config_path) config_images = _get_config_images(dom) for item in items: filepath = NEW_ITEM_PATH + current_folder + item __debug('processing %s' % current_folder + '/' + item) if os.path.isfile(filepath) and _get_extension(item) == '.JPG': os.rename(filepath, _get_extension(item) + JPEG_EXT) if os.path.isdir(filepath): if not os.path.isdir(CONFIG_PATH + current_folder + item) and\ current_folder != HIDDEN: # _append_folder_to_config(config_path, item) _append_folder_to_dom(item, dom) __debug(' added %s to %s config' % (item, config_path)) _setup_configs(current_folder, item) _setup_image_folders(current_folder + item) _process_new_files(current_folder + item + '/') __debug('DONE folder') elif os.path.isfile(filepath) and _get_extension(item) == JPEG_EXT: iptc = IPTCInfo(filepath) exif = get_exif(filepath) # if _create_sized_images(item, current_folder): # _append_image_to_config(item, current_folder, metadata) _create_sized_images(item, current_folder) _append_image_to_dom(item, exif, iptc, dom, config_images, current_folder) __debug('DONE image') _write_dom_to_xml(dom, config_path)
def print_iptcinfo(self,filename): """ Prints the IPC Info for the specified filename :param filename: .jpg filename :return: none """ info = IPTCInfo(filename) if len(info.data) < 4: raise Exception(info.error) # Print list of keywords, supplemental categories, contacts print("Keywords:{}".format(info.keywords)) print("SupplementalCatagories:{}".format(info.supplementalCategories)) print("Contacts:{}".format(info.contacts)) print("Data:{}".format(info.data)) tags = ['date created', 'digital creation date', 'reference number', 'custom8', 'custom9', 'sub-location', 'object cycle', 'custom4', 'custom5', 'custom6', 'custom7', 'custom1', 'custom2', 'reference date', 'by-line title', 'local caption', 'keywords', 'province/state', 'category', 'custom17', 'custom14', 'digital creation time', 'custom12', 'custom13', 'custom10', 'custom11', 'headline', 'custom18', 'custom19', 'source', 'contact', 'by-line', 'object name', 'content location code', 'language identifier', 'release date', 'expiration date', 'reference service', 'custom16', 'original transmission reference', 'originating program', 'subject reference', 'city', 'supplemental category', 'content location name', 'country/primary location code', 'editorial update', 'custom15', 'fixture identifier', 'custom3', 'country/primary location name', 'action advised', 'custom20', 'copyright notice', 'program version', 'image orientation', 'edit status', 'expiration time', 'release time', 'credit', 'time created', 'special instructions', 'writer/editor', 'caption/abstract', 'urgency', 'image type'] for i in tags: desc=info.data[i] print(" {0}:{1}".format(i,desc))
def get_imageinfo(filepath): """ Return EXIF and IPTC information found from image file in a dictionary. """ info = {} info['exif'] = exif = exifparser.read_exif(filepath) info.update(exifparser.parse_datetime(exif, 'EXIF DateTimeOriginal')) info['gps'] = gps = exifparser.parse_gps(exif) if 'lat' in gps: # Backwards compatibility info['lat'], info['lon'] = gps['lat'], gps['lon'] info['iptc'] = iptc = IPTCInfo(filepath, force=True) if iptc: # TODO: this to own function if iptc.data['caption/abstract']: info['caption'] = iptc.data['caption/abstract'] if iptc.data['object name']: info['title'] = iptc.data['object name'] if iptc.data['keywords']: kw_str = ','.join(iptc.data['keywords']) info['keywords'] = kw_str info['tags'] = iptc.data['keywords'] for key in info: # Convert all str values to unicode if isinstance(info[key], str): info[key] = unicode(info[key], guess_encoding(info[key])) with open(str(filepath), 'rb') as f: im = ImagePIL.open(f) info['width'], info['height'] = im.size del im return info
def captioner(person, yesterday): # >> the captioning filename = person[0] firstname = person[1] lastname = person[2] offense = person[3] fn = "mugs/%s" % (filename) print "FILENAME:" print filename print firstname print lastname print offense info = IPTCInfo(fn, force=True) # call the sql for the mug # note there needs to be a lot more caption stuff here info.data[ 'caption/abstract'] = "%s %s was booked into the Greene County jail on %s. Offenses as listed by the jail upon booking, starting with warrant number, level of offense, the offense and the bond amount: %s" % ( firstname, lastname, yesterday, offense) info.data['by-line'] = "Greene County Jail" # create the directory to save the file saveAsDir = "./dailymugs/%s" % (yesterday) if not os.path.exists(saveAsDir): os.makedirs(saveAsDir) # save the file saveasName = "%s/%s" % (saveAsDir, filename) info.saveAs(saveasName)
def get_imagegroups(): """ Returns a tuple of tuples representing groups of 10 image paths (each group is a page)""" path_to_optimized_images = os.path.join(THIS_DIR, "static", "images", "opt") try: os.chdir(path_to_optimized_images) except OSError: raise OSError( "Problem getting optimized images at {path}. Run the process-images.py script." .format(path=path_to_optimized_images)) sorted_image_paths = sorted(filter(os.path.isfile, os.listdir('.')), reverse=True) os.chdir(THIS_DIR) groups = tuple(grouper(10, sorted_image_paths)) f = [] for group in groups: l = [] for name in group: if name is not None: path = "static/images/opt/%s" % name d = {} info = IPTCInfo(path, force=True) caption = info.data['caption/abstract'] or "" date = datetime.datetime.fromtimestamp( int(name.replace(".jpg", ""))).strftime('%B %d, %Y at %H:%M') d[path] = {"date": date, "caption": caption} l.append(d) f.append(l) return f
def getTitle(filepath, filename): # try to get the ITPC title (from Lightroom), fall back to image filename filename = filename[0:filename.lower().find(".jpg")] try: info = IPTCInfo(filepath) return info.data.get(5, filename) except: return filename
def write_info(self): """Write picasa album names and star rating to photo's IPTC keywords.""" for photo, info in self.photos.items(): photo = IPTCInfo(photo, force=True) if "albums" in info: photo.keywords = list(set(photo.keywords + info["albums"])) print "Write: {}".format(photo.keywords) photo.save()
def _get_raw_metadata(self, path): data = super(Image, self)._get_raw_metadata(path) if HAS_IPTC: try: data.update(IPTCInfo(path).__dict__['_data']) except: pass return data
def read_iptc(abspath, charset='utf-8', new=False): '''Parses IPTC metadata from a photo with iptcinfo.py''' info = IPTCInfo(abspath, True, charset) if len(info.data) < 4: print('IPTC is empty for %s' % abspath) return None return info
def read_list_from_jpg(self, filename): """ Reads a list of data from the IPTC special_instructions field of a .JPG file :param filename: .jpg filename :return: list of items """ info = IPTCInfo(filename) if len(info.data) < 4: raise Exception(info.error) s = info.data['special instructions'] items = eval(s) # TBD - use another method to convert a string to a list for tighter security return items
def write_info(self): """Write picasa album names and star rating to photo's IPTC keywords.""" for filename, info in self.photos.items(): photo = IPTCInfo(filename, force=True) if "albums" in info: photo.keywords = list(set(photo.keywords + info["albums"])) print "Writing {}: {}".format(filename, photo.keywords) try: photo.save() except: self.errors[filename] = sys.exc_info()[0]
def write_list_to_jpg(self, filename, items): """ Writes a list of data to the IPTC special_instructions field of a .JPG file :param filename: .jpg filename :param items: list of items to be written (specified using Python list syntax) :return: none """ info = IPTCInfo(filename) if len(info.data) < 4: raise Exception(info.error) info.data['caption/abstract']='Contains Special Instructions' info.data['special instructions']=str(items) info.save()
def build_photo_sets(self, path, extensions): # Build local photo sets photo_sets = {} # Dictionary skips_root = [] # List keywords = set(self.cmd_args.keyword) if self.cmd_args.keyword else () for r, dirs, files in os.walk(path, followlinks=True): if self.cmd_args.starts_with and not r.startswith('{}{}'.format( self.cmd_args.sync_path, self.cmd_args.starts_with)): continue files = [f for f in files if not f.startswith('.')] dirs[:] = [d for d in dirs if not d.startswith('.')] for file in files: ext = file.lower().split('.').pop() if ext in extensions: if r == self.cmd_args.sync_path: skips_root.append(file) else: # If filtering by keyword... if keywords: file_path = os.path.join(r, file) # Create object for file that may or may not (force=TRUE) have IPTC metadata. info = IPTCInfo(file_path, force=True) # intersection(*others): Return a new set with elements common to the set and all others. matches = keywords.intersection(info.keywords) if not matches: # No matching keyword(s) found, skip file logger.info( 'Skipped file [%s] because it does not match any keywords [%s].' % (file, list(keywords))) continue photo_sets.setdefault(r, []) file_path = os.path.join(r, file) file_stat = os.stat(file_path) logger.info('appending %s to photo sets %s' % (file, r)) photo_sets[r].append((file, file_stat)) if skips_root: logger.warning( 'Root photos are not synced to avoid disorganized flickr sets. Sync at the topmost level of your photos directory to avoid this warning.' ) logger.warning('Skipped files: %s.' % skips_root) return photo_sets
def write_encrypted_list_to_jpg(self, filename, password, items): """ Writes an encrypted list of data to the IPTC special_instructions field of a .JPG file :param filename: .jpg filename :param password: the password used for encryption :param items: list of items to be written (specified using Python list syntax) :return: none """ info = IPTCInfo(filename) if len(info.data) < 4: raise Exception(info.error) token = self._fernet_encrypt(str(items),password) info.data['caption/abstract']='Contains Special Instructions' info.data['special instructions']=token info.save()
def read_encrypted_list_from_jpg(self, filename, password): """ Reads a list of data encrypted in the IPTC special_instructions field of a .JPG file :param filename: .jpg filename :param password: the password used for encryption :return: list of items """ self._password=password info = IPTCInfo(filename) if len(info.data) < 4: raise Exception(info.error) token = info.data['special instructions'] s = self._fermet_decrypt(token,password) items = eval(s) # # TBD - use another method to convert a string to a list for tighter security return items
def test_skip_special_directories(self): file_walker = FileWalker(FileLabeler(), LabelServiceExecutor(TestServiceConnector())) os.makedirs('_testdir/2016/10') self._create_testfile('_testdir/2016/10/test1.jpg') os.makedirs('_testdir/2016/10/@eaDir') self._create_testfile('_testdir/2016/10/@eaDir/test1.jpg') os.makedirs('_testdir/2016/10/2006-07-21 12.45.16.jpg/') self._create_testfile('_testdir/2016/10/2006-07-21 12.45.16.jpg/SYNOPHOTO_THUMB_XL.jpg') file_walker.walk_and_tag('_testdir/2016') self.assertEqual(IPTCInfo('_testdir/2016/10/test1.jpg').keywords, ['cat', 'mammal', 'vertebrate', 'whiskers', 'animal']) self.assertRaisesRegexp(Exception, 'No IPTC data found.', IPTCInfo, '_testdir/2016/10/@eaDir/test1.jpg') self.assertRaisesRegexp(Exception, 'No IPTC data found.', IPTCInfo, '_testdir/2016/10/2006-07-21 12.45.16.jpg/SYNOPHOTO_THUMB_XL.jpg')
def build_local_photo_sets(self, path, valid_extensions): # build local photo sets local_photo_sets = {} keywords = set( self.parser_args.keywords) if self.parser_args.keywords else () for root_dir, dirs, files in os.walk(path, followlinks=True): if self.parser_args.starts_with and not root_dir.startswith( '{}{}'.format(self.parser_args.sync_path, self.parser_args.starts_with)): logger.debug( 'skipping local directory "%s" (--starts-with="%s" not satisfied)', root_dir, self.parser_args.starts_with) continue files = [f for f in files if not f.startswith('.')] for file in files: file_path = os.path.join(root_dir, file) file_stat = os.stat(file_path) file_extension = file.lower().split('.').pop() if file_extension not in valid_extensions: #logger.debug('skipping local file "%s" (unrecognized filename extension; valid extensions are: %s)', file_path, list(valid_extensions)) #TODO too many files continue if root_dir == self.parser_args.sync_path: logger.info( 'skipping local file "%s" (files in --sync-path root are not synced to avoid disorganized flickr sets)', file_path) continue if keywords: file_info = IPTCInfo( file_path, force=True ) # use "force=True" if file may not have IPTC metadata if not keywords.intersection(file_info.keywords): logger.debug( 'skipping local file "%s" (--keywords=%s not satisfied)', file_path, list(keywords)) continue local_photo_sets.setdefault(root_dir, []) local_photo_sets[root_dir].append((file, file_stat)) return local_photo_sets
def build_photo_sets(self, path, extensions): # Build your local photo sets photo_sets = {} skips_root = [] keywords = set(self.cmd_args.keyword) if self.cmd_args.keyword else () for r, dirs, files in os.walk(path, followlinks=True): if self.cmd_args.starts_with and not r.startswith('{}{}'.format( self.cmd_args.sync_path, self.cmd_args.starts_with)): continue files = [f for f in files if not f.startswith('.')] dirs[:] = [d for d in dirs if not d.startswith('.')] for file in files: if not file.startswith('.'): ext = file.lower().split('.').pop() if ext in extensions: if r == self.cmd_args.sync_path: skips_root.append(file) else: # filter by keywords if keywords: file_path = os.path.join(r, file) info = IPTCInfo(file_path, force=True) matches = keywords.intersection(info.keywords) if not matches: # no matching keyword(s) found, skip file logger.info( 'Skipped [%s] does not match any keyword %s' % (file, list(keywords))) continue photo_sets.setdefault(r, []) file_path = os.path.join(r, file) file_stat = os.stat(file_path) photo_sets[r].append((file, file_stat)) if skips_root: logger.warn( 'To avoid disorganization on flickr sets root photos are not synced, skipped these photos: %s' % skips_root) logger.warn( 'Try to sync at top most level of your photos directory') return photo_sets
def photo_init(self, media): 'Initialize photo metadata.' # Define default values. self.filename = media.filename self.filepath = os.path.relpath(media.filepath, 'site_media') self.timestamp = media.timestamp self.title = u'' self.tags = u'' self.author = u'' self.city = u'' self.sublocation = u'' self.state = u'' self.country = u'' self.taxon = u'' self.rights = u'' self.caption = u'' self.size = u'' self.source = u'' self.references = u'' self.notes = u'' # Create metadata object. info = IPTCInfo(media.filepath, True, 'utf-8') # Check if file has IPTC data. if len(info.data) < 4: print(u'%s has no IPTC data!' % media.filename) # Fill values with IPTC data. self.title = info.data['object name'] #5 self.tags = info.data['keywords'] #25 self.author = info.data['by-line'] #80 self.city = info.data['city'] #90 self.sublocation = info.data['sub-location'] #92 self.state = info.data['province/state'] #95 self.country = info.data['country/primary location name'] #101 self.taxon = info.data['headline'] #105 self.rights = info.data['copyright notice'] #116 self.caption = info.data['caption/abstract'] #120 self.size = info.data['special instructions'] #40 self.source = info.data['source'] #115 self.references = info.data['credit'] #110 self.notes = u''
def create_meta(self, charset='utf-8', new=False): '''Define as variáveis extraídas dos metadados da imagem. Usa a biblioteca do arquivo iptcinfo.py para padrão IPTC e pyexiv2 para EXIF. ''' logger.info('Lendo metadados de %s e criando objetos.', self.filename) # Criar objeto com metadados. info = IPTCInfo(self.source_filepath, True, charset) # Checando se o arquivo tem dados IPTC. if len(info.data) < 4: logger.warning('%s não tem dados IPTC!', self.filename) # Limpa metadados pra não misturar com o anterior. self.meta = {} self.meta = { 'source_filepath': os.path.abspath(self.source_filepath), 'title': info.data['object name'], #5 'tags': info.data['keywords'], #25 'author': info.data['by-line'], #80 'city': info.data['city'], #90 'sublocation': info.data['sub-location'], #92 'state': info.data['province/state'], #95 'country': info.data['country/primary location name'], #101 'taxon': info.data['headline'], #105 'rights': info.data['copyright notice'], #116 'caption': info.data['caption/abstract'], #120 'size': info.data['special instructions'], #40 'source': info.data['source'], #115 'references': info.data['credit'], #110 'timestamp': self.timestamp, 'notes': u'', } if new: # Adiciona o antigo caminho aos metadados. self.meta['old_filepath'] = os.path.abspath(self.source_filepath) new_filename = rename_file(self.filename, self.meta['author']) # Atualiza media object. self.source_filepath = os.path.join( os.path.dirname(self.source_filepath), new_filename) self.filename = new_filename # Atualiza os metadados. self.meta['source_filepath'] = os.path.abspath( self.source_filepath) os.rename(self.meta['old_filepath'], self.meta['source_filepath']) else: self.meta['source_filepath'] = os.path.abspath( self.source_filepath) # Prepara alguns campos para banco de dados. self.meta = prepare_meta(self.meta) # Extraindo metadados do EXIF. exif = get_exif(self.source_filepath) # Extraindo data. self.meta['date'] = get_date(exif) # Extraindo a geolocalização. gps = get_gps(exif) self.meta.update(gps) # Processar imagem. web_filepath, thumb_filepath = self.process_photo() # Caso arquivo esteja corrompido, interromper. if not web_filepath: return None self.meta['web_filepath'] = web_filepath.strip('site_media/') self.meta['thumb_filepath'] = thumb_filepath.strip('site_media/') print print u'\tVariável\tMetadado' print u'\t' + 40 * '-' print u'\t' + self.meta['web_filepath'] print u'\t' + self.meta['thumb_filepath'] print u'\t' + 40 * '-' print u'\tTítulo:\t\t%s' % self.meta['title'] print u'\tDescrição:\t%s' % self.meta['caption'] print u'\tTáxon:\t\t%s' % ', '.join(self.meta['taxon']) print u'\tTags:\t\t%s' % '\n\t\t\t'.join(self.meta['tags']) print u'\tTamanho:\t%s' % self.meta['size'] print u'\tEspecialista:\t%s' % ', '.join(self.meta['source']) print u'\tAutor:\t\t%s' % ', '.join(self.meta['author']) print u'\tSublocal:\t%s' % self.meta['sublocation'] print u'\tCidade:\t\t%s' % self.meta['city'] print u'\tEstado:\t\t%s' % self.meta['state'] print u'\tPaís:\t\t%s' % self.meta['country'] print u'\tDireitos:\t%s' % self.meta['rights'] print u'\tData:\t\t%s' % self.meta['date'] print print u'\tGeolocalização:\t%s' % self.meta['geolocation'].decode( "utf8") print u'\tDecimal:\t%s, %s' % (self.meta['latitude'], self.meta['longitude']) print return self.meta
def test_create_empty_keywords(self): info = IPTCInfo(self.jpg_file, force=True) self.assertEqual(info.keywords, [])
def test_write_keywords(self): info = IPTCInfo(self.jpg_file, force=True) info.keywords = ('A', 'B') info.save() info = IPTCInfo(self.jpg_file) self.assertEqual(info.keywords, ['A', 'B'])
#!/usr/bin/env python # :mode=python:encoding=utf-8 # -*- coding: utf-8 -*- import sys sys.path.insert(0, '.') from iptcinfo import IPTCInfo, LOG, LOGDBG if __name__ == '__main__': import logging logging.basicConfig(level=logging.DEBUG) LOGDBG.setLevel(logging.DEBUG) if len(sys.argv) > 1: info = IPTCInfo(sys.argv[1], True) info.keywords = ['test'] info.supplementalCategories = [] info.contacts = [] print("info = %s\n%s" % (info, "=" * 30), file=sys.stderr) info.save()
def test_label_image(self): labeler = FileLabeler() labeler.label(self.jpg_file, (u'cat', u'mammal')) info = IPTCInfo(self.jpg_file) self.assertEqual(info.keywords, ['cat', 'mammal'])