def process_image(ifile:str, siteconfig, ofile:str=None): '''resize an image and return the output file name and the exif and iptc meta data''' try: size = siteconfig.get('resize') site_copyright_notice = siteconfig.get('copyright_notice') im = Image.open(ifile) im_xif = im.info.get('exif') im_iptc = retrieve_copyright_notice(ifile) dest = ofile if size and ofile else ifile #Target the input file if the output file is not provided # Image resizing with exif preservation if size: im2 = im.resize(size) params = dict(exif=im_xif) if im_xif else dict() im2.save(dest, **params) # IPTC data rewriting. IPTC are lost during the resize operation. We have to put them back manually # If the inputfile contains a copyright notice, then use it instead of the siteconfig value im_iptc['copyright notice'] = im_iptc.get('copyright notice', site_copyright_notice) if im_iptc or site_copyright_notice: iptc = IPTCInfo(dest) for key in im_iptc: iptc[key] = im_iptc[key] iptc.save() return (dest, im_xif, im_iptc) except Exception as e: printfailure('could not process image ', ifile, ' traceback ', str(e)) return (ifile, None, None)
def main(): f = open('changedIPTCmeta.txt', 'w') parser = argparse.ArgumentParser() parser.add_argument("-image_path", help='path to jpg images', type=str, required=True) image_path = parser.parse_args() data = pd.read_csv('iptcmeta.csv') i = 0 for filename in data['fileName']: info = IPTCInfo(join(image_path.image_path, filename)) oldKeywords = info['keywords'] oldObjName = info['object name'] oldCaption = info['caption/abstract'] newkey = data.at[i, 'keywords'].split(',') info['object name'] = data.at[i, 'obj_name'] info['caption/abstract'] = data.at[i, 'caption'] info['keywords'] = newkey f.write('В файле ' + str(filename) + ' значения keywords изменились с ' + str(oldKeywords) + ' на ' + str(newkey) + ' значения object_name изменились с ' + str(oldObjName) + ' на ' + str(info['object name']) + ' значения caption изменились с ' + str(oldCaption) + ' на ' + str(info['caption/abstract']) + '\n') info.save() print(filename) i = i + 1 f.close()
def update_iptc(tmp_img, orig_iptc): info = IPTCInfo(tmp_img, force=True) valids = ['keywords', 'caption/abstract', 'country/primary location name', 'city', 'sub-location', 'credit', 'copyright notice', 'writer/editor', 'by-line', 'headline'] for k in orig_iptc: if k in valids: info[k] = orig_iptc[k] print('saving new copy with IPTC tags:', tmp_img) info.save_as(tmp_img, {'overwrite': True})
def test_save_as_saves_as_new_file_with_new_info(): if os.path.isfile('fixtures/deleteme.jpg'): # pragma: no cover os.unlink('fixtures/deleteme.jpg') new_headline = b'test headline %d' % random.randint(0, 100) info = IPTCInfo('fixtures/Lenna.jpg') info['headline'] = new_headline info.save_as('fixtures/deleteme.jpg') info2 = IPTCInfo('fixtures/deleteme.jpg') assert info2['headline'] == new_headline
def mutate(self, info, input): abs_path = Path(BASE_DIR, input["path"]) if not abs_path.exists(): raise Exception("Image not found") image = {"path": abs_path} iptc_info = IPTCInfo(str(abs_path), inp_charset="utf_8") if iptc_info["keywords"] == input["keywords"]: return SetKeywordsPayload(image=image) iptc_info["keywords"] = input["keywords"] iptc_info.save() return SetKeywordsPayload(image=image)
def getImageDate(pathName, tags): dateStr = '' if 'Image DateTime' in tags: dateStr = tags['Image DateTime'].printable if 'EXIF DateTimeOriginal' in tags: ndate = tags['EXIF DateTimeOriginal'].printable if ndate < dateStr or not dateStr: dateStr = ndate if 'EXIF DateTimeDigitized' in tags: ndate = tags['EXIF DateTimeDigitized'].printable if ndate < dateStr or not dateStr: dateStr = ndate if not dateStr: # See if there is an IPTC date iptcTags = IPTCInfo(pathName) idate = iptcTags['date created'] if idate and len(idate) < 10: dateStr = bytesToString(idate) + '_000000' if not dateStr: # There are no date tags, so use the file timestamp ct = os.path.getctime(pathName) mt = os.path.getmtime(pathName) ft = min(ct, mt) ts = time.localtime(ft) dateStr = '%04d%02d%02d_%02d%02d%02d' % (ts.tm_year, ts.tm_mon, ts.tm_mday, ts.tm_hour, ts.tm_min, ts.tm_sec) print('Date from timestamp:', dateStr) return dateStr
def get_iptc(the_image_path): iptc_info = IPTCInfo(the_image_path) out = {} if 'keywords' in iptc_info: kws = map(lambda x: fix_encoding(x), iptc_info['keywords']) out['keywords'] = list(kws) if 'city' in iptc_info: out['city'] = fix_encoding(iptc_info['city']) if 'country/primary location name' in iptc_info: out['country/primary location name'] = fix_encoding(iptc_info['country/primary location name']) if 'caption/abstract' in iptc_info: out['caption/abstract'] = fix_encoding(iptc_info['caption/abstract']) if 'copyright notice' in iptc_info: out['copyright notice'] = fix_encoding(iptc_info['copyright notice']) if 'writer/editor' in iptc_info: out['writer/editor'] = fix_encoding(iptc_info['writer/editor']) if 'credit' in iptc_info: out['credit'] = fix_encoding(iptc_info['credit']) if 'sub-location' in iptc_info: out['sub-location'] = fix_encoding(iptc_info['sub-location']) if 'by-line' in iptc_info: out['by-line'] = fix_encoding(iptc_info['by-line']) if 'headline' in iptc_info: out['headline'] = fix_encoding(iptc_info['headline']) return out
def getAllTags(pathName): getAllIPTC = False if getAllIPTC: iptcTags = IPTCInfo(pathName) tags = {} for tag in iptcKeys: if iptcTags[tag]: tags[tag] = iptcTags[tag] print(tags[tag]) # Open image file for reading (binary mode) f = open(pathName, 'rb') # Return Exif tags exifTags = exifread.process_file(f, details=True, strict=False) # Convert byte array to unicode for k in [ 'Image XPTitle', 'Image XPComment', 'Image XPAuthor', 'Image XPKeywords', 'Image XPSubject' ]: if k in exifTags: exifTags[k].values = bytesToString(exifTags[k].values) return exifTags
def __init__(self, path): self.path = path self.exif = self.extract_exif(path) self.iptc = IPTCInfo(path) latitude, longitude = GPS(path).decimal_coordinates self.latitude = latitude self.longitude = longitude
def label_extract(file): info = IPTCInfo(file) title = str(info['object name']) website = str(info['copyright notice']) author = str(info['by-line']) label = (f"{title[1:]} | {website[2:-1]} | {author[2:-1]}") print(label)
def test_getitem_can_read_info(): info = IPTCInfo('fixtures/Lenna.jpg') assert len(info) >= 4 assert info['keywords'] == [b'lenna', b'test'] assert info['supplemental category'] == [b'supplemental category'] assert info['caption/abstract'] == b'I am a caption'
def __do_iptc_keywords(self): try: from iptcinfo3 import IPTCInfo iptcinfo_logger = logging.getLogger( 'iptcinfo') # turn off useless log infos iptcinfo_logger.setLevel(logging.ERROR) with open(self.__filename, 'rb') as fh: iptc = IPTCInfo(fh, force=True, out_charset='utf-8' ) # TODO put IPTC read in separate function # tags val = iptc['keywords'] if val is not None and len(val) > 0: keywords = '' for key in iptc['keywords']: keywords += key.decode( 'utf-8') + ',' # decode binary strings self.__tags['IPTC Keywords'] = keywords # caption val = iptc['caption/abstract'] if val is not None and len(val) > 0: self.__tags['IPTC Caption/Abstract'] = iptc[ 'caption/abstract'].decode('utf8') # title val = iptc['object name'] if val is not None and len(val) > 0: self.__tags['IPTC Object Name'] = iptc[ 'object name'].decode('utf-8') except Exception as e: self.__logger.warning( "IPTC loading has failed - if you want to use this you will need to install iptcinfo3 %s -> %s", self.__filename, e)
def get_imageinfo(filepath: str) -> dict: """ Return EXIF and IPTC information found from image file in a dictionary. """ info = {} info["exif"] = exif = read_exif(filepath) info["gps"] = gps = parse_gps(exif) info.update(parse_datetime(exif, tag_name="EXIF DateTimeOriginal", gps=gps)) if "lat" in gps: # Backwards compatibility info["lat"], info["lon"] = gps["lat"], gps["lon"] info["iptc"] = iptc = IPTCInfo(filepath, force=True) try: if iptc.data["caption/abstract"]: info["caption"] = iptc.data["caption/abstract"] if iptc.data["object name"]: info["title"] = iptc.data["object name"] if iptc.data["keywords"]: kw_str = ",".join(iptc.data["keywords"]) info["keywords"] = kw_str info["tags"] = iptc.data["keywords"] for key in info: # Convert all str values to unicode if isinstance(info[key], str): info[key] = str(info[key], guess_encoding(info[key])) except AttributeError: pass with open(str(filepath), "rb") as f: im = Image.open(f) info["width"], info["height"] = im.size del im return info
def iptckeys(filename, keys): info = IPTCInfo(filename) def infoget(info, key): try: return info[key] except KeyError: return None return tuple(infoget(info, key) for key in keys)
def mangle(file: IPTCInfo) -> int: tracking: str = file['special instructions'] if tracking is not None: if tracking.startswith(b'FBMD'): print( f'Old tracking: {file["special instructions"].decode("utf-8")}' ) file[ 'special instructions'] = f'FBMD{secrets.token_hex(int((len(tracking) - 4) / 2))}' print(f'New tracking: {file["special instructions"]}') file.save() return 1 else: return 0 else: return 0
def iptcinfo(file_path: str): from iptcinfo3 import IPTCInfo, c_datasets_r info = IPTCInfo(file_path, inp_charset="utf_8") # print(info.values()) for k, v in c_datasets_r.items(): # print(f"{k}\t{v}") iptc_value = info[k] if iptc_value: print(f"{v}\t{k}\t{iptc_value}")
def retrieve_copyright_notice(fn:str) -> dict: '''Retrieves the copyright notice from the input file Returns ------- Tuple (copyright notice, description) ''' info = IPTCInfo(fn) return { key: info[key] for key in c_datasets.values() if info[key] }
def process_file(filename): """ Want to return: [filename, caption, [tags]] """ info = IPTCInfo(filename) if len(info.data) < 4: raise Exception(info.error) keywords = [] for keyword in info.keywords: keywords.append(keyword.decode("utf-8")) return [filename, info.data["caption/abstract"], keywords]
def test_save_as_saves_as_new_file_with_info(): if os.path.isfile('fixtures/deleteme.jpg'): # pragma: no cover os.unlink('fixtures/deleteme.jpg') info = IPTCInfo('fixtures/Lenna.jpg') info.save_as('fixtures/deleteme.jpg') info2 = IPTCInfo('fixtures/deleteme.jpg') # The files won't be byte for byte exact, so filecmp won't work assert info._data == info2._data with open('fixtures/Lenna.jpg', 'rb') as fh, open('fixtures/deleteme.jpg', 'rb') as fh2: start, end, adobe = jpeg_collect_file_parts(fh) start2, end2, adobe2 = jpeg_collect_file_parts(fh2) # But we can compare each section assert start == start2 assert end == end2 assert adobe == adobe2
def make_big_groups(photo_filenames): """Group images by keyword """ grouped = {} for photo_filename in photo_filenames: info = IPTCInfo(photo_filename) for x in info["keywords"]: x = x.decode("utf-8") if x not in grouped: grouped[x] = [] grouped[x].append(photo_filename) return grouped
def get_metadata(file_path): """ Get EXIF metadata for a file. :param file_path: full path to the file :return: iptcinfo3.IPTCInfo object """ logger = logging.getLogger(__name__) info = IPTCInfo(file_path) logger.debug("File {} tags: {}".format(file_path, info)) return info
def process_file(filepath, random_replace=False, verbose=False): img_type = imghdr.what(filepath) if img_type is None: if verbose: print("Skipping).") return if verbose: print(img_type, end='). ') info = IPTCInfo(filepath, force=True) special_instructions = info['special instructions'] if special_instructions is None: if verbose: print("FBMD not found") return fbmd_index = special_instructions.find(b'FBMD') if fbmd_index < 0: if verbose: print("FBMD not found") return length_hex = special_instructions[fbmd_index + 6:fbmd_index + 6 + 4] length = int(length_hex, 16) info['special instructions'] = update_instructions(special_instructions, fbmd_index, length, random=random_replace) if verbose: print(special_instructions[fbmd_index:fbmd_index + 6 + 4 + (length + 1) * 8]) info.save() os.remove(filepath + "~")
def update_index(filepath, filename): info = IPTCInfo(filepath + "/" + filename, force=True) keywords = [] master_index_trie = load_obj("master_index_trie") pp(master_index_trie) for item in info["keywords"]: keywords.append(item.decode("utf-8")) for item in keywords: associated_pics = trie.find_prefix(master_index_trie, item)[2] if type(associated_pics) == list and filename not in associated_pics: trie.add_value(master_index_trie, item, filename) save_obj(master_index_trie, "master_index_trie")
def get_img_data(sfile): try: info = IPTCInfo(sfile) except: logging.ERROR("Could not open file: %s" % sfile) data = dict() data["path"] = sfile data["filename"] = os.path.basename(sfile) title = info["object name"] or info["headline"] if title: data["title"] = title.decode(ENCODING) desc = info["caption/abstract"] if desc: data["description"] = desc.decode(ENCODING) date = info["date created"] if date: date_obj = datetime.datetime.strptime(date.decode(ENCODING), '%Y%m%d') data["date"] = date_obj data["keywords"] = [] if info["keywords"]: for key in info["keywords"]: data["keywords"].append(key.decode(ENCODING)) # exif exif = get_exif(sfile) for tag in exif.keys(): value = str(exif[tag]) if tag == "Image Model": data["model"] = value if tag == "EXIF ExposureTime": data["shutter"] = value if tag == "EXIF FocalLength": data["focal"] = value if tag == "EXIF FNumber": data["aperture"] = str(parse_fstop(value)) if tag == "EXIF ISOSpeedRatings": data["iso"] = value if tag == "EXIF LensModel": data["lens"] = value data["shortcode"] = get_shortcode(data) return data
def update_index(filepath, filename): full_path = filepath + "/" + filename print(full_path) info = IPTCInfo(full_path, force=True) keywords = [] master_index_trie = load_obj("master_index_trie") #pp(master_index_trie.children) for item in info["keywords"]: keywords.append(item.decode("utf-8")) for item in keywords: print(trie.find_prefix(master_index_trie, item)) associated_pics = trie.find_prefix(master_index_trie, item)[3] if type(associated_pics ) == dict and full_path not in associated_pics.keys(): trie.add_value(master_index_trie, item, filename, full_path, keywords) save_obj(master_index_trie, "master_index_trie")
def extractIptcKeywordTags(thisFile: str) -> list: """ extracts IPTC keywords (=tags) from jpeg file """ assert os.path.isfile(thisFile) iptc = IPTCInfo(thisFile) # for key in iptc_keys: # if iptc[key]: # print(f"{key}:") # print(iptc[key]) iptc_keywords = [] if len(iptc["keywords"]) > 0: # or supplementalCategories or contacts for key in sorted(iptc["keywords"]): # try: s = key.decode("utf-8") # decode binary strings # s = key.decode('ascii') # decode binary strings iptc_keywords.append(s) # except UnicodeDecodeError: # continue return iptc_keywords
def mediaFolder2Data(path): entries = {} filelist = os.listdir(path) filelist.sort() for filename in filelist: entry = {} filePath = path+filename filnameParts = filename.split(".") filenameBase = filnameParts[0].lower() if os.path.isdir(filePath): continue if len(filnameParts) == 2 and filnameParts[1].lower() == "jpg": info = IPTCInfo(filePath) info['keywords'] = [x.decode('ascii') for x in info['keywords']] #entries[filenameBase] = info['keywords'] for keyword in info['keywords']: for reg in conversionsRegex: reg_match = re.search(reg['regex'], keyword, re.IGNORECASE) if reg_match: entry[reg['index']] = reg_match.group(1) for at in autotranslate: if at["search"] in info['keywords']: entry[at["index"]] = at["value"] for ap in autoappend: entry[ap["index"]] = ap["value"].format(filePath) entries[filenameBase] = entry return entries
def get_date(DATADIR_RAW, DATADIR_RAW_NEW): '''Function wich takes as input path of dirs. in those dirs the function will irreterate throug all jpg, extract a date from their meta date and it a IPTC protocol''' # get the image name from the dir DATADIR_RAW for img in os.listdir(DATADIR_RAW): image_path = os.path.join(DATADIR_RAW, img) info = IPTCInfo(image_path, force=True) print(img) # Open the image and access the metadate. Use regex to extract date: image = Image.open(image_path) txt = str(image.info['exif']) # exif are mostly JPEG r = '[\d]{4}:[\d]{2}:[\d]{2}' # format yyyy:mm:dd date = re.search(r, txt).group().replace(':', '') print(f'date of picture: {date}') # Force open the IPTC protocol to insert date info = IPTCInfo(image_path, force=True) print(f"info before: {info['date created']}" ) #Before. Shows wheter or not the entry was empty before info['date created'] = date print(f"info after: {info['date created']}" ) # after. Shows the date extracted from the meta data print('\n') # Create the new dir DATADIR_RAW_NEW if it does not already exist if not os.path.exists(DATADIR_RAW_NEW): os.makedirs(DATADIR_RAW_NEW) # Create the new path for the image new_path = os.path.join(DATADIR_RAW_NEW, 'new' + img) # If that image is already in the path, delete it. if os.path.exists(DATADIR_RAW_NEW): try: os.remove(new_path) except: pass # Save new image. info.save_as(new_path)
def downSize(): # processing images: reducing size # # running this process we loose exif and iptc info # # the only purpose of this firs process is to have a decent size image to display on web # smallImagestoday = ''.join( [route2folderWithImages, '/', 'smallImages_', timeStamp]) if (os.path.isdir(smallImagestoday)): pass else: os.mkdir(smallImagestoday) lisOfBigImages = getFileWithExt('jpg', route2folderWithImages) for i in lisOfBigImages: try: foo = Image.open(i) foo = foo.resize((160, 300), Image.ANTIALIAS) foo.save(''.join([smallImagestoday, '/', i]), optimize=True, quality=95) # From 1.9GB to 8.8MB ... nice! print(i + ' is done!') except: print(i + '... is already done!!!') pass # extracting Exif and IPTC info from originals images # lisOfBigImages = getFileWithExt('jpg', route2folderWithImages) toSaveData = ''.join([route2folderWithImages, '/', 'data_', timeStamp]) if (os.path.isdir(toSaveData)): pass else: os.mkdir(toSaveData) collection = [] print('getting EXIF and IPTC info ...') for i in lisOfBigImages: try: openSmall = Image.open(i) exif = { ExifTags.TAGS[k]: v for k, v in openSmall._getexif().items() if k in ExifTags.TAGS } dateTime = exif['DateTime'][0:10].replace(':', '-').strip() lat = [float(x) / float(y) for x, y in exif['GPSInfo'][2]] lon = [float(x) / float(y) for x, y in exif['GPSInfo'][4]] latref = exif['GPSInfo'][1] lonref = exif['GPSInfo'][3] lat = lat[0] + lat[1] / 60 + lat[2] / 3600 lon = lon[0] + lon[1] / 60 + lon[2] / 3600 if latref == 'S': lat = -lat if lonref == 'W': lon = -lon info = IPTCInfo(i, force=True) kw = [] for x in info['keywords']: kw.append(x.decode("utf-8")) o = { "type": "Feature", "geometry": { "type": "Point", "coordinates": [lon, lat] }, "properties": { "name": i, "date": dateTime, "tags": kw, "image": ''.join([ 'https://github.com/crishernandezmaps/situado_visualizacion/blob/gh-pages/', smallImagestoday, '/', i, '?raw=true' ]), "author": "Made by Cris Hernandez for FONDECYT Nº 1171554, INVI - U. of Chile, 2018/9" } } collection.append(o) except: pass data = {"type": "FeatureCollection", "features": collection} # ### Saving GeoJson ### name = ''.join([ toSaveData, '/', route2folderWithImages.split('/')[-1], '_', timeStamp, '.geojson' ]) with open(name, 'w') as f: dump(data, f) print(data) print('All Done :)...')
def scan_file(file: Path) -> Optional[IPTCInfo]: if file.exists() and file.is_file() and file.suffix in EXTENSIONS: return IPTCInfo(file)