Esempio n. 1
0
def process_image(ifile:str, siteconfig, ofile:str=None):
    '''resize an image and return the output file name and the exif and iptc meta data'''
    try:
        size = siteconfig.get('resize')
        site_copyright_notice = siteconfig.get('copyright_notice')

        im  = Image.open(ifile)
        im_xif = im.info.get('exif')
        im_iptc = retrieve_copyright_notice(ifile)

        dest = ofile if size and ofile else ifile #Target the input file if the output file is not provided
        
        # Image resizing with exif preservation
        if size:
            im2 = im.resize(size)

            params = dict(exif=im_xif) if im_xif else dict()   
            im2.save(dest, **params)

        # IPTC data rewriting. IPTC are lost during the resize operation. We have to put them back manually
        # If the inputfile contains a copyright notice, then use it instead of the siteconfig value
        im_iptc['copyright notice'] = im_iptc.get('copyright notice', site_copyright_notice)

        if im_iptc or site_copyright_notice:
            iptc = IPTCInfo(dest)

            for key in im_iptc:
                iptc[key] = im_iptc[key]
            iptc.save()

        return (dest, im_xif, im_iptc)
    except Exception as e:
        printfailure('could not process image ', ifile, ' traceback ', str(e))
        return (ifile, None, None)
Esempio n. 2
0
def main():
    f = open('changedIPTCmeta.txt', 'w')

    parser = argparse.ArgumentParser()
    parser.add_argument("-image_path",
                        help='path to jpg images',
                        type=str,
                        required=True)
    image_path = parser.parse_args()

    data = pd.read_csv('iptcmeta.csv')
    i = 0

    for filename in data['fileName']:
        info = IPTCInfo(join(image_path.image_path, filename))
        oldKeywords = info['keywords']
        oldObjName = info['object name']
        oldCaption = info['caption/abstract']
        newkey = data.at[i, 'keywords'].split(',')
        info['object name'] = data.at[i, 'obj_name']
        info['caption/abstract'] = data.at[i, 'caption']
        info['keywords'] = newkey
        f.write('В файле ' + str(filename) +
                ' значения keywords изменились с ' + str(oldKeywords) +
                ' на ' + str(newkey) + ' значения object_name изменились с ' +
                str(oldObjName) + ' на ' + str(info['object name']) +
                ' значения caption изменились с ' + str(oldCaption) + ' на ' +
                str(info['caption/abstract']) + '\n')
        info.save()
        print(filename)
        i = i + 1

    f.close()
Esempio n. 3
0
def update_iptc(tmp_img, orig_iptc):
    info = IPTCInfo(tmp_img, force=True)
    valids = ['keywords', 'caption/abstract', 'country/primary location name', 'city', 'sub-location',
              'credit', 'copyright notice', 'writer/editor', 'by-line', 'headline']
    for k in orig_iptc:
        if k in valids:
            info[k] = orig_iptc[k]
    print('saving new copy with IPTC tags:', tmp_img)
    info.save_as(tmp_img, {'overwrite': True})
Esempio n. 4
0
def test_save_as_saves_as_new_file_with_new_info():
    if os.path.isfile('fixtures/deleteme.jpg'):  # pragma: no cover
        os.unlink('fixtures/deleteme.jpg')

    new_headline = b'test headline %d' % random.randint(0, 100)
    info = IPTCInfo('fixtures/Lenna.jpg')
    info['headline'] = new_headline
    info.save_as('fixtures/deleteme.jpg')

    info2 = IPTCInfo('fixtures/deleteme.jpg')

    assert info2['headline'] == new_headline
Esempio n. 5
0
    def mutate(self, info, input):
        abs_path = Path(BASE_DIR, input["path"])
        if not abs_path.exists():
            raise Exception("Image not found")

        image = {"path": abs_path}
        iptc_info = IPTCInfo(str(abs_path), inp_charset="utf_8")
        if iptc_info["keywords"] == input["keywords"]:
            return SetKeywordsPayload(image=image)

        iptc_info["keywords"] = input["keywords"]
        iptc_info.save()
        return SetKeywordsPayload(image=image)
Esempio n. 6
0
def getImageDate(pathName, tags):
    dateStr = ''
    if 'Image DateTime' in tags:
        dateStr = tags['Image DateTime'].printable

    if 'EXIF DateTimeOriginal' in tags:
        ndate = tags['EXIF DateTimeOriginal'].printable
        if ndate < dateStr or not dateStr:
            dateStr = ndate

    if 'EXIF DateTimeDigitized' in tags:
        ndate = tags['EXIF DateTimeDigitized'].printable
        if ndate < dateStr or not dateStr:
            dateStr = ndate

    if not dateStr:
        # See if there is an IPTC date
        iptcTags = IPTCInfo(pathName)
        idate = iptcTags['date created']
        if idate and len(idate) < 10:
            dateStr = bytesToString(idate) + '_000000'

    if not dateStr:
        # There are no date tags, so use the file timestamp
        ct = os.path.getctime(pathName)
        mt = os.path.getmtime(pathName)
        ft = min(ct, mt)
        ts = time.localtime(ft)
        dateStr = '%04d%02d%02d_%02d%02d%02d' % (ts.tm_year, ts.tm_mon,
                                                 ts.tm_mday, ts.tm_hour,
                                                 ts.tm_min, ts.tm_sec)
        print('Date from timestamp:', dateStr)

    return dateStr
Esempio n. 7
0
def get_iptc(the_image_path):
    iptc_info = IPTCInfo(the_image_path)
    out = {}
    if 'keywords' in iptc_info:
        kws = map(lambda x: fix_encoding(x), iptc_info['keywords'])
        out['keywords'] = list(kws)
    if 'city' in iptc_info:
        out['city'] = fix_encoding(iptc_info['city'])
    if 'country/primary location name' in iptc_info:
        out['country/primary location name'] = fix_encoding(iptc_info['country/primary location name'])
    if 'caption/abstract' in iptc_info:
        out['caption/abstract'] = fix_encoding(iptc_info['caption/abstract'])
    if 'copyright notice' in iptc_info:
        out['copyright notice'] = fix_encoding(iptc_info['copyright notice'])
    if 'writer/editor' in iptc_info:
        out['writer/editor'] = fix_encoding(iptc_info['writer/editor'])
    if 'credit' in iptc_info:
        out['credit'] = fix_encoding(iptc_info['credit'])
    if 'sub-location' in iptc_info:
        out['sub-location'] = fix_encoding(iptc_info['sub-location'])
    if 'by-line' in iptc_info:
        out['by-line'] = fix_encoding(iptc_info['by-line'])
    if 'headline' in iptc_info:
        out['headline'] = fix_encoding(iptc_info['headline'])
    return out
Esempio n. 8
0
def getAllTags(pathName):
    getAllIPTC = False
    if getAllIPTC:
        iptcTags = IPTCInfo(pathName)

        tags = {}
        for tag in iptcKeys:
            if iptcTags[tag]:
                tags[tag] = iptcTags[tag]
                print(tags[tag])

    # Open image file for reading (binary mode)
    f = open(pathName, 'rb')

    # Return Exif tags
    exifTags = exifread.process_file(f, details=True, strict=False)

    # Convert byte array to unicode
    for k in [
            'Image XPTitle', 'Image XPComment', 'Image XPAuthor',
            'Image XPKeywords', 'Image XPSubject'
    ]:
        if k in exifTags:
            exifTags[k].values = bytesToString(exifTags[k].values)

    return exifTags
Esempio n. 9
0
 def __init__(self, path):
     self.path = path
     self.exif = self.extract_exif(path)
     self.iptc = IPTCInfo(path)
     latitude, longitude = GPS(path).decimal_coordinates
     self.latitude = latitude
     self.longitude = longitude
Esempio n. 10
0
def label_extract(file):
    info = IPTCInfo(file)
    title = str(info['object name'])
    website = str(info['copyright notice'])
    author = str(info['by-line'])
    label = (f"{title[1:]} | {website[2:-1]} | {author[2:-1]}")
    print(label)
Esempio n. 11
0
def test_getitem_can_read_info():
    info = IPTCInfo('fixtures/Lenna.jpg')

    assert len(info) >= 4
    assert info['keywords'] == [b'lenna', b'test']
    assert info['supplemental category'] == [b'supplemental category']
    assert info['caption/abstract'] == b'I am a caption'
Esempio n. 12
0
 def __do_iptc_keywords(self):
     try:
         from iptcinfo3 import IPTCInfo
         iptcinfo_logger = logging.getLogger(
             'iptcinfo')  # turn off useless log infos
         iptcinfo_logger.setLevel(logging.ERROR)
         with open(self.__filename, 'rb') as fh:
             iptc = IPTCInfo(fh, force=True, out_charset='utf-8'
                             )  # TODO put IPTC read in separate function
             # tags
             val = iptc['keywords']
             if val is not None and len(val) > 0:
                 keywords = ''
                 for key in iptc['keywords']:
                     keywords += key.decode(
                         'utf-8') + ','  # decode binary strings
                 self.__tags['IPTC Keywords'] = keywords
             # caption
             val = iptc['caption/abstract']
             if val is not None and len(val) > 0:
                 self.__tags['IPTC Caption/Abstract'] = iptc[
                     'caption/abstract'].decode('utf8')
             # title
             val = iptc['object name']
             if val is not None and len(val) > 0:
                 self.__tags['IPTC Object Name'] = iptc[
                     'object name'].decode('utf-8')
     except Exception as e:
         self.__logger.warning(
             "IPTC loading has failed - if you want to use this you will need to install iptcinfo3 %s -> %s",
             self.__filename, e)
Esempio n. 13
0
def get_imageinfo(filepath: str) -> dict:
    """
    Return EXIF and IPTC information found from image file in a dictionary.
    """
    info = {}
    info["exif"] = exif = read_exif(filepath)
    info["gps"] = gps = parse_gps(exif)
    info.update(parse_datetime(exif, tag_name="EXIF DateTimeOriginal",
                               gps=gps))
    if "lat" in gps:  # Backwards compatibility
        info["lat"], info["lon"] = gps["lat"], gps["lon"]
    info["iptc"] = iptc = IPTCInfo(filepath, force=True)
    try:
        if iptc.data["caption/abstract"]:
            info["caption"] = iptc.data["caption/abstract"]
        if iptc.data["object name"]:
            info["title"] = iptc.data["object name"]
        if iptc.data["keywords"]:
            kw_str = ",".join(iptc.data["keywords"])
            info["keywords"] = kw_str
            info["tags"] = iptc.data["keywords"]
        for key in info:  # Convert all str values to unicode
            if isinstance(info[key], str):
                info[key] = str(info[key], guess_encoding(info[key]))
    except AttributeError:
        pass
    with open(str(filepath), "rb") as f:
        im = Image.open(f)
        info["width"], info["height"] = im.size
        del im
    return info
Esempio n. 14
0
def iptckeys(filename, keys):
    info = IPTCInfo(filename)
    def infoget(info, key):
        try:
            return info[key]
        except KeyError:
            return None
    return tuple(infoget(info, key) for key in keys)
Esempio n. 15
0
def mangle(file: IPTCInfo) -> int:
    tracking: str = file['special instructions']

    if tracking is not None:
        if tracking.startswith(b'FBMD'):
            print(
                f'Old tracking: {file["special instructions"].decode("utf-8")}'
            )
            file[
                'special instructions'] = f'FBMD{secrets.token_hex(int((len(tracking) - 4) / 2))}'
            print(f'New tracking: {file["special instructions"]}')
            file.save()
            return 1
        else:
            return 0
    else:
        return 0
Esempio n. 16
0
def iptcinfo(file_path: str):
    from iptcinfo3 import IPTCInfo, c_datasets_r

    info = IPTCInfo(file_path, inp_charset="utf_8")
    # print(info.values())
    for k, v in c_datasets_r.items():
        # print(f"{k}\t{v}")
        iptc_value = info[k]
        if iptc_value:
            print(f"{v}\t{k}\t{iptc_value}")
Esempio n. 17
0
def retrieve_copyright_notice(fn:str) -> dict:
    '''Retrieves the copyright notice from the input file
    Returns
    -------
    Tuple (copyright notice, description)
    '''
    info = IPTCInfo(fn)
    return {
        key: info[key]
        for key in c_datasets.values() if info[key]
    }
Esempio n. 18
0
def process_file(filename):
    """ Want to return: [filename, caption, [tags]] """

    info = IPTCInfo(filename)
    if len(info.data) < 4: raise Exception(info.error)

    keywords = []
    for keyword in info.keywords:
        keywords.append(keyword.decode("utf-8"))

    return [filename, info.data["caption/abstract"], keywords]
Esempio n. 19
0
def test_save_as_saves_as_new_file_with_info():
    if os.path.isfile('fixtures/deleteme.jpg'):  # pragma: no cover
        os.unlink('fixtures/deleteme.jpg')

    info = IPTCInfo('fixtures/Lenna.jpg')
    info.save_as('fixtures/deleteme.jpg')

    info2 = IPTCInfo('fixtures/deleteme.jpg')

    # The files won't be byte for byte exact, so filecmp won't work
    assert info._data == info2._data
    with open('fixtures/Lenna.jpg', 'rb') as fh, open('fixtures/deleteme.jpg',
                                                      'rb') as fh2:
        start, end, adobe = jpeg_collect_file_parts(fh)
        start2, end2, adobe2 = jpeg_collect_file_parts(fh2)

    # But we can compare each section
    assert start == start2
    assert end == end2
    assert adobe == adobe2
Esempio n. 20
0
def make_big_groups(photo_filenames):
    """Group images by keyword
    """
    grouped = {}
    for photo_filename in photo_filenames:
        info = IPTCInfo(photo_filename)
        for x in info["keywords"]:
            x = x.decode("utf-8")
            if x not in grouped:
                grouped[x] = []
            grouped[x].append(photo_filename)
    return grouped
Esempio n. 21
0
def get_metadata(file_path):
    """
    Get EXIF metadata for a file.
    :param file_path: full path to the file
    :return: iptcinfo3.IPTCInfo object
    """

    logger = logging.getLogger(__name__)

    info = IPTCInfo(file_path)
    logger.debug("File {} tags: {}".format(file_path, info))

    return info
Esempio n. 22
0
def process_file(filepath, random_replace=False, verbose=False):
    img_type = imghdr.what(filepath)

    if img_type is None:
        if verbose:
            print("Skipping).")
        return

    if verbose:
        print(img_type, end='). ')

    info = IPTCInfo(filepath, force=True)

    special_instructions = info['special instructions']
    if special_instructions is None:
        if verbose:
            print("FBMD not found")
        return

    fbmd_index = special_instructions.find(b'FBMD')
    if fbmd_index < 0:
        if verbose:
            print("FBMD not found")
        return

    length_hex = special_instructions[fbmd_index + 6:fbmd_index + 6 + 4]
    length = int(length_hex, 16)

    info['special instructions'] = update_instructions(special_instructions,
                                                       fbmd_index,
                                                       length,
                                                       random=random_replace)

    if verbose:
        print(special_instructions[fbmd_index:fbmd_index + 6 + 4 +
                                   (length + 1) * 8])

    info.save()
    os.remove(filepath + "~")
Esempio n. 23
0
def update_index(filepath, filename):
    info = IPTCInfo(filepath + "/" + filename, force=True)
    keywords = []
    master_index_trie = load_obj("master_index_trie")
    pp(master_index_trie)

    for item in info["keywords"]:
        keywords.append(item.decode("utf-8"))

    for item in keywords:
        associated_pics = trie.find_prefix(master_index_trie, item)[2]
        if type(associated_pics) == list and filename not in associated_pics:
            trie.add_value(master_index_trie, item, filename)
            save_obj(master_index_trie, "master_index_trie")
Esempio n. 24
0
def get_img_data(sfile):
    try:
        info = IPTCInfo(sfile)
    except:
        logging.ERROR("Could not open file: %s" % sfile)

    data = dict()

    data["path"] = sfile
    data["filename"] = os.path.basename(sfile)

    title = info["object name"] or info["headline"]
    if title:
        data["title"] = title.decode(ENCODING)
    desc = info["caption/abstract"]
    if desc:
        data["description"] = desc.decode(ENCODING)
    date = info["date created"]
    if date:
        date_obj = datetime.datetime.strptime(date.decode(ENCODING), '%Y%m%d')
        data["date"] = date_obj
    data["keywords"] = []
    if info["keywords"]:
        for key in info["keywords"]:
            data["keywords"].append(key.decode(ENCODING))

    # exif
    exif = get_exif(sfile)
    for tag in exif.keys():
        value = str(exif[tag])
        if tag == "Image Model":
            data["model"] = value
        if tag == "EXIF ExposureTime":
            data["shutter"] = value
        if tag == "EXIF FocalLength":
            data["focal"] = value
        if tag == "EXIF FNumber":
            data["aperture"] = str(parse_fstop(value))
        if tag == "EXIF ISOSpeedRatings":
            data["iso"] = value
        if tag == "EXIF LensModel":
            data["lens"] = value

    data["shortcode"] = get_shortcode(data)

    return data
Esempio n. 25
0
def update_index(filepath, filename):
    full_path = filepath + "/" + filename
    print(full_path)
    info = IPTCInfo(full_path, force=True)
    keywords = []
    master_index_trie = load_obj("master_index_trie")
    #pp(master_index_trie.children)

    for item in info["keywords"]:
        keywords.append(item.decode("utf-8"))

    for item in keywords:
        print(trie.find_prefix(master_index_trie, item))
        associated_pics = trie.find_prefix(master_index_trie, item)[3]
        if type(associated_pics
                ) == dict and full_path not in associated_pics.keys():
            trie.add_value(master_index_trie, item, filename, full_path,
                           keywords)
            save_obj(master_index_trie, "master_index_trie")
Esempio n. 26
0
def extractIptcKeywordTags(thisFile: str) -> list:
    """
    extracts IPTC keywords (=tags) from jpeg file
    """
    assert os.path.isfile(thisFile)
    iptc = IPTCInfo(thisFile)

    # for key in iptc_keys:
    #     if iptc[key]:
    #         print(f"{key}:")
    #         print(iptc[key])

    iptc_keywords = []
    if len(iptc["keywords"]) > 0:  # or supplementalCategories or contacts
        for key in sorted(iptc["keywords"]):
            # try:
            s = key.decode("utf-8")  # decode binary strings
            # s = key.decode('ascii')  # decode binary strings
            iptc_keywords.append(s)
            # except UnicodeDecodeError:
            #     continue
    return iptc_keywords
Esempio n. 27
0
def mediaFolder2Data(path):
	entries = {}
	
	filelist = os.listdir(path)
	filelist.sort()
	
	for filename in filelist:
		entry = {}
		filePath = path+filename
		filnameParts = filename.split(".")
		filenameBase = filnameParts[0].lower()
		if os.path.isdir(filePath):
			continue
		
		if len(filnameParts) == 2 and filnameParts[1].lower() == "jpg":
			info = IPTCInfo(filePath)

			info['keywords'] = [x.decode('ascii') for x in info['keywords']]
			
			#entries[filenameBase] = info['keywords']
			for keyword in info['keywords']:
				for reg in conversionsRegex:
					reg_match = re.search(reg['regex'], keyword, re.IGNORECASE)

					if reg_match:
						entry[reg['index']] = reg_match.group(1)
						
			for at in autotranslate:
				if at["search"] in info['keywords']:
					entry[at["index"]] = at["value"]
			
			for ap in autoappend:
				entry[ap["index"]] = ap["value"].format(filePath)
			
			entries[filenameBase] = entry
			
	return entries
Esempio n. 28
0
def get_date(DATADIR_RAW, DATADIR_RAW_NEW):
    '''Function wich takes as input path of dirs. in those dirs the function will irreterate throug all jpg, extract a date from their meta date and it a IPTC protocol'''

    # get the image name from the dir DATADIR_RAW
    for img in os.listdir(DATADIR_RAW):
        image_path = os.path.join(DATADIR_RAW, img)
        info = IPTCInfo(image_path, force=True)
        print(img)

        # Open the image and access the metadate. Use regex to extract date:
        image = Image.open(image_path)
        txt = str(image.info['exif'])  # exif are mostly JPEG
        r = '[\d]{4}:[\d]{2}:[\d]{2}'  # format yyyy:mm:dd
        date = re.search(r, txt).group().replace(':', '')
        print(f'date of picture: {date}')

        # Force open the IPTC protocol to insert date
        info = IPTCInfo(image_path, force=True)
        print(f"info before: {info['date created']}"
              )  #Before. Shows wheter or not the entry was empty before
        info['date created'] = date
        print(f"info after: {info['date created']}"
              )  # after. Shows the date extracted from the meta data
        print('\n')

        # Create the new dir DATADIR_RAW_NEW if it does not already exist
        if not os.path.exists(DATADIR_RAW_NEW):
            os.makedirs(DATADIR_RAW_NEW)

        # Create the new path for the image
        new_path = os.path.join(DATADIR_RAW_NEW, 'new' + img)

        # If that image is already in the path, delete it.
        if os.path.exists(DATADIR_RAW_NEW):
            try:
                os.remove(new_path)
            except:
                pass

        # Save new image.
        info.save_as(new_path)
def downSize():
    # processing images: reducing size #
    # running this process we loose exif and iptc info #
    # the only purpose of this firs process is to have a decent size image to display on web #
    smallImagestoday = ''.join(
        [route2folderWithImages, '/', 'smallImages_', timeStamp])
    if (os.path.isdir(smallImagestoday)):
        pass
    else:
        os.mkdir(smallImagestoday)

    lisOfBigImages = getFileWithExt('jpg', route2folderWithImages)
    for i in lisOfBigImages:
        try:
            foo = Image.open(i)
            foo = foo.resize((160, 300), Image.ANTIALIAS)
            foo.save(''.join([smallImagestoday, '/', i]),
                     optimize=True,
                     quality=95)  # From 1.9GB to 8.8MB ... nice!
            print(i + ' is done!')
        except:
            print(i + '... is already done!!!')
            pass

    # extracting Exif and IPTC info from originals images #
    lisOfBigImages = getFileWithExt('jpg', route2folderWithImages)

    toSaveData = ''.join([route2folderWithImages, '/', 'data_', timeStamp])
    if (os.path.isdir(toSaveData)):
        pass
    else:
        os.mkdir(toSaveData)

    collection = []
    print('getting EXIF and IPTC info ...')

    for i in lisOfBigImages:
        try:
            openSmall = Image.open(i)
            exif = {
                ExifTags.TAGS[k]: v
                for k, v in openSmall._getexif().items() if k in ExifTags.TAGS
            }
            dateTime = exif['DateTime'][0:10].replace(':', '-').strip()
            lat = [float(x) / float(y) for x, y in exif['GPSInfo'][2]]
            lon = [float(x) / float(y) for x, y in exif['GPSInfo'][4]]
            latref = exif['GPSInfo'][1]
            lonref = exif['GPSInfo'][3]
            lat = lat[0] + lat[1] / 60 + lat[2] / 3600
            lon = lon[0] + lon[1] / 60 + lon[2] / 3600
            if latref == 'S':
                lat = -lat
            if lonref == 'W':
                lon = -lon

            info = IPTCInfo(i, force=True)
            kw = []
            for x in info['keywords']:
                kw.append(x.decode("utf-8"))

            o = {
                "type": "Feature",
                "geometry": {
                    "type": "Point",
                    "coordinates": [lon, lat]
                },
                "properties": {
                    "name":
                    i,
                    "date":
                    dateTime,
                    "tags":
                    kw,
                    "image":
                    ''.join([
                        'https://github.com/crishernandezmaps/situado_visualizacion/blob/gh-pages/',
                        smallImagestoday, '/', i, '?raw=true'
                    ]),
                    "author":
                    "Made by Cris Hernandez for FONDECYT Nº 1171554, INVI - U. of Chile, 2018/9"
                }
            }

            collection.append(o)
        except:
            pass

    data = {"type": "FeatureCollection", "features": collection}

    # ### Saving GeoJson ###
    name = ''.join([
        toSaveData, '/',
        route2folderWithImages.split('/')[-1], '_', timeStamp, '.geojson'
    ])
    with open(name, 'w') as f:
        dump(data, f)

    print(data)
    print('All Done :)...')
Esempio n. 30
0
def scan_file(file: Path) -> Optional[IPTCInfo]:
    if file.exists() and file.is_file() and file.suffix in EXTENSIONS:
        return IPTCInfo(file)