def execute(file_filter_include, file_filter_exclude): LocalLibrary.load_library('raw') # Result format is: # { # "acronym": [list of image paths], # ... # } result = {} albums_seen = {} cache = LocalLibrary.cache_raw() albums = cache['albums'] images = cache['images'] for image in images: image_name = image['name'] image_path = image['path'] if file_filter_exclude and image_name.find(file_filter_exclude) > -1: continue if file_filter_include and image_path.find(file_filter_include) < 0: continue # Need to rerun local library caching if not os.path.exists(image_path): msg = "Local library not updated. Please rerun download_local_library again" sys.exit(msg)
def find(et, album_path_filter, file_filter_include, file_filter_exclude): LocalLibrary.load_library('raw') # Result structure is of the form: # result = { # "model name": None, # ... # } result = {} album_path_filter_leaf = None if album_path_filter: album_path_filter_leaf = os.path.basename(album_path_filter) # Walk through each file, split its file name for # comparison, and get date shot metadata cache = LocalLibrary.cache_raw() images = cache['images'] albums = cache['albums'] for image in images: image_name = image['name'] image_path = image['path'] if file_filter_exclude and image_name.find(file_filter_exclude) > -1: continue if file_filter_include and image_path.find(file_filter_include) < 0: continue if album_path_filter and not image_path.startswith(album_path_filter): continue # Get model tag value value = None try: value = et.get_tag("Model", image_path) except Exception as e: value = None # Add the value to result result[value] = None saveto_filename = "get_unique_models" if album_path_filter_leaf: saveto_filename += '_d' + album_path_filter_leaf if file_filter_include is not None: saveto_filename += '_' + file_filter_include saveto_filename += '.json' saveto = os.path.join(gphoto.cache_dir(), saveto_filename) print(f"Saving to: '{saveto}'") with open(saveto, "w") as cache_file: json.dump(result, cache_file, indent=2)
def execute(album_path_filter): LocalLibrary.load_library('raw') album_path_filter_leaf = None if album_path_filter: album_path_filter_leaf = os.path.basename(album_path_filter) # The result is going to be of the form # { # "word": none, # ... # } result = {} # hold sections of cache as local variables cache = LocalLibrary.cache_raw() albums = cache['albums'] album_paths = cache['album_paths'] images = cache['images'] image_ids = cache['image_ids'] # Loop through each album, get caption from it # if it follows standard naming convention for album in albums: # if folder is in the include list then continue # Otherwise ignore this album album_name = album['name'] album_path = album['path'] album_images = album['images'] # filter out albums if album_path_filter and not album_path.startswith(album_path_filter): continue # Get words from album name words = album_name.split(' ') for word in words: if len(word) <= 3: result[word.capitalize()] = None saveto_filename = "get_small_words_in_album_names" if album_path_filter_leaf: saveto_filename += '_d' + album_path_filter_leaf saveto_filename += '.json' saveto = os.path.join(gphoto.cache_dir(), saveto_filename) print(f"Saving to: '{saveto}'") with open(saveto, "w") as cache_file: json.dump(result, cache_file, indent=2)
def find(): """ This method builds the cache for local raw pics folder, traverses it and find image name duplicates """ gphoto.init() LocalLibrary.cache_raw_library("p:\\pics") LocalLibrary.save_library('raw') # The dups dict holds # key: image name # list: list of image paths name_to_paths = {} # traverse the images list. For each image add its name cache = LocalLibrary.cache_raw() cache_images = cache['images'] cache_image_ids = cache['image_ids'] for image in cache_images: imagename = image['name'] imagepath = image['path'] if imagename not in name_to_paths: name_to_paths[imagename] = [imagepath] else: name_to_paths[imagename].append(imagepath) # review the dups where imagename is holding multiple image paths dups = [] for imagename, imagelist in name_to_paths.items(): if len(imagelist) > 1: dup = { 'name': imagename, 'paths': [] } paths = dup['paths'] for imagepath in imagelist: paths.append(imagepath) dups.append(dup) return dups
def do_work(et, google_image_filter, album_folder_path, list_only): # Find folder album in the database LocalLibrary.load_library('raw') local_library_cache = LocalLibrary.cache_raw() images = local_library_cache['images'] albums = local_library_cache['albums'] album_paths = local_library_cache['album_paths'] album_idx = album_paths[album_folder_path] album = albums[album_idx] local_album_path = album['path'] print(f"[INFO]: Found album '{local_album_path}'") # Collect list of local album files local_files_results = [] local_album_images = album['images'] for image_idx in local_album_images: image = images[image_idx] image_name = image['name'] image_path = image['path'] local_files_results.append(image_path) sorted(local_files_results) util.pprint(local_files_results) print(f"[INFO] Local files count '{len(local_files_results)}'") # Collect a list of images from google photos # Each element in this list will be an object: # {'path': 'image path', 'caption': 'images caption...'} google_images_results = [] gphoto.init() GoogleImages.load_images() google_image_cache = GoogleImages.cache() google_images = google_image_cache['list'] for google_image in google_images: image_name = google_image['filename'] if image_name.find(google_image_filter) < 0: continue image_desc = google_image['description'] google_images_results.append((image_name, image_desc)) google_images_results = sorted(google_images_results, key=lambda record: record[0]) util.pprint(google_images_results) print(f"[INFO] Google files count '{len(google_images_results)}'") # Perform basic validations # If counts are not the same then error out if len(local_files_results) != len(google_images_results): print( f"[ERROR]: Count mismatch local: '{len(local_files_results)}', google: '{len(google_images_results)}'. Aborting" ) # Now loop through the list of folder images, get its # equivalent caption from the corresponding google image if list_only: return for image_idx, local_image_path in enumerate(local_files_results): desc = google_images_results[image_idx][1] # Get image extension and identify it as an image or video image_name = os.path.basename(local_image_path) image_ext = ImageUtils.get_file_extension(image_name) is_video = ImageUtils.is_ext_video(image_ext) # Set the caption now ImageUtils.set_caption(et, local_image_path, desc, is_video)
def main_with_exiftool(et, file_filter_pattern): """ If date shot is missing in iPhone file then get it from the filename of the format is like: "2015-02-17 19.30.28.jpg" then update the dateshot from the filename """ LocalLibrary.load_library('raw') result = {} # Walk through each file, split its file name for # comparison, and get date shot metadata cache = LocalLibrary.cache_raw() images = cache['images'] albums = cache['albums'] for image in images: image_name = image['name'] image_path = image['path'] image_ext = ImageUtils.get_file_extension(image_name) # if filter is specified and does not match to the file path # then ignore the file if file_filter_pattern and image_path.find(file_filter_pattern) < 0: continue if not os.path.exists(image_path): continue is_video = image_ext in gphoto.core.VIDEO_EXTENSIONS # If the file has dateshot then ignore it tag = None if not is_video: tag = et.get_Tag("Exif:DateTimeOriginal", image_path) else: tag = et.get_Tag("QuickTime:CreateDate", image_path) if tag is not None: continue # at this point dateshot is missing # parse the file and check for format as "2015-02-17 19.30.28.jpg" splits = image_name.split(' ') if len(splits) < 2: continue file_date = splits[0] file_time = splits[1] if file_date is None or file_time is None: continue file_date_splits = file_date.split('-') if len(file_date_splits) < 3: continue file_time_splits = file_time.split('.') if len(file_time_splits) < 4: continue dateshot = ':'.join(file_date_splits) + ' ' + ':'.join( file_time_splits[0:3]) # cmd = "\"-" + ImageUtils._TagIPTCObjectName + '=' + dateshot + '"' # cmd += "\" -" + ImageUtils._TagIPTCCaptionAbstract + '=' + dateshot + '"' # cmd += "\" -" + ImageUtils._TagExifImageDescription + '=' + dateshot + '"' # cmd += "\" -" + ImageUtils._TagXmpDescription + '=' + dateshot + '"' # ret = subprocess.run(["exiftool", f"-EXIF:DateTimeOriginal={dateshot}", "-EXIF:CreateDate={dateshot}", "-overwrite_original", "-P", image_path]) ret = None if not is_video: ret = subprocess.run([ "exiftool", f"-EXIF:DateTimeOriginal={dateshot}", "-overwrite_original", image_path ]) print(f"Image Date Set: {image_path}") else: ret = subprocess.run([ "exiftool", f"-QuickTime:CreateDate={dateshot}", "-overwrite_original", "-ext", "mov", "-ext", "mp4", image_path ]) print(f"Video Date Set: {image_path}") print(f"retcode: {ret.returncode}, {dateshot}, {image_path}")
def execute(file_filter_include, file_filter_exclude): LocalLibrary.load_library('raw') # Result format is: # { # "acronym": [list of image paths], # ... # } result = {} albums_seen = {} cache = LocalLibrary.cache_raw() albums = cache['albums'] images = cache['images'] for image in images: image_name = image['name'] image_path = image['path'] if file_filter_exclude and image_name.find(file_filter_exclude) > -1: continue if file_filter_include and image_path.find(file_filter_include) < 0: continue # Need to rerun local library caching if not os.path.exists(image_path): msg = "Local library not updated. Please rerun download_local_library again" sys.exit(msg) # check if file name conforms to yyyymmdd_hhmmss_XXXX if len(image_name) < _FILEPREFIX_PATTERN_LEN: continue image_basename = os.path.splitext(image_name)[0] image_name_splits = image_basename.split('_') if len(image_name_splits) < 3: continue image_date = image_name_splits[0] image_time = image_name_splits[1] image_acronym = '_'.join(image_name_splits[2:]) if len(image_date) < 8 or len(image_time) < 6: continue # Get parent album album_idx = image['parent'] album = albums[album_idx] album_name = album['name'] album_path = album['path'] # if the combination of album name and acronym has already # been seen the ignore rest of the images in this album album_plus_acronym = album_name + '__' + image_acronym if album_plus_acronym in albums_seen: continue else: albums_seen[album_plus_acronym] = None # add image acronym and image_path to the result if image_acronym not in result: image_list = [image_path] result[image_acronym] = image_list else: image_list = result[image_acronym] image_list.append(image_path) # filter out acronyms where there are no duplicates final_result = {} for acronym in result.keys(): image_list = result[acronym] if len(image_list) > 1: final_result[acronym] = image_list saveto_filename = "test_dup_file_acronym" if file_filter_include is not None: saveto_filename += '_' + file_filter_include saveto_filename += '.json' saveto = os.path.join(gphoto.cache_dir(), saveto_filename) print(f"Saving to: '{saveto}'") with open(saveto, "w") as cache_file: json.dump(final_result, cache_file, indent=2)
def check_album_readiness(et, album_path_filter_year, file_filter_include, file_filter_exclude, test_missing_date_shot, test_bad_date_shot, test_filename_FMT, test_Tag_mismatch, test_missing_caption, test_unique_caption, test_missing_caption_year, test_missing_geotags): """ Images should follow the format: YYYYMMMDD_HHmmSS.... If it does not follow this format then that is and indication that the file name does not match date shot The result is of the form { "album_path": { "reason value": [list of image paths], ... }, ... } """ print(f"-------------------- args --------------------------") print(f"album_path_filter_pattern = {album_path_filter_year}") print(f"file_filter_include = {file_filter_include}") print(f"file_filter_exclude = {file_filter_exclude}") print(f"test_missing_date_shot = {test_missing_date_shot}") print(f"test_bad_date_shot = {test_bad_date_shot}") print(f"test_filename_FMT = {test_filename_FMT}") print(f"test_Tag_mismatch = {test_Tag_mismatch}") print(f"test_missing_caption = {test_missing_caption}") print(f"test_unique_caption = {test_unique_caption}") print(f"test_missing_caption_year = {test_missing_caption_year}") print(f"test_missing_geotags = {test_missing_geotags}") print(f"----------------------------------------------------") unique_caption_reason = "non-unique-captions" mismatch_album_image_caption_reason = "mismatch-album-image-captions" missing_geotags_reason = "missing-geotags" LocalLibrary.load_library('raw') result = {} album_path_filter_pattern = f"\\{album_path_filter_year}\\" print(f"album_path_filter_pattern = {album_path_filter_pattern}") # Walk through each file, split its file name for # comparison, and get date shot metadata cache = LocalLibrary.cache_raw() images = cache.get('images') albums = cache.get('albums') for album in albums: album_name = album['name'] album_path = album['path'] if album_path_filter_pattern and album_path.find( album_path_filter_pattern) < 0: continue album_splits = album_name.split(' ') album_year = album_splits[0].split('-')[0] album_caption = album_year + ' ' + ' '.join(album_splits[1:]) # print(f"album_caption = {album_caption}") # Album level results captured here # Duplicate captions table. Every caption of images # is hashed here unique_caption_dict = {} album_images = album['images'] for image_idx in album_images: image = images[image_idx] image_name = image['name'] image_path = image['path'] if file_filter_exclude and image_name.find( file_filter_exclude) > -1: continue if file_filter_include and image_path.find( file_filter_include) < 0: continue image_ext = ImageUtils.get_file_extension(image_name) is_video = ImageUtils.is_ext_video(image_ext) # Need to rerun local library caching if not os.path.exists(image_path): msg = "Local library not updated. Please rerun download_local_library again" print(msg) sys.exit(msg) # Nothing is mismatched yet # Each test returns a result as tuple with 3 values: # ("name of the test", True|False if test failed, "extra info") mismatched = False test_results = [] # if image date shot does not match images name # then add it to the mismatched list. For PNG use PNG:CreationTime tag = None if test_missing_date_shot: tag = et.get_tag("Exif:DateTimeOriginal", image_path) if tag is None or len(tag) <= 0: tag = et.get_tag("Exif:CreateDate", image_path) if tag is None or len(tag) <= 0: tag = et.get_tag("QuickTime:CreateDate", image_path) if tag is None or len(tag) <= 0: mismatched = True test_results.append("missing-date-shot") tagsplit = None if test_missing_date_shot and test_bad_date_shot and not mismatched: tagsplit = tag.split(' ') if len(tagsplit) < 2: mismatched = True test_results.append(("bad-date-shot", tag)) # If image does not follow correct pattern # Then add it to album list mismatched_filename_format = False if test_filename_FMT: if len(image_name) < _IMAGE_PATTERN_LEN: mismatched = True mismatched_filename_format = True test_results.append("filename-FMT") filedatetime = None if test_filename_FMT and not mismatched_filename_format: filedatetime = image_name.split('_') if len(filedatetime) < 2: mismatched = True mismatched_filename_format = True test_results.append("filename-FMT") if test_Tag_mismatch and not mismatched_filename_format: file_date = filedatetime[0] file_time = filedatetime[1][0:3] tag_date = ''.join(tagsplit[0].split(':')) tag_time = ''.join(tagsplit[1].split(':'))[0:3] if tag_date != file_date or tag_time != file_time: mismatched = True test_results.append(("tag-mismatch", tag)) # Check missing Caption: check if any of the tags have any value caption = None if test_missing_caption: caption = ImageUtils.get_caption(et, image_path, is_video) if caption is None or len(caption) <= 0: mismatched = True test_results.append("missing-caption") elif test_unique_caption: year = None if len(caption) > 4: year = caption[0:4] if not year.isdecimal(): unique_caption_dict[caption] = None # Check missing Caption year if test_missing_caption_year and caption is not None: if not test_missing_caption: caption = ImageUtils.get_caption(et, image_path, is_video) if not test_missing_caption and (caption is None or len(caption) <= 0): mismatched = True test_results.append("missing-caption") elif not test_missing_caption and len(caption) < 5: mismatched = True test_results.append("missing-caption") else: caption_year = caption[0:4] if not caption_year.isdecimal(): mismatched = True test_results.append(("missing-caption-year", caption)) # If caption has full date then report it if caption is not None and len(caption) > 11: caption_year = caption[0:4] first_dash = caption[4] second_dash = caption[7] if caption_year.isdecimal( ) and first_dash == '-' and second_dash == '-': mismatched = True test_results.append(("full-date-prefix", caption)) # If caption different from album then report it if caption is not None and caption != album_caption: unique_caption_dict[caption] = None # Test missing geotags if test_missing_geotags and not is_video: geotags = None try: geotags = et.get_tags([ "GPSLatitude", "GPSLongitude", "GPSLatitudeRef", "GPSLongitudeRef" ], image_path) except Exception as e: geotags = None if geotags is None or len(geotags) < 4: mismatched = True test_results.append(missing_geotags_reason) if mismatched: for test_result in test_results: mismatch_reason = None mismatch_desc = None if type(test_result) is not tuple: mismatch_reason = test_result else: mismatch_reason = test_result[0] mismatch_desc = test_result[1] reason_result = None if mismatch_reason not in result: reason_result = {} result[mismatch_reason] = reason_result else: reason_result = result[mismatch_reason] album_result = None if album_path not in reason_result: album_result = [] reason_result[album_path] = album_result else: album_result = reason_result[album_path] if type(test_result) is not tuple: album_result.append(image_path) else: album_result.append((mismatch_desc, image_path)) # add duplicate caption results if len(unique_caption_dict) > 1: unique_caption_result = None if unique_caption_reason not in result: unique_caption_result = {} result[unique_caption_reason] = unique_caption_result else: unique_caption_result = result[unique_caption_reason] unique_caption_result[album_path] = list( unique_caption_dict.keys()) # If caption is same for all images but diff from album then report it if len(unique_caption_dict) == 1: image_caption = str(next(iter(unique_caption_dict))) # Strip the month and day from the album name splits = album_name.split(' ') album_date = splits[0] album_desc = splits[1:] album_year = album_date[0:4] album_caption = album_year + ' ' + ' '.join(album_desc) if album_caption != image_caption: mismatch_album_image_caption_result = None if mismatch_album_image_caption_reason not in result: mismatch_album_image_caption_result = {} result[ mismatch_album_image_caption_reason] = mismatch_album_image_caption_result else: mismatch_album_image_caption_result = result[ mismatch_album_image_caption_reason] mismatch_album_image_caption_result[album_path] = { 'album_caption': album_caption, 'image_caption': image_caption } saveto_filename = "check_album_readiness" if album_path_filter_year: saveto_filename += '_d' + album_path_filter_year if file_filter_include is not None: saveto_filename += '_' + file_filter_include if test_missing_date_shot or test_bad_date_shot: saveto_filename += "_dtshot" if test_filename_FMT: saveto_filename += "_ffmt" if test_Tag_mismatch: saveto_filename += "_Tagmm" if test_missing_caption: saveto_filename += "_miscap" if test_unique_caption: saveto_filename += "_dupcap" saveto_filename += '.json' saveto = os.path.join(gphoto.cache_dir(), saveto_filename) print(f"Saving to: '{saveto}'") with open(saveto, "w") as cache_file: json.dump(result, cache_file, indent=2)