def __init__( self, full_file_name: str, et_process: exiftool.ExifTool, file_type: Optional[FileType] = None, ) -> None: """ Get photo and video metadata using Exiftool :param filename: the file from which to get metadata :param et_process: instance of ExifTool class, which allows calling EXifTool without it exiting with each call :param file_type: photo or video. If not specified, will be determined using file extension """ super().__init__() self.full_file_name = full_file_name if full_file_name is not None: self.ext = fileformats.extract_extension(full_file_name) else: self.ext = None self.metadata = dict() self.metadata_string_format = dict() self.et_process = et_process if file_type is None and full_file_name is not None: file_type = fileformats.file_type_from_splitext( file_name=full_file_name) assert file_type is not None self.file_type = file_type # All the names of the preview images we know about (there may be more, perhaps) # Synchronize with preview_smallest and preview256 dicts below self.index_preview = OrderedDict( sorted(_index_preview.items(), key=lambda t: t[0])) # If extension is not in dict preview_smallest, that means the file # format always contains a "ThumbnailImage" self.preview_smallest = dict( crw=(2, ), dng=(4, 3, 0), fff=(3, ), iiq=(4, ), mrw=(0, ), nef=(4, 3), raw=(2, ), ) self.preview_smallest["3fr"] = 3, 4 # Format might have a thumbnail, but might not self.may_have_thumbnail = ("crw", "mrw", "orf", "raw", "x3f") # Preview images that are at least 256 pixels big, according to # self.index_preview self.preview256 = dict( arw=(0, ), cr2=(0, ), cr3=(0, ), crw=(2, ), dng=(0, 3), fff=(3, ), iiq=(4, ), mrw=(0, ), nef=( 0, 4, 2, 3, ), # along with DNG quite possibly the most inconsistent format nrw=(0, 1), orf=(0, ), pef=(0, ), raf=(0, ), raw=(2, ), rw2=(2, ), sr2=(0, ), srw=(0, ), x3f=(0, 2), ) self.preview256["3fr"] = 3, 4 self.ignore_tiff_preview_256 = ("cr2", )
def scan( folder: str, disk_cach_cleared: bool, scan_types: List[str], errors: bool, outfile: str, keep_file_names: bool, analyze_previews: bool, ) -> Tuple[List[PhotoAttributes], List[VideoAttributes]]: global stop global kill problematic_files = "RAW_LEICA_M8.DNG" stop = kill = False pbs = progress_bar_scanning() pbs.start() test_files = [] not_tested = [] # Phase 1 # Determine which files are safe to test i.e. are not cached if analyze_previews: disk_cach_cleared = True for dir_name, subdirs, filenames in walk(folder): for filename in filenames: if filename not in problematic_files: ext = extract_extension(filename) if ext in scan_types: full_file_name = os.path.join(dir_name, filename) if disk_cach_cleared: test_files.append((full_file_name, ext.upper())) else: bytes_cached, total, in_memory = vmtouch_output( full_file_name) if bytes_cached == 0: test_files.append((full_file_name, ext.upper())) else: not_tested.append(full_file_name) stop = True pbs.join() if not_tested: print() if len(not_tested) > 20: for line in textwrap.wrap( "WARNING: {:,} files will not be analyzed because they are already in the " "kernel disk cache.".format(len(not_tested)), width=80, ): print(line) else: print( "WARNING: these files will not be analyzed because they are already in the " "kernel disk cache:") for name in not_tested: print(name) print() for line in textwrap.wrap( "Run this script as super user and use command line option -c or --clear to safely " "clear the disk cache.", width=80, ): print(line) if confirm(prompt="\nDo you want to exit?", resp=True): sys.exit(0) photos = [] videos = [] if test_files: print("\nAnalyzing {:,} files:".format(len(test_files))) if have_progressbar and not errors: bar = pyprind.ProgBar(iterations=len(test_files), stream=1, track_time=False, width=80) else: print("\nNothing to analyze") # Phase 2 # Get info from files if errors: context = show_errors() else: # Redirect stderr, hiding error output from exiv2 context = stdchannel_redirected(sys.stderr, os.devnull) metadata_fail = [] with context: with ExifTool() as exiftool_process: for full_file_name, ext in test_files: if ext.lower() in VIDEO_EXTENSIONS: va = VideoAttributes(full_file_name, ext, exiftool_process) videos.append(va) else: # TODO think about how to handle HEIF files! if use_exiftool_on_photo( ext.lower(), preview_extraction_irrelevant=False): pa = ExifToolPhotoAttributes(full_file_name, ext, exiftool_process, analyze_previews) pa.process(analyze_previews) photos.append(pa) else: try: metadata = mp.MetaData( full_file_name=full_file_name, et_process=exiftool_process, ) except: metadata_fail.append(full_file_name) else: pa = PhotoAttributes(full_file_name, ext, exiftool_process, analyze_previews) pa.metadata = metadata pa.process(analyze_previews) photos.append(pa) if have_progressbar and not errors: bar.update() if metadata_fail: print() for full_file_name in metadata_fail: print("Could not read metadata from {}".format(full_file_name)) if outfile is not None: if not keep_file_names: for pa in photos: pa.file_name = None for va in videos: va.file_name = None with open(outfile, "wb") as save_to: pickle.dump((photos, videos), save_to, pickle.HIGHEST_PROTOCOL) return photos, videos
def __init__(self, name: str, path: str, size: int, prev_full_name: Optional[str], prev_datetime: Optional[datetime], device_timestamp_type: DeviceTimestampTZ, mtime: float, mdatatime: float, thumbnail_cache_status: ThumbnailCacheDiskStatus, thm_full_name: Optional[str], audio_file_full_name: Optional[str], xmp_file_full_name: Optional[str], log_file_full_name: Optional[str], scan_id: bytes, from_camera: bool, never_read_mdatatime: bool, device_display_name: str, device_uri: str, camera_details: Optional[CameraDetails] = None, camera_memory_card_identifiers: Optional[List[int]] = None, raw_exif_bytes: Optional[bytes] = None, exif_source: Optional[ExifSource] = None, problem: Optional[Problem] = None) -> None: """ :param name: filename, including the extension, without its path :param path: path of the file :param size: file size :param device_timestamp_type: the method with which the device records timestamps. :param mtime: file modification time :param mdatatime: file time recorded in metadata :param thumbnail_cache_status: whether there is an entry in the thumbnail cache or not :param prev_full_name: the name and path the file was previously downloaded with, else None :param prev_datetime: when the file was previously downloaded, else None :param thm_full_name: name and path of and associated thumbnail file :param audio_file_full_name: name and path of any associated audio file :param xmp_file_full_name: name and path of any associated XMP file :param log_file_full_name: name and path of any associated LOG file :param scan_id: id of the scan :param from_camera: whether the file is being downloaded from a camera :param never_read_mdatatime: whether to ignore the metadata date time when determining a photo or video's creation time, and rely only on the file modification time :param device_display_name: display name of the device the file was found on :param device_uri: the uri of the device the file was found on :param camera_details: details about the camera, such as model name, port, etc. :param camera_memory_card_identifiers: if downloaded from a camera, and the camera has more than one memory card, a list of numeric identifiers (i.e. 1 or 2) identifying which memory card the file came from :param raw_exif_bytes: excerpt of the file's metadata in bytes format :param exif_source: source of photo metadata :param problem: any problems encountered """ self.from_camera = from_camera self.camera_details = camera_details self.device_display_name = device_display_name self.device_uri = device_uri if camera_details is not None: self.camera_model = camera_details.model self.camera_port = camera_details.port self.camera_display_name = camera_details.display_name self.is_mtp_device = camera_details.is_mtp == True self.camera_storage_descriptions = camera_details.storage_desc else: self.camera_model = self.camera_port = self.camera_display_name = None self.camera_storage_descriptions = None self.is_mtp_device = False self.path = path self.name = name self.prev_full_name = prev_full_name self.prev_datetime = prev_datetime self.previously_downloaded = prev_full_name is not None self.full_file_name = os.path.join(path, name) # Used in sample RPD files self.raw_exif_bytes = raw_exif_bytes self.exif_source = exif_source # Indicate whether file is a photo or video self._assign_file_type() # Remove the period from the extension and make it lower case self.extension = fileformats.extract_extension(name) # Classify file based on its type e.g. jpeg, raw or tiff etc. self.extension_type = fileformats.extension_type(self.extension) self.mime_type = mimetypes.guess_type(name)[0] assert size > 0 self.size = size # Cached version of call to metadata.date_time() self._datetime = None # type: Optional[datetime] ############################ # self._no_datetime_metadata ############################ # If True, tried to read the date time metadata, and failed # If None, haven't tried yet # If False, no problems encountered, got it (or it was assigned from mtime # when never_read_mdatatime is True) self._no_datetime_metadata = None # type: Optional[bool] self.never_read_mdatatime = never_read_mdatatime if never_read_mdatatime: assert self.extension == 'dng' self.device_timestamp_type = device_timestamp_type ########### # self.ctime ########### # # self.ctime is the photo or video's creation time. It's value depends # on the values in self.modification_time and self.mdatatime. It's value # is set by the setter functions below. # # Ideally the file's metadata contains the date/time that the file # was created. However the metadata may not have been read yet (it's a slow # operation), or it may not exist or be invalid. In that case, need to rely on # the file modification time as a proxy, as reported by the file system or device. # # However that can also be misleading. On my Canon DSLR, for instance, if I'm in the # timezone UTC + 5, and I take a photo at 5pm, then the time stamp on the memory card # shows the photo being taken at 10pm when I look at it on the computer. The timestamp # written to the memory card should with this camera be read as # datetime.utcfromtimestamp(mtime), which would return a time zone naive value of 5pm. # In other words, the timestamp on the memory card is written as if it were always in # UTC, regardless of which timezone the photo was taken in. # # Yet this is not the case with a cellphone, where the file modification time knows # nothing about UTC and just saves it as a naive local time. self.mdatatime_caused_ctime_change = False # file modification time self.modification_time = mtime # date time recorded in metadata if never_read_mdatatime: self.mdatatime = mtime else: self.mdatatime = mdatatime self.mdatatime_caused_ctime_change = False # If a camera has more than one memory card, store a simple numeric # identifier to indicate which memory card it came from self.camera_memory_card_identifiers = camera_memory_card_identifiers # full path and name of thumbnail file that is associated with some # videos self.thm_full_name = thm_full_name # full path and name of audio file that is associated with some photos # and maybe one day videos, e.g. found with the Canon 1D series of # cameras self.audio_file_full_name = audio_file_full_name self.xmp_file_full_name = xmp_file_full_name # log files: see https://wiki.magiclantern.fm/userguide#movie_logging self.log_file_full_name = log_file_full_name self.status = DownloadStatus.not_downloaded self.problem = problem self.scan_id = int(scan_id) self.uid = uuid.uuid4().bytes self.job_code = None # freedesktop.org cache thumbnails # http://specifications.freedesktop.org/thumbnail-spec/thumbnail-spec-latest.html self.thumbnail_status = ThumbnailCacheStatus.not_ready # type: ThumbnailCacheStatus self.fdo_thumbnail_128_name = '' self.fdo_thumbnail_256_name = '' # PNG data > 128x128 <= 256x256 self.fdo_thumbnail_256 = None # type: Optional[bytes] # Thee status of the file in the Rapid Photo Downloader thumbnail cache self.thumbnail_cache_status = thumbnail_cache_status # generated values self.cache_full_file_name = '' # temporary file used only for video metadata extraction: self.temp_sample_full_file_name = None # type: Optional[str] # if True, the file is a complete copy of the original self.temp_sample_is_complete_file = False self.temp_full_file_name = '' self.temp_thm_full_name = '' self.temp_audio_full_name = '' self.temp_xmp_full_name = '' self.temp_log_full_name = '' self.temp_cache_full_file_chunk = '' self.download_start_time = None self.download_folder = '' self.download_subfolder = '' self.download_path = '' # os.path.join(download_folder, download_subfolder) self.download_name = '' self.download_full_file_name = '' # filename with path self.download_full_base_name = '' # filename with path but no extension self.download_thm_full_name = '' # name of THM (thumbnail) file with path self.download_xmp_full_name = '' # name of XMP sidecar with path self.download_log_full_name = '' # name of LOG associate file with path self.download_audio_full_name = '' # name of the WAV or MP3 audio file with path self.thm_extension = '' self.audio_extension = '' self.xmp_extension = '' self.log_extension = '' self.metadata = None # type: Optional[Union[metadataphoto.MetaData, metadatavideo.MetaData, metadataexiftool.MetadataExiftool]] self.metadata_failure = False # type: bool # User preference values used for name generation self.subfolder_pref_list = [] # type: List[str] self.name_pref_list = [] # type: List[str] self.generate_extension_case = '' # type: str self.modified_via_daemon_process = False # If true, there was a name generation problem self.name_generation_problem = False