def test_get_metadata(self, metadata, caplog): caplog.set_level(logging.DEBUG) exiftool = ExifTool(executable_="true") exiftool._process = NopProcess( stdout_messages=( metadata["exiftool_output"].encode(), b"{ready}", ) ) exiftool.running = True assert exiftool.get_metadata(filename=metadata["filename"]) == metadata["value"]
def test_pyexiftool_get_tag(datafiles, tag, caplog): caplog.set_level(logging.DEBUG) print(datafiles.listdir()) with ExifTool() as exiftool: filename = str(datafiles / tag["filename"]) new_tag = exiftool.get_tag(tag=tag["tag"], filename=filename) assert new_tag == tag["value"] new_tags = exiftool.get_tags(tags=[tag["tag"]], filename=filename) print(new_tags) assert new_tags == {"SourceFile": filename, tag["tag"]: tag["value"]}
def test_photofile_from_file(datafiles): with ExifTool(): for pf in photofile_expected_results: pf = PhotoFile.from_dict(pf.to_dict()) rel_path = pf.src pf.src = str(datafiles / rel_path) new_pf = PhotoFile.from_file( pf.src, tz_default=timezone(timedelta(seconds=pf.tzo)), ) assert new_pf == pf
def test_execute_json(self, caplog): """ execute_json logs a warning if a string is empty and raises a JsonDecodeError if it cannot decode """ exiftool = ExifTool() exiftool.execute = lambda *args: b"\n" assert exiftool.execute_json() == [] assert any(record.levelname == "WARNING" for record in caplog.records) assert any("empty string" in record.message for record in caplog.records) caplog.records.clear() exiftool.execute = lambda *args: b'[{"a":"b"},{"c":"d"}]' assert exiftool.execute_json() == [{"a": "b"}, {"c": "d"}] assert not any(record.levelname == "WARNING" for record in caplog.records) caplog.records.clear() exiftool.execute = lambda *args: b"[{]" with pytest.raises(orjson.JSONDecodeError): exiftool.execute_json()
def test_get_metadata_batch(self, caplog): caplog.set_level(logging.DEBUG) exiftool = ExifTool(executable_="true") exiftool._process = NopProcess( stdout_messages=( b'[{"SourceFile":"img1.jpg"},{"SourceFile":"img2.jpg"}]', b"{ready}", ) ) exiftool.running = True assert exiftool.get_metadata_batch(filenames=["img1.jpg", "img2.jpg"]) == [ {"SourceFile": "img1.jpg"}, {"SourceFile": "img2.jpg"}, ] with pytest.raises(TypeError): exiftool.get_tags_batch(None, "img1.jpg") with pytest.raises(TypeError): exiftool.get_tags_batch("EXIF:DateTimeOriginal", None)
def test_parse_get_tag_batch(self): """ get_tag_batch returns the first value in each dict that isn't for key SourceFile, or None if there are no values. """ exiftool = ExifTool() exiftool.get_tags_batch = lambda *args: [] assert exiftool.get_tag_batch("E", ("a.jpg",)) == [] exiftool.get_tags_batch = lambda *args: [{"SourceFile": "a.jpg"}] assert exiftool.get_tag_batch("E", ("a.jpg",)) == [None] exiftool.get_tags_batch = lambda *args: [ {"SourceFile": "a.jpg", "E": "c"}, {"SourceFile": "b.jpg", "E": "f", "G": "h"}, ] assert exiftool.get_tag_batch("E", ("a.jpg", "b.jpg")) == ["c", "f"]
def test_parse_get_best_datetime(self): """ get_best_datetime is get_best_datetime_batch's first element, or None """ exiftool = ExifTool() exiftool.get_best_datetime_batch = lambda *args: [] assert exiftool.get_best_datetime(("a.jpg", "b.jpg")) is None exiftool.get_best_datetime_batch = lambda *args: ["2020:05:20", "2021:02:08"] assert exiftool.get_best_datetime(("a.jpg", "b.jpg")) == "2020:05:20"
def test_parse_get_tag(self): """ get_tag is get_tag_batch's first element, or None """ exiftool = ExifTool() exiftool.get_tag_batch = lambda *args: [] assert exiftool.get_tag("E", "a.jpg") is None exiftool.get_tag_batch = lambda *args: ["c", "f"] assert exiftool.get_tag("E", "a.jpg") == "c"
def test_parse_get_tags(self): """ get_tags is get_tags_batch's first element, or {} """ exiftool = ExifTool() exiftool.get_tags_batch = lambda *args: [] assert exiftool.get_tags(("E",), "a.jpg") == {} exiftool.get_tags_batch = lambda *args: [{"a": "b"}, {"c": "d"}] assert exiftool.get_tags(("E",), "a.jpg") == {"a": "b"}
def test_parse_get_metadata(self): """ get_metadata is execute_json's first element, or None """ exiftool = ExifTool() exiftool.execute_json = lambda *args: [] assert exiftool.get_metadata("a.jpg") == {} exiftool.execute_json = lambda *args: [{"a": "b"}, {"c": "d"}] assert exiftool.get_metadata("a.jpg") == {"a": "b"}
def test_pyexiftool_nonexistent_file(tmpdir, caplog): caplog.set_level(logging.DEBUG) with ExifTool() as exiftool: with pytest.warns(UserWarning): exiftool.start() tag = exiftool.get_tag( tag="EXIF:DateTimeOriginal", filename=str(tmpdir / "asdf.jpg") ) assert tag is None assert any(record.levelname == "WARNING" for record in caplog.records) assert any("empty string" in record.message for record in caplog.records) caplog.clear() tags = exiftool.get_tag_batch( tag="EXIF:DateTimeOriginal", filenames=[str(tmpdir / "asdf.jpg")] ) assert tags == [] assert any(record.levelname == "WARNING" for record in caplog.records) assert any("empty string" in record.message for record in caplog.records) assert any("bad response" in record.message for record in caplog.records) assert not exiftool.running
def test_parse_get_tags_batch(self, caplog): """ get_tags_batch logs a warning if execute_json returns the wrong number of arguments """ exiftool = ExifTool() exiftool.execute_json = lambda *args: [] assert exiftool.get_tags_batch((), ("a.jpg",)) == [] assert any(record.levelname == "WARNING" for record in caplog.records) assert any("bad response" in record.message for record in caplog.records) caplog.records.clear() exiftool.execute_json = lambda *args: [{"a": "b"}, {"c": "d"}] assert exiftool.get_tags_batch((), ("a.jpg",)) == [{"a": "b"}, {"c": "d"}] assert any(record.levelname == "WARNING" for record in caplog.records) assert any("bad response" in record.message for record in caplog.records) caplog.records.clear() exiftool.execute_json = lambda *args: [{"a": "b"}] assert exiftool.get_tags_batch((), ("a.jpg",)) == [{"a": "b"}] assert not any(record.levelname == "WARNING" for record in caplog.records) assert not any("bad response" in record.message for record in caplog.records) caplog.records.clear()
def test_parse_get_best_datetime_batch(self): """ get_best_datetime_batch gets the best datetime from get_tags_batch """ exiftool = ExifTool() exiftool.get_tags_batch = lambda *args: [] assert exiftool.get_best_datetime_batch(("a.jpg", "b.jpg")) == [] exiftool.get_tags_batch = lambda *args: [ { "SourceFile": "/images/img8.MP4", "File:FileCreateDate": "2020:05:20 12:39:39-04:00", "File:FileModifyDate": "2020:05:20 12:39:39-04:00", }, { "SourceFile": "/images/img7.HEIC", "EXIF:CreationDate": "2021:02:08 21:45:02", "XMP:CreateDate": "2021:02:08 21:45:01", "File:FileCreateDate": "2021:02:08 23:19:05-05:00", "File:FileModifyDate": "2021:02:08 23:19:05-05:00", }, ] assert exiftool.get_best_datetime_batch( ("/images/img8.MP4", "/images/img7.HEIC") ) == ["2020:05:20 12:39:39-04:00", "2021:02:08 21:45:02"]
def get_media_datetime(path: Union[str, PathLike]) -> str: """Gets the best known datetime string for a file""" return ExifTool().get_best_datetime(path)
def test_pyexiftool_terminate(caplog): caplog.set_level(logging.DEBUG) exiftool = ExifTool() exiftool.start() exiftool.terminate(wait_timeout=0) assert not exiftool.running exiftool.terminate() with pytest.raises(ValueError): exiftool.execute() assert not exiftool.running exiftool.start() assert exiftool.running del exiftool assert ExifTool.instance.running Singleton.clear(ExifTool) assert ExifTool.instance is None
def index_photos( files: Iterable[Union[str, PathLike]], priority: int = 10, hash_algorithm: HashAlgorithm = DEFAULT_HASH_ALGO, tz_default: Optional[tzinfo] = None, storage_type: str = "HDD", ) -> list[Optional[PhotoFile]]: """ Indexes photo files :param files: the photo file paths to index :param priority: the photos' priority :param hash_algorithm: The hashing algorithm to use for file checksums :param tz_default: The time zone to use if none is set (defaults to local time) :param storage_type: the storage type being indexed (uses more async if SSD) :return: a list of PhotoFiles, with None entries for errors """ logger = logging.getLogger(__name__) if storage_type in ("SSD", "RAID"): async_hashes = True async_exif = cpu_count() else: # concurrent reads of sequential files can lead to thrashing async_hashes = False # exiftool is partially CPU-bound and benefits from async async_exif = min(4, cpu_count()) logger.info("Collecting media hashes") checksum_cache = AsyncFileHasher(algorithm=hash_algorithm, use_async=async_hashes).check_files( files, pbar_unit="B") logger.info("Collecting media dates and times") datetime_cache = AsyncExifTool( num_workers=async_exif).get_best_datetime_batch(files) logger.info("Indexing media") photos = [] exiftool = ExifTool() exiftool.start() for current_file in tqdm(files): if logger.isEnabledFor(logging.DEBUG): tqdm.write(f"Indexing {current_file}") try: pf = PhotoFile.from_file_cached( current_file, checksum_cache=checksum_cache, datetime_cache=datetime_cache, algorithm=hash_algorithm, tz_default=tz_default, priority=priority, ) photos.append(pf) except Exception as e: tqdm.write(f"Error indexing {current_file}", file=sys.stderr) tb_str = "".join( traceback.format_exception(etype=type(e), value=e, tb=e.__traceback__)) tqdm.write(tb_str, file=sys.stderr) photos.append(None) exiftool.terminate() return photos