Esempio n. 1
0
 def test_copy_photos(self, datafiles, caplog):
     """
     copy_photos will copy the supplied PhotoFiles to the destination folder
     using the provided relative store path, or PhotoFile.sto if no path is provided.
     """
     caplog.set_level(logging.DEBUG)
     photos_to_copy = [
         (
             PhotoFile(
                 chk="deadbeef",
                 src="B/img2.jpg",
                 sto="2015/08/2015-08-01_img2.jpg",
                 dt="2015:08:01 18:28:36.99",
                 ts=1438468116.99,
                 fsz=789,
                 tzo=-14400.0,
             ),
             None,
         ),
         (
             PhotoFile(
                 chk="deadbeef",
                 src="B/img4.jpg",
                 dt="2018:08:01 20:28:36",
                 ts=1533169716.0,
                 fsz=777,
                 tzo=-14400.0,
             ),
             "2018/08/2018-08-01_img4.jpg",
         ),
         (
             PhotoFile(
                 chk="deadbeef",
                 src="B/img_missing.jpg",
                 sto="2018/08/2018-08-08_img_missing.jpg",
                 dt="2018:08:01 20:28:36",
                 ts=1533169716.0,
                 fsz=777,
                 tzo=-14400.0,
             ),
             None,
         ),
     ]
     for pf, rel_store_path in photos_to_copy:
         pf.src = str(datafiles / pf.src)
     num_copied_photos, total_copy_size, num_error_photos = fileops.copy_photos(
         datafiles / "dest", photos_to_copy)
     print(num_copied_photos, total_copy_size, num_error_photos)
     assert num_copied_photos == 2
     assert total_copy_size == 789 + 777
     assert num_error_photos == 1
     assert os.listdir(datafiles /
                       "dest/2015/08") == ["2015-08-01_img2.jpg"]
     assert os.listdir(datafiles /
                       "dest/2018/08") == ["2018-08-01_img4.jpg"]
Esempio n. 2
0
def test_photofile_from_file(datafiles):
    with ExifTool():
        for pf in photofile_expected_results:
            pf = PhotoFile.from_dict(pf.to_dict())
            rel_path = pf.src
            pf.src = str(datafiles / rel_path)
            new_pf = PhotoFile.from_file(
                pf.src,
                tz_default=timezone(timedelta(seconds=pf.tzo)),
            )
            assert new_pf == pf
Esempio n. 3
0
def generate_test_database(num_uids=10000, r_seed=42):
    random.seed(r_seed, version=2)
    database = Database()
    for i_uid in range(num_uids):
        uid = "".join(random.choices(database.UID_ALPHABET, k=8))
        database.photo_db[uid] = []
        for i_photo in range(random.randint(1, 3)):
            checksum = "".join(random.choices(string.hexdigits, k=64))
            timestamp = random.randint(1037750179000000, 1637750179000000) / 1000000
            dt = datetime.datetime.fromtimestamp(timestamp).astimezone(
                datetime.timezone(datetime.timedelta(hours=random.randint(-12, 12)))
            )
            ts_str = dt.strftime("%Y-%m-%d %H:%M:%S%z")
            img_num = random.randint(0, 9999)
            source_path = f"/path/to/photo/{dt.year}/IMG_{img_num:04d}.JPG"
            store_path = (
                ""
                if random.randint(0, 1)
                else f"{dt.year}/{source_path.rsplit('/', 1)[-1]}"
            )
            filesize = random.randint(100000, 100000000)
            photo = PhotoFile(
                chk=checksum,
                src=source_path,
                ts=timestamp,
                dt=ts_str,
                fsz=filesize,
                sto=store_path,
            )
            database.photo_db[uid].append(photo)
    return database
Esempio n. 4
0
def test_database_add_photo_same_source_new_checksum(caplog):
    """
    When adding a photo with a source_path in the database but a different checksum
    the photo is added to the database but a warning is issued.
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data2)
    uid = db.add_photo(
        PhotoFile(
            chk="not_a_match",
            src="/a/b/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=10,
        ),
        uid="uid1",
    )
    print([(r.levelname, r) for r in caplog.records])
    print(uid)
    assert uid == "uid1"
    assert db.hash_to_uid["not_a_match"] == "uid1"
    assert db.hash_to_uid["deadbeef"] == "uid1"
    print(db.photo_db["uid1"])
    assert len(db.photo_db["uid1"]) == 2
    print([(r.levelname, r) for r in caplog.records])
    assert any(record.levelname == "WARNING" for record in caplog.records)
    assert any(
        "Checksum of previously-indexed source photo has changed" in record.msg
        for record in caplog.records)
Esempio n. 5
0
    def db(self, db: dict):
        """Set the Database parameters from a dict."""
        db.setdefault("version", 1)  # legacy dbs are version 1
        db.setdefault("hash_algorithm", "sha256")  # legacy dbs use sha256
        db.setdefault("timezone_default", "local")  # legacy dbs are in local time

        db["version"] = int(db["version"])
        if db["version"] > self.VERSION:
            raise DatabaseException(
                "Database version too new for this version of PhotoManager."
            )
        if db["version"] < 3:
            for uid in db["photo_db"].keys():
                photos = db["photo_db"][uid]
                for i in range(len(photos)):
                    photos[i] = {NAME_MAP_ENC[k]: v for k, v in photos[i].items()}

        db = {k: db[k] for k in self.DB_KEY_ORDER}
        db["hash_algorithm"] = HashAlgorithm(db["hash_algorithm"])
        for uid in db["photo_db"].keys():
            db["photo_db"][uid] = [PhotoFile.from_dict(d) for d in db["photo_db"][uid]]

        db["version"] = self.VERSION
        self._db = db

        for uid, photos in self.photo_db.items():
            for photo in photos:
                self.hash_to_uid[photo.chk] = uid
                if photo.ts in self.timestamp_to_uids:
                    self.timestamp_to_uids[photo.ts][uid] = None
                else:
                    self.timestamp_to_uids[photo.ts] = {uid: None}

        self.reset_saved()
Esempio n. 6
0
def test_database_add_photo_sort(caplog):
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data)
    uid = db.add_photo(
        PhotoFile(
            chk="deadbeef",
            src="/x/y/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=20,
        ),
        uid=None,
    )
    db.add_photo(
        PhotoFile(
            chk="deadbeef",
            src="/z/y/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=11,
        ),
        uid=None,
    )
    db.add_photo(
        PhotoFile(
            chk="deadbeef",
            src="/0/1/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=10,
        ),
        uid=None,
    )
    assert list(p.src for p in db.photo_db[uid]) == [
        "/0/1/c.jpg",
        "/a/b/c.jpg",
        "/z/y/c.jpg",
        "/x/y/c.jpg",
    ]
Esempio n. 7
0
 def test_remove_photos(self, datafiles, caplog):
     """
     remove_photos will remove the supplied PhotoFiles if they are not missing
     """
     caplog.set_level(logging.DEBUG)
     photos_to_remove = [
         PhotoFile(
             chk="deadbeef",
             src="B/img2.jpg",
             dt="2015:08:01 18:28:36.99",
             ts=1438468116.99,
             fsz=789,
             tzo=-14400.0,
         ),
         PhotoFile(
             chk="deadbeef",
             src="B/img4.jpg",
             dt="2018:08:01 20:28:36",
             ts=1533169716.0,
             fsz=777,
             tzo=-14400.0,
         ),
         PhotoFile(
             chk="deadbeef",
             src="B/img_missing.jpg",
             dt="2018:08:01 20:28:36",
             ts=1533169716.0,
             fsz=777,
             tzo=-14400.0,
         ),
     ]
     for pf in photos_to_remove:
         pf.sto = str(datafiles / pf.src)
     num_removed_photos, num_missing_photos = fileops.remove_photos(
         directory=datafiles, photos=photos_to_remove)
     assert num_removed_photos == 2
     assert num_missing_photos == 1
     assert os.listdir(datafiles / "B") == ["img1.jpg"]
Esempio n. 8
0
def test_database_add_photo_already_present(caplog):
    """
    When adding a photo that is already in the database,
    the photo is not added and add_photo returns None.
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data2)
    uid = db.add_photo(
        PhotoFile(
            chk="deadbeef",
            src="/a/b/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=10,
        ),
        uid="uid1",
    )
    print([(r.levelname, r) for r in caplog.records])
    print(uid)
    assert uid is None
Esempio n. 9
0
def test_database_add_photo_wrong_uid(caplog):
    """
    When adding a photo with a matching checksum for a different uid,
    the photo is not added and add_photo returns None.
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data2)
    uid = db.add_photo(
        PhotoFile(
            chk="deadbeef",
            src="/x/y/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=10,
        ),
        uid="uid2",
    )
    print([(r.levelname, r) for r in caplog.records])
    print(uid)
    assert uid is None
Esempio n. 10
0
def test_database_find_photo_ambiguous(caplog):
    """
    When there is no checksum match and an ambiguous timestamp+source match,
    find_photo returns the first match.
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data2)
    uid = db.find_photo(
        PhotoFile(
            chk="not_a_match",
            src="/x/y/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=10,
        ))
    print([(r.levelname, r) for r in caplog.records])
    print(uid)
    assert any(record.levelname == "WARNING" for record in caplog.records)
    assert any("ambiguous timestamp+name match" in record.msg
               for record in caplog.records)
    assert uid == "uid1"
Esempio n. 11
0
 def test_hash_stored_photos(self, datafiles, caplog):
     """
     hash_stored_photos will return a dict of filenames:hashes
     given a list of stored photos.
     """
     caplog.set_level(logging.DEBUG)
     stored_photos = [
         PhotoFile(
             chk=
             "d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb",
             src=str(datafiles / "A" / "img1.jpg"),
             dt="2015:08:01 18:28:36.90",
             ts=1438468116.9,
             fsz=771,
             sto="A/img1.jpg",
             prio=11,
             tzo=None,
         ),
         PhotoFile(
             chk=
             "1e10df2e3abe4c810551525b6cb2eb805886de240e04cc7c13c58ae208cabfb9",
             src=str(datafiles / "A" / "img1.png"),
             dt="2015:08:01 18:28:36.90",
             ts=1438468116.9,
             fsz=382,
             sto="A/img1.png",
             prio=11,
             tzo=None,
         ),
         PhotoFile(
             chk=
             "3b39f47d51f63e54c76417ee6e04c34bd3ff5ac47696824426dca9e200f03666",
             src=str(datafiles / "A" / "img2.jpg"),
             dt="2015:08:01 18:28:36.99",
             ts=1438468116.99,
             fsz=771,
             sto="A/img2.jpg",
             prio=11,
             tzo=None,
         ),
         PhotoFile(
             chk=
             "79ac4a89fb3d81ab1245b21b11ff7512495debca60f6abf9afbb1e1fbfe9d98c",
             src=str(datafiles / "A" / "img4.jpg"),
             dt="2018:08:01 20:28:36",
             ts=1533169716.0,
             fsz=759,
             sto="A/img4.jpg",
             prio=11,
             tzo=None,
         ),
         PhotoFile(
             chk=
             "79ac4a89fb3d81ab1245b21b11ff7512495debca60f6abf9afbb1e1fbfe9d98c",
             src=str(datafiles / "A" / "img_nonexistent.jpg"),
             dt="2018:08:01 20:28:36",
             ts=1533169716.0,
             fsz=759,
             sto="A/img_nonexistent.jpg",
             prio=11,
             tzo=None,
         ),
     ]
     expected_hashes = {
         pf.src: pf.chk
         for pf in stored_photos if "nonexistent" not in pf.src
     }
     photos = fileops.hash_stored_photos(
         photos=stored_photos,
         directory=datafiles,
         hash_algorithm=HashAlgorithm.BLAKE2B_256,
     )
     print(photos)
     assert photos == expected_hashes
Esempio n. 12
0
 def test_index_photos(self, datafiles, caplog):
     """
     index_photos will make PhotoFiles for the supplied list of paths
     and skip nonexistent photos
     """
     caplog.set_level(logging.DEBUG)
     files = [
         str(datafiles / "A" / "img1.jpg"),
         Path(datafiles / "A" / "img1.png"),
         str(datafiles / "A" / "img_nonexistent.jpg"),
         str(datafiles / "A" / "img4.jpg"),
         str(datafiles / "A" / "img2.jpg"),
     ]
     expected_photos = [
         PhotoFile(
             chk=
             "d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb",
             src=str(datafiles / "A" / "img1.jpg"),
             dt="2015:08:01 18:28:36.90",
             ts=1438468116.9,
             fsz=771,
             sto="",
             prio=11,
             tzo=-14400.0,
         ),
         PhotoFile(
             chk=
             "1e10df2e3abe4c810551525b6cb2eb805886de240e04cc7c13c58ae208cabfb9",
             src=str(datafiles / "A" / "img1.png"),
             dt="2015:08:01 18:28:36.90",
             ts=1438468116.9,
             fsz=382,
             sto="",
             prio=11,
             tzo=-14400.0,
         ),
         None,
         PhotoFile(
             chk=
             "79ac4a89fb3d81ab1245b21b11ff7512495debca60f6abf9afbb1e1fbfe9d98c",
             src=str(datafiles / "A" / "img4.jpg"),
             dt="2018:08:01 20:28:36",
             ts=1533169716.0,
             fsz=759,
             sto="",
             prio=11,
             tzo=-14400.0,
         ),
         PhotoFile(
             chk=
             "3b39f47d51f63e54c76417ee6e04c34bd3ff5ac47696824426dca9e200f03666",
             src=str(datafiles / "A" / "img2.jpg"),
             dt="2015:08:01 18:28:36.99",
             ts=1438468116.99,
             fsz=771,
             sto="",
             prio=11,
             tzo=-14400.0,
         ),
     ]
     photos = fileops.index_photos(files=files,
                                   priority=11,
                                   tz_default=timezone(
                                       timedelta(seconds=-14400.0)))
     print(photos)
     print(len(photos))
     assert photos == expected_photos
Esempio n. 13
0
def test_database_load_version_1():
    json_data = b"""{
"version": 1,
"hash_algorithm": "sha256",
"photo_db": {
    "d239210f00534b76a2b215e073f75832": [
        {
            "checksum": "deadbeef",
            "source_path": "/a/b/c.jpg",
            "datetime": "2015:08:27 04:09:36.50",
            "timestamp": 1440662976.5,
            "file_size": 1024,
            "store_path": "/d/e/f.jpg",
            "priority": 11
        },
        {
            "checksum": "deadbeef",
            "source_path": "/g/b/c.jpg",
            "datetime": "2015:08:27 04:09:36.50",
            "timestamp": 1440662976.5,
            "file_size": 1024,
            "store_path": "",
            "priority": 20,
            "tz_offset": -14400
        }
    ]
},
"command_history": {
    "2021-03-08_23-56-00Z": "photomanager create --db test.json",
    "2021-03-08_23-57-00Z": "photomanager import --db test.json test.jpg"
}
}"""
    db = Database.from_json(json_data)
    print(db.db)
    assert db.version == Database.VERSION
    assert db.hash_algorithm == HashAlgorithm.SHA256
    assert db.db["timezone_default"] == "local"
    assert db.timezone_default is None
    photo_db_expected = {
        "d239210f00534b76a2b215e073f75832": [
            PhotoFile.from_dict({
                "chk": "deadbeef",
                "src": "/a/b/c.jpg",
                "dt": "2015:08:27 04:09:36.50",
                "ts": 1440662976.5,
                "fsz": 1024,
                "sto": "/d/e/f.jpg",
                "prio": 11,
            }),
            PhotoFile.from_dict({
                "chk": "deadbeef",
                "src": "/g/b/c.jpg",
                "dt": "2015:08:27 04:09:36.50",
                "ts": 1440662976.5,
                "fsz": 1024,
                "sto": "",
                "prio": 20,
                "tzo": -14400,
            }),
        ]
    }
    command_history_expected = {
        "2021-03-08_23-56-00Z": "photomanager create --db test.json",
        "2021-03-08_23-57-00Z": "photomanager import --db test.json test.jpg",
    }
    db_expected = {
        "version": Database.VERSION,
        "hash_algorithm": HashAlgorithm.SHA256,
        "timezone_default": "local",
        "photo_db": photo_db_expected,
        "command_history": command_history_expected,
    }
    assert db.photo_db == photo_db_expected
    assert db.command_history == command_history_expected
    assert orjson.loads(db.json) != orjson.loads(json_data)
    assert db.db == db_expected
    assert db == Database.from_dict(orjson.loads(json_data))
    assert db.get_stats() == (1, 2, 1, 1024)
Esempio n. 14
0
def test_database_load_version_3():
    json_data = b"""{
"version": 3,
"hash_algorithm": "sha256",
"photo_db": {
    "QKEsTn2X": [
        {
            "chk": "deadbeef",
            "src": "/a/b/c.jpg",
            "dt": "2015:08:27 04:09:36.50",
            "ts": 1440662976.5,
            "fsz": 1024,
            "sto": "/d/e/f.jpg",
            "prio": 11,
            "tzo": null
        },
        {
            "chk": "deadbeef",
            "src": "/g/b/c.jpg",
            "dt": "2015:08:27 04:09:36.50",
            "ts": 1440662976.5,
            "fsz": 1024,
            "sto": "",
            "prio": 20,
            "tzo": -14400
        }
    ]
},
"command_history": {
    "2021-03-08_23-56-00Z": "photomanager create --db test.json",
    "2021-03-08_23-57-00Z": "photomanager import --db test.json test.jpg"
}
}""".replace(b"VERSION", f"{Database.VERSION}".encode())
    db = Database.from_json(json_data)
    print(db.db)
    assert db.version == Database.VERSION
    assert db.hash_algorithm == HashAlgorithm.SHA256
    assert db.db["timezone_default"] == "local"
    assert db.timezone_default is None
    photo_db_expected = {
        "QKEsTn2X": [
            PhotoFile.from_dict({
                "chk": "deadbeef",
                "src": "/a/b/c.jpg",
                "dt": "2015:08:27 04:09:36.50",
                "ts": 1440662976.5,
                "fsz": 1024,
                "sto": "/d/e/f.jpg",
                "prio": 11,
            }),
            PhotoFile.from_dict({
                "chk": "deadbeef",
                "src": "/g/b/c.jpg",
                "dt": "2015:08:27 04:09:36.50",
                "ts": 1440662976.5,
                "fsz": 1024,
                "sto": "",
                "prio": 20,
                "tzo": -14400,
            }),
        ]
    }
    command_history_expected = {
        "2021-03-08_23-56-00Z": "photomanager create --db test.json",
        "2021-03-08_23-57-00Z": "photomanager import --db test.json test.jpg",
    }
    db_expected = {
        "version": Database.VERSION,
        "hash_algorithm": HashAlgorithm.SHA256,
        "timezone_default": "local",
        "photo_db": photo_db_expected,
        "command_history": command_history_expected,
    }
    assert db.photo_db == photo_db_expected
    assert db.command_history == command_history_expected
    assert orjson.loads(db.json) != orjson.loads(json_data)
    assert db.db == db_expected
    assert db == Database.from_dict(orjson.loads(json_data))
    assert db.get_stats() == (1, 2, 1, 1024)
Esempio n. 15
0
def index_photos(
    files: Iterable[Union[str, PathLike]],
    priority: int = 10,
    hash_algorithm: HashAlgorithm = DEFAULT_HASH_ALGO,
    tz_default: Optional[tzinfo] = None,
    storage_type: str = "HDD",
) -> list[Optional[PhotoFile]]:
    """
    Indexes photo files

    :param files: the photo file paths to index
    :param priority: the photos' priority
    :param hash_algorithm: The hashing algorithm to use for file checksums
    :param tz_default: The time zone to use if none is set
        (defaults to local time)
    :param storage_type: the storage type being indexed (uses more async if SSD)
    :return: a list of PhotoFiles, with None entries for errors
    """
    logger = logging.getLogger(__name__)
    if storage_type in ("SSD", "RAID"):
        async_hashes = True
        async_exif = cpu_count()
    else:
        # concurrent reads of sequential files can lead to thrashing
        async_hashes = False
        # exiftool is partially CPU-bound and benefits from async
        async_exif = min(4, cpu_count())
    logger.info("Collecting media hashes")
    checksum_cache = AsyncFileHasher(algorithm=hash_algorithm,
                                     use_async=async_hashes).check_files(
                                         files, pbar_unit="B")
    logger.info("Collecting media dates and times")
    datetime_cache = AsyncExifTool(
        num_workers=async_exif).get_best_datetime_batch(files)

    logger.info("Indexing media")
    photos = []
    exiftool = ExifTool()
    exiftool.start()
    for current_file in tqdm(files):
        if logger.isEnabledFor(logging.DEBUG):
            tqdm.write(f"Indexing {current_file}")
        try:
            pf = PhotoFile.from_file_cached(
                current_file,
                checksum_cache=checksum_cache,
                datetime_cache=datetime_cache,
                algorithm=hash_algorithm,
                tz_default=tz_default,
                priority=priority,
            )
            photos.append(pf)
        except Exception as e:
            tqdm.write(f"Error indexing {current_file}", file=sys.stderr)
            tb_str = "".join(
                traceback.format_exception(etype=type(e),
                                           value=e,
                                           tb=e.__traceback__))
            tqdm.write(tb_str, file=sys.stderr)
            photos.append(None)
    exiftool.terminate()
    return photos
Esempio n. 16
0
import pytest
from photomanager.pyexiftool import ExifTool
from photomanager.photofile import PhotoFile

FIXTURE_DIR = Path(__file__).resolve().parent.parent / "test_files"
ALL_IMG_DIRS = pytest.mark.datafiles(
    FIXTURE_DIR / "A",
    FIXTURE_DIR / "B",
    FIXTURE_DIR / "C",
    keep_top_dir=True,
)
photofile_expected_results = [
    PhotoFile(
        chk="d090ce7023b57925e7e94fc80372e3434fb1897e00b4452a25930dd1b83648fb",
        src="A/img1.jpg",
        dt="2015:08:01 18:28:36.90",
        ts=1438468116.9,
        fsz=771,
        tzo=-14400.0,
    ),
    PhotoFile(
        chk="3b39f47d51f63e54c76417ee6e04c34bd3ff5ac47696824426dca9e200f03666",
        src="A/img2.jpg",
        dt="2015:08:01 18:28:36.99",
        ts=1438450116.99,
        fsz=771,
        tzo=3600.0,
    ),
    PhotoFile(
        chk="1e10df2e3abe4c810551525b6cb2eb805886de240e04cc7c13c58ae208cabfb9",
        src="A/img1.png",
        dt="2015:08:01 18:28:36.90",