예제 #1
0
def generate_test_database(num_uids=10000, r_seed=42):
    random.seed(r_seed, version=2)
    database = Database()
    for i_uid in range(num_uids):
        uid = "".join(random.choices(database.UID_ALPHABET, k=8))
        database.photo_db[uid] = []
        for i_photo in range(random.randint(1, 3)):
            checksum = "".join(random.choices(string.hexdigits, k=64))
            timestamp = random.randint(1037750179000000, 1637750179000000) / 1000000
            dt = datetime.datetime.fromtimestamp(timestamp).astimezone(
                datetime.timezone(datetime.timedelta(hours=random.randint(-12, 12)))
            )
            ts_str = dt.strftime("%Y-%m-%d %H:%M:%S%z")
            img_num = random.randint(0, 9999)
            source_path = f"/path/to/photo/{dt.year}/IMG_{img_num:04d}.JPG"
            store_path = (
                ""
                if random.randint(0, 1)
                else f"{dt.year}/{source_path.rsplit('/', 1)[-1]}"
            )
            filesize = random.randint(100000, 100000000)
            photo = PhotoFile(
                chk=checksum,
                src=source_path,
                ts=timestamp,
                dt=ts_str,
                fsz=filesize,
                sto=store_path,
            )
            database.photo_db[uid].append(photo)
    return database
예제 #2
0
def test_database_init_version_too_high():
    """
    Database will raise DatabaseException if loaded database version is too high
    """
    json_data = b"""{
  "version": VERSION,
  "hash_algorithm": "sha256",
  "timezone_default": "-0400",
  "photo_db": {},
  "command_history": {}
}""".replace(b"VERSION", f"{Database.VERSION + 1}".encode())
    with pytest.raises(DatabaseException):
        Database.from_json(json_data)
예제 #3
0
def test_database_add_photo_same_source_new_checksum(caplog):
    """
    When adding a photo with a source_path in the database but a different checksum
    the photo is added to the database but a warning is issued.
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data2)
    uid = db.add_photo(
        PhotoFile(
            chk="not_a_match",
            src="/a/b/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=10,
        ),
        uid="uid1",
    )
    print([(r.levelname, r) for r in caplog.records])
    print(uid)
    assert uid == "uid1"
    assert db.hash_to_uid["not_a_match"] == "uid1"
    assert db.hash_to_uid["deadbeef"] == "uid1"
    print(db.photo_db["uid1"])
    assert len(db.photo_db["uid1"]) == 2
    print([(r.levelname, r) for r in caplog.records])
    assert any(record.levelname == "WARNING" for record in caplog.records)
    assert any(
        "Checksum of previously-indexed source photo has changed" in record.msg
        for record in caplog.records)
예제 #4
0
def clean(
    database: Database,
    destination: Union[str, PathLike],
    subdir: Union[str, PathLike] = "",
    dry_run: bool = False,
) -> dict[str, int]:
    logger = logging.getLogger(__name__)
    photos_to_remove = database.get_photos_to_remove(
        destination, subdirectory=subdir, dry_run=dry_run
    )
    total_file_size = sum(pf.fsz for pf in photos_to_remove)
    logger.info(f"Identified {len(photos_to_remove)} lower-priority items for removal")
    logger.info(f"Total file size: {sizeof_fmt(total_file_size)}")
    num_removed_photos, num_missing_photos = fileops.remove_photos(
        destination, photos_to_remove, dry_run=dry_run
    )
    logger.info(
        f"{'Found' if dry_run else 'Removed'} {num_removed_photos} items "
        f"and skipped {num_missing_photos} missing items"
    )
    return dict(
        num_removed_photos=num_removed_photos,
        total_file_size=total_file_size,
        num_missing_photos=num_missing_photos,
    )
예제 #5
0
def _stats(db: Union[str, PathLike]):
    config_logging()
    database = Database.from_file(db)
    num_uids, num_photos, num_stored_photos, total_file_size = database.get_stats(
    )
    print(f"Total items:        {num_photos}")
    print(f"Total unique items: {num_uids}")
    print(f"Total stored items: {num_stored_photos}")
    print(f"Total file size:    {sizeof_fmt(total_file_size)}")
예제 #6
0
def test_database_save_not_modified(tmpdir, caplog):
    """
    Database.save() will not save if the database is unchanged from loading
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data3)
    db_path = tmpdir / "photos.json"
    db.save(db_path, ["photomanager", "test"])
    assert "The database was not modified and will not be saved" in caplog.messages
    assert not db_path.exists()
예제 #7
0
def test_database_get_photos_to_collect_same_checksum_same_priority(
        caplog, tmpdir):
    """
    Photos with the same priority and checksum will not be recollected
    """
    caplog.set_level(logging.DEBUG)
    example_database = {
        "version": 1,
        "hash_algorithm": "sha256",
        "photo_db": {
            "uid1": [
                {
                    "checksum": "deadbeef",
                    "source_path": str(tmpdir / "source1" / "a.jpg"),
                    "datetime": "2015:08:27 04:09:36.50",
                    "timestamp": 1440662976.5,
                    "file_size": 1024,
                    "store_path": "a.jpg",
                    "priority": 11,
                },
                {
                    "checksum": "deadbeef",
                    "source_path": str(tmpdir / "source2" / "a.jpg"),
                    "datetime": "2015:08:27 04:09:36.50",
                    "timestamp": 1440662976.5,
                    "file_size": 1024,
                    "store_path": "",
                    "priority": 11,
                },
            ]
        },
        "command_history": {
            "2021-03-08_23-56-00Z": "photomanager create --db test.json"
        },
    }
    os.makedirs(tmpdir / "source1")
    os.makedirs(tmpdir / "source2")
    os.makedirs(tmpdir / "store")
    Path(tmpdir / "source1" / "a.jpg").touch()
    Path(tmpdir / "source2" / "a.jpg").touch()
    Path(tmpdir / "store" / "a.jpg").touch()
    db = Database.from_dict(example_database)
    (
        photos_to_copy,
        (num_copied_photos, num_added_photos, num_missed_photos,
         num_stored_photos),
    ) = db.get_photos_to_collect(tmpdir / "store")
    print(photos_to_copy)
    print(num_copied_photos, num_added_photos, num_missed_photos,
          num_stored_photos)
    assert len(photos_to_copy) == 0
    assert num_copied_photos == 0
    assert num_added_photos == 0
    assert num_missed_photos == 0
    assert num_stored_photos == 2
예제 #8
0
def test_database_list_sources(caplog):
    """
    The Database.sources property yields all src paths in the database
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data3)
    assert set(db.sources) == {
        "/a/b/c.jpg",
        "/o/b/c.jpg",
        "/a/c/e.jpg",
    }
예제 #9
0
def test_database_clean_verify_absolute_subdir(tmpdir, caplog):
    """
    An exception is raised if subdir is an absolute path
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data2)
    with pytest.raises(DatabaseException):
        db.get_photos_to_remove(tmpdir / "a", subdirectory=tmpdir / "b")
    with pytest.raises(DatabaseException):
        db.get_stored_photos(subdirectory=tmpdir / "b")
    with pytest.raises(NotImplementedError):
        db.verify_indexed_photos()
예제 #10
0
def index(
    database: Database,
    files: Iterable[Union[str, PathLike]],
    priority: int = 10,
    timezone_default: Optional[str] = None,
    storage_type: str = "HDD",
) -> dict[str, Union[int, list[str]]]:
    """
    Index photo files and add them to the database.

    :param database: the Database
    :param files: an iterable of paths to the photos
    :param priority: priority of indexed photos (lower is preferred)
    :param timezone_default: timezone to use when indexing timezone-naive photos
    :param storage_type: class of storage medium (HDD, SSD, RAID)
    :return: the number of errors found
    """
    logger = logging.getLogger(__name__)
    tz_default = (
        tz_str_to_tzinfo(timezone_default)
        if timezone_default is not None
        else database.timezone_default
    )
    photos = fileops.index_photos(
        files=files,
        priority=priority,
        storage_type=storage_type,
        hash_algorithm=database.hash_algorithm,
        tz_default=tz_default,
    )
    num_error_photos = sum(pf is None for pf in photos)
    (
        changed_uids,
        num_added_photos,
        num_merged_photos,
        num_skipped_photos,
    ) = database.add_photos(pf for pf in photos if pf is not None)
    logger.info(f"Indexed {num_added_photos+num_merged_photos}/{len(photos)} items")
    logger.info(
        f"Added {num_added_photos} new items and merged {num_merged_photos} items"
    )
    if num_skipped_photos:
        logger.info(f"Skipped {num_skipped_photos} items")
    if num_error_photos:  # pragma: no cover
        logger.info(f"Encountered an error on {num_error_photos} items")
    return dict(
        changed_uids=changed_uids,
        num_added_photos=num_added_photos,
        num_merged_photos=num_merged_photos,
        num_skipped_photos=num_skipped_photos,
        num_error_photos=num_error_photos,
    )
예제 #11
0
def test_database_is_modified(caplog):
    """
    Database.is_modified() is True if Database.db has been modified
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data3)
    assert not db.is_modified()
    db.add_command("test")
    assert db.is_modified()
    db.reset_saved()
    assert not db.is_modified()
    db.photo_db["uid1"][1].sto = "/path/to/sto.jpg"
    assert db.is_modified()
예제 #12
0
def test_database_save_modified(tmpdir, caplog):
    """
    Database.save() will save if the database has been modified
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data3)
    db.photo_db["uid1"][1].sto = "/path/to/sto.jpg"
    db_path = tmpdir / "photos.json"
    db.save(db_path, ["photomanager", "test"])
    assert "The database was not modified and will not be saved" not in caplog.messages
    assert db_path.exists()
    with open(db_path, "rb") as f:
        assert len(orjson.loads(f.read())["command_history"]) == 2
예제 #13
0
def test_database_init_update_version_1():
    """
    Database will upgrade loaded database files to current version
    """
    json_data = b"""{
  "version": 1,
  "hash_algorithm": "sha256",
  "timezone_default": "-0400",
  "photo_db": {
    "d239210f00534b76a2b215e073f75832": [
      {
        "checksum": "deadbeef",
        "source_path": "/a/b/c.jpg",
        "datetime": "2015:08:27 04:09:36.50",
        "timestamp": 1440662976.5,
        "file_size": 1024,
        "store_path": "/d/e/f.jpg",
        "priority": 11,
        "tz_offset": null
      },
      {
        "checksum": "deadbeef",
        "source_path": "/g/b/c.jpg",
        "datetime": "2015:08:27 04:09:36.50",
        "timestamp": 1440662976.5,
        "file_size": 1024,
        "store_path": "",
        "priority": 20,
        "tz_offset": -14400
      }
    ]
  },
  "command_history": {
    "2021-03-08_23-56-00Z": "photomanager create --db test.json",
    "2021-03-08_23-57-00Z": "photomanager import --db test.json test.jpg"
  }
}"""
    new_json_data = json_data.replace(
        b'"version": 1', f'"version": {Database.VERSION}'.encode())
    for k, v in NAME_MAP_ENC.items():
        new_json_data = new_json_data.replace(
            b'"' + k.encode() + b'"',
            b'"' + v.encode() + b'"',
        )
    db = Database.from_json(json_data)
    print(db.db)
    assert db.db["timezone_default"] == "-0400"
    assert db.timezone_default == timezone(timedelta(days=-1, seconds=72000))
    assert orjson.loads(db.json) == orjson.loads(new_json_data)
    assert db.to_json(pretty=True) == new_json_data
예제 #14
0
def test_database_save(tmpdir, caplog):
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data)
    db.to_file(tmpdir / "test.json")
    db2 = db.from_file(tmpdir / "test.json")
    print(db.db, db2.db, sep="\n")
    assert db == db2
    db.to_file(tmpdir / "test.json.gz")
    db2 = db.from_file(tmpdir / "test.json.gz")
    print(db2.db)
    assert db == db2
    db.to_file(tmpdir / "test.json.zst")
    db2 = db.from_file(tmpdir / "test.json.zst")
    print(db2.db)
    assert db == db2
예제 #15
0
def _create(
    db: Union[str, PathLike],
    hash_algorithm: str = DEFAULT_HASH_ALGO,
    timezone_default: str = "local",
    debug: bool = False,
):
    config_logging(debug=debug)
    try:
        database = Database.from_file(db)
    except FileNotFoundError:
        database = Database()
    database.hash_algorithm = HashAlgorithm(hash_algorithm)
    database.db["timezone_default"] = timezone_default
    database.save(path=db, argv=sys.argv, force=True)
예제 #16
0
def _import(
    db: Union[str, PathLike],
    destination: Union[str, PathLike],
    source: Optional[Union[str, PathLike]] = None,
    file: Optional[Union[str, PathLike]] = None,
    paths: Iterable[Union[str, PathLike]] = tuple(),
    exclude: Iterable[str] = tuple(),
    skip_existing: bool = False,
    debug: bool = False,
    dry_run: bool = False,
    priority: int = 10,
    timezone_default: Optional[str] = None,
    storage_type: str = "HDD",
    collect_db: bool = False,
):
    config_logging(debug=debug)
    database = Database.from_file(db, create_new=True)
    skip_existing = set(database.sources) if skip_existing else set()
    filtered_files = fileops.list_files(
        source=source,
        file=file,
        exclude=exclude,
        exclude_files=skip_existing,
        paths=paths,
    )
    index_result = actions.index(
        database=database,
        files=filtered_files,
        priority=priority,
        timezone_default=timezone_default,
        storage_type=storage_type,
    )
    collect_result = actions.collect(
        database=database,
        destination=destination,
        dry_run=dry_run,
        filter_uids=index_result["changed_uids"] if skip_existing else None,
    )
    if not dry_run:
        database.save(path=db,
                      argv=sys.argv,
                      collect_db=collect_db,
                      destination=destination)
    click_exit(1 if index_result["num_error_photos"]
               or collect_result["num_missed_photos"]
               or collect_result["num_error_photos"] else 0)
예제 #17
0
def test_database_add_photo_sort(caplog):
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data)
    uid = db.add_photo(
        PhotoFile(
            chk="deadbeef",
            src="/x/y/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=20,
        ),
        uid=None,
    )
    db.add_photo(
        PhotoFile(
            chk="deadbeef",
            src="/z/y/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=11,
        ),
        uid=None,
    )
    db.add_photo(
        PhotoFile(
            chk="deadbeef",
            src="/0/1/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=10,
        ),
        uid=None,
    )
    assert list(p.src for p in db.photo_db[uid]) == [
        "/0/1/c.jpg",
        "/a/b/c.jpg",
        "/z/y/c.jpg",
        "/x/y/c.jpg",
    ]
예제 #18
0
def collect(
    database: Database,
    destination: Union[str, PathLike],
    filter_uids: Optional[Container[str]] = None,
    dry_run: bool = False,
) -> dict[str, int]:
    """
    Collect the database's highest-priority photos to destination.

    :param database: the Database
    :param destination: the photo storage directory
    :param filter_uids: optional, only collect the specified photo uids
    :param dry_run: perform a dry run that makes no changes
    :return: the number of errors found
    """
    logger = logging.getLogger(__name__)
    (
        photos_to_copy,
        (num_copied_photos, num_added_photos, num_missed_photos, num_stored_photos),
    ) = database.get_photos_to_collect(destination, filter_uids=filter_uids)
    total_copied_photos, total_copy_size, num_error_photos = fileops.copy_photos(
        destination, photos_to_copy, dry_run=dry_run
    )
    logger.info(
        f"{'Would copy' if dry_run else 'Copied'} {total_copied_photos} items, "
        f"total size: {sizeof_fmt(total_copy_size)}: "
        f"{num_added_photos} new items and {num_copied_photos} "
        f"items marked as stored elsewhere"
    )
    if num_stored_photos or num_missed_photos:
        logger.info(
            f"Skipped {num_stored_photos} items already stored "
            f"and {num_missed_photos} missing items"
        )
    if num_error_photos:  # pragma: no cover
        logger.warning(f"Encountered errors copying {num_error_photos} items")
    return dict(
        num_copied_photos=num_copied_photos,
        num_added_photos=num_added_photos,
        num_missed_photos=num_missed_photos,
        num_stored_photos=num_stored_photos,
        total_copied_photos=total_copied_photos,
        total_copy_size=total_copy_size,
        num_error_photos=num_error_photos,
    )
예제 #19
0
def _clean(
    db: Union[str, PathLike],
    destination: Union[str, PathLike],
    subdir: Union[str, PathLike] = "",
    debug: bool = False,
    dry_run: bool = False,
):
    config_logging(debug=debug)
    database = Database.from_file(db)
    result = actions.clean(
        database=database,
        destination=destination,
        subdir=subdir,
        dry_run=dry_run,
    )
    if not dry_run:
        database.save(path=db, argv=sys.argv)
    click_exit(1 if result["num_missing_photos"] else 0)
예제 #20
0
def test_database_load_zstd_checksum_error(tmpdir, monkeypatch, caplog):
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data)
    db.to_file(tmpdir / "test.json.zst")
    with open(tmpdir / "test.json.zst", "r+b") as f:
        f.seek(4)
        c = f.read(1)
        f.seek(4)
        f.write(bytes([ord(c) ^ 0b1]))
    with pytest.raises(zstandard.ZstdError):
        db.from_file(tmpdir / "test.json.zst")
    monkeypatch.setattr(
        zstandard,
        "decompress",
        lambda _: db.to_json(pretty=True).replace(c, bytes([ord(c) ^ 0b1])),
    )
    with pytest.raises(DatabaseException):
        db.from_file(tmpdir / "test.json.zst")
예제 #21
0
def _collect(
    db: Union[str, PathLike],
    destination: Union[str, PathLike],
    debug: bool = False,
    dry_run: bool = False,
    collect_db: bool = False,
):
    config_logging(debug=debug)
    database = Database.from_file(db)
    collect_result = actions.collect(database=database,
                                     destination=destination,
                                     dry_run=dry_run)
    if not dry_run:
        database.save(path=db,
                      argv=sys.argv,
                      collect_db=collect_db,
                      destination=destination)
    click_exit(1 if collect_result["num_missed_photos"]
               or collect_result["num_error_photos"] else 0)
예제 #22
0
def _verify(
    db: Union[str, PathLike],
    destination: Union[str, PathLike],
    subdir: Union[str, PathLike] = "",
    storage_type: str = "HDD",
    random_fraction: Optional[float] = None,
    debug: bool = False,
):
    config_logging(debug=debug)
    database = Database.from_file(db)
    result = actions.verify(
        database=database,
        directory=destination,
        subdir=subdir,
        storage_type=storage_type,
        random_fraction=random_fraction,
    )
    click_exit(1 if result["num_incorrect_photos"]
               or result["num_missing_photos"] else 0)
예제 #23
0
def test_database_add_photo_wrong_uid(caplog):
    """
    When adding a photo with a matching checksum for a different uid,
    the photo is not added and add_photo returns None.
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data2)
    uid = db.add_photo(
        PhotoFile(
            chk="deadbeef",
            src="/x/y/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=10,
        ),
        uid="uid2",
    )
    print([(r.levelname, r) for r in caplog.records])
    print(uid)
    assert uid is None
예제 #24
0
def test_database_overwrite_error(tmpdir, caplog):
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data)
    path = Path(tmpdir / "test.json")
    db.to_file(path)
    base_path = path
    for _ in path.suffixes:
        base_path = base_path.with_suffix("")
    timestamp_str = datetime.fromtimestamp(
        path.stat().st_mtime).strftime("%Y-%m-%d_%H-%M-%S")
    new_path = base_path.with_name(
        f"{base_path.name}_{timestamp_str}").with_suffix("".join(
            path.suffixes))
    os.makedirs(new_path)
    (new_path / "file.txt").touch()
    db.to_file(path)
    print(tmpdir.listdir())
    assert (tmpdir / "test_1.json").exists()

    Path(tmpdir / "test_0.json").touch()
    Path(tmpdir / "test_a.json").touch()
    db.to_file(path)
    print(tmpdir.listdir())
    assert (tmpdir / "test_2.json").exists()

    path = Path(tmpdir / "test_2.json")
    base_path = path
    for _ in path.suffixes:
        base_path = base_path.with_suffix("")
    timestamp_str = datetime.fromtimestamp(
        path.stat().st_mtime).strftime("%Y-%m-%d_%H-%M-%S")
    new_path = base_path.with_name(
        f"{base_path.name}_{timestamp_str}").with_suffix("".join(
            path.suffixes))
    os.makedirs(new_path)
    (new_path / "file.txt").touch()
    db.to_file(path)
    print(tmpdir.listdir())
    assert (tmpdir / "test_3.json").exists()
예제 #25
0
def test_database_add_photo_already_present(caplog):
    """
    When adding a photo that is already in the database,
    the photo is not added and add_photo returns None.
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data2)
    uid = db.add_photo(
        PhotoFile(
            chk="deadbeef",
            src="/a/b/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=10,
        ),
        uid="uid1",
    )
    print([(r.levelname, r) for r in caplog.records])
    print(uid)
    assert uid is None
예제 #26
0
def test_database_find_photo_ambiguous(caplog):
    """
    When there is no checksum match and an ambiguous timestamp+source match,
    find_photo returns the first match.
    """
    caplog.set_level(logging.DEBUG)
    db = Database.from_json(example_database_json_data2)
    uid = db.find_photo(
        PhotoFile(
            chk="not_a_match",
            src="/x/y/c.jpg",
            dt="2015:08:27 04:09:36.50",
            ts=1440662976.5,
            fsz=1024,
            sto="",
            prio=10,
        ))
    print([(r.levelname, r) for r in caplog.records])
    print(uid)
    assert any(record.levelname == "WARNING" for record in caplog.records)
    assert any("ambiguous timestamp+name match" in record.msg
               for record in caplog.records)
    assert uid == "uid1"
예제 #27
0
def _index(
    db: Union[str, PathLike],
    source: Optional[Union[str, PathLike]] = None,
    file: Optional[Union[str, PathLike]] = None,
    paths: Iterable[Union[str, PathLike]] = tuple(),
    exclude: Iterable[str] = tuple(),
    skip_existing: bool = False,
    debug: bool = False,
    dry_run: bool = False,
    priority: int = 10,
    timezone_default: Optional[str] = None,
    storage_type: str = "HDD",
):
    if not source and not file and not paths:
        print("Nothing to index")
        print(click.get_current_context().get_help())
        click_exit(1)
    config_logging(debug=debug)
    database = Database.from_file(db, create_new=True)
    skip_existing = set(database.sources) if skip_existing else set()
    filtered_files = fileops.list_files(
        source=source,
        file=file,
        exclude=exclude,
        exclude_files=skip_existing,
        paths=paths,
    )
    index_result = actions.index(
        database=database,
        files=filtered_files,
        priority=priority,
        timezone_default=timezone_default,
        storage_type=storage_type,
    )
    if not dry_run:
        database.save(path=db, argv=sys.argv)
    click_exit(1 if index_result["num_error_photos"] else 0)
예제 #28
0
def test_verify_random_sample(tmpdir, caplog):
    """
    The random_fraction parameter in actions.verify will verify
    the specified fraction of the stored photos
    (rounded to the nearest integer)
    """
    caplog.set_level(logging.DEBUG)
    example_database = {
        "version": 1,
        "hash_algorithm": "sha256",
        "photo_db": {
            "uid1": [
                {
                    "checksum": "deadbeef",
                    "source_path": str(tmpdir / "source1" / "a.jpg"),
                    "datetime": "2015:08:27 04:09:36.50",
                    "timestamp": 1440662976.5,
                    "file_size": 1024,
                    "store_path": "a.jpg",
                    "priority": 11,
                },
            ],
            "uid2": [
                {
                    "checksum": "asdf",
                    "source_path": str(tmpdir / "source2" / "b.jpg"),
                    "datetime": "2015:08:27 04:09:36.50",
                    "timestamp": 1440662976.5,
                    "file_size": 1024,
                    "store_path": "b.jpg",
                    "priority": 11,
                },
            ],
            "uid3": [
                {
                    "checksum": "ffff",
                    "source_path": str(tmpdir / "source1" / "c.jpg"),
                    "datetime": "2015:08:27 04:09:36.50",
                    "timestamp": 1440662976.5,
                    "file_size": 1024,
                    "store_path": "c.jpg",
                    "priority": 11,
                },
            ],
            "uid4": [
                {
                    "checksum": "beef",
                    "source_path": str(tmpdir / "source2" / "d.jpg"),
                    "datetime": "2015:08:27 04:09:36.50",
                    "timestamp": 1440662976.5,
                    "file_size": 1024,
                    "store_path": "d.jpg",
                    "priority": 11,
                },
            ],
        },
        "command_history": {
            "2021-03-08_23-56-00Z": "photomanager create --db test.json"
        },
    }
    os.makedirs(tmpdir / "store")
    db = Database.from_dict(example_database)
    assert len(db.get_stored_photos()) == 4

    result = actions.verify(
        database=db,
        directory=tmpdir / "store",
        random_fraction=0.33,
    )
    print("\nVERIFY 33% (missing photos)")
    print(result)
    assert result["num_correct_photos"] == 0
    assert result["num_incorrect_photos"] == 0
    assert result["num_missing_photos"] == 1

    Path(tmpdir / "store" / "a.jpg").touch()
    Path(tmpdir / "store" / "b.jpg").touch()
    Path(tmpdir / "store" / "c.jpg").touch()
    Path(tmpdir / "store" / "d.jpg").touch()
    result = actions.verify(
        database=db,
        directory=tmpdir / "store",
        random_fraction=0.5,
    )
    print("\nVERIFY 50% (incorrect photos)")
    print(result)
    assert result["num_correct_photos"] == 0
    assert result["num_incorrect_photos"] == 2
    assert result["num_missing_photos"] == 0
예제 #29
0
def test_database_load_version_1():
    json_data = b"""{
"version": 1,
"hash_algorithm": "sha256",
"photo_db": {
    "d239210f00534b76a2b215e073f75832": [
        {
            "checksum": "deadbeef",
            "source_path": "/a/b/c.jpg",
            "datetime": "2015:08:27 04:09:36.50",
            "timestamp": 1440662976.5,
            "file_size": 1024,
            "store_path": "/d/e/f.jpg",
            "priority": 11
        },
        {
            "checksum": "deadbeef",
            "source_path": "/g/b/c.jpg",
            "datetime": "2015:08:27 04:09:36.50",
            "timestamp": 1440662976.5,
            "file_size": 1024,
            "store_path": "",
            "priority": 20,
            "tz_offset": -14400
        }
    ]
},
"command_history": {
    "2021-03-08_23-56-00Z": "photomanager create --db test.json",
    "2021-03-08_23-57-00Z": "photomanager import --db test.json test.jpg"
}
}"""
    db = Database.from_json(json_data)
    print(db.db)
    assert db.version == Database.VERSION
    assert db.hash_algorithm == HashAlgorithm.SHA256
    assert db.db["timezone_default"] == "local"
    assert db.timezone_default is None
    photo_db_expected = {
        "d239210f00534b76a2b215e073f75832": [
            PhotoFile.from_dict({
                "chk": "deadbeef",
                "src": "/a/b/c.jpg",
                "dt": "2015:08:27 04:09:36.50",
                "ts": 1440662976.5,
                "fsz": 1024,
                "sto": "/d/e/f.jpg",
                "prio": 11,
            }),
            PhotoFile.from_dict({
                "chk": "deadbeef",
                "src": "/g/b/c.jpg",
                "dt": "2015:08:27 04:09:36.50",
                "ts": 1440662976.5,
                "fsz": 1024,
                "sto": "",
                "prio": 20,
                "tzo": -14400,
            }),
        ]
    }
    command_history_expected = {
        "2021-03-08_23-56-00Z": "photomanager create --db test.json",
        "2021-03-08_23-57-00Z": "photomanager import --db test.json test.jpg",
    }
    db_expected = {
        "version": Database.VERSION,
        "hash_algorithm": HashAlgorithm.SHA256,
        "timezone_default": "local",
        "photo_db": photo_db_expected,
        "command_history": command_history_expected,
    }
    assert db.photo_db == photo_db_expected
    assert db.command_history == command_history_expected
    assert orjson.loads(db.json) != orjson.loads(json_data)
    assert db.db == db_expected
    assert db == Database.from_dict(orjson.loads(json_data))
    assert db.get_stats() == (1, 2, 1, 1024)
예제 #30
0
def verify(
    database: Database,
    directory: Union[str, PathLike],
    subdir: Union[str, PathLike] = "",
    storage_type: str = "HDD",
    random_fraction: Optional[float] = None,
) -> dict[str, int]:
    """
    Check the files stored in directory against checksums in the database.

    :param database: the Database
    :param directory: the photo storage directory
    :param subdir: verify only photos within subdirectory
    :param storage_type: the type of media the photos are stored on
        (uses async if SSD)
    :param random_fraction: verify a randomly sampled fraction of the photos
    :return: the number of errors found
    """
    logger = logging.getLogger(__name__)
    num_correct_photos = num_incorrect_photos = num_missing_photos = 0
    destination = Path(directory).expanduser().resolve()
    stored_photos = database.get_stored_photos(subdir)
    if random_fraction is not None:
        n = len(stored_photos)
        k = max(min(round(random_fraction * n), n), 0)
        stored_photos = random.sample(stored_photos, k=k)
    total_file_size = sum(pf.fsz for pf in stored_photos)
    logger.info(f"Verifying {len(stored_photos)} items")
    logger.info(f"Total file size: {sizeof_fmt(total_file_size)}")

    logger.info("Collecting media hashes")
    checksum_cache = fileops.hash_stored_photos(
        photos=stored_photos,
        directory=directory,
        hash_algorithm=database.hash_algorithm,
        storage_type=storage_type,
    )

    for photo in tqdm(stored_photos):
        abs_store_path = str(destination / photo.sto)
        if abs_store_path not in checksum_cache:
            tqdm.write(f"Missing photo: {abs_store_path}", file=sys.stderr)
            num_missing_photos += 1
        elif checksum_cache[abs_store_path] == photo.chk:
            num_correct_photos += 1
        else:
            tqdm.write(f"Incorrect checksum: {abs_store_path}", file=sys.stderr)
            num_incorrect_photos += 1

    logger.info(
        f"Checked "
        f"{num_correct_photos+num_incorrect_photos+num_missing_photos} "
        f"items"
    )
    if num_incorrect_photos or num_missing_photos:
        logger.warning(
            f"Found {num_incorrect_photos} incorrect and "
            f"{num_missing_photos} missing items"
        )
    else:
        logger.info("No errors found")
    return dict(
        num_correct_photos=num_correct_photos,
        num_incorrect_photos=num_incorrect_photos,
        num_missing_photos=num_missing_photos,
    )