Exemple #1
0
def test_progress_bar(tmp_path, no_empty, fast, progress):
    """
    User can provide a progress bar factory, in particular tqdm.
    """
    file_size = 123
    subdir, files, dupe_groups = tempfiles(tmp_path, file_size)
    dirinfo = DirInfo(subdir)
    # We use a real tqdm here, because it's more important to test that the
    # integration with tqdm is correct, than UI details like the descriptions
    # being useful. We also test with DummyBar, to check the claim that it
    # demonstrates the required interface for user-defined progress.
    bars = []

    def tqdm_bar(*args, **kwargs):
        print(args, kwargs)
        bar = progress(*args, **kwargs)
        bars.append(bar)
        return bar

    dirinfo.populate(progress=tqdm_bar, no_empty=no_empty, fast=fast)
    # Make sure the progress bar was actually used: one progress bar for
    # reading the files, and one for hashing them.
    assert len(bars) == 2
    if progress is tqdm:
        # Progress bar shows all files, not just those hashed.
        assert bars[0].n == len(files)
    if progress is tqdm:
        # Upper bound on time is quite arbitrary and could be relaxed.
        assert 0 <= time.time() - bars[0].start_t <= 1
        assert 0 <= time.time() - bars[1].start_t <= 1
    check_everything(file_size, subdir, files, dupe_groups, dirinfo, no_empty,
                     fast)
Exemple #2
0
def test_fileinfo(tmp_path):
    """
    Test the dataclass that stores filename/hash data.
    """
    file_size = 123
    subdir, files, dupe_groups = tempfiles(tmp_path, file_size)
    dupes = set(itertools.chain.from_iterable(dupe_groups))
    dupe_sizes = set(len(files[name]) for name in dupes)
    dirinfo = DirInfo(subdir)
    dirinfo.populate(no_empty=True, fast=True)
    with_hash = without_hash = False
    for file in files:
        info = dirinfo.get_relative(file)
        if info.size in dupe_sizes:
            with_hash = True
            # Fields are tested by check_everything. Here we're interested in
            # the types.
            assert isinstance(info, FileStats) and isinstance(info, FileInfo)
        else:
            without_hash = True
            assert isinstance(info,
                              FileStats) and not isinstance(info, FileInfo)
            # Test the addition of hash data to basic file stats.
            new_info = FileInfo.add_hash(info, b'hash')
            assert isinstance(new_info, FileInfo)
            assert new_info.hash == b'hash'
            now = datetime.now(tz=timezone.utc)
            assert now + timedelta(1) >= new_info.when >= now - timedelta(1)
            new_info = FileInfo.add_hash(info, b'hash', datetime(2000, 1, 1))
            assert isinstance(new_info, FileInfo)
            assert new_info.hash == b'hash'
            assert new_info.when == datetime(2000, 1, 1)
    assert with_hash
    assert without_hash
Exemple #3
0
 def decorated(
     dirname: str,
     populate: bool,
     hash_empty: bool,
     fast: bool,
     resume: bool,
     threads: int,
 ):
     dirname = os.path.abspath(dirname)
     if resume or not populate:
         dirinfo = DirInfo.cached(dirname)
         if dirinfo.file_count == 0:
             populate = True
     else:
         dirinfo = DirInfo(dirname)
     if populate:
         dirinfo.populate(
             no_empty=not hash_empty,
             fast=fast,
             resume=resume,
             threads=threads,
             progress=tqdm,
         )
         filename = dirinfo.save()
         tqdm.write(f'Written {filename}')
     return func(dirinfo)
Exemple #4
0
 def set_up(path, fast=False):
     path.mkdir()
     (path / 'dupe1.txt').write_bytes(b'equal')
     (path / 'dupe2.txt').write_bytes(b'equal')
     (path / 'unequal.txt').write_bytes(str(path).encode('utf8'))
     info = DirInfo(path)
     info.populate(fast=fast)
     info.save()
     return path
Exemple #5
0
def test_dir_info(tmp_path):
    """
    We can get a list of files in the directory, containing suitable info.
    Unless otherwise specified, this finds and hashes everything.
    """
    file_size = 12345
    subdir, files, dupe_groups = tempfiles(tmp_path, file_size)
    dirinfo = DirInfo(subdir)
    dirinfo.populate()
    check_everything(file_size, subdir, files, dupe_groups, dirinfo)
Exemple #6
0
def test_dir_info_optimisation(tmp_path, no_empty, fast):
    """
    We can reduce the number of hashes computed by excluding empty files,
    and/or only hashing when file size matches.
    """
    file_size = 123
    subdir, files, dupe_groups = tempfiles(tmp_path, file_size)
    dirinfo = DirInfo(subdir)
    dirinfo.populate(no_empty=no_empty, fast=fast)
    check_everything(file_size, subdir, files, dupe_groups, dirinfo, no_empty,
                     fast)
Exemple #7
0
 def set_up(path, extra_files=()):
     path.mkdir()
     (path / 'equal1.txt').write_bytes(b'equal1')
     (path / 'equal2.txt').write_bytes(b'equal2')
     (path / 'unequal.txt').write_bytes(str(path).encode('utf8'))
     for extra in extra_files:
         (path / extra).write_bytes(b'extra')
     info = DirInfo(path)
     info.populate()
     info.save()
     return path
Exemple #8
0
def test_resume(tmp_path):
    """
    The 'resume' option to populate() avoids re-calculating hashes.
    """
    file_size = 123
    subdir, files, dupe_groups = tempfiles(tmp_path, file_size)
    empty_files = set(file for file, content in files.items()
                      if len(content) == 0)
    assert 0 < len(empty_files) < len(files)
    non_empty_files = set(files) - empty_files
    dirinfo = DirInfo(subdir)
    dirinfo.populate(no_empty=True)
    # Remember the results
    first_results = {file: dirinfo.get_relative(file) for file in files}
    # Resume populating, with different params to include the empty file
    dirinfo.populate(resume=True)
    for file in empty_files:
        assert dirinfo.get_relative(file) is not first_results[file], file
    for file in non_empty_files:
        assert dirinfo.get_relative(file) is first_results[file], file
    # Now delete a file, and make sure it disappears. Cover the cases where
    # the deleted file has and hasn't been hashed.
    missing_files = ('big', 'unequal')
    dirinfo.populate(no_empty=True, fast=True)
    for filename in missing_files:
        (subdir / filename).unlink()
    dirinfo.populate(resume=True, fast=True)
    for filename in missing_files:
        with pytest.raises(KeyError):
            dirinfo.get_relative(filename), filename