Ejemplo n.º 1
0
def compare(old_dirname: str, new_dirname: str, list_identical: bool,
            list_changed: bool, list_vanished: bool, list_added: bool,
            list_all: bool) -> None:
    """
    Summarise changes from OLD_DIRNAME to NEW_DIRNAME. Both directories must
    previously have been scanned (by the 'info' command or otherwise).

    The names OLD and NEW assume that you're comparing two snapshots of "the
    same" collection of files, for example two backups on different dates.
    """
    identical = set()
    changed = set()
    vanished = set()
    old_dir = DirInfo.load(old_dirname)
    new_dir = DirInfo.load(new_dirname)
    for old_file in old_dir:
        rel_str = old_file._rel_str
        try:
            new_file = new_dir.get_relative(rel_str)
        except KeyError:
            vanished.add(rel_str)
            continue
        if not isinstance(old_file, FileInfo):
            raise Exception(f'No hash for {old_file.fullpath}')
        if not isinstance(new_file, FileInfo):
            raise Exception(f'No hash for {new_file.fullpath}')
        if new_file.hash == old_file.hash:
            identical.add(rel_str)
        else:
            changed.add(rel_str)
    added = ({file._rel_str
              for file in new_dir}.difference(file._rel_str
                                              for file in old_dir))
    print('old:', old_dir.file_count)
    print('new:', new_dir.file_count)
    print('identical:', len(identical))
    print('changed: ', len(changed))
    print('vanished:', len(vanished))
    print('added:', len(added))
    if list_identical or list_all:
        print('\nidentical files:\n  ', end='')
        print('\n  '.join(sorted(identical)))
    if list_changed or list_all:
        print('\nchanged files:\n  ', end='')
        print('\n  '.join(sorted(changed)))
    if list_vanished or list_all:
        print('\nvanished files:\n  ', end='')
        print('\n  '.join(sorted(vanished)))
    if list_added or list_all:
        print('\nadded files:\n  ', end='')
        print('\n  '.join(sorted(added)))
Ejemplo n.º 2
0
def test_serialisation(tmp_path):
    """
    Test failure modes. Success is tested in check_everything.
    """
    subdir = (tmp_path / 'sub')
    jsonfile = (tmp_path / 'sub.dirinfo.json')
    (subdir / 'dir').mkdir(parents=True)
    dirinfo = DirInfo(subdir)
    assert dirinfo.save() == os.fspath(jsonfile)
    # Not exactly a requirement, but for the tests to work we need this.
    assert jsonfile.exists()
    # If this fails, then testing that the bad cases fail is kind of pointless.
    assert DirInfo.load(subdir).base == os.fspath(subdir)
    # Make sure the encoder isn't accidentally used for something it can't handle.
    with pytest.raises(TypeError):
        json.dumps(object(), cls=Encoder)

    # Make sure bad json file contents are rejected
    def bad_jsonfile(jsondata):
        with open(jsonfile, 'w', encoding='utf8') as outfile:
            json.dump(jsondata, outfile)
        with pytest.raises(ValueError):
            DirInfo.load(subdir)

    bad_jsonfile({'foo': 'bar'})
    bad_jsonfile(None)

    # If the serialised base doesn't match the actual location, then something
    # is wrong and we should refuse to load it.
    assert dirinfo.save() == os.fspath(jsonfile)
    with open(jsonfile, 'r', encoding='utf8') as infile:
        jsondata = json.load(infile)
    jsondata['base'] += 'X'
    bad_jsonfile(jsondata)

    # If there's no data then load() fails, but cached() succeeds.
    jsonfile.unlink()
    with pytest.raises(FileNotFoundError):
        DirInfo.load(subdir)
    assert DirInfo.cached(subdir).base == subdir
Ejemplo n.º 3
0
def check_everything(file_size,
                     subdir,
                     files,
                     dupes,
                     info,
                     no_empty=False,
                     fast=False,
                     is_copy=False):
    def skip_hash(duplicated_sizes, actual_size):
        # Optionally some files aren't hashed.
        return ((no_empty and actual_size == 0)
                or (fast and actual_size not in duplicated_sizes))

    assert info.file_count == len(files) > 0
    for rel_path_str, content in files.items():
        record = info.get_relative(rel_path_str)
        assert record.basepath == subdir
        assert isinstance(record.relpath, Path)
        assert os.fspath(record.relpath) == rel_path_str
        assert record.size == len(content)
        if skip_hash([file_size], record.size):
            assert not hasattr(record, 'hash'), rel_path_str
        else:
            assert record.hash == hashlib.sha256(content).digest()
            # 10 seconds is arbitrary, but it *shouldn't* be that slow.
            assert record.when >= datetime.now(tz=timezone.utc) - timedelta(
                seconds=10)
    assert sorted(files.keys()) == sorted(file._rel_str for file in info)
    # Must notice that two files have the same hash
    dupe_groups = tuple(info.dupe_groups())
    assert len(dupe_groups) == len(dupes)
    sets = tuple(
        set(os.fspath(stats.relpath) for stats in group)
        for group in dupe_groups)
    assert sets == dupes
    if not is_copy:
        info.save()
        clone = DirInfo.load(subdir)
        check_everything(file_size,
                         subdir,
                         files,
                         dupes,
                         clone,
                         no_empty,
                         fast,
                         is_copy=True)
Ejemplo n.º 4
0
 def bad_jsonfile(jsondata):
     with open(jsonfile, 'w', encoding='utf8') as outfile:
         json.dump(jsondata, outfile)
     with pytest.raises(ValueError):
         DirInfo.load(subdir)