def get_duplicates(self, fcomp=True): cursor = self.connection.cursor() dupes = cursor.execute(""" SELECT GROUP_CONCAT(full_path , '|') FROM files f JOIN hashes h ON h.hash_id = f.hash_id GROUP BY h.hash_id HAVING COUNT(h.hash_id) > 1 """).fetchall() for (dupe, ) in dupes: real_dupes = dupe.split("|") result = DuplicateFileResult() first = real_dupes[0] result.add_duplicate(first) if fcomp: for next in real_dupes[1:]: same = filecmp.cmp(first, next) if not same: result.add_diff(next) else: result.add_duplicate(next) else: for next in real_dupes[1:]: result.add_duplicate(next) yield result
def test_add_diff(self): d = DuplicateFileResult() d.add_diff("x") assert d.get_files() == ["x"] d.add_diff("y") assert d.get_files() == ["x", "y"]
def test_duplicates_with_non_equal_duplicates(mocker): runner = CliRunner() r1 = DuplicateFileResult() r1.add_diff("x") i = mocker.MagicMock() i.get_duplicates.return_value = [r1] mocked_indexer = mocker.patch('hashdex.cli.Indexer') mocked_indexer.return_value = i with runner.isolated_filesystem(): result = runner.invoke(cli, ['duplicates', '--index', './index.db']) assert 'x' in result.output assert 'NOT EQUAL' in result.output
def test_get_duplicates(self, mocker): connection = mocker.MagicMock() connection\ .cursor.return_value \ .execute.return_value \ .fetchall.return_value = [("path1|path2",), (("path3|path4|path5",))] mocker.patch("filecmp.cmp").return_value = True r1 = DuplicateFileResult() r1.add_duplicate("path1") r1.add_duplicate("path2") r2 = DuplicateFileResult() r2.add_duplicate("path3") r2.add_duplicate("path4") r2.add_duplicate("path5") indexer = Indexer(connection, mocker.Mock()) assert list(indexer.get_duplicates()) == [r1, r2]
def test_compare_duplicate_results(self): d1 = DuplicateFileResult() d1.add_duplicate("x") d1.add_diff("y") d2 = DuplicateFileResult() d2.add_diff("x") d2.add_duplicate("y") assert d1 != d2 d3 = DuplicateFileResult() d3.add_duplicate("x") d3.add_diff("y") d4 = DuplicateFileResult() d4.add_diff("y") d4.add_duplicate("x") assert d3 == d4
def test_is_not_equal(self): d = DuplicateFileResult() d.add_diff("x") assert d.is_equal() is False
def test_is_equal(self): d = DuplicateFileResult() d.add_duplicate("x") assert d.is_equal() is True