def test_file_names(self):
        with TempDir() as tmpdir:
            datadir = join(tmpdir.name, 'data')
            names = []
            names.append("all_chars_part1_%s" % ("".join([unichr(x) for x in range(1, 80)])))
            names.append("all_chars_part2_%s" % ("".join([unichr(x) for x in range(80, 160)])))
            names.append("all_chars_part3_%s" % ("".join([unichr(x) for x in range(160, 240)])))
            names.append("all_chars_part4_%s" % ("".join([unichr(x) for x in range(240, 256)])))

            for name in names:
                name = name.replace("/", "")
                subdir = tmpdir.create_dir('data', name)
                write_binary(1024, join(subdir, name))

            db_fn = join(tmpdir.name, 'files.sdb')
            the_db = db.Database(db_fn, verbose=0)
            indexer = BitIndexer(the_db, DO_NOT_MATCH_RE, DO_NOT_MATCH_RE, verbose_progress=1)
            with NoStderr() as devnull:
                indexer.run([datadir])
            self.assertTrue(len(the_db.file.find_ids()), len(names))

            with NoStderr() as devnull:
                indexer.run([datadir])
                self.assertEqual(devnull.written(), "[]"*(1+len(names))+"\n")
            self.assertTrue(len(the_db.file.find_ids()), len(names))
 def test_2049_bytes_file(self):
     with TempDir() as tmpdir:
         tmpfn = os.path.join(tmpdir.name, 'input')
         write_binary(2049, tmpfn)
         hashs = get_sha1sums(tmpfn, os.path.getsize(tmpfn), 1024)
         self.assertEqual(hashs, ('5b00669c480d5cffbdfa8bdba99561160f2d1b77', '170751534f1a95fd80a7a25787ecad2b60368e0a',
                                  {2048L: 'f10ccfde60c17db26e7d85d35665c7661dbbeb2c'}, False))
         self.assertEqual(get_partial_sha1(tmpfn, 0, 1024), '5b00669c480d5cffbdfa8bdba99561160f2d1b77')
         self.assertEqual(get_partial_sha1(tmpfn, 0, 2048), 'f10ccfde60c17db26e7d85d35665c7661dbbeb2c')
         self.assertEqual(get_partial_sha1(tmpfn, 0, 2049), '170751534f1a95fd80a7a25787ecad2b60368e0a')
    def test_progress_for_coverage(self):
        with TempDir() as tmpdir:
            datadir = tmpdir.create_dir('data')
            write_binary(1024, join(datadir, 'input'))

            db_fn = join(tmpdir.name, 'files.sdb')
            with NoStderr():
                indexer = BitIndexer(db.Database(db_fn, verbose=0), DO_NOT_MATCH_RE, DO_NOT_MATCH_RE, verbose_progress=1)
                indexer.run([datadir])
            self.assert_no_exception()
    def test_unreadable_file(self):
        with TempDir() as tmpdir:
            datadir = tmpdir.create_dir('data')
            write_binary(1024, join(datadir, 'input'))
            make_unreadable(join(datadir, 'input'))

            db_fn = join(tmpdir.name, 'files.sdb')
            indexer = BitIndexer(db.Database(db_fn, verbose=0), DO_NOT_MATCH_RE, DO_NOT_MATCH_RE, verbose_progress=1)
            with NoStderr() as devnull:
                indexer.run([datadir])
                self.assertEqual(devnull.written(), "[P]\n")
    def test_for_coverage(self):
        with TempDir() as tmpdir:
            datadir = tmpdir.create_dir("data")

            for i in range(1000):
                write_binary(1024, join(datadir, 'input_%03d' % i))

            db_fn = join(tmpdir.name, 'files.sdb')
            indexer = BitIndexer(db.Database(db_fn, verbose=0), DO_NOT_MATCH_RE, DO_NOT_MATCH_RE,
                                 verbose_progress=0, commit_every=0.001, full_stats_every=0.001, dir_warn_threshold=0.001)
            indexer.run([datadir])
            self.assert_no_exception()
    def test_with_subdirs(self):
        with TempDir() as tmpdir:
            datadir = tmpdir.create_dir("data")
            subdir1 = tmpdir.create_dir("data", "sub1")
            subdir2 = tmpdir.create_dir("data", "sub2")

            write_binary(1024, join(subdir2, 'hash1_a'))
            write_binary(1024, join(subdir1, 'hash1_b'))
            write_binary(1024, join(subdir1, 'hash1_c'))
            write_binary(1024, join(datadir, 'hash1_d'))

            db_fn = join(tmpdir.name, 'files.sdb')
            the_db = db.Database(db_fn, verbose=0)

            indexer = BitIndexer(the_db, DO_NOT_MATCH_RE, DO_NOT_MATCH_RE, verbose_progress=0)
            indexer.run([datadir])

            finder = BitEqualFinder(db.Database(db_fn, verbose=0), [datadir])
            items = list(finder.find())
            items = [(x.size, x.hardlinked, x.path1, x.path2) for x in items]
            self.assertEqual(items, [
                (1024, False, join(datadir, 'hash1_d'), join(subdir1, 'hash1_b')),
                (1024, False, join(datadir, 'hash1_d'), join(subdir1, 'hash1_c')),
                (1024, False, join(datadir, 'hash1_d'), join(subdir2, 'hash1_a')),
                (1024, False, join(subdir1, 'hash1_b'), join(subdir1, 'hash1_c')),
                (1024, False, join(subdir1, 'hash1_b'), join(subdir2, 'hash1_a')),
                (1024, False, join(subdir1, 'hash1_c'), join(subdir2, 'hash1_a'))])

            finder = BitEqualFinder(db.Database(db_fn, verbose=0), [subdir1])
            items = list(finder.find())
            items = [(x.size, x.hardlinked, x.path1, x.path2) for x in items]
            self.assertEqual(items, [
                (1024, False, join(subdir1, 'hash1_b'), join(subdir1, 'hash1_c'))])
    def test_other_io_error(self):
        with TempDir() as tmpdir:
            datadir = tmpdir.create_dir('data')
            write_binary(1024, join(datadir, 'input'))

            db_fn = join(tmpdir.name, 'files.sdb')
            indexer = BitIndexer(db.Database(db_fn, verbose=0), DO_NOT_MATCH_RE, DO_NOT_MATCH_RE, verbose_progress=1)

            def mock(*_):
                error = IOError("dummy io error")
                error.errno = errno.EHOSTUNREACH
                raise error

            indexer.get_or_insert_content = mock
            with NoStderr() as devnull:
                indexer.run([datadir])
                self.assertEqual(devnull.written(), "[E]\n")
    def test_content_changes(self):
        with TempDir() as tmpdir:
            datadir = tmpdir.create_dir('data')
            write_binary(1024, join(datadir, 'input'))

            db_fn = join(tmpdir.name, 'files.sdb')
            indexer = BitIndexer(db.Database(db_fn, verbose=0), DO_NOT_MATCH_RE, DO_NOT_MATCH_RE, verbose_progress=0)
            indexer.run([datadir])

            conn = sqlite3.connect(db_fn)
            self.assertEqual(conn.execute("select contentid,fullsha1 from file,content where file.contentid=content.id").fetchall(),
                             [(1, u'5b00669c480d5cffbdfa8bdba99561160f2d1b77')])
            new_mtime = time.time() + 2
            write_binary(1024, join(datadir, 'input'), offset=1)
            os.utime(join(datadir, 'input'), (new_mtime, new_mtime))
            indexer.run([datadir])

            self.assertEqual(conn.execute("select contentid,fullsha1 from file,content where file.contentid=content.id").fetchall(),
                             [(2, u'b0f14f1c1d87185bcc46363860b84609d5a2169e')])
    def test_simple(self):
        with TempDir() as tmpdir:
            datadir = tmpdir.create_dir("data")
            subdir1 = tmpdir.create_dir("data", "sub1")
            subdir2 = tmpdir.create_dir("data", "sub2")

            write_binary(100, join(subdir1, 'small_file'))
            write_binary(1024, join(subdir1, 'input1'))
            write_binary(1025, join(subdir1, 'input2'))
            write_binary(1026, join(subdir2, 'input3'))

            os.symlink("input1", join(subdir1, 'symlink'))
            os.mkfifo(join(subdir1, 'fifo'))

            db_fn = join(tmpdir.name, 'files.sdb')
            indexer = BitIndexer(db.Database(db_fn, verbose=0), DO_NOT_MATCH_RE, DO_NOT_MATCH_RE, verbose_progress=0)
            indexer.run([datadir])

            conn = sqlite3.connect(db_fn)
            self.assertEqual(conn.execute("select * from dir").fetchall(),
                             [(1, datadir), (2, subdir1), (3, subdir2)])
            self.assertEqual(conn.execute("select id,dirid,name,contentid from file").fetchall(),
                             [(1, 2, u'input1', 1), (2, 2, u'input2', 2), (3, 3, u'input3', 3)])
            self.assertEqual(conn.execute("select * from content").fetchall(),
                             [(1, u'5b00669c480d5cffbdfa8bdba99561160f2d1b77', u'5b00669c480d5cffbdfa8bdba99561160f2d1b77', 1024, u'', 0),
                              (2, u'5b00669c480d5cffbdfa8bdba99561160f2d1b77', u'409c9978384c2832af4a98bafe453dfdaa8e8054', 1025, u'', 0),
                              (3, u'5b00669c480d5cffbdfa8bdba99561160f2d1b77', u'76f936767b092576521501bdb344aa7a632b88b8', 1026, u'', 0)])

            indexer.run([datadir])

            self.assertEqual(conn.execute("select * from dir").fetchall(),
                             [(1, datadir), (2, subdir1), (3, subdir2)])
            self.assertEqual(conn.execute("select id,dirid,name,contentid from file").fetchall(),
                             [(1, 2, u'input1', 1), (2, 2, u'input2', 2), (3, 3, u'input3', 3)])
            self.assertEqual(conn.execute("select * from content").fetchall(),
                             [(1, u'5b00669c480d5cffbdfa8bdba99561160f2d1b77', u'5b00669c480d5cffbdfa8bdba99561160f2d1b77', 1024, u'', 0),
                              (2, u'5b00669c480d5cffbdfa8bdba99561160f2d1b77', u'409c9978384c2832af4a98bafe453dfdaa8e8054', 1025, u'', 0),
                              (3, u'5b00669c480d5cffbdfa8bdba99561160f2d1b77', u'76f936767b092576521501bdb344aa7a632b88b8', 1026, u'', 0)])
    def test_exclude(self):
        with TempDir(0) as tmpdir:
            datadir = tmpdir.create_dir("data")
            subdir1 = tmpdir.create_dir("data", "sub1")
            subdir2 = tmpdir.create_dir("data", "sub2")
            subdir3 = tmpdir.create_dir("data", "sub3")

            write_binary(1024, join(subdir1, 'sub1_input1'))
            write_binary(1024, join(subdir1, 'sub1_input2'))
            write_binary(1024, join(subdir2, 'sub2_input1'))
            write_binary(1024, join(subdir2, 'sub2_input2'))
            write_binary(1024, join(subdir3, 'sub3_input1'))
            write_binary(1024, join(subdir3, 'sub3_input2'))

            db_fn = join(tmpdir.name, 'files.sdb')
            indexer = BitIndexer(db.Database(db_fn, verbose=0), "^.*input2$", "^sub2$", verbose_progress=0)
            indexer.run([datadir])

            conn = sqlite3.connect(db_fn)
            self.assertEqual(conn.execute("SELECT dir.name,file.name FROM dir,file WHERE dir.id=file.dirid").fetchall(),
                             [(subdir1, "sub1_input1"),
                              (subdir3, "sub3_input1")])
    def test_bucket_finder(self):
        with TempDir() as tmpdir:
            datadir = tmpdir.create_dir('data')
            write_binary(1024, join(datadir, 'hash1_a'))
            write_binary(1024, join(datadir, 'hash1_b'))
            write_binary(1024, join(datadir, 'hash1_c'))
            write_binary(1100, join(datadir, 'hash2_a'), offset=1)
            write_binary(1100, join(datadir, 'hash2_b'), offset=1)
            write_binary(1100, join(datadir, 'hash2_c'), offset=1)
            write_binary(1024, join(datadir, 'hash3'), offset=2)
            write_binary(1024, join(datadir, 'hash4_a'), offset=3)
            os.link(join(datadir, 'hash4_a'), join(datadir, 'hash4_b'))

            db_fn = join(tmpdir.name, 'files.sdb')
            the_db = db.Database(db_fn, verbose=0)

            indexer = BitIndexer(the_db, DO_NOT_MATCH_RE, DO_NOT_MATCH_RE, verbose_progress=0)
            indexer.run([datadir])

            os.remove(join(datadir, 'hash2_c'))

            finder = BitEqualBucketFinder(db.Database(db_fn, verbose=0), [datadir])
            items = list(finder.find())
            items = [(x.size, [y.path for y in x.files]) for x in items]

            self.assertEqual(items, [
                (1100, [join(datadir, 'hash2_a'), join(datadir, 'hash2_b')]),
                (1024, [join(datadir, 'hash1_a'), join(datadir, 'hash1_b'), join(datadir, 'hash1_c')])])
 def test_1025_bytes_file(self):
     with TempDir() as tmpdir:
         tmpfn = os.path.join(tmpdir.name, 'input')
         write_binary(1025, tmpfn)
         hashs = get_sha1sums(tmpfn, os.path.getsize(tmpfn), 1024)
         self.assertEqual(hashs, ('5b00669c480d5cffbdfa8bdba99561160f2d1b77', '409c9978384c2832af4a98bafe453dfdaa8e8054', {}, False))
 def test_1024_file(self):
     with TempDir() as tmpdir:
         tmpfn = os.path.join(tmpdir.name, 'input')
         write_binary(1024, tmpfn)
         hashs = get_sha1sums(tmpfn, os.path.getsize(tmpfn), 1024)
         self.assertEqual(hashs, ('5b00669c480d5cffbdfa8bdba99561160f2d1b77', '5b00669c480d5cffbdfa8bdba99561160f2d1b77', {}, False))
 def test_1023_file(self):
     with TempDir() as tmpdir:
         tmpfn = os.path.join(tmpdir.name, 'input')
         write_binary(1023, tmpfn)
         self.assertRaises(AssertionError, get_sha1sums, tmpfn, os.path.getsize(tmpfn), 1024)