Beispiel #1
0
    def test_gc_multiple_files(self):
        self.backend1.start()
        self._gen_data(1, prefix='group1_', loop_num=16 * 1024)
        #5M data file 1
        self._gen_data(2, prefix='group1_', loop_num=16 * 1024)
        print 'group1'

        self.backend1.stop()
        self.backend1.start()
        print "restarted"

        self._gen_data(1, prefix='group2_', loop_num=16 * 1024)
        self._gen_data(2, prefix='group2_', loop_num=16 * 1024)

        self.backend1.stop()
        self.backend1.start()
        print "restarted"

        #5M data file 2
        # data file 3
        self._gen_data(1, prefix='group3_', loop_num=512)
        self._gen_data(2, prefix='group3_', loop_num=512)

        self._gen_data(1, prefix='group4_', loop_num=16 * 1024, sector=1)
        self._gen_data(2, prefix='group4_', loop_num=16 * 1024, sector=1)

        self.assertEqual(self.backend1.item_count(), 32 * 1024 + 512 + 16 * 1024)

        sector0_exp = os.path.join(self.backend1.db_home, "0/*.data")

        print "sector0 files", glob.glob(sector0_exp)
        self.assertEqual(len(glob.glob(sector0_exp)), 3)

        self.backend1.stop()
        print "test append some junk data to file, simulate incomplete data file got gc"
        with open(os.path.join(self.backend1.db_home, "0/001.data"), 'a') as f:
            f.write("junkdatasdfsdfsdfdfdf")

        buckets_txt_path = os.path.join(self.backend1.db_home, "0/buckets.txt")
        if os.path.exists(buckets_txt_path):
            print "rm", buckets_txt_path
            os.remove(buckets_txt_path)

        self.backend1.start()
        def check_data():
            self._check_data(2, prefix='group1_', loop_num=16 * 1024)
            self._check_data(2, prefix='group2_', loop_num=16 * 1024)
            self._check_data(2, prefix='group3_', loop_num=512)

        time.sleep(1)
        self._start_gc(0, bucket="0")
        print "gc started"
        while True:
            status = self._gc_status()
            if status.find('running') >= 0:
                check_data()
                continue
            elif status == 'success':
                print "done gc"
                break
            elif status == 'fail':
                return self.fail("optimize_stat = fail")
            else:
                self.fail(status)

        self.assertEqual(self.backend1.item_count(), 32 * 1024 + 512 + 16 * 1024)
        check_data()
        for key in self.backend1.generate_key(prefix="group1_", count=2, sector=0):
            print key
            self.assert_(not check_data_with_key(os.path.join(self.backend1.db_home, "0/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/000.data"), key, ver_=2))
        print "group2 should be not in 000.data, but in 001.data"
        for key in self.backend1.generate_key(prefix="group2_", count=2, sector=0):
            print key
            self.assert_(not check_data_with_key(os.path.join(self.backend1.db_home, "0/000.data"), key))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/001.data"), key, ver_=2))
        print "group4 of bucket 1 should be untouched"
        for key in self.backend1.generate_key(prefix="group4_", count=2, sector=1):
            print key
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "1/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "1/000.data"), key, ver_=2))

        print "check data& hint"
        check_data_hint_integrity(self.backend1.db_home, db_depth=self.backend1.db_depth)
Beispiel #2
0
    def test_gc_multiple_files(self):
        self.backend1.start()
        self._gen_data(1, prefix="group1_", loop_num=16 * 1024)
        # 5M data file 1
        self._gen_data(2, prefix="group1_", loop_num=16 * 1024)
        print "group1"

        self.backend1.stop()
        self.backend1.start()
        print "restarted"

        self._gen_data(1, prefix="group2_", loop_num=16 * 1024)
        self._gen_data(2, prefix="group2_", loop_num=16 * 1024)

        self.backend1.stop()
        self.backend1.start()
        print "restarted"

        # 5M data file 2
        # data file 3
        self._gen_data(1, prefix="group3_", loop_num=512)
        self._gen_data(2, prefix="group3_", loop_num=512)

        self._gen_data(1, prefix="group4_", loop_num=16 * 1024, sector=1)
        self._gen_data(2, prefix="group4_", loop_num=16 * 1024, sector=1)

        self.assertEqual(self.backend1.item_count(), 32 * 1024 + 512 + 16 * 1024)

        sector0_exp = os.path.join(self.backend1.db_home, "0/*.data")

        print "sector0 files", glob.glob(sector0_exp)
        self.assertEqual(len(glob.glob(sector0_exp)), 3)

        self.backend1.stop()
        print "test append some junk data to file, simulate incomplete data file got gc"
        with open(os.path.join(self.backend1.db_home, "0/001.data"), "a") as f:
            f.write("junkdatasdfsdfsdfdfdf")

        buckets_txt_path = os.path.join(self.backend1.db_home, "0/buckets.txt")
        if os.path.exists(buckets_txt_path):
            print "rm", buckets_txt_path
            os.remove(buckets_txt_path)

        self.backend1.start()

        def check_data():
            self._check_data(2, prefix="group1_", loop_num=16 * 1024)
            self._check_data(2, prefix="group2_", loop_num=16 * 1024)
            self._check_data(2, prefix="group3_", loop_num=512)

        time.sleep(1)
        self._start_gc(0, bucket="0")
        print "gc started"
        while True:
            status = self._gc_status()
            if status.find("running") >= 0:
                check_data()
                continue
            elif status == "success":
                print "done gc"
                break
            elif status == "fail":
                return self.fail("optimize_stat = fail")
            else:
                self.fail(status)

        self.assertEqual(self.backend1.item_count(), 32 * 1024 + 512 + 16 * 1024)
        check_data()
        for key in self.backend1.generate_key(prefix="group1_", count=2, sector=0):
            print key
            self.assert_(not check_data_with_key(os.path.join(self.backend1.db_home, "0/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/000.data"), key, ver_=2))
        print "group2 should be not in 000.data, but in 001.data"
        for key in self.backend1.generate_key(prefix="group2_", count=2, sector=0):
            print key
            self.assert_(not check_data_with_key(os.path.join(self.backend1.db_home, "0/000.data"), key))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/001.data"), key, ver_=2))
        print "group4 of bucket 1 should be untouched"
        for key in self.backend1.generate_key(prefix="group4_", count=2, sector=1):
            print key
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "1/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "1/000.data"), key, ver_=2))

        print "check data& hint"
        check_data_hint_integrity(self.backend1.db_home, db_depth=self.backend1.db_depth)
Beispiel #3
0
    def test_gc_multiple_files(self):
        self.backend1.start()
        self._gen_data(1, prefix='group1_', loop_num=16 * 1024)
        #5M data file 1
        self._gen_data(2, prefix='group1_', loop_num=16 * 1024)
        print 'group1'

        self.backend1.stop()
        self.backend1.start()
        print "restarted"

        self._gen_data(1, prefix='group2_', loop_num=16 * 1024)
        self._gen_data(2, prefix='group2_', loop_num=16 * 1024)

        self.backend1.stop()
        self.backend1.start()
        print "restarted"

        #5M data file 2
        # data file 3
        self._gen_data(1, prefix='group3_', loop_num=512)
        self._gen_data(2, prefix='group3_', loop_num=512)

        self._gen_data(1, prefix='group4_', loop_num=16 * 1024, sector=(1, 0))
        self._gen_data(2, prefix='group4_', loop_num=16 * 1024, sector=(1, 0))

        self._gen_data(1, prefix='group5_', loop_num=16 * 1024, sector=(0, 1))
        self._gen_data(2, prefix='group5_', loop_num=16 * 1024, sector=(0, 1))

        self.assertEqual(self.backend1.item_count(), 64 * 1024 + 512)

        sector00_exp = os.path.join(self.backend1.db_home, "0/0/0*.data")
        sector01_exp = os.path.join(self.backend1.db_home, "0/1/0*.data")
        sector10_exp = os.path.join(self.backend1.db_home, "1/0/0*.data")

        self.assertEqual(len(glob.glob(sector00_exp)), 3)
        self.assertEqual(len(glob.glob(sector01_exp)), 1)
        self.assertEqual(len(glob.glob(sector10_exp)), 1)

        self.backend1.stop()
        def check_data():
            self._check_data(2, prefix='group1_', loop_num=16 * 1024)
            self._check_data(2, prefix='group2_', loop_num=16 * 1024)
            self._check_data(2, prefix='group3_', loop_num=512)



        self.backend1.start()
        self._start_gc(0, bucket="1")
        print "gc @1 started"
        while True:
            status = self._gc_status()
            if status.find('running') >= 0:
                check_data()
                continue
            elif status == 'success':
                print "done gc"
                break
            elif status == 'fail':
                return self.fail("optimize_stat = fail")
            else:
                self.fail(status)
        print "group4 got gc"
        for key in self.backend1.generate_key(prefix="group4_", count=2, sector=(1, 0)):
            print key
            self.assert_(not check_data_with_key(os.path.join(self.backend1.db_home, "1/0/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "1/0/000.data"), key, ver_=2))
        print "group1 should be untouched"
        for key in self.backend1.generate_key(prefix="group1_", count=2, sector=(0, 0)):
            print key
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/0/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/0/000.data"), key, ver_=2))


        time.sleep(1)
        self._start_gc(0, bucket="00")
        print "gc @00 started"
        while True:
            status = self._gc_status()
            if status.find('running') >= 0:
                check_data()
                continue
            elif status == 'success':
                print "done gc"
                break
            elif status == 'fail':
                return self.fail("optimize_stat = fail")
            else:
                self.fail(status)

        self.assertEqual(self.backend1.item_count(), 64 * 1024 + 512)
        check_data()
        print "group1 got gc"
        for key in self.backend1.generate_key(prefix="group1_", count=2, sector=(0, 0)):
            print key
            self.assert_(not check_data_with_key(os.path.join(self.backend1.db_home, "0/0/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/0/000.data"), key, ver_=2))
        print "group2 should be not in 000.data, but in 001.data"
        for key in self.backend1.generate_key(prefix="group2_", count=2, sector=(0, 0)):
            print key
            self.assert_(not check_data_with_key(os.path.join(self.backend1.db_home, "0/0/000.data"), key))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/0/001.data"), key, ver_=2))
        print "group5 of bucket 0/1 should be untouched"
        for key in self.backend1.generate_key(prefix="group5_", count=2, sector=(0, 1)):
            print key
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/1/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/1/000.data"), key, ver_=2))

        print "check data& hint"
        check_data_hint_integrity(self.backend1.db_home, db_depth=self.backend1.db_depth)
Beispiel #4
0
    def test_gc_multiple_files(self):
        self.backend1.start()
        self._gen_data(1, prefix="group1_", loop_num=16 * 1024)
        # 5M data file 1
        self._gen_data(2, prefix="group1_", loop_num=16 * 1024)
        print "group1"

        self.backend1.stop()
        self.backend1.start()
        print "restarted"

        self._gen_data(1, prefix="group2_", loop_num=16 * 1024)
        self._gen_data(2, prefix="group2_", loop_num=16 * 1024)

        self.backend1.stop()
        self.backend1.start()
        print "restarted"

        # 5M data file 2
        # data file 3
        self._gen_data(1, prefix="group3_", loop_num=512)
        self._gen_data(2, prefix="group3_", loop_num=512)

        self._gen_data(1, prefix="group4_", loop_num=16 * 1024, sector=(1, 0))
        self._gen_data(2, prefix="group4_", loop_num=16 * 1024, sector=(1, 0))

        self._gen_data(1, prefix="group5_", loop_num=16 * 1024, sector=(0, 1))
        self._gen_data(2, prefix="group5_", loop_num=16 * 1024, sector=(0, 1))

        self.assertEqual(self.backend1.item_count(), 64 * 1024 + 512)

        sector00_exp = os.path.join(self.backend1.db_home, "0/0/0*.data")
        sector01_exp = os.path.join(self.backend1.db_home, "0/1/0*.data")
        sector10_exp = os.path.join(self.backend1.db_home, "1/0/0*.data")

        self.assertEqual(len(glob.glob(sector00_exp)), 3)
        self.assertEqual(len(glob.glob(sector01_exp)), 1)
        self.assertEqual(len(glob.glob(sector10_exp)), 1)

        self.backend1.stop()

        def check_data():
            self._check_data(2, prefix="group1_", loop_num=16 * 1024)
            self._check_data(2, prefix="group2_", loop_num=16 * 1024)
            self._check_data(2, prefix="group3_", loop_num=512)

        self.backend1.start()
        self._start_gc(0, bucket="1")
        print "gc @1 started"
        while True:
            status = self._gc_status()
            if status.find("running") >= 0:
                check_data()
                continue
            elif status == "success":
                print "done gc"
                break
            elif status == "fail":
                return self.fail("optimize_stat = fail")
            else:
                self.fail(status)
        print "group4 got gc"
        for key in self.backend1.generate_key(prefix="group4_", count=2, sector=(1, 0)):
            print key
            self.assert_(not check_data_with_key(os.path.join(self.backend1.db_home, "1/0/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "1/0/000.data"), key, ver_=2))
        print "group1 should be untouched"
        for key in self.backend1.generate_key(prefix="group1_", count=2, sector=(0, 0)):
            print key
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/0/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/0/000.data"), key, ver_=2))

        time.sleep(1)
        self._start_gc(0, bucket="00")
        print "gc @00 started"
        while True:
            status = self._gc_status()
            if status.find("running") >= 0:
                check_data()
                continue
            elif status == "success":
                print "done gc"
                break
            elif status == "fail":
                return self.fail("optimize_stat = fail")
            else:
                self.fail(status)

        self.assertEqual(self.backend1.item_count(), 64 * 1024 + 512)
        check_data()
        print "group1 got gc"
        for key in self.backend1.generate_key(prefix="group1_", count=2, sector=(0, 0)):
            print key
            self.assert_(not check_data_with_key(os.path.join(self.backend1.db_home, "0/0/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/0/000.data"), key, ver_=2))
        print "group2 should be not in 000.data, but in 001.data"
        for key in self.backend1.generate_key(prefix="group2_", count=2, sector=(0, 0)):
            print key
            self.assert_(not check_data_with_key(os.path.join(self.backend1.db_home, "0/0/000.data"), key))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/0/001.data"), key, ver_=2))
        print "group5 of bucket 0/1 should be untouched"
        for key in self.backend1.generate_key(prefix="group5_", count=2, sector=(0, 1)):
            print key
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/1/000.data"), key, ver_=1))
            self.assert_(check_data_with_key(os.path.join(self.backend1.db_home, "0/1/000.data"), key, ver_=2))

        print "check data& hint"
        check_data_hint_integrity(self.backend1.db_home, db_depth=self.backend1.db_depth)