Example #1
0
class test_tiered05(wttest.WiredTigerTestCase):
    storage_sources = [
        ('dir_store',
         dict(auth_token=get_auth_token('dir_store'),
              bucket=get_bucket1_name('dir_store'),
              bucket_prefix="pfx_",
              ss_name='dir_store')),
        ('s3',
         dict(auth_token=get_auth_token('s3_store'),
              bucket=get_bucket1_name('s3_store'),
              bucket_prefix=generate_s3_prefix(),
              ss_name='s3_store')),
    ]
    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources)

    uri = "table:test_tiered05"
    wait = 2

    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    def conn_config(self):
        if self.ss_name == 'dir_store' and not os.path.exists(self.bucket):
            os.mkdir(self.bucket)
        return \
          'tiered_manager=(wait=%d),' % self.wait + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket + \
          'bucket_prefix=%s,' % self.bucket_prefix + \
          'name=%s,' % self.ss_name + \
          'object_target_size=20M)'

    # Test calling the flush_tier API with a tiered manager. Should get an error.
    def test_tiered(self):
        self.session.create(self.uri, 'key_format=S')
        # Allow time for the thread to start up.
        time.sleep(self.wait)
        msg = "/storage manager thread is configured/"
        self.assertRaisesWithMessage(
            wiredtiger.WiredTigerError,
            lambda: self.assertEquals(self.session.flush_tier(None), 0), msg)
Example #2
0
class test_tiered09(wttest.WiredTigerTestCase):
    storage_sources = [
        ('dir_store',
         dict(auth_token=get_auth_token('dir_store'),
              bucket=get_bucket1_name('dir_store'),
              prefix1='1_',
              prefix2='2_',
              prefix3='3_',
              ss_name='dir_store')),
        ('s3',
         dict(auth_token=get_auth_token('s3_store'),
              bucket=get_bucket1_name('s3_store'),
              prefix1=generate_s3_prefix(),
              prefix2=generate_s3_prefix(),
              prefix3=generate_s3_prefix(),
              ss_name='s3_store')),
    ]
    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources)

    # If the 'uri' changes all the other names must change with it.
    base = 'test_tiered09-000000000'
    base2 = 'test_second09-000000000'
    obj1file = base + '1.wtobj'
    obj1second = base2 + '1.wtobj'
    obj2file = base + '2.wtobj'
    uri = "table:test_tiered09"
    uri2 = "table:test_second09"

    retention = 1
    saved_conn = ''

    def conn_config(self):
        if self.ss_name == 'dir_store' and not os.path.exists(self.bucket):
            os.mkdir(self.bucket)
        self.saved_conn = \
          'debug_mode=(flush_checkpoint=true),' + \
          'statistics=(all),' + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket + \
          'bucket_prefix=%s,' % self.prefix1 + \
          'local_retention=%d,' % self.retention + \
          'name=%s)' % self.ss_name
        return self.saved_conn

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    def check(self, tc, n):
        for i in range(0, n):
            self.assertEqual(tc[str(i)], str(i))
        tc.set_key(str(n))
        self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND)

    # Test calling the flush_tier API.
    def test_tiered(self):
        # Create a table. Add some data. Checkpoint and flush tier.
        # Close the connection. Then we want to reopen the connection
        # with a different bucket prefix and repeat. Then reopen the
        # connection with the original prefix. Then reopen and verify
        # we can read all the data.
        #
        # Verify the files are as we expect also. We expect:
        # 1_<tablename>-00000001.wtobj
        # 2_<tablename>-00000002.wtobj
        # 1_<tablename>-00000003.wtobj
        # but we can read and access all data in all objects.
        self.session.create(self.uri, 'key_format=S,value_format=S,')
        # Add first data. Checkpoint, flush and close the connection.
        c = self.session.open_cursor(self.uri)
        c["0"] = "0"
        self.check(c, 1)
        c.close()
        self.session.checkpoint()
        self.session.flush_tier(None)
        self.close_conn()

        # For directory store, check that the expected files exist.
        if self.ss_name == 'dir_store':
            self.assertTrue(os.path.exists(self.obj2file))
            bucket_obj = os.path.join(self.bucket,
                                      self.prefix1 + self.obj1file)
            self.assertTrue(os.path.exists(bucket_obj))

        # Since we've closed and reopened the connection we lost the work units
        # to drop the local objects. Clean them up now to make sure we can open
        # the correct object in the bucket.
        localobj = './' + self.obj1file
        if os.path.exists(localobj):
            os.remove(localobj)

        # Reopen the connection with a different prefix this time.
        conn_params = self.saved_conn + ',tiered_storage=(bucket_prefix=%s)' % self.prefix2
        self.conn = self.wiredtiger_open('.', conn_params)
        self.session = self.conn.open_session()
        # Add a second table created while the second prefix is used for the connection.
        self.session.create(self.uri2, 'key_format=S,value_format=S,')
        # Add first data. Checkpoint, flush and close the connection.
        c = self.session.open_cursor(self.uri2)
        c["0"] = "0"
        self.check(c, 1)
        c.close()
        # Add more data to original table.
        # Checkpoint, flush and close the connection.
        c = self.session.open_cursor(self.uri)
        c["1"] = "1"
        self.check(c, 2)
        c.close()
        self.session.checkpoint()
        self.session.flush_tier(None)
        self.close_conn()

        # For directory store, Check each table was created with the correct prefix.
        if self.ss_name == 'dir_store':
            bucket_obj = os.path.join(self.bucket,
                                      self.prefix2 + self.obj1second)
            self.assertTrue(os.path.exists(bucket_obj))
            bucket_obj = os.path.join(self.bucket,
                                      self.prefix1 + self.obj2file)
            self.assertTrue(os.path.exists(bucket_obj))

        # Since we've closed and reopened the connection we lost the work units
        # to drop the local objects. Clean them up now to make sure we can open
        # the correct object in the bucket.
        localobj = './' + self.obj2file
        if os.path.exists(localobj):
            os.remove(localobj)
        localobj = './' + self.obj1second
        if os.path.exists(localobj):
            os.remove(localobj)

        # Reopen with the other prefix and check all data. Even though we're using the
        # other prefix, we should find all the data in the object with the original
        # prefix.
        conn_params = self.saved_conn + ',tiered_storage=(bucket_prefix=%s)' % self.prefix3
        self.conn = self.wiredtiger_open('.', conn_params)
        self.session = self.conn.open_session()
        c = self.session.open_cursor(self.uri)
        self.check(c, 2)
        c.close()
        c = self.session.open_cursor(self.uri2)
        self.check(c, 1)
        c.close()
Example #3
0
class test_tiered10(wttest.WiredTigerTestCase):
    storage_sources = [
        ('dir_store',
         dict(auth_token=get_auth_token('dir_store'),
              bucket=get_bucket1_name('dir_store'),
              prefix1='1_',
              prefix2='2_',
              ss_name='dir_store')),
        ('s3',
         dict(auth_token=get_auth_token('s3_store'),
              bucket=get_bucket1_name('s3_store'),
              prefix1=generate_s3_prefix(),
              prefix2=generate_s3_prefix(),
              ss_name='s3_store')),
    ]
    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources)

    # If the 'uri' changes all the other names must change with it.
    base = 'test_tiered10-000000000'
    obj1file = base + '1.wtobj'
    uri = "table:test_tiered10"

    conn1_dir = "first_dir"
    conn2_dir = "second_dir"
    retention = 1
    saved_conn = ''

    def conn_config(self):
        if self.ss_name == 'dir_store' and not os.path.exists(self.bucket):
            os.mkdir(self.bucket)
        os.mkdir(self.conn1_dir)
        os.mkdir(self.conn2_dir)
        # Use this to create the directories and set up for the others.
        dummy_conn = 'create,statistics=(all),'

        # For directory store, the bucket is a directory one level up from database directories.
        bucket = ''
        if self.ss_name == 'dir_store':
            bucket = '../'
        bucket += self.bucket

        self.saved_conn = \
          'debug_mode=(flush_checkpoint=true),' + \
          'create,statistics=(all),' + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % bucket + \
          'local_retention=%d,' % self.retention + \
          'name=%s),' % self.ss_name
        return dummy_conn

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            # config = '=(config=\"(verbose=[api:1,version,tiered:1])\")'
            extlist.skip_if_missing = True
        # if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    def check(self, tc, base, n):
        for i in range(base, n):
            self.assertEqual(tc[str(i)], str(i))
        tc.set_key(str(n))
        self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND)

    # Test calling the flush_tier API.
    def test_tiered(self):
        # Have two connections running in different directories, but sharing
        # the same bucket with different prefixes. Each database creates an
        # identically named table with different data. Each then does a flush
        # tier testing that both databases can coexist in the same bucket
        # without conflict.
        #
        # Then reopen the connections and make sure we can read data correctly.
        #
        # We open two connections manually so that they both have the same relative
        # pathnames. The standard connection is just a dummy for this test.
        ext = self.extensionsConfig()
        conn1_params = self.saved_conn + ext + ',tiered_storage=(bucket_prefix=%s)' % self.prefix1
        conn1 = self.wiredtiger_open(self.conn1_dir, conn1_params)
        session1 = conn1.open_session()
        conn2_params = self.saved_conn + ext + ',tiered_storage=(bucket_prefix=%s)' % self.prefix2
        conn2 = self.wiredtiger_open(self.conn2_dir, conn2_params)
        session2 = conn2.open_session()

        session1.create(self.uri, 'key_format=S,value_format=S,')
        session2.create(self.uri, 'key_format=S,value_format=S,')

        # Add first data. Checkpoint, flush and close the connection.
        c1 = session1.open_cursor(self.uri)
        c2 = session2.open_cursor(self.uri)
        c1["0"] = "0"
        c2["20"] = "20"
        self.check(c1, 0, 1)
        self.check(c2, 20, 1)
        c1.close()
        c2.close()
        session1.checkpoint()
        session1.flush_tier(None)
        session2.checkpoint()
        session2.flush_tier(None)
        conn1_obj1 = os.path.join(self.bucket, self.prefix1 + self.obj1file)
        conn2_obj1 = os.path.join(self.bucket, self.prefix2 + self.obj1file)

        if self.ss_name == 'dir_store':
            self.assertTrue(os.path.exists(conn1_obj1))
            self.assertTrue(os.path.exists(conn2_obj1))

        conn1.close()
        conn2.close()

        # Remove the local copies of the objects before we reopen so that we force
        # the system to read from the bucket or bucket cache.
        local = self.conn1_dir + '/' + self.obj1file
        if os.path.exists(local):
            os.remove(local)
        local = self.conn2_dir + '/' + self.obj1file
        if os.path.exists(local):
            os.remove(local)

        conn1 = self.wiredtiger_open(self.conn1_dir, conn1_params)
        session1 = conn1.open_session()
        conn2 = self.wiredtiger_open(self.conn2_dir, conn2_params)
        session2 = conn2.open_session()

        c1 = session1.open_cursor(self.uri)
        c2 = session2.open_cursor(self.uri)
        self.check(c1, 0, 1)
        self.check(c2, 20, 1)
        c1.close()
        c2.close()
Example #4
0
class test_tiered14(wttest.WiredTigerTestCase):
    uri = "table:test_tiered14-{}"  # format for subtests

    # FIXME-WT-7833: enable the commented scenarios and run the
    # test with the --long option.

    # The multiplier makes the size of keys and values progressively larger.
    # A multipler of 0 makes the keys and values a single length.
    multiplier = [
        ('0', dict(multiplier=0)),
        ('S', dict(multiplier=1)),
        ('M', dict(multiplier=10)),
        #('L', dict(multiplier=100, long_only=True)),
        #('XL', dict(multiplier=1000, long_only=True)),
    ]
    keyfmt = [
        ('integer', dict(keyfmt='i')),
        ('string', dict(keyfmt='S')),
    ]
    dataset = [
        ('simple', dict(dataset='simple')),
        #('complex', dict(dataset='complex', long_only=True)),
    ]
    storage_sources = [
        ('dir_store',
         dict(
             auth_token=get_auth_token('dir_store'),
             bucket=get_bucket1_name('dir_store'),
             bucket_prefix="pfx_",
             num_ops=100,
             ss_name='dir_store',
         )),
        ('s3',
         dict(auth_token=get_auth_token('s3_store'),
              bucket=get_bucket1_name('s3_store'),
              bucket_prefix=generate_s3_prefix(),
              num_ops=20,
              ss_name='s3_store')),
    ]
    scenarios = wtscenario.make_scenarios(multiplier, keyfmt, dataset,
                                          storage_sources)

    def conn_config(self):
        if self.ss_name == 'dir_store' and not os.path.exists(self.bucket):
            os.mkdir(self.bucket)
        return \
          'debug_mode=(flush_checkpoint=true),' + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket + \
          'bucket_prefix=%s,' % self.bucket_prefix + \
          'name=%s),' % self.ss_name

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    def progress(self, s):
        outstr = "testnum {}, position {}: {}".format(self.testnum,
                                                      self.position, s)
        self.verbose(3, outstr)
        self.pr(outstr)

    # Run a sequence of operations, indicated by a string.
    #  a = add some number of keys
    #  u = update some number of keys
    #  c = checkpoint
    #  r = reopen
    #  f = flush_tier
    #  . = check to make sure all expected values are present
    #
    # We require a unique test number so we get can generate a different uri from
    # previous runs.  A different approach is to drop the uri, but then we need to
    # remove the bucket and cache, which is specific to the storage source extension.
    def playback(self, testnum, ops):
        self.testnum = testnum
        self.position = -1

        uri = self.uri.format(testnum)
        self.progress('Running ops: {} using uri {}'.format(ops, uri))
        if self.dataset == 'simple':
            ds = TrackedSimpleDataSet(self,
                                      uri,
                                      self.multiplier,
                                      key_format=self.keyfmt)
        elif self.dataset == 'complex':
            ds = TrackedComplexDataSet(self,
                                       uri,
                                       self.multiplier,
                                       key_format=self.keyfmt)

        # Populate for a tracked data set is needed to create the uri.
        ds.populate()
        inserted = 0

        # At the end of the sequence of operations, do a final check ('.').
        for op in ops + '.':
            self.position += 1
            try:
                if op == 'f':
                    self.progress('flush_tier')
                    self.session.flush_tier(None)
                elif op == 'c':
                    self.progress('checkpoint')
                    self.session.checkpoint()
                elif op == 'r':
                    self.progress('reopen')
                    self.reopen_conn()
                elif op == 'a':
                    self.progress('add')
                    n = random.randrange(1, 101)  # 1 <= n <= 100
                    ds.store_range(inserted, n)
                    inserted += n
                elif op == 'u':
                    self.progress('update')
                    # only update the elements if enough have already been added.
                    n = random.randrange(1, 101)  # 1 <= n <= 100
                    if n < inserted:
                        pos = random.randrange(0, inserted - n)
                        ds.store_range(pos, n)
                elif op == '.':
                    self.progress('check')
                    ds.check()
            except Exception as e:
                self.progress('Failed at position {} in {}: {}'.format(
                    idx, ops, str(e)))
                raise (e)

    # Test tiered storage with checkpoints and flush_tier calls.
    def test_tiered(self):
        random.seed(0)

        # Get started with a fixed sequence of basic operations.
        # There's no particular reason to start with this sequence.
        testnum = 0
        self.playback(testnum,
                      "aaaaacaaa.uucrauaf.aauaac.auu.aacrauafa.uruua.")

        for i in range(0, 10):
            testnum += 1
            # Generate a sequence of operations that is heavy on additions and updates.
            s = ''.join(random.choices('aaaaauuuuufcr.', k=self.num_ops))
            self.playback(testnum, s)

        for i in range(0, 10):
            testnum += 1
            # Generate a sequence of operations that is has a greater mix of 'operational' functions.
            s = ''.join(random.choices('aufcr.', k=self.num_ops))
            self.playback(testnum, s)
Example #5
0
class test_tiered07(wttest.WiredTigerTestCase):
    storage_sources = [
        ('dir_store', dict(auth_token = get_auth_token('dir_store'),
            bucket = get_bucket1_name('dir_store'),
            bucket_prefix = "pfx_",
            ss_name = 'dir_store')),
        # FIXME-WT-8897 Disabled as S3 directory listing is interpreting a directory to end in a '/',
        # whereas the code in the tiered storage doesn't expect that. Enable when fixed.
        #('s3', dict(auth_token = get_auth_token('s3_store'),
        #    bucket = get_bucket1_name('s3_store'),
        #    bucket_prefix = generate_s3_prefix(),
        #    ss_name = 's3_store'))
    ]
    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources)

    uri = "table:abc"
    uri2 = "table:ab"
    uri3 = "table:abcd"
    uri4 = "table:abcde"
    localuri = "table:local"
    newuri = "table:tier_new"

    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
            #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    def conn_config(self):
        if self.ss_name == 'dir_store' and not os.path.exists(self.bucket):
            os.mkdir(self.bucket)
        #  'verbose=(tiered),' + \

        return \
          'debug_mode=(flush_checkpoint=true),' + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket + \
          'bucket_prefix=%s,' % self.bucket_prefix + \
          'name=%s,' % self.ss_name + \
          'object_target_size=20M)'

    def check(self, tc, n):
        for i in range(0, n):
            self.assertEqual(tc[str(i)], str(i))
        tc.set_key(str(n))
        self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND)

    # Test calling schema APIs with a tiered table.
    def test_tiered(self):
        # Create a new tiered table.
        self.pr('create table')
        self.session.create(self.uri, 'key_format=S,value_format=S')
        self.pr('create table 2')
        self.session.create(self.uri2, 'key_format=S,value_format=S')
        self.pr('create table 3')
        self.session.create(self.uri3, 'key_format=S,value_format=S')
        self.pr('create table local')
        self.session.create(self.localuri, 'key_format=S,value_format=S,tiered_storage=(name=none)')

        # Rename is not supported for tiered tables.
        msg = "/is not supported/"
        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
            lambda:self.assertEquals(self.session.rename(self.uri, self.newuri, None), 0), msg)

        # Add some data and flush tier.
        self.pr('add one item to all tables')
        c = self.session.open_cursor(self.uri)
        c["0"] = "0"
        self.check(c, 1)
        c.close()
        c = self.session.open_cursor(self.uri2)
        c["0"] = "0"
        self.check(c, 1)
        c.close()
        c = self.session.open_cursor(self.uri3)
        c["0"] = "0"
        self.check(c, 1)
        c.close()
        c = self.session.open_cursor(self.localuri)
        c["0"] = "0"
        c.close()
        self.session.checkpoint()
        self.pr('After data, call flush_tier')
        self.session.flush_tier(None)

        # Drop table.
        self.pr('call drop')
        self.session.drop(self.localuri)
        self.session.drop(self.uri)

        # By default, the remove_files configuration for drop is true. This means that the
        # drop operation for tiered tables should both remove the files from the metadata
        # file and remove the corresponding local object files in the directory.
        self.assertFalse(os.path.isfile("abc-0000000001.wtobj"))
        self.assertFalse(os.path.isfile("abc-0000000002.wtobj"))

        # Dropping a table using the force setting should succeed even if the table does not exist.
        self.session.drop(self.localuri, 'force=true')
        self.session.drop(self.uri, 'force=true')

        # Dropping a table should not succeed if the table does not exist.
        # Test dropping a table that was previously dropped.
        self.assertRaises(wiredtiger.WiredTigerError,
            lambda: self.session.drop(self.localuri, None))
        # Test dropping a table that does not exist.
        self.assertRaises(wiredtiger.WiredTigerError,
            lambda: self.session.drop("table:random_non_existent", None))

        # Create new table with same name. This should error.
        msg = "/already exists/"
        self.pr('check cannot create with same name')
        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
            lambda:self.assertEquals(self.session.create(self.uri, 'key_format=S'), 0), msg)

        # Make sure there was no problem with overlapping table names.
        self.pr('check original similarly named tables')
        c = self.session.open_cursor(self.uri2)
        self.check(c, 1)
        c.close()
        c = self.session.open_cursor(self.uri3)
        self.check(c, 1)
        c.close()

        # Create new table with new name.
        self.pr('create new table')
        self.session.create(self.newuri, 'key_format=S')

        # Test the drop operation without removing associated files.
        self.session.create(self.uri4, 'key_format=S,value_format=S')
        self.session.drop(self.uri4, 'remove_files=false')
        self.assertTrue(os.path.isfile("abcde-0000000001.wtobj"))
Example #6
0
class test_tiered02(wttest.WiredTigerTestCase):
    storage_sources = [
        ('dirstore',
         dict(auth_token=get_auth_token('dir_store'),
              bucket=get_bucket1_name('dir_store'),
              bucket_prefix="pfx_",
              ss_name='dir_store')),
        ('s3',
         dict(auth_token=get_auth_token('s3_store'),
              bucket=get_bucket1_name('s3_store'),
              bucket_prefix=generate_s3_prefix(),
              ss_name='s3_store')),
    ]

    complex_dataset = [
        ('simple_ds', dict(complex_dataset=False)),

        # Commented out compplex dataset that tests column groups and indexes because it crashes
        # in the middle of the test. FIXME: WT-9001
        #('complex_ds', dict(complex_dataset=True)),
    ]

    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources, complex_dataset)

    uri = "table:test_tiered02"

    def conn_config(self):
        if self.ss_name == 'dir_store' and not os.path.exists(self.bucket):
            os.mkdir(self.bucket)
        return \
          'debug_mode=(flush_checkpoint=true),' + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket + \
          'bucket_prefix=%s,' % self.bucket_prefix + \
          'name=%s),' % self.ss_name

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    def progress(self, s):
        self.verbose(3, s)
        self.pr(s)

    def confirm_flush(self, increase=True):
        # Without directly using the filesystem API, directory listing is only supported on
        # the directory store.  Limit this check to the directory store.
        if self.ss_name != 'dir_store':
            return

        got = sorted(list(os.listdir(self.bucket)))
        self.pr('Flushed objects: ' + str(got))
        if increase:
            # WT-7639: we know that this assertion sometimes fails,
            # we are collecting more data - we still want it to fail
            # so it is noticed.
            if len(got) <= self.flushed_objects:
                from time import sleep
                self.prout(
                    'directory items: {} is not greater than {}!'.format(
                        got, self.flushed_objects))
                self.prout('waiting to see if it resolves')
                for i in range(0, 10):
                    self.prout('checking again')
                    newgot = sorted(list(os.listdir(self.bucket)))
                    if len(newgot) > self.flushed_objects:
                        self.prout('resolved, now see: {}'.format(newgot))
                        break
                    sleep(i)
            self.assertGreater(len(got), self.flushed_objects)
        else:
            self.assertEqual(len(got), self.flushed_objects)
        self.flushed_objects = len(got)

    def get_dataset(self, rows):
        args = 'key_format=S'

        if self.complex_dataset:
            return ComplexDataSet(self, self.uri, rows, config=args)
        else:
            return SimpleDataSet(self, self.uri, rows, config=args)

    # Test tiered storage with checkpoints and flush_tier calls.
    def test_tiered(self):
        self.flushed_objects = 0

        self.pr("create sys")
        self.progress('Create simple data set (10)')
        ds = self.get_dataset(10)
        self.progress('populate')
        ds.populate()
        ds.check()
        self.progress('checkpoint')
        self.session.checkpoint()
        self.progress('flush_tier')
        self.session.flush_tier(None)
        self.confirm_flush()
        ds.check()

        self.close_conn()
        self.progress('reopen_conn')
        self.reopen_conn()
        # Check what was there before.
        ds = self.get_dataset(10)
        ds.check()

        self.progress('Create simple data set (50)')
        ds = self.get_dataset(50)
        self.progress('populate')
        ds.populate()
        ds.check()
        self.progress('open extra cursor on ' + self.uri)
        cursor = self.session.open_cursor(self.uri, None, None)
        self.progress('checkpoint')
        self.session.checkpoint()

        self.progress('flush_tier')
        self.session.flush_tier(None)
        self.progress('flush_tier complete')
        self.confirm_flush()

        self.progress('Create simple data set (100)')
        ds = self.get_dataset(100)
        self.progress('populate')
        ds.populate()
        ds.check()
        self.progress('checkpoint')
        self.session.checkpoint()
        self.progress('flush_tier')
        self.session.flush_tier(None)
        self.confirm_flush()

        self.progress('Create simple data set (200)')
        ds = self.get_dataset(200)
        self.progress('populate')
        ds.populate()
        ds.check()
        cursor.close()
        self.progress('close_conn')
        self.close_conn()

        self.progress('reopen_conn')
        self.reopen_conn()

        # Check what was there before.
        ds = self.get_dataset(200)
        ds.check()

        # Now add some more.
        self.progress('Create simple data set (300)')
        ds = self.get_dataset(300)
        self.progress('populate')
        ds.populate()
        ds.check()

        # We haven't done a flush so there should be
        # nothing extra on the shared tier.
        self.confirm_flush(increase=False)
        self.progress('checkpoint')
        self.session.checkpoint()
        self.confirm_flush(increase=False)
        self.progress('END TEST')
Example #7
0
class test_tiered13(test_import_base):
    storage_sources = [
        ('dir_store',
         dict(auth_token=get_auth_token('dir_store'),
              bucket=get_bucket1_name('dir_store'),
              bucket_prefix="pfx_",
              ss_name='dir_store')),
        ('s3',
         dict(auth_token=get_auth_token('s3_store'),
              bucket=get_bucket1_name('s3_store'),
              bucket_prefix=generate_s3_prefix(),
              ss_name='s3_store')),
    ]
    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources)

    # If the 'uri' changes all the other names must change with it.
    base = 'test_tiered13-000000000'
    fileuri_base = 'file:' + base
    file1uri = fileuri_base + '1.wtobj'
    file2 = base + '2.wtobj'
    file2uri = fileuri_base + '2.wtobj'
    otherfile = 'other.wt'
    otheruri = 'file:' + otherfile
    uri = "table:test_tiered13"

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    def conn_config(self):
        if self.ss_name == 'dir_store' and not os.path.exists(self.bucket):
            os.mkdir(self.bucket)
        self.saved_conn = \
          'debug_mode=(flush_checkpoint=true),' + \
          'create,tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket + \
          'bucket_prefix=%s,' % self.bucket_prefix + \
          'name=%s,' % self.ss_name + \
          'object_target_size=20M),'
        return self.saved_conn

    def test_tiered13(self):
        # Create a new tiered table.
        self.session.create(self.uri, 'key_format=S,value_format=S,')
        # Add first data. Checkpoint, flush and close the connection.
        c = self.session.open_cursor(self.uri)
        c["0"] = "0"
        c.close()
        self.session.checkpoint()
        self.session.flush_tier(None)
        c = self.session.open_cursor(self.uri)
        c["1"] = "1"
        c.close()
        self.session.checkpoint()
        # We now have the second object existing, with data in it.

        # Set up for the test.
        # - Create the tiered table (above).
        # - Find the metadata for the current file: object.
        # - Set up a new database for importing.
        #
        # Testing import and tiered tables. All should error:
        # - Try to import via the table:uri.
        # - Try to import via the table:uri with the file object's metadata.
        # - Try to import via the file:uri.
        # - Try to import via the file:uri with the file object's metadata.
        # - Try to import via a renamed file:name.wt.
        # - Try to import via a renamed file:name.wt with the file object's metadata.

        # Export the metadata for the current file object 2.
        cursor = self.session.open_cursor('metadata:', None, None)
        for k, v in cursor:
            if k.startswith(self.file2uri):
                fileobj_config = cursor[k]
            if k.startswith(self.uri):
                table_config = cursor[k]
        cursor.close()
        self.close_conn()
        # Contruct the config strings.
        import_enabled = 'import=(enabled,repair=true)'
        import_meta = 'import=(enabled,repair=false,file_metadata=(' + \
            fileobj_config + '))'
        table_import_meta = table_config + ',import=(enabled,repair=false,file_metadata=(' + \
            fileobj_config + '))'

        # Set up the import database.
        newdir = 'IMPORT_DB'
        shutil.rmtree(newdir, ignore_errors=True)
        os.mkdir(newdir)
        newbucket = os.path.join(newdir, self.bucket)
        if self.ss_name == 'dir_store':
            os.mkdir(newbucket)
        # It is tricky to work around the extension and connection bucket setup for
        # creating the new import directory that is tiered-enabled.
        ext = self.extensionsConfig()
        conn_params = self.saved_conn + ext
        self.conn = self.wiredtiger_open(newdir, conn_params)
        self.session = self.setUpSessionOpen(self.conn)

        # Copy the file to the file names we're going to test later.
        self.copy_file(self.file2, '.', newdir)
        copy_from = self.file2
        copy_to = os.path.join(newdir, self.otherfile)
        shutil.copy(copy_from, copy_to)

        msg = '/Operation not supported/'
        enoent = '/No such file/'
        # Try to import via the table:uri. This fails with ENOENT because
        # it is looking for the normal on-disk file name. It cannot tell it
        # is a tiered table in this case.
        self.assertRaisesWithMessage(
            wiredtiger.WiredTigerError,
            lambda: self.session.create(self.uri, import_enabled), enoent)
        # Try to import via the table:uri with file metadata.
        self.assertRaisesWithMessage(
            wiredtiger.WiredTigerError,
            lambda: self.session.create(self.uri, table_import_meta), msg)
        # Try to import via the file:uri.
        self.assertRaisesWithMessage(
            wiredtiger.WiredTigerError,
            lambda: self.session.create(self.file2uri, import_enabled), msg)
        # Try to import via the file:uri with file metadata.
        self.assertRaisesWithMessage(
            wiredtiger.WiredTigerError,
            lambda: self.session.create(self.file2uri, import_meta), msg)

        # Try to import via a renamed object. If we don't send in metadata,
        # we cannot tell it was a tiered table until we read in the root page.
        # Only test this in diagnostic mode which has an assertion.
        #
        # FIXME-8644 There is an error path bug in wt_bm_read preventing this from
        # working correctly although the code to return an error is in the code.
        # Uncomment these lines when that bug is fixed.

        #if wiredtiger.diagnostic_build():
        #    self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
        #        lambda: self.session.create(self.otheruri, import_enabled), msg)

        # Try to import via a renamed object with metadata.
        self.assertRaisesWithMessage(
            wiredtiger.WiredTigerError,
            lambda: self.session.create(self.otheruri, import_meta), msg)
Example #8
0
class test_tiered04(wttest.WiredTigerTestCase):
    storage_sources = [
        ('dir_store',
         dict(auth_token=get_auth_token('dir_store'),
              bucket=get_bucket1_name('dir_store'),
              bucket1=get_bucket2_name('dir_store'),
              prefix="pfx_",
              prefix1="pfx1_",
              ss_name='dir_store')),
        (
            's3',
            dict(
                auth_token=get_auth_token('s3_store'),
                bucket=get_bucket1_name('s3_store'),
                bucket1=get_bucket2_name('s3_store'),
                prefix=generate_s3_prefix(),
                # Test that object name with "/" are processed.
                prefix1=generate_s3_prefix() + "/s3/source/",
                ss_name='s3_store')),
    ]
    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources)

    # If the 'uri' changes all the other names must change with it.
    base = 'test_tiered04-000000000'
    fileuri_base = 'file:' + base
    obj1file = base + '1.wtobj'
    obj2file = base + '2.wtobj'
    objuri = 'object:' + base + '1.wtobj'
    tiereduri = "tiered:test_tiered04"
    tieruri = "tier:test_tiered04"
    uri = "table:test_tiered04"

    uri1 = "table:test_other_tiered04"
    uri_none = "table:test_local04"
    file_none = "file:test_local04.wt"

    object_sys = "9M"
    object_sys_val = 9 * 1024 * 1024
    object_uri = "15M"
    object_uri_val = 15 * 1024 * 1024
    retention = 3
    retention1 = 600

    def conn_config(self):
        if self.ss_name == 'dir_store':
            os.mkdir(self.bucket)
            os.mkdir(self.bucket1)
        self.saved_conn = \
          'debug_mode=(flush_checkpoint=true),' + \
          'statistics=(all),' + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket + \
          'bucket_prefix=%s,' % self.prefix + \
          'local_retention=%d,' % self.retention + \
          'name=%s,' % self.ss_name + \
          'object_target_size=%s)' % self.object_sys
        return self.saved_conn

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    # Check for a specific string as part of the uri's metadata.
    def check_metadata(self, uri, val_str):
        c = self.session.open_cursor('metadata:')
        val = c[uri]
        c.close()
        self.assertTrue(val_str in val)

    def get_stat(self, stat, uri):
        if uri == None:
            stat_cursor = self.session.open_cursor('statistics:')
        else:
            stat_cursor = self.session.open_cursor('statistics:' + uri)
        val = stat_cursor[stat][2]
        stat_cursor.close()
        return val

    def check(self, tc, n):
        for i in range(0, n):
            self.assertEqual(tc[str(i)], str(i))
        tc.set_key(str(n))
        self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND)

    # Test calling the flush_tier API.
    def test_tiered(self):
        # Create three tables. One using the system tiered storage, one
        # specifying its own bucket and object size and one using no
        # tiered storage. Use stats to verify correct setup.
        intl_page = 'internal_page_max=16K'
        base_create = 'key_format=S,value_format=S,' + intl_page
        self.pr("create sys")
        self.session.create(self.uri, base_create)
        conf = \
          ',tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket1 + \
          'bucket_prefix=%s,' % self.prefix1 + \
          'local_retention=%d,' % self.retention1 + \
          'name=%s,' % self.ss_name + \
          'object_target_size=%s)' % self.object_uri
        self.pr("create non-sys tiered")
        self.session.create(self.uri1, base_create + conf)
        conf = ',tiered_storage=(name=none)'
        self.pr("create non tiered/local")
        self.session.create(self.uri_none, base_create + conf)

        c = self.session.open_cursor(self.uri)
        c1 = self.session.open_cursor(self.uri1)
        cn = self.session.open_cursor(self.uri_none)
        c["0"] = "0"
        c1["0"] = "0"
        cn["0"] = "0"
        self.check(c, 1)
        self.check(c1, 1)
        self.check(cn, 1)
        c.close()

        flush = 0
        # Check the local retention. After a flush_tier call the object file should exist in
        # the local database. Then after sleeping long enough it should be removed.
        self.pr("flush tier no checkpoint")
        self.session.flush_tier(None)
        flush += 1
        # We should not have flushed either tiered table.
        skip = self.get_stat(stat.conn.flush_tier_skipped, None)
        self.assertEqual(skip, 2)

        self.session.checkpoint()
        self.session.flush_tier(None)
        # Now we should have switched both tables. The skip value should stay the same.
        skip = self.get_stat(stat.conn.flush_tier_skipped, None)
        self.assertEqual(skip, 2)
        switch = self.get_stat(stat.conn.flush_tier_switched, None)
        self.assertEqual(switch, 2)
        flush += 1
        self.pr("Check for ")
        self.pr(self.obj1file)
        self.assertTrue(os.path.exists(self.obj1file))
        self.assertTrue(os.path.exists(self.obj2file))

        remove1 = self.get_stat(stat.conn.local_objects_removed, None)
        time.sleep(self.retention + 1)
        # We call flush_tier here because otherwise the internal thread that
        # processes the work units won't run for a while. This call will signal
        # the internal thread to process the work units.
        self.session.flush_tier('force=true')
        flush += 1
        # We still sleep to give the internal thread a chance to run. Some slower
        # systems can fail here if we don't give them time.
        time.sleep(1)
        self.pr("Check removal of ")
        self.pr(self.obj1file)
        self.assertFalse(os.path.exists(self.obj1file))
        remove2 = self.get_stat(stat.conn.local_objects_removed, None)
        self.assertTrue(remove2 > remove1)

        c = self.session.open_cursor(self.uri)
        c["1"] = "1"
        c1["1"] = "1"
        cn["1"] = "1"
        self.check(c, 2)
        c.close()

        c = self.session.open_cursor(self.uri)
        c["2"] = "2"
        c1["2"] = "2"
        cn["2"] = "2"
        self.check(c, 3)
        c1.close()
        cn.close()
        self.session.checkpoint()

        self.pr("flush tier again, holding open cursor")
        self.session.flush_tier(None)
        flush += 1

        c["3"] = "3"
        self.check(c, 4)
        c.close()

        calls = self.get_stat(stat.conn.flush_tier, None)
        self.assertEqual(calls, flush)
        obj = self.get_stat(stat.conn.tiered_object_size, None)
        self.assertEqual(obj, self.object_sys_val)

        # As we flush each object, the next object exists, but our first flush was a no-op.
        # So the value for the last file: object should be 'flush'.
        last = 'last=' + str(flush)
        # For now all earlier objects exist. So it is always 1 until garbage collection
        # starts removing them.
        oldest = 'oldest=1'
        fileuri = self.fileuri_base + str(flush) + '.wtobj'
        self.check_metadata(self.tiereduri, intl_page)
        self.check_metadata(self.tiereduri, last)
        self.check_metadata(self.tiereduri, oldest)
        self.check_metadata(fileuri, intl_page)
        self.check_metadata(self.objuri, intl_page)

        # Check for the correct tiered_object setting for both tiered and not tiered tables.
        tiered_false = 'tiered_object=false'
        tiered_true = 'tiered_object=true'
        self.check_metadata(fileuri, tiered_true)
        self.check_metadata(self.objuri, tiered_true)
        self.check_metadata(self.tieruri, tiered_true)

        self.check_metadata(self.file_none, tiered_false)

        # Now test some connection statistics with operations.
        retain = self.get_stat(stat.conn.tiered_retention, None)
        self.assertEqual(retain, self.retention)
        self.session.flush_tier(None)
        skip1 = self.get_stat(stat.conn.flush_tier_skipped, None)
        switch1 = self.get_stat(stat.conn.flush_tier_switched, None)
        # Make sure the last checkpoint and this flush tier are timed differently
        # so that we can specifically check the statistics and code paths in the test.
        # Sleep some to control the execution.
        time.sleep(2)
        self.session.flush_tier('force=true')
        skip2 = self.get_stat(stat.conn.flush_tier_skipped, None)
        switch2 = self.get_stat(stat.conn.flush_tier_switched, None)
        self.assertGreater(switch2, switch1)

        self.assertEqual(skip1, skip2)
        flush += 2
        calls = self.get_stat(stat.conn.flush_tier, None)
        self.assertEqual(calls, flush)

        # Test reconfiguration.
        config = 'tiered_storage=(local_retention=%d)' % self.retention1
        self.pr("reconfigure")
        self.conn.reconfigure(config)
        retain = self.get_stat(stat.conn.tiered_retention, None)
        self.assertEqual(retain, self.retention1)

        # Call flush_tier with its various configuration arguments. It is difficult
        # to force a timeout or lock contention with a unit test. So just test the
        # call for now.
        #
        # There have been no data changes nor checkpoints since the last flush_tier with
        # force, above. The skip statistics should increase and the switched
        # statistics should stay the same.
        skip1 = self.get_stat(stat.conn.flush_tier_skipped, None)
        switch1 = self.get_stat(stat.conn.flush_tier_switched, None)
        self.session.flush_tier('timeout=100')
        skip2 = self.get_stat(stat.conn.flush_tier_skipped, None)
        switch2 = self.get_stat(stat.conn.flush_tier_switched, None)
        self.assertEqual(switch1, switch2)
        self.assertGreater(skip2, skip1)

        self.session.flush_tier('lock_wait=false')
        self.session.flush_tier('sync=off')
        flush += 3
        self.pr("reconfigure get stat")
        calls = self.get_stat(stat.conn.flush_tier, None)
        self.assertEqual(calls, flush)

        # Test that the checkpoint and flush times work across a connection restart.
        # Make modifications and then close the connection (which will checkpoint).
        # Reopen the connection and call flush_tier. Verify this flushes the object.
        c = self.session.open_cursor(self.uri)
        c["4"] = "4"
        self.check(c, 5)
        c.close()
        # Manually reopen the connection because the default function above tries to
        # make the bucket directories.
        self.reopen_conn(config=self.saved_conn)
        remove1 = self.get_stat(stat.conn.local_objects_removed, None)
        skip1 = self.get_stat(stat.conn.flush_tier_skipped, None)
        switch1 = self.get_stat(stat.conn.flush_tier_switched, None)
        self.session.flush_tier(None)
        skip2 = self.get_stat(stat.conn.flush_tier_skipped, None)
        switch2 = self.get_stat(stat.conn.flush_tier_switched, None)

        # The first flush_tier after restart should have queued removal work units
        # for other objects. Sleep and then force a flush tier to signal the internal
        # thread and make sure that some objects were removed.
        time.sleep(self.retention + 1)
        self.session.flush_tier('force=true')

        # Sleep to give the internal thread time to run and process.
        time.sleep(1)
        self.assertFalse(os.path.exists(self.obj1file))
        remove2 = self.get_stat(stat.conn.local_objects_removed, None)
        self.assertTrue(remove2 > remove1)
        #
        # Due to the above modification, we should skip the 'other' table while
        # switching the main tiered table. Therefore, both the skip and switch
        # values should increase by one.
        self.assertEqual(skip2, skip1 + 1)
        self.assertEqual(switch2, switch1 + 1)
Example #9
0
class test_tiered11(wttest.WiredTigerTestCase):
    storage_sources = [('dir_store',
                        dict(auth_token=get_auth_token('dir_store'),
                             bucket=get_bucket1_name('dir_store'),
                             bucket_prefix="pfx_",
                             ss_name='dir_store')),
                       ('s3',
                        dict(auth_token=get_auth_token('s3_store'),
                             bucket=get_bucket1_name('s3_store'),
                             bucket_prefix=generate_s3_prefix(),
                             ss_name='s3_store'))]
    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources)

    # If the 'uri' changes all the other names must change with it.
    base = 'test_tiered11-000000000'
    nentries = 10
    objuri = 'object:' + base + '1.wtobj'
    tiereduri = "tiered:test_tiered11"
    uri = "table:test_tiered11"

    def conn_config(self):
        if self.ss_name == 'dir_store' and not os.path.exists(self.bucket):
            os.mkdir(self.bucket)
        self.saved_conn = \
          'debug_mode=(flush_checkpoint=true),' + \
          'statistics=(all),' + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket + \
          'bucket_prefix=%s,' % self.bucket_prefix + \
          'name=%s)' % self.ss_name
        return self.saved_conn

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    # Check for a specific string as part of the uri's metadata.
    def check_metadata(self, uri, val_str, match=True):
        #self.pr("Check_meta: uri: " + uri)
        c = self.session.open_cursor('metadata:')
        val = c[uri]
        c.close()
        #self.pr("Check_meta: metadata val: " + val)
        if match:
            #self.pr("Check_meta: Should see val_str: " + val_str)
            self.assertTrue(val_str in val)
        else:
            #self.pr("Check_meta: Should not see val_str: " + val_str)
            self.assertFalse(val_str in val)

    def add_data(self, start):
        c = self.session.open_cursor(self.uri)
        # Begin by adding some data.
        end = start + self.nentries
        for i in range(start, end):
            self.session.begin_transaction()
            c[i] = i
            # Jump the commit TS to leave rooom for the stable TS separate from any commit TS.
            self.session.commit_transaction('commit_timestamp=' +
                                            self.timestamp_str(i * 2))
        # Set the oldest and stable timestamp to the end.
        end_ts = self.timestamp_str(end - 1)
        self.conn.set_timestamp('oldest_timestamp=' + end_ts +
                                ',stable_timestamp=' + end_ts)
        c.close()
        return end_ts

    # Test calling the flush_tier API.
    def test_tiered11(self):
        # Create a tiered table and checkpoint. Make sure the recorded
        # timestamp is what we expect.
        intl_page = 'internal_page_max=16K'
        base_create = 'key_format=i,value_format=i,' + intl_page
        self.session.create(self.uri, base_create)

        end_ts = self.add_data(1)
        self.session.checkpoint()

        new_end_ts = self.add_data(self.nentries)
        # We have a new stable timestamp, but after the checkpoint. Make
        # sure the flush tier records the correct timestamp.
        self.session.flush_tier(None)
        # Make sure a new checkpoint doesn't change any of our timestamp info.
        self.session.checkpoint()

        flush_str = 'flush_timestamp="' + end_ts + '"'
        self.check_metadata(self.tiereduri, flush_str)
        self.check_metadata(self.objuri, flush_str)
        # Make sure some flush time was saved. We don't know what it is other
        # than it should not be zero.
        time_str = "flush_time=0"
        self.check_metadata(self.tiereduri, time_str, False)
        self.check_metadata(self.objuri, time_str, False)
Example #10
0
class test_tiered06(wttest.WiredTigerTestCase):
    storage_sources = [
        ('dir_store', dict(auth_token = get_auth_token('dir_store'),
            bucket1 = get_bucket1_name('dir_store'),
            bucket2 = get_bucket2_name('dir_store'),
            bucket_prefix_base = "pfx_",
            ss_name = 'dir_store')),
        ('s3', dict(auth_token = get_auth_token('s3_store'),
            bucket1 = get_bucket1_name('s3_store'),
            bucket2 = get_bucket2_name('s3_store'),
            bucket_prefix_base = generate_s3_prefix(),
            ss_name = 's3_store')),
    ]
    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources)

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
            #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    def breakpoint(self):
        import pdb, sys
        sys.stdin = open('/dev/tty', 'r')
        sys.stdout = open('/dev/tty', 'w')
        sys.stderr = open('/dev/tty', 'w')
        pdb.set_trace()

    def get_storage_source(self):
        return self.conn.get_storage_source(self.ss_name)
    
    def get_fs_config(self, prefix = '', cache_dir = ''):
        conf = ''
        if prefix:
            conf += ',prefix=' + prefix
        if cache_dir:
            conf += ',cache_directory=' + cache_dir
        return conf

    def test_ss_basic(self):
        # Test some basic functionality of the storage source API, calling
        # each supported method in the API at least once.

        session = self.session
        ss = self.get_storage_source()

        # Since this class has multiple tests, append test name to the prefix to
        # avoid namespace collison. 0th element on the stack is the current function.
        prefix = self.bucket_prefix_base + inspect.stack()[0][3] + '/'

        # The directory store needs the bucket created as a directory on the filesystem.
        if self.ss_name == 'dir_store':
            os.mkdir(self.bucket1)

        fs = ss.ss_customize_file_system(session, self.bucket1, self.auth_token,
            self.get_fs_config(prefix))

        # The object doesn't exist yet.
        if self.ss_name == 's3_store':
            with self.expectedStderrPattern('.*HTTP response code: 404.*'):
                self.assertFalse(fs.fs_exist(session, 'foobar'))
        else:
            self.assertFalse(fs.fs_exist(session, 'foobar'))

        # We cannot use the file system to create files, it is readonly.
        # So use python I/O to build up the file.
        f = open('foobar', 'wb')

        # The object still doesn't exist yet.
        if self.ss_name == 's3_store':
            with self.expectedStderrPattern('.*HTTP response code: 404.*'):
                self.assertFalse(fs.fs_exist(session, 'foobar'))
        else:
            self.assertFalse(fs.fs_exist(session, 'foobar'))

        outbytes = ('MORE THAN ENOUGH DATA\n'*100000).encode()
        f.write(outbytes)
        f.close()

        # Nothing is in the directory list until a flush.
        self.assertEquals(fs.fs_directory_list(session, '', ''), [])

        # Flushing copies the file into the file system.
        ss.ss_flush(session, fs, 'foobar', 'foobar', None)
        ss.ss_flush_finish(session, fs, 'foobar', 'foobar', None)

        # The object exists now.
        self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar'])
        self.assertTrue(fs.fs_exist(session, 'foobar'))

        fh = fs.fs_open_file(session, 'foobar', FileSystem.open_file_type_data, FileSystem.open_readonly)
        inbytes = bytes(1000000)         # An empty buffer with a million zero bytes.
        fh.fh_read(session, 0, inbytes)  # Read into the buffer.
        self.assertEquals(outbytes[0:1000000], inbytes)
        self.assertEquals(fs.fs_size(session, 'foobar'), len(outbytes))
        self.assertEquals(fh.fh_size(session), len(outbytes))
        fh.close(session)

        # The fh_lock call doesn't do anything in the directory and S3 store implementation.
        fh = fs.fs_open_file(session, 'foobar', FileSystem.open_file_type_data, FileSystem.open_readonly)
        fh.fh_lock(session, True)
        fh.fh_lock(session, False)
        fh.close(session)

        # Files that have been flushed cannot be manipulated.
        with self.expectedStderrPattern('foobar: rename of file not supported'):
            self.assertRaisesException(wiredtiger.WiredTigerError,
                lambda: fs.fs_rename(session, 'foobar', 'barfoo', 0))
        self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar'])

        # Files that have been flushed cannot be manipulated through the custom file system.
        with self.expectedStderrPattern('foobar: remove of file not supported'):
            self.assertRaisesException(wiredtiger.WiredTigerError,
                lambda: fs.fs_remove(session, 'foobar', 0))
        self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar'])

        fs.terminate(session)
        ss.terminate(session)

    def test_ss_write_read(self):
        # Write and read to a file non-sequentially.

        session = self.session
        ss = self.get_storage_source()

        # Since this class has multiple tests, append test name to the prefix to
        # avoid namespace collison. 0th element on the stack is the current function.
        prefix = self.bucket_prefix_base + inspect.stack()[0][3] + '/'

        cachedir = self.bucket1 + '_cache'
        os.mkdir(cachedir)

        # Directory store needs the bucket created as a directory on the filesystem.
        if self.ss_name == 'dir_store':
            os.mkdir(self.bucket1)
        
        fs = ss.ss_customize_file_system(session, self.bucket1, self.auth_token,
            self.get_fs_config(prefix, cachedir))

        # We call these 4K chunks of data "blocks" for this test, but that doesn't
        # necessarily relate to WT block sizing.
        nblocks = 1000
        block_size = 4096
        f = open('abc', 'wb')

        # Create some blocks filled with 'a', etc.
        a_block = ('a' * block_size).encode()
        b_block = ('b' * block_size).encode()
        c_block = ('c' * block_size).encode()
        file_size = nblocks * block_size

        # Write all blocks as 'a', but in reverse order.
        for pos in range(file_size - block_size, 0, -block_size):
            f.seek(pos)
            f.write(a_block)

        # Write the even blocks as 'b', forwards.
        for pos in range(0, file_size, block_size * 2):
            f.seek(pos)
            f.write(b_block)

        # Write every third block as 'c', backwards.
        for pos in range(file_size - block_size, 0, -block_size * 3):
            f.seek(pos)
            f.write(c_block)
        f.close()

        # Flushing copies the file into the file system.
        ss.ss_flush(session, fs, 'abc', 'abc', None)
        ss.ss_flush_finish(session, fs, 'abc', 'abc', None)

        # Use the file system to open and read the file.
        # We do this twice, and between iterations, we remove the cached file to make sure
        # it is copied back from the bucket.
        #
        # XXX: this uses knowledge of the implementation, but at the current time,
        # we don't have a way via the API to "age out" a file from the cache.
        for i in range(0, 2):
            in_block = bytes(block_size)
            fh = fs.fs_open_file(session, 'abc', FileSystem.open_file_type_data, FileSystem.open_readonly)

            # Do some spot checks, reading non-sequentially.
            fh.fh_read(session, 500 * block_size, in_block)  # divisible by 2, not 3
            self.assertEquals(in_block, b_block)
            fh.fh_read(session, 333 * block_size, in_block)  # divisible by 3, not 2
            self.assertEquals(in_block, c_block)
            fh.fh_read(session, 401 * block_size, in_block)  # not divisible by 2 or 3
            self.assertEquals(in_block, a_block)

            # Read the whole file, backwards checking to make sure
            # each block was written correctly.
            for block_num in range(nblocks - 1, 0, -1):
                pos = block_num * block_size
                fh.fh_read(session, pos, in_block)
                if block_num % 3 == 0:
                    self.assertEquals(in_block, c_block)
                elif block_num % 2 == 0:
                    self.assertEquals(in_block, b_block)
                else:
                    self.assertEquals(in_block, a_block)
            fh.close(session)
            os.remove(os.path.join(cachedir, 'abc'))

        ss.terminate(session)

    def create_with_fs(self, fs, fname):
        session = self.session
        f = open(fname, 'wb')
        f.write('some stuff'.encode())
        f.close()

    cachedir1 = "./cache1"
    cachedir2 = "./cache2"

    # Add a suffix to each in a list.
    def suffix(self, lst, sfx):
        return [x + '.' + sfx for x in lst]

    def check_dirlist(self, fs, prefix, expect):
        # We don't require any sorted output for directory lists,
        # so we'll sort before comparing.'
        got = sorted(fs.fs_directory_list(self.session, '', prefix))
        expect = sorted(self.suffix(expect, 'wtobj'))
        self.assertEquals(got, expect)

    # Check for data files in the WiredTiger home directory.
    def check_home(self, expect):
        # Get list of all .wt files in home, prune out the WiredTiger produced ones.
        got = sorted(list(os.listdir(self.home)))
        got = [x for x in got if not x.startswith('WiredTiger') and x.endswith('.wt')]
        expect = sorted(self.suffix(expect, 'wt'))
        self.assertEquals(got, expect)

    # Check that objects are "in the cloud" for the directory store after a flush.
    # Using the directory storage module, they are actually going to be in either
    # bucket1 or bucket2.
    def check_local_objects(self, expect1, expect2):
        if self.ss_name != 'dir_store':
            return

        got = sorted(list(os.listdir(self.bucket1)))
        expect = sorted(self.suffix(expect1, 'wtobj'))
        self.assertEquals(got, expect)
        got = sorted(list(os.listdir(self.bucket2)))
        expect = sorted(self.suffix(expect2, 'wtobj'))
        self.assertEquals(got, expect)

    # Check that objects are in the cache directory after flush_finish.
    def check_caches(self, expect1, expect2):
        got = sorted(list(os.listdir(self.cachedir1)))
        expect = sorted(self.suffix(expect1, 'wtobj'))
        self.assertEquals(got, expect)
        got = sorted(list(os.listdir(self.cachedir2)))
        expect = sorted(self.suffix(expect2, 'wtobj'))
        self.assertEquals(got, expect)

    def create_wt_file(self, name):
        with open(name + '.wt', 'w') as f:
            f.write('hello')

    def test_ss_file_systems(self):

        # Test using various buckets, hosts.
        session = self.session
        ss = self.get_storage_source()

        # Since this class has multiple tests, append test name to the prefix to
        # avoid namespace collison. 0th element on the stack is the current function.
        prefix = self.bucket_prefix_base + inspect.stack()[0][3] + '/'

        # Directory store needs the bucket created as a directory on the filesystem.
        if self.ss_name == 'dir_store':
            os.mkdir(self.bucket1)
            os.mkdir(self.bucket2)

        os.mkdir(self.cachedir1)
        os.mkdir(self.cachedir2)
        bad_bucket = "./objects_BAD"
        bad_cachedir = '/BAD'

        # Create file system objects. First try some error cases.
        errmsg = '/No such /'
        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
            lambda: ss.ss_customize_file_system(session, self.bucket1, self.auth_token,
                self.get_fs_config(prefix, bad_cachedir)), errmsg)

        # S3 store expects a region with the bucket
        if self.ss_name == 's3_store':
            bad_bucket += ';us-east-2'

        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
            lambda: ss.ss_customize_file_system(session, bad_bucket, self.auth_token,
                self.get_fs_config(prefix, self.cachedir1)), errmsg)

        # For directory store - Create an empty file, try to use it as a directory.
        if self.ss_name == 'dir_store':
            with open("some_file", "w"):
                pass
            errmsg = '/Invalid argument/'
            self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
                lambda: ss.ss_customize_file_system(
                    session, "some_file", self.auth_token, ',cache_directory=' + self.bucket1), errmsg)

        # Now create some file systems that should succeed.
        # Use either different bucket directories or different prefixes,
        # so activity that happens in the various file systems should be independent.
        fs1 = ss.ss_customize_file_system(session, self.bucket1, self.auth_token,
            self.get_fs_config(prefix, self.cachedir1))
        fs2 = ss.ss_customize_file_system(session, self.bucket2, self.auth_token,
            self.get_fs_config(prefix, self.cachedir2))
        
        # Create files in the wt home directory.
        for a in ['beagle', 'bird', 'bison', 'bat']:
            self.create_wt_file(a)
        for a in ['cat', 'cougar', 'coyote', 'cub']:
            self.create_wt_file(a)

        # Everything is in wt home, nothing in the file system yet.
        self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub'])
        self.check_dirlist(fs1, '', [])
        self.check_dirlist(fs2, '', [])
        self.check_caches([], [])
        self.check_local_objects([], [])

        # A flush copies to the cloud, nothing is removed.
        ss.ss_flush(session, fs1, 'beagle.wt', 'beagle.wtobj')
        self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub'])
        self.check_dirlist(fs1, '', ['beagle'])
        self.check_dirlist(fs2, '', [])
        self.check_caches([], [])
        self.check_local_objects(['beagle'], [])

        # Bad file to flush.
        errmsg = '/No such file/'
        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
            lambda: ss.ss_flush(session, fs1, 'bad.wt', 'bad.wtobj'), errmsg)

        # It's okay to flush again, nothing changes.
        ss.ss_flush(session, fs1, 'beagle.wt', 'beagle.wtobj')
        self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub'])
        self.check_dirlist(fs1, '', ['beagle'])
        self.check_dirlist(fs2, '', [])
        self.check_caches([], [])
        self.check_local_objects(['beagle'], [])

        # When we flush_finish, the local file will be in both the local and cache directory.
        ss.ss_flush_finish(session, fs1, 'beagle.wt', 'beagle.wtobj')
        self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub'])
        self.check_dirlist(fs1, '', ['beagle'])
        self.check_dirlist(fs2, '', [])
        self.check_caches(['beagle'], [])
        self.check_local_objects(['beagle'], [])

        # Do a some more in each file system.
        ss.ss_flush(session, fs1, 'bison.wt', 'bison.wtobj')
        ss.ss_flush(session, fs2, 'cat.wt', 'cat.wtobj')
        ss.ss_flush(session, fs1, 'bat.wt', 'bat.wtobj')
        ss.ss_flush_finish(session, fs2, 'cat.wt', 'cat.wtobj')
        ss.ss_flush(session, fs2, 'cub.wt', 'cub.wtobj')
        ss.ss_flush_finish(session, fs1, 'bat.wt', 'bat.wtobj')

        self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub'])
        self.check_dirlist(fs1, '', ['beagle', 'bat', 'bison'])
        self.check_dirlist(fs2, '', ['cat', 'cub'])
        self.check_caches(['beagle', 'bat'], ['cat'])
        self.check_local_objects(['beagle', 'bat', 'bison'], ['cat', 'cub'])

        # Test directory listing prefixes.
        self.check_dirlist(fs1, '', ['beagle', 'bat', 'bison'])
        self.check_dirlist(fs1, 'ba', ['bat'])
        self.check_dirlist(fs1, 'be', ['beagle'])
        self.check_dirlist(fs1, 'x', [])

        # Terminate just one of the custom file systems.
        # We should be able to terminate file systems, but we should
        # also be able to terminate the storage source without terminating
        # all the file systems we created.
        fs1.terminate(session)
        ss.terminate(session)
Example #11
0
class test_tiered08(wttest.WiredTigerTestCase):
    storage_sources = [('dir_store',
                        dict(auth_token=get_auth_token('dir_store'),
                             bucket=get_bucket1_name('dir_store'),
                             bucket_prefix="pfx_",
                             ss_name='dir_store')),
                       ('s3',
                        dict(auth_token=get_auth_token('s3_store'),
                             bucket=get_bucket1_name('s3_store'),
                             bucket_prefix=generate_s3_prefix(),
                             ss_name='s3_store'))]
    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources)

    batch_size = 100000

    # Keep inserting keys until we've done this many flush and checkpoint ops.
    ckpt_flush_target = 10

    uri = "table:test_tiered08"

    def conn_config(self):
        if self.ss_name == 'dir_store' and not os.path.exists(self.bucket):
            os.mkdir(self.bucket)
        return \
          'debug_mode=(flush_checkpoint=true),' + \
          'statistics=(fast),' + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket + \
          'bucket_prefix=%s,' % self.bucket_prefix + \
          'name=%s),tiered_manager=(wait=0)' % self.ss_name

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    def get_stat(self, stat):
        stat_cursor = self.session.open_cursor('statistics:')
        val = stat_cursor[stat][2]
        stat_cursor.close()
        return val

    def key_gen(self, i):
        return 'KEY' + str(i)

    def value_gen(self, i):
        return 'VALUE_' + 'filler' * (i % 12) + str(i)

    # Populate the test table.  Keep adding keys until the desired number of flush and
    # checkpoint operations have happened.
    def populate(self):
        ckpt_count = 0
        flush_count = 0
        nkeys = 0

        self.pr('Populating tiered table')
        c = self.session.open_cursor(self.uri, None, None)
        while ckpt_count < self.ckpt_flush_target or flush_count < self.ckpt_flush_target:
            for i in range(nkeys, nkeys + self.batch_size):
                c[self.key_gen(i)] = self.value_gen(i)
            nkeys += self.batch_size
            ckpt_count = self.get_stat(stat.conn.txn_checkpoint)
            flush_count = self.get_stat(stat.conn.flush_tier)
        c.close()
        return nkeys

    def verify(self, key_count):
        self.pr('Verifying tiered table')
        c = self.session.open_cursor(self.uri, None, None)
        for i in range(key_count):
            self.assertEqual(c[self.key_gen(i)], self.value_gen(i))
        c.close()

    def test_tiered08(self):

        # FIXME-WT-7833
        #     This test can trigger races in file handle access during flush_tier.
        #     We will re-enable it when that is fixed.
        self.skipTest(
            'Concurrent flush_tier and insert operations not supported yet.')

        cfg = self.conn_config()
        self.pr('Config is: ' + cfg)
        intl_page = 'internal_page_max=16K'
        base_create = 'key_format=S,value_format=S,' + intl_page
        self.session.create(self.uri, base_create)

        done = threading.Event()
        ckpt = checkpoint_thread(self.conn, done)
        flush = flush_tier_thread(self.conn, done)

        # Start background threads and give them a chance to start.
        ckpt.start()
        flush.start()
        time.sleep(0.5)

        key_count = self.populate()

        done.set()
        flush.join()
        ckpt.join()

        self.verify(key_count)

        self.close_conn()
        self.pr('Reopening tiered table')
        self.reopen_conn()

        self.verify(key_count)
Example #12
0
class test_tiered12(wttest.WiredTigerTestCase):
    storage_sources = [
        ('dir_store',
         dict(auth_token=get_auth_token('dir_store'),
              bucket=get_bucket1_name('dir_store'),
              bucket_prefix="pfx_",
              ss_name='dir_store')),
        ('s3',
         dict(auth_token=get_auth_token('s3_store'),
              bucket=get_bucket1_name('s3_store'),
              bucket_prefix=generate_s3_prefix(),
              ss_name='s3_store')),
    ]
    # Make scenarios for different cloud service providers
    scenarios = make_scenarios(storage_sources)

    # If the 'uri' changes all the other names must change with it.
    base = 'test_tiered12-000000000'
    obj1file = base + '1.wtobj'
    uri = "table:test_tiered12"

    retention = 1
    saved_conn = ''

    def conn_config(self):
        if self.ss_name == 'dir_store' and not os.path.exists(self.bucket):
            os.mkdir(self.bucket)
        self.saved_conn = \
          'debug_mode=(flush_checkpoint=true),' + \
          'statistics=(all),timing_stress_for_test=(tiered_flush_finish),' + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % self.bucket + \
          'bucket_prefix=%s,' % self.bucket_prefix + \
          'local_retention=%d,' % self.retention + \
          'name=%s)' % self.ss_name
        return self.saved_conn

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    def check(self, tc, n):
        for i in range(0, n):
            self.assertEqual(tc[str(i)], str(i))
        tc.set_key(str(n))
        self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND)

    def test_tiered(self):
        # Default cache location is cache-<bucket-name>
        cache = "cache-" + self.bucket
        # The bucket format for the S3 store is the name and the region separataed by a semi-colon.
        # Strip off the region to get the cache folder.
        if self.ss_name == 's3_store':
            cache = cache[:cache.find(';')]

        # Create a table. Add some data. Checkpoint and flush tier.
        # We have configured the timing stress for tiered caching which delays
        # the internal thread calling flush_finish for 1 second.
        # So after flush tier completes, check that the cached object does not
        # exist. Then sleep and check that it does exist.
        #
        # The idea is to make sure flush_tier is not waiting for unnecessary work
        # to be done, but returns as soon as the copying to shared storage completes.
        self.session.create(self.uri, 'key_format=S,value_format=S,')

        # Add data. Checkpoint and flush.
        c = self.session.open_cursor(self.uri)
        c["0"] = "0"
        self.check(c, 1)
        c.close()
        self.session.checkpoint()

        self.session.flush_tier(None)

        # On directory store, the bucket object should exist.
        if self.ss_name == 'dir_store':
            bucket_obj = os.path.join(self.bucket,
                                      self.bucket_prefix + self.obj1file)
            self.assertTrue(os.path.exists(bucket_obj))

        # Sleep more than the one second stress timing amount and give the thread time to run.
        time.sleep(2)
        # After sleeping, the internal thread should have created the cached object.
        cache_obj = os.path.join(cache, self.bucket_prefix + self.obj1file)
        self.assertTrue(os.path.exists(cache_obj))
Example #13
0
class test_tiered03(wttest.WiredTigerTestCase):
    K = 1024
    M = 1024 * K
    G = 1024 * M
    # TODO: tiered: change this to a table: URI, otherwise we are
    # not using tiered files.  The use of a second directory for
    # sharing would probably need to be reworked.
    uri = 'file:test_tiered03'

    storage_sources = [
        ('dirstore',
         dict(auth_token=get_auth_token('dir_store'),
              bucket=get_bucket1_name('dir_store'),
              bucket_prefix="pfx_",
              ss_name='dir_store')),
        ('s3',
         dict(auth_token=get_auth_token('s3_store'),
              bucket=get_bucket1_name('s3_store'),
              bucket_prefix=generate_s3_prefix(),
              ss_name='s3_store')),
    ]
    # Occasionally add a lot of records to vary the amount of work flush does.
    record_count_scenarios = wtscenario.quick_scenarios(
        'nrecs', [10, 10000], [0.9, 0.1])
    scenarios = wtscenario.make_scenarios(storage_sources,
                                          record_count_scenarios,
                                          prune=100,
                                          prunelong=500)

    absolute_bucket_dir = None  # initialied in conn_config to an absolute path

    def conn_config(self):
        bucket_ret = self.bucket

        # The bucket format for the S3 store is the name and the region separataed by a semi-colon.
        if self.ss_name == 's3_store':
            cache_dir = self.bucket[:self.bucket.find(';')] + '-cache'
        else:
            cache_dir = self.bucket + '-cache'

        # We have multiple connections that want to share a bucket.
        # For the directory store, the first time this function is called, we'll
        # establish the absolute path for the bucket, and always use that for
        # the bucket name.
        # The cache directory name is a relative one, so it won't be shared
        # between connections.
        if self.ss_name == 'dir_store':
            if self.absolute_bucket_dir == None:
                self.absolute_bucket_dir = os.path.join(
                    os.getcwd(), self.bucket)
                os.mkdir(self.absolute_bucket_dir)
            bucket_ret = self.absolute_bucket_dir
        return \
          'debug_mode=(flush_checkpoint=true),' + \
          'tiered_storage=(auth_token=%s,' % self.auth_token + \
          'bucket=%s,' % bucket_ret  + \
          'cache_directory=%s,' % cache_dir + \
          'bucket_prefix=%s,' % self.bucket_prefix + \
          'name=%s)' % self.ss_name

    # Load the storage store extension.
    def conn_extensions(self, extlist):
        config = ''
        # S3 store is built as an optional loadable extension, not all test environments build S3.
        if self.ss_name == 's3_store':
            #config = '=(config=\"(verbose=1)\")'
            extlist.skip_if_missing = True
        #if self.ss_name == 'dir_store':
        #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")'
        # Windows doesn't support dynamically loaded extension libraries.
        if os.name == 'nt':
            extlist.skip_if_missing = True
        extlist.extension('storage_sources', self.ss_name + config)

    # Test sharing data between a primary and a secondary
    def test_sharing(self):
        # FIXME: WT-8235 Enable the test once file containing transaction ids is supported.
        self.skipTest(
            'Sharing the checkpoint file containing transaction ids is not supported'
        )

        ds = SimpleDataSet(self, self.uri, 10)
        ds.populate()
        ds.check()
        self.session.checkpoint()
        ds.check()

        # Create a secondary database
        dir2 = os.path.join(self.home, 'SECONDARY')
        os.mkdir(dir2)
        conn2 = self.setUpConnectionOpen(dir2)
        session2 = conn2.open_session()

        # Reference the tree from the secondary:
        metac = self.session.open_cursor('metadata:')
        metac2 = session2.open_cursor('metadata:', None, 'readonly=0')
        uri2 = self.uri[:5] + '../' + self.uri[5:]
        metac2[uri2] = metac[self.uri] + ",readonly=1"

        cursor2 = session2.open_cursor(uri2)
        ds.check_cursor(cursor2)
        cursor2.close()

        newds = SimpleDataSet(self, self.uri, 10000)
        newds.populate()
        newds.check()
        self.session.checkpoint()
        newds.check()

        # Check we can still read from the last checkpoint
        cursor2 = session2.open_cursor(uri2)
        ds.check_cursor(cursor2)
        cursor2.close()

        # Bump to new checkpoint
        origmeta = metac[self.uri]
        checkpoint = re.search(r',checkpoint=\(.+?\)\)', origmeta).group(0)[1:]
        self.pr('Orig checkpoint: ' + checkpoint)
        session2.alter(uri2, checkpoint)
        self.pr('New metadata on secondaery: ' + metac2[uri2])

        # Check that we can see the new data
        cursor2 = session2.open_cursor(uri2)
        newds.check_cursor(cursor2)