class DefaultTemplateMetaDataCompatibilityTest(NodeProvider, unittest.TestCase): CLUSTER_ID = gen_id() CLUSTER_SETTINGS = { 'cluster.name': CLUSTER_ID, } SUPPORTED_VERSIONS = (VersionDef('3.0.x', False, []), VersionDef('latest-nightly', False, [])) def test_metadata_compatibility(self): nodes = 3 cluster = self._new_cluster(self.SUPPORTED_VERSIONS[0].version, nodes, self.CLUSTER_SETTINGS) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: cursor = conn.cursor() cursor.execute("select 1") self._process_on_stop() for version_def in self.SUPPORTED_VERSIONS[1:]: self.assert_dynamic_string_detection(version_def, nodes) def assert_dynamic_string_detection(self, version_def, nodes): """ Test that a dynamic string column detection works as expected. If the cluster was initially created/started with a lower CrateDB version, we must ensure that our default template is also upgraded, if needed, because it is persisted in the cluster state. That's why re-creating tables would not help. """ self._move_nodes_folder_if_needed() cluster = self._new_cluster(version_def.version, nodes, self.CLUSTER_SETTINGS, prepare_env(version_def.java_home)) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: cursor = conn.cursor() cursor.execute('CREATE TABLE t1 (o object)') cursor.execute('''INSERT INTO t1 (o) VALUES ({"name" = 'foo'})''') self.assertEqual(cursor.rowcount, 1) cursor.execute('REFRESH TABLE t1') cursor.execute("SELECT o['name'], count(*) FROM t1 GROUP BY 1") rs = cursor.fetchall() self.assertEqual(['foo', 1], rs[0]) cursor.execute('DROP TABLE t1') self._process_on_stop() def _move_nodes_folder_if_needed(self): """Eliminates the cluster-id folder inside the data directory.""" data_path_incl_cluster_id = os.path.join(self._path_data, self.CLUSTER_ID) if os.path.exists(data_path_incl_cluster_id): src_path_nodes = os.path.join(data_path_incl_cluster_id, 'nodes') target_path_nodes = os.path.join(self._path_data, 'nodes') shutil.move(src_path_nodes, target_path_nodes) shutil.rmtree(data_path_incl_cluster_id)
class TableSettingsCompatibilityTest(NodeProvider, unittest.TestCase): CLUSTER_SETTINGS = { 'cluster.name': gen_id(), } SUPPORTED_VERSIONS = (VersionDef('2.3.x', False, []), VersionDef('3.2.x', False, [])) def test_altering_tables_with_old_settings(self): """ Test that the settings of tables created with an old not anymore supported setting can still be changed when running with the latest version. This test ensures that old settings are removed on upgrade or at latest when changing some table settings. Before 3.1.2, purging old settings was not done correctly and thus altering settings of such tables failed. """ nodes = 3 cluster = self._new_cluster(self.SUPPORTED_VERSIONS[0].version, nodes, self.CLUSTER_SETTINGS) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: cursor = conn.cursor() # The used setting is only valid until version 2.3.x cursor.execute(''' CREATE TABLE t1 (id int) clustered into 4 shards with ("recovery.initial_shards"=1, number_of_replicas=0); ''') cursor.execute(''' CREATE TABLE p1 (id int, p int) clustered into 4 shards partitioned by (p) with ("recovery.initial_shards"=1, number_of_replicas=0); ''') cursor.execute(''' INSERT INTO p1 (id, p) VALUES (1, 1); ''') self._process_on_stop() for version_def in self.SUPPORTED_VERSIONS[1:]: self.start_cluster_and_alter_tables(version_def, nodes) def start_cluster_and_alter_tables(self, version_def, nodes): cluster = self._new_cluster(version_def.version, nodes, self.CLUSTER_SETTINGS, prepare_env(version_def.java_home)) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: cursor = conn.cursor() wait_for_active_shards(cursor, 8) cursor.execute(''' ALTER TABLE t1 SET (number_of_replicas=1) ''') cursor.execute(''' ALTER TABLE p1 SET (number_of_replicas=1) ''') self._process_on_stop()
def test_snapshot_compatibility(self): """Test snapshot compatibility when upgrading 3.3.x -> 4.x.x Using Minio as a S3 repository, the first cluster that runs creates the repo, a table and inserts/selects some data, which then is snapshotted and deleted. The next cluster recovers the data from the last snapshot, performs further inserts/selects, to then snapshot the data and delete it. We are interested in the transition 3.3.x -> 4.x.x """ with MinioServer() as minio: t = threading.Thread(target=minio.run) t.daemon = True t.start() wait_until(lambda: _is_up('127.0.0.1', 9000)) num_nodes = 3 num_docs = 30 prev_version = None num_snapshot = 1 path_data = 'data_test_snapshot_compatibility' cluster_settings = { 'cluster.name': gen_id(), 'path.data': path_data } shutil.rmtree(path_data, ignore_errors=True) for version in self.VERSION: cluster = self._new_cluster(version, num_nodes, settings=cluster_settings) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: c = conn.cursor() if not prev_version: c.execute(self.CREATE_REPOSITORY) c.execute(CREATE_ANALYZER) c.execute(CREATE_DOC_TABLE) insert_data(conn, 'doc', 't1', num_docs) else: c.execute( self.RESTORE_SNAPSHOT_TPT.format(num_snapshot - 1)) c.execute('SELECT COUNT(*) FROM t1') rowcount = c.fetchone()[0] self.assertEqual(rowcount, num_docs) run_selects(c, version) c.execute(self.CREATE_SNAPSHOT_TPT.format(num_snapshot)) c.execute(self.DROP_DOC_TABLE) self._process_on_stop() prev_version = version num_snapshot += 1 shutil.rmtree(path_data, ignore_errors=True)
class SqlLogicTest(NodeProvider, unittest.TestCase): CLUSTER_SETTINGS = { 'cluster.name': gen_id(), } def test_sqllogic(self): """ Runs sqllogictests against latest CrateDB. """ (node, _) = self._new_node(self.CRATE_VERSION) node.start() psql_addr = node.addresses.psql logfiles = [] try: with ProcessPoolExecutor() as executor: futures = [] for i, filename in enumerate(tests_path.glob('**/*.test')): filepath = tests_path / filename relpath = str(filepath.relative_to(tests_path)) if not any(p.match(str(relpath)) for p in FILE_WHITELIST): continue logfile = os.path.join( here, f'sqllogic-{os.path.basename(relpath)}-{i}.log') logfiles.append(logfile) future = executor.submit(run_file, filename=str(filepath), host='localhost', port=str(psql_addr.port), log_level=logging.WARNING, log_file=logfile, failfast=True, schema=f'x{i}') futures.append(future) for future in as_completed(futures): future.result() finally: # instead of having dozens file merge to one which is in gitignore merge_logfiles(logfiles)
def test_snapshot_restore_and_drop_in_parallel(self): """Test to run the drop and restore operation on two different snapshots in parallel. The purpose of this test is to validate that the snapshot mechanism of CrateDB can handle the two operations in parallel. Here, Minio is used as s3 backend for the repository, but this should work on any other backend as well. """ with MinioServer() as minio: t = threading.Thread(target=minio.run) t.daemon = True t.start() wait_until(lambda: _is_up('127.0.0.1', 9000)) num_nodes = random.randint(3, 5) number_of_shards = random.randint(1, 3) number_of_replicas = random.randint(0, 2) num_docs = random.randint(1, 100) cluster_settings = { 'cluster.name': gen_id(), 'path.data': self.PATH_DATA } shutil.rmtree(self.PATH_DATA, ignore_errors=True) cluster = self._new_cluster('latest-nightly', num_nodes, settings=cluster_settings) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: c = conn.cursor() wait_for_active_shards(c) c.execute( ''' create table doc.test(x int) clustered into ? shards with( number_of_replicas =?) ''', ( number_of_shards, number_of_replicas, )) insert_data(conn, 'doc', 'test', num_docs) c.execute(''' CREATE REPOSITORY repo TYPE S3 WITH (access_key = 'minio', secret_key = 'miniostorage', bucket='backups', endpoint = '127.0.0.1:9000', protocol = 'http') ''') c.execute( 'CREATE SNAPSHOT repo.snapshot1 TABLE doc.test WITH (wait_for_completion = true)' ) c.execute( 'CREATE SNAPSHOT repo.snapshot2 TABLE doc.test WITH (wait_for_completion = true)' ) c.execute('DROP TABLE doc.test') # Drop snapshot2 while the restore of snapshot1 is still running c.execute( 'RESTORE SNAPSHOT repo.snapshot1 ALL WITH (wait_for_completion = false)' ) try: c.execute('DROP SNAPSHOT repo.snapshot2') except ProgrammingError: self.fail( "Restore and Drop Snapshot operation should work in parallel" ) assert_busy(lambda: self._assert_num_docs(conn, num_docs)) cluster.stop()
class MetaDataCompatibilityTest(NodeProvider, unittest.TestCase): CLUSTER_SETTINGS = { 'license.enterprise': 'true', 'lang.js.enabled': 'true', 'cluster.name': gen_id(), } SUPPORTED_VERSIONS = (VersionDef('2.3.x', False, []), VersionDef('3.3.x', False, []), VersionDef('latest-nightly', False, [])) def test_metadata_compatibility(self): nodes = 3 cluster = self._new_cluster(self.SUPPORTED_VERSIONS[0].version, nodes, self.CLUSTER_SETTINGS) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: cursor = conn.cursor() cursor.execute(''' CREATE USER user_a; ''') cursor.execute(''' GRANT ALL PRIVILEGES ON SCHEMA doc TO user_a; ''') cursor.execute(''' CREATE FUNCTION fact(LONG) RETURNS LONG LANGUAGE JAVASCRIPT AS 'function fact(a) { return a < 2 ? 0 : a * (a - 1); }'; ''') self._process_on_stop() for version_def in self.SUPPORTED_VERSIONS[1:]: self.assert_meta_data(version_def, nodes) # restart with latest version self.assert_meta_data(self.SUPPORTED_VERSIONS[-1], nodes) def assert_meta_data(self, version_def, nodes): cluster = self._new_cluster(version_def.version, nodes, self.CLUSTER_SETTINGS, prepare_env(version_def.java_home)) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: cursor = conn.cursor() cursor.execute(''' SELECT name, superuser FROM sys.users ORDER BY superuser, name; ''') rs = cursor.fetchall() self.assertEqual(['user_a', False], rs[0]) self.assertEqual(['crate', True], rs[1]) cursor.execute(''' SELECT fact(100); ''') self.assertEqual(9900, cursor.fetchone()[0]) cursor.execute(''' SELECT class, grantee, ident, state, type FROM sys.privileges ORDER BY class, grantee, ident, state, type ''') self.assertEqual([['SCHEMA', 'user_a', 'doc', 'GRANT', 'DDL'], ['SCHEMA', 'user_a', 'doc', 'GRANT', 'DML'], ['SCHEMA', 'user_a', 'doc', 'GRANT', 'DQL']], cursor.fetchall()) self._process_on_stop()
class StorageCompatibilityTest(NodeProvider, unittest.TestCase): CLUSTER_SETTINGS = { 'cluster.name': gen_id(), } def test_upgrade_paths(self): for path in get_test_paths(): try: self.setUp() self._test_upgrade_path(path, nodes=3) finally: self.tearDown() def _upgrade(self, cursor, upgrade_segments, num_retries=3): """ Performs the upgrade of the indices and retries in case of ProgrammingErrors. The retry was added because the wait_for_active shards check collects the shard information directly from the nodes. The internal ES code, however, retrieves the shard information from the ClusterState. A retry is necessary in case the shards are ready but the cluster state hasn't been updated yet. """ try: if upgrade_segments: cursor.execute( 'OPTIMIZE TABLE doc.t1 WITH (upgrade_segments = true)') cursor.execute( 'OPTIMIZE TABLE blob.b1 WITH (upgrade_segments = true)') except ProgrammingError as e: print(f'OPTIMIZE failed: {e.message} (num_retries={num_retries})') if num_retries > 0 and "PrimaryMissingActionException" in e.message: time.sleep(1 / (num_retries + 1)) self._upgrade(cursor, upgrade_segments, num_retries - 1) else: raise e def _test_upgrade_path(self, versions: Tuple[VersionDef], nodes): """ Test upgrade path across specified versions. Creates a blob and regular table in first version and inserts a record, then goes through all subsequent versions - each time verifying that a few simple selects work. """ version_def = versions[0] env = prepare_env(version_def.java_home) cluster = self._new_cluster(version_def.version, nodes, self.CLUSTER_SETTINGS, env) cluster.start() digest = None with connect(cluster.node().http_url, error_trace=True) as conn: c = conn.cursor() c.execute(CREATE_ANALYZER) c.execute(CREATE_DOC_TABLE) c.execute(CREATE_PARTED_TABLE) c.execute(''' INSERT INTO t1 (id, text) VALUES (0, 'Phase queue is foo!') ''') insert_data(conn, 'doc', 't1', 10) c.execute(CREATE_BLOB_TABLE) run_selects(c, versions[0].version) container = conn.get_blob_container('b1') digest = container.put(BytesIO(b'sample data')) container.get(digest) self._process_on_stop() for version_def in versions[1:]: self.assert_data_persistence(version_def, nodes, digest) # restart with latest version version_def = versions[-1] self.assert_data_persistence(version_def, nodes, digest) def assert_data_persistence(self, version_def, nodes, digest): env = prepare_env(version_def.java_home) version = version_def.version cluster = self._new_cluster(version, nodes, self.CLUSTER_SETTINGS, env) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: cursor = conn.cursor() wait_for_active_shards(cursor, 0) self._upgrade(cursor, version_def.upgrade_segments) cursor.execute( 'ALTER TABLE doc.t1 SET ("refresh_interval" = 4000)') run_selects(cursor, version_def.version) container = conn.get_blob_container('b1') container.get(digest) cursor.execute( 'ALTER TABLE doc.t1 SET ("refresh_interval" = 2000)') # older versions had a bug that caused this to fail if version in ('latest-nightly', '3.2'): # Test that partition and dynamic columns can be created obj = {"t_" + version.replace('.', '_'): True} args = (str(uuid4()), version, obj) cursor.execute( 'INSERT INTO doc.parted (id, version, cols) values (?, ?, ?)', args) self._process_on_stop()