def test_query_partitioned_table(self): (node, _) = self._new_node(self.CRATE_VERSION) node.start() with connect(node.http_url, error_trace=True) as conn: cursor = conn.cursor() cursor.execute(""" CREATE TABLE parted_table ( id long, ts timestamp, day__generated GENERATED ALWAYS AS date_trunc('day', ts) ) CLUSTERED INTO 1 SHARDS PARTITIONED BY (day__generated) WITH (number_of_replicas = 0) """) for x in range(5): cursor.execute( """ INSERT INTO parted_table (id, ts) VALUES (?, ?) """, (x, datetime.now() - timedelta(days=x))) node.stop() node.start() with connect(node.http_url, error_trace=True) as conn: cursor = conn.cursor() wait_for_active_shards(cursor) cursor.execute(""" SELECT id, date_trunc('day', ts) = day__generated FROM parted_table order by 1 """) for idx, result in enumerate(cursor.fetchall()): self.assertEqual(result[0], idx) self.assertTrue(result[1])
def test_blob_record(self): (node, _) = self._new_node(self.CRATE_VERSION) node.start() digest = '' with connect(node.http_url, error_trace=True) as conn: cursor = conn.cursor() cursor.execute(""" CREATE BLOB TABLE myblobs CLUSTERED INTO 1 shards WITH (number_of_replicas = 0) """) blob_container = conn.get_blob_container('myblobs') digest = blob_container.put(BytesIO(b'sample data')) node.stop() node.start() with connect(node.http_url, error_trace=True) as conn: cursor = conn.cursor() wait_for_active_shards(cursor) cursor.execute("SELECT count(*) FROM blob.myblobs WHERE digest = ?", (digest,)) result = cursor.fetchone() self.assertEqual(result[0], 1) blob_container = conn.get_blob_container('myblobs') result = blob_container.get(digest) self.assertTrue(blob_container.exists(digest)) self.assertEqual(next(result), b'sample data') filepath = Path(self._path_data).glob(f'**/{digest}') self.assertTrue(next(filepath).exists())
def test_blob_index(self): (node, _) = self._new_node(self.CRATE_VERSION) node.start() with connect(node.http_url, error_trace=True) as conn: cursor = conn.cursor() cursor.execute(""" CREATE BLOB TABLE myblobs CLUSTERED INTO 1 shards WITH (number_of_replicas = 0) """) node.stop() node.start() with connect(node.http_url, error_trace=True) as conn: cursor = conn.cursor() wait_for_active_shards(cursor) cursor.execute(""" SELECT table_name, number_of_shards, number_of_replicas FROM information_schema.tables WHERE table_schema = 'blob' """) result = cursor.fetchone() self.assertEqual(result[0], 'myblobs') self.assertEqual(result[1], 1) self.assertEqual(result[2], '0')
def assert_data_persistence(self, version_def, nodes, digest): env = prepare_env(version_def.java_home) version = version_def.version cluster = self._new_cluster(version, nodes, self.CLUSTER_SETTINGS, env) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: cursor = conn.cursor() wait_for_active_shards(cursor, 0) self._upgrade(cursor, version_def.upgrade_segments) cursor.execute( 'ALTER TABLE doc.t1 SET ("refresh_interval" = 4000)') run_selects(cursor, version_def.version) container = conn.get_blob_container('b1') container.get(digest) cursor.execute( 'ALTER TABLE doc.t1 SET ("refresh_interval" = 2000)') # older versions had a bug that caused this to fail if version in ('latest-nightly', '3.2'): # Test that partition and dynamic columns can be created obj = {"t_" + version.replace('.', '_'): True} args = (str(uuid4()), version, obj) cursor.execute( 'INSERT INTO doc.parted (id, version, cols) values (?, ?, ?)', args) self._process_on_stop()
def start_cluster_and_alter_tables(self, version_def, nodes): cluster = self._new_cluster(version_def.version, nodes, self.CLUSTER_SETTINGS, prepare_env(version_def.java_home)) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: cursor = conn.cursor() wait_for_active_shards(cursor, 8) cursor.execute(''' ALTER TABLE t1 SET (number_of_replicas=1) ''') cursor.execute(''' ALTER TABLE p1 SET (number_of_replicas=1) ''') self._process_on_stop()
def test_latest_testing_can_be_downgraded_within_hotfix_versions(self): cluster = self._new_cluster('latest-testing', 2) cluster.start() node = cluster.node() with connect(node.http_url, error_trace=True) as conn: c = conn.cursor() c.execute('CREATE TABLE tbl (x int)') c.execute('INSERT INTO tbl (x) values (?)', (10, )) major, feature, hotfix = node.version for i in range(hotfix, -1, -1): new_version = (major, feature, i) with self.subTest(version=new_version): node = self.upgrade_node(node, '.'.join(map(str, new_version))) with connect(node.http_url, error_trace=True) as conn: c = conn.cursor() wait_for_active_shards(c) c.execute('SELECT x FROM tbl') xs = [row[0] for row in c.fetchall()] self.assertEqual(xs, [10])
def _test_rolling_upgrade(self, path, nodes): """ Test a rolling upgrade across given versions. An initial test cluster is started and then subsequently each node in the cluster is upgraded to the new version. After each upgraded node a SQL statement is executed that involves all nodes in the cluster, in order to check if communication between nodes is possible. """ shards, replicas = (nodes, 1) expected_active_shards = shards + shards * replicas cluster = self._new_cluster(path.from_version, nodes) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: c = conn.cursor() c.execute(f''' CREATE TABLE doc.t1 ( type BYTE, value FLOAT ) CLUSTERED INTO {shards} SHARDS WITH (number_of_replicas={replicas}) ''') insert_data(conn, 'doc', 't1', 1000) c.execute(f''' CREATE TABLE doc.parted ( id INT, value INT ) CLUSTERED INTO {shards} SHARDS PARTITIONED BY (id) WITH (number_of_replicas=0, "write.wait_for_active_shards"=1) ''') c.execute("INSERT INTO doc.parted (id, value) VALUES (1, 1)") # Add the shards of the new partition primaries expected_active_shards += shards for idx, node in enumerate(cluster): new_node = self.upgrade_node(node, path.to_version) cluster[idx] = new_node with connect(new_node.http_url, error_trace=True) as conn: c = conn.cursor() wait_for_active_shards(c, expected_active_shards) c.execute(''' SELECT type, AVG(value) FROM doc.t1 GROUP BY type ''') c.fetchall() # Ensure aggregation with different intermediate input works, this was an regression for 4.1 <-> 4.2 c.execute(''' SELECT type, count(distinct value) FROM doc.t1 GROUP BY type ''') c.fetchall() # Ensure scalar symbols are working across versions c.execute(''' SELECT type, value + 1 FROM doc.t1 WHERE value > 1 LIMIT 1 ''') c.fetchone() # Ensure that inserts, which will create a new partition, are working while upgrading c.execute("INSERT INTO doc.parted (id, value) VALUES (?, ?)", [idx + 10, idx + 10]) # Add the shards of the new partition primaries expected_active_shards += shards # Finally validate that all shards (primaries and replicas) of all partitions are started # and writes into the partitioned table while upgrading were successful with connect(cluster.node().http_url, error_trace=True) as conn: c = conn.cursor() wait_for_active_shards(c, expected_active_shards) c.execute(''' REFRESH TABLE doc.parted ''') c.execute(''' SELECT count(*) FROM doc.parted ''') res = c.fetchone() self.assertEqual(res[0], nodes + 1)
def test_snapshot_restore_and_drop_in_parallel(self): """Test to run the drop and restore operation on two different snapshots in parallel. The purpose of this test is to validate that the snapshot mechanism of CrateDB can handle the two operations in parallel. Here, Minio is used as s3 backend for the repository, but this should work on any other backend as well. """ with MinioServer() as minio: t = threading.Thread(target=minio.run) t.daemon = True t.start() wait_until(lambda: _is_up('127.0.0.1', 9000)) num_nodes = random.randint(3, 5) number_of_shards = random.randint(1, 3) number_of_replicas = random.randint(0, 2) num_docs = random.randint(1, 100) cluster_settings = { 'cluster.name': gen_id(), 'path.data': self.PATH_DATA } shutil.rmtree(self.PATH_DATA, ignore_errors=True) cluster = self._new_cluster('latest-nightly', num_nodes, settings=cluster_settings) cluster.start() with connect(cluster.node().http_url, error_trace=True) as conn: c = conn.cursor() wait_for_active_shards(c) c.execute( ''' create table doc.test(x int) clustered into ? shards with( number_of_replicas =?) ''', ( number_of_shards, number_of_replicas, )) insert_data(conn, 'doc', 'test', num_docs) c.execute(''' CREATE REPOSITORY repo TYPE S3 WITH (access_key = 'minio', secret_key = 'miniostorage', bucket='backups', endpoint = '127.0.0.1:9000', protocol = 'http') ''') c.execute( 'CREATE SNAPSHOT repo.snapshot1 TABLE doc.test WITH (wait_for_completion = true)' ) c.execute( 'CREATE SNAPSHOT repo.snapshot2 TABLE doc.test WITH (wait_for_completion = true)' ) c.execute('DROP TABLE doc.test') # Drop snapshot2 while the restore of snapshot1 is still running c.execute( 'RESTORE SNAPSHOT repo.snapshot1 ALL WITH (wait_for_completion = false)' ) try: c.execute('DROP SNAPSHOT repo.snapshot2') except ProgrammingError: self.fail( "Restore and Drop Snapshot operation should work in parallel" ) assert_busy(lambda: self._assert_num_docs(conn, num_docs)) cluster.stop()