Example #1
0
    def test_query_partitioned_table(self):
        (node, _) = self._new_node(self.CRATE_VERSION)
        node.start()
        with connect(node.http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            cursor.execute("""
            CREATE TABLE parted_table (
                id long,
                ts timestamp,
                day__generated GENERATED ALWAYS AS date_trunc('day', ts)
            ) CLUSTERED INTO 1 SHARDS PARTITIONED BY (day__generated)
            WITH (number_of_replicas = 0)
            """)
            for x in range(5):
                cursor.execute(
                    """
                INSERT INTO parted_table (id, ts)
                VALUES (?, ?)
                """, (x, datetime.now() - timedelta(days=x)))
        node.stop()

        node.start()
        with connect(node.http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            wait_for_active_shards(cursor)
            cursor.execute("""
            SELECT id, date_trunc('day', ts) = day__generated FROM parted_table order by 1
            """)
            for idx, result in enumerate(cursor.fetchall()):
                self.assertEqual(result[0], idx)
                self.assertTrue(result[1])
Example #2
0
    def test_blob_record(self):
        (node, _) = self._new_node(self.CRATE_VERSION)
        node.start()
        digest = ''
        with connect(node.http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            cursor.execute("""
            CREATE BLOB TABLE myblobs
            CLUSTERED INTO 1 shards
            WITH (number_of_replicas = 0)
            """)
            blob_container = conn.get_blob_container('myblobs')
            digest = blob_container.put(BytesIO(b'sample data'))
        node.stop()

        node.start()
        with connect(node.http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            wait_for_active_shards(cursor)
            cursor.execute("SELECT count(*) FROM blob.myblobs WHERE digest = ?", (digest,))
            result = cursor.fetchone()
            self.assertEqual(result[0], 1)

            blob_container = conn.get_blob_container('myblobs')
            result = blob_container.get(digest)
            self.assertTrue(blob_container.exists(digest))
            self.assertEqual(next(result), b'sample data')
            filepath = Path(self._path_data).glob(f'**/{digest}')
            self.assertTrue(next(filepath).exists())
Example #3
0
    def test_blob_index(self):
        (node, _) = self._new_node(self.CRATE_VERSION)
        node.start()
        with connect(node.http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            cursor.execute("""
            CREATE BLOB TABLE myblobs
            CLUSTERED INTO 1 shards
            WITH (number_of_replicas = 0)
            """)
        node.stop()

        node.start()
        with connect(node.http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            wait_for_active_shards(cursor)
            cursor.execute("""
            SELECT table_name, number_of_shards, number_of_replicas
            FROM information_schema.tables
            WHERE table_schema = 'blob'
            """)
            result = cursor.fetchone()
            self.assertEqual(result[0], 'myblobs')
            self.assertEqual(result[1], 1)
            self.assertEqual(result[2], '0')
Example #4
0
    def assert_data_persistence(self, version_def, nodes, digest):
        env = prepare_env(version_def.java_home)
        version = version_def.version
        cluster = self._new_cluster(version, nodes, self.CLUSTER_SETTINGS, env)
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            cursor = conn.cursor()
            wait_for_active_shards(cursor, 0)
            self._upgrade(cursor, version_def.upgrade_segments)
            cursor.execute(
                'ALTER TABLE doc.t1 SET ("refresh_interval" = 4000)')
            run_selects(cursor, version_def.version)
            container = conn.get_blob_container('b1')
            container.get(digest)
            cursor.execute(
                'ALTER TABLE doc.t1 SET ("refresh_interval" = 2000)')

            # older versions had a bug that caused this to fail
            if version in ('latest-nightly', '3.2'):
                # Test that partition and dynamic columns can be created
                obj = {"t_" + version.replace('.', '_'): True}
                args = (str(uuid4()), version, obj)
                cursor.execute(
                    'INSERT INTO doc.parted (id, version, cols) values (?, ?, ?)',
                    args)
        self._process_on_stop()
Example #5
0
 def start_cluster_and_alter_tables(self, version_def, nodes):
     cluster = self._new_cluster(version_def.version, nodes,
                                 self.CLUSTER_SETTINGS,
                                 prepare_env(version_def.java_home))
     cluster.start()
     with connect(cluster.node().http_url, error_trace=True) as conn:
         cursor = conn.cursor()
         wait_for_active_shards(cursor, 8)
         cursor.execute('''
             ALTER TABLE t1 SET (number_of_replicas=1)
         ''')
         cursor.execute('''
             ALTER TABLE p1 SET (number_of_replicas=1)
         ''')
     self._process_on_stop()
Example #6
0
    def test_latest_testing_can_be_downgraded_within_hotfix_versions(self):
        cluster = self._new_cluster('latest-testing', 2)
        cluster.start()
        node = cluster.node()
        with connect(node.http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('CREATE TABLE tbl (x int)')
            c.execute('INSERT INTO tbl (x) values (?)', (10, ))
        major, feature, hotfix = node.version
        for i in range(hotfix, -1, -1):
            new_version = (major, feature, i)
            with self.subTest(version=new_version):
                node = self.upgrade_node(node, '.'.join(map(str, new_version)))

                with connect(node.http_url, error_trace=True) as conn:
                    c = conn.cursor()
                    wait_for_active_shards(c)
                    c.execute('SELECT x FROM tbl')
                    xs = [row[0] for row in c.fetchall()]
                    self.assertEqual(xs, [10])
Example #7
0
    def _test_rolling_upgrade(self, path, nodes):
        """
        Test a rolling upgrade across given versions.
        An initial test cluster is started and then subsequently each node in
        the cluster is upgraded to the new version.
        After each upgraded node a SQL statement is executed that involves all
        nodes in the cluster, in order to check if communication between nodes
        is possible.
        """

        shards, replicas = (nodes, 1)
        expected_active_shards = shards + shards * replicas

        cluster = self._new_cluster(path.from_version, nodes)
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute(f'''
                CREATE TABLE doc.t1 (
                    type BYTE,
                    value FLOAT
                ) CLUSTERED INTO {shards} SHARDS
                WITH (number_of_replicas={replicas})
            ''')
            insert_data(conn, 'doc', 't1', 1000)
            c.execute(f'''
                CREATE TABLE doc.parted (
                    id INT,
                    value INT
                ) CLUSTERED INTO {shards} SHARDS
                PARTITIONED BY (id)
                WITH (number_of_replicas=0, "write.wait_for_active_shards"=1)
            ''')
            c.execute("INSERT INTO doc.parted (id, value) VALUES (1, 1)")
            # Add the shards of the new partition primaries
            expected_active_shards += shards

        for idx, node in enumerate(cluster):
            new_node = self.upgrade_node(node, path.to_version)
            cluster[idx] = new_node
            with connect(new_node.http_url, error_trace=True) as conn:
                c = conn.cursor()
                wait_for_active_shards(c, expected_active_shards)
                c.execute('''
                    SELECT type, AVG(value)
                    FROM doc.t1
                    GROUP BY type
                ''')
                c.fetchall()
                # Ensure aggregation with different intermediate input works, this was an regression for 4.1 <-> 4.2
                c.execute('''
                    SELECT type, count(distinct value)
                    FROM doc.t1
                    GROUP BY type
                ''')
                c.fetchall()

                # Ensure scalar symbols are working across versions
                c.execute('''
                    SELECT type, value + 1
                    FROM doc.t1
                    WHERE value > 1
                    LIMIT 1
                ''')
                c.fetchone()

                # Ensure that inserts, which will create a new partition, are working while upgrading
                c.execute("INSERT INTO doc.parted (id, value) VALUES (?, ?)",
                          [idx + 10, idx + 10])
                # Add the shards of the new partition primaries
                expected_active_shards += shards

        # Finally validate that all shards (primaries and replicas) of all partitions are started
        # and writes into the partitioned table while upgrading were successful
        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            wait_for_active_shards(c, expected_active_shards)
            c.execute('''
                REFRESH TABLE doc.parted
            ''')
            c.execute('''
                SELECT count(*)
                FROM doc.parted
            ''')
            res = c.fetchone()
            self.assertEqual(res[0], nodes + 1)
Example #8
0
    def test_snapshot_restore_and_drop_in_parallel(self):
        """Test to run the drop and restore operation on two different
           snapshots in parallel.

        The purpose of this test is to validate that the snapshot mechanism
        of CrateDB can handle the two operations in parallel. Here, Minio is
        used as s3 backend for the repository, but this should work on any
        other backend as well.
        """
        with MinioServer() as minio:
            t = threading.Thread(target=minio.run)
            t.daemon = True
            t.start()
            wait_until(lambda: _is_up('127.0.0.1', 9000))

            num_nodes = random.randint(3, 5)
            number_of_shards = random.randint(1, 3)
            number_of_replicas = random.randint(0, 2)
            num_docs = random.randint(1, 100)

            cluster_settings = {
                'cluster.name': gen_id(),
                'path.data': self.PATH_DATA
            }
            shutil.rmtree(self.PATH_DATA, ignore_errors=True)
            cluster = self._new_cluster('latest-nightly',
                                        num_nodes,
                                        settings=cluster_settings)
            cluster.start()

            with connect(cluster.node().http_url, error_trace=True) as conn:
                c = conn.cursor()
                wait_for_active_shards(c)
                c.execute(
                    '''
                            create table doc.test(x int) clustered into ? shards with( number_of_replicas =?)
                         ''', (
                        number_of_shards,
                        number_of_replicas,
                    ))

                insert_data(conn, 'doc', 'test', num_docs)

                c.execute('''
                            CREATE REPOSITORY repo TYPE S3
                            WITH (access_key = 'minio',
                            secret_key = 'miniostorage',
                            bucket='backups',
                            endpoint = '127.0.0.1:9000',
                            protocol = 'http')
                        ''')

                c.execute(
                    'CREATE SNAPSHOT repo.snapshot1 TABLE doc.test WITH (wait_for_completion = true)'
                )
                c.execute(
                    'CREATE SNAPSHOT repo.snapshot2 TABLE doc.test WITH (wait_for_completion = true)'
                )
                c.execute('DROP TABLE doc.test')
                # Drop snapshot2 while the restore of snapshot1 is still running
                c.execute(
                    'RESTORE SNAPSHOT repo.snapshot1 ALL WITH (wait_for_completion = false)'
                )
                try:
                    c.execute('DROP SNAPSHOT repo.snapshot2')
                except ProgrammingError:
                    self.fail(
                        "Restore and Drop Snapshot operation should work in parallel"
                    )

                assert_busy(lambda: self._assert_num_docs(conn, num_docs))

            cluster.stop()