Exemple #1
0
    def _test_turnoff_translog_retention_after_upgraded(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            number_of_replicas = random.randint(0, 2)
            c.execute(
                '''
                        create table doc.test(x int) clustered into 1 shards with( number_of_replicas =?,
                        "soft_deletes.enabled" = true)
                     ''', (number_of_replicas, ))

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            insert_data(conn, 'doc', 'test', random.randint(100, 200))
            c.execute('refresh table doc.test')

            num_docs = random.randint(0, 100)
            if num_docs > 0:
                insert_data(conn, 'doc', 'test', num_docs)

            # update the cluster to the new version
            self._upgrade_cluster(cluster, path.to_version,
                                  self.NUMBER_OF_NODES)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            c.execute('refresh table doc.test')
            self._assert_translog_is_empty(conn, 'doc', 'test')
Exemple #2
0
    def _test_upgrade_path(self, versions: Tuple[VersionDef], nodes):
        """ Test upgrade path across specified versions.

        Creates a blob and regular table in first version and inserts a record,
        then goes through all subsequent versions - each time verifying that a
        few simple selects work.
        """
        version_def = versions[0]
        env = prepare_env(version_def.java_home)
        cluster = self._new_cluster(version_def.version, nodes,
                                    self.CLUSTER_SETTINGS, env)
        cluster.start()
        digest = None
        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute(CREATE_ANALYZER)
            c.execute(CREATE_DOC_TABLE)
            c.execute(CREATE_PARTED_TABLE)
            c.execute('''
                INSERT INTO t1 (id, text) VALUES (0, 'Phase queue is foo!')
            ''')
            insert_data(conn, 'doc', 't1', 10)
            c.execute(CREATE_BLOB_TABLE)
            run_selects(c, versions[0].version)
            container = conn.get_blob_container('b1')
            digest = container.put(BytesIO(b'sample data'))
            container.get(digest)
        self._process_on_stop()

        for version_def in versions[1:]:
            self.assert_data_persistence(version_def, nodes, digest)

        # restart with latest version
        version_def = versions[-1]
        self.assert_data_persistence(version_def, nodes, digest)
Exemple #3
0
    def _test_recovery(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                        create table doc.test(x int) clustered into 1 shards with( number_of_replicas = 1,
                         "unassigned.node_left.delayed_timeout" = '100ms', "allocation.max_retries" = '0')
                    ''')

            num_docs = random.randint(0, 10)
            if num_docs > 0:
                insert_data(conn, 'doc', 'test', num_docs)

            if random.choice([True, False]):
                c.execute("refresh table doc.test")

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version,
                                  self.NUMBER_OF_NODES)

            if random.choice([True, False]):
                c.execute("refresh table doc.test")

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
Exemple #4
0
    def _test_retention_leases_established_when_relocating_primary(self, path):
        number_of_nodes = 3
        cluster = self._new_cluster(path.from_version, number_of_nodes)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()

            number_of_shards = random.randint(1, 5)
            number_of_replicas = random.randint(0, 1)

            c.execute(
                '''create table doc.test(x int) clustered into ? shards with(
                    "number_of_replicas" = ?,
                    "soft_deletes.enabled" = false,
                    "allocation.max_retries" = 0,
                    "unassigned.node_left.delayed_timeout" = '100ms'
                    )''', (
                    number_of_shards,
                    number_of_replicas,
                ))

            number_of_docs = random.randint(0, 10)
            if number_of_docs > 0:
                insert_data(conn, 'doc', 'test', number_of_docs)

            if random.choice([True, False]):
                c.execute('refresh table doc.test')

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            self._upgrade_to_mixed_cluster(cluster, path.to_version)
            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            #  trigger a primary relocation by excluding the primary from this index
            c.execute(
                '''select node['id'] from sys.shards where primary=true and table_name='test' '''
            )
            primary_id = c.fetchall()
            self.assertTrue(primary_id)
            c.execute(
                'alter table doc.test set ("routing.allocation.exclude._id" = ?)',
                primary_id)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            assert_busy(
                lambda: self.
                _assert_ensure_peer_recovery_retention_leases_renewed_and_synced(
                    conn, 'doc', 'test'))

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version, number_of_nodes)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            assert_busy(
                lambda: self.
                _assert_ensure_peer_recovery_retention_leases_renewed_and_synced(
                    conn, 'doc', 'test'))
Exemple #5
0
    def test_snapshot_compatibility(self):
        """Test snapshot compatibility when upgrading 3.3.x -> 4.x.x

        Using Minio as a S3 repository, the first cluster that runs
        creates the repo, a table and inserts/selects some data, which
        then is snapshotted and deleted. The next cluster recovers the
        data from the last snapshot, performs further inserts/selects,
        to then snapshot the data and delete it.

        We are interested in the transition 3.3.x -> 4.x.x
        """
        with MinioServer() as minio:
            t = threading.Thread(target=minio.run)
            t.daemon = True
            t.start()
            wait_until(lambda: _is_up('127.0.0.1', 9000))

            num_nodes = 3
            num_docs = 30
            prev_version = None
            num_snapshot = 1
            path_data = 'data_test_snapshot_compatibility'
            cluster_settings = {
                'cluster.name': gen_id(),
                'path.data': path_data
            }
            shutil.rmtree(path_data, ignore_errors=True)
            for version in self.VERSION:
                cluster = self._new_cluster(version,
                                            num_nodes,
                                            settings=cluster_settings)
                cluster.start()
                with connect(cluster.node().http_url,
                             error_trace=True) as conn:
                    c = conn.cursor()
                    if not prev_version:
                        c.execute(self.CREATE_REPOSITORY)
                        c.execute(CREATE_ANALYZER)
                        c.execute(CREATE_DOC_TABLE)
                        insert_data(conn, 'doc', 't1', num_docs)
                    else:
                        c.execute(
                            self.RESTORE_SNAPSHOT_TPT.format(num_snapshot - 1))
                    c.execute('SELECT COUNT(*) FROM t1')
                    rowcount = c.fetchone()[0]
                    self.assertEqual(rowcount, num_docs)
                    run_selects(c, version)
                    c.execute(self.CREATE_SNAPSHOT_TPT.format(num_snapshot))
                    c.execute(self.DROP_DOC_TABLE)
                self._process_on_stop()
                prev_version = version
                num_snapshot += 1
            shutil.rmtree(path_data, ignore_errors=True)
Exemple #6
0
    def _test_recovery_with_concurrent_indexing(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                        create table doc.test(x int) clustered into 1 shards with( number_of_replicas = 2,
                        "unassigned.node_left.delayed_timeout" = '100ms', "allocation.max_retries" = '0')
                    ''')

            # insert data into the initial homogeneous cluster
            insert_data(conn, 'doc', 'test', 10)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            # make sure that we can index while the replicas are recovering
            c.execute('''alter table doc.test set ("routing.allocation.enable"='primaries')''')

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            c.execute('''alter table doc.test set ("routing.allocation.enable"='all')''')
            # insert data into a mixed cluster
            insert_data(conn, 'doc', 'test', 50)
            c.execute('refresh table doc.test')
            # make sure that we can index while the replicas are recovering
            c.execute('select count(*) from doc.test')
            self.assertEqual(c.fetchone()[0], 60)
            # check counts for each node individually
            c.execute('select id from sys.nodes')
            node_ids = c.fetchall()
            self.assertEqual(len(node_ids), self.NUMBER_OF_NODES)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            for node_id in node_ids:
                assert_busy(lambda: self._assert_num_docs_by_node_id(conn, 'doc', 'test', node_id[0], 60))

            c.execute('''alter table doc.test set ("routing.allocation.enable"='primaries')''')
            # upgrade the full cluster
            self._upgrade_cluster(cluster, path.to_version, self.NUMBER_OF_NODES)
            c.execute('''alter table doc.test set ("routing.allocation.enable"='all')''')

            insert_data(conn, 'doc', 'test', 45)
            c.execute('refresh table doc.test')
            c.execute('select count(*) from doc.test')
            res = c.fetchone()
            self.assertEqual(res[0], 105)

            c.execute('select id from sys.nodes')
            node_ids = c.fetchall()
            self.assertEqual(len(node_ids), self.NUMBER_OF_NODES)

            for node_id in node_ids:
                assert_busy(lambda: self._assert_num_docs_by_node_id(conn, 'doc', 'test', node_id[0], 105))
Exemple #7
0
    def _test_operation_based_recovery(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                        create table doc.test(x int) clustered into 1 shards with( "number_of_replicas" = 2,
                        "soft_deletes.enabled" = true)
                        ''')

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            insert_data(conn, 'doc', 'test', random.randint(100, 200))
            c.execute('refresh table doc.test')

            self._assert_ensure_checkpoints_are_synced(conn, 'doc', 'test')
            num_docs = random.randint(0, 3)
            if num_docs > 0:
                insert_data(conn, 'doc', 'test', num_docs)

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            num_docs = random.randint(0, 3)
            if num_docs > 0:
                insert_data(conn, 'doc', 'test', num_docs)
            self._assert_ensure_checkpoints_are_synced(conn, 'doc', 'test')

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version,
                                  self.NUMBER_OF_NODES)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            num_docs = random.randint(0, 3)
            if num_docs > 0:
                insert_data(conn, 'doc', 'test', num_docs)

            self._assert_ensure_checkpoints_are_synced(conn, 'doc', 'test')
Exemple #8
0
    def _test_rolling_upgrade(self, path, nodes):
        """
        Test a rolling upgrade across given versions.
        An initial test cluster is started and then subsequently each node in
        the cluster is upgraded to the new version.
        After each upgraded node a SQL statement is executed that involves all
        nodes in the cluster, in order to check if communication between nodes
        is possible.
        """

        shards, replicas = (nodes, 1)
        expected_active_shards = shards + shards * replicas

        cluster = self._new_cluster(path.from_version, nodes)
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute(f'''
                CREATE TABLE doc.t1 (
                    type BYTE,
                    value FLOAT
                ) CLUSTERED INTO {shards} SHARDS
                WITH (number_of_replicas={replicas})
            ''')
            insert_data(conn, 'doc', 't1', 1000)
            c.execute(f'''
                CREATE TABLE doc.parted (
                    id INT,
                    value INT
                ) CLUSTERED INTO {shards} SHARDS
                PARTITIONED BY (id)
                WITH (number_of_replicas=0, "write.wait_for_active_shards"=1)
            ''')
            c.execute("INSERT INTO doc.parted (id, value) VALUES (1, 1)")
            # Add the shards of the new partition primaries
            expected_active_shards += shards

        for idx, node in enumerate(cluster):
            new_node = self.upgrade_node(node, path.to_version)
            cluster[idx] = new_node
            with connect(new_node.http_url, error_trace=True) as conn:
                c = conn.cursor()
                wait_for_active_shards(c, expected_active_shards)
                c.execute('''
                    SELECT type, AVG(value)
                    FROM doc.t1
                    GROUP BY type
                ''')
                c.fetchall()
                # Ensure aggregation with different intermediate input works, this was an regression for 4.1 <-> 4.2
                c.execute('''
                    SELECT type, count(distinct value)
                    FROM doc.t1
                    GROUP BY type
                ''')
                c.fetchall()

                # Ensure scalar symbols are working across versions
                c.execute('''
                    SELECT type, value + 1
                    FROM doc.t1
                    WHERE value > 1
                    LIMIT 1
                ''')
                c.fetchone()

                # Ensure that inserts, which will create a new partition, are working while upgrading
                c.execute("INSERT INTO doc.parted (id, value) VALUES (?, ?)",
                          [idx + 10, idx + 10])
                # Add the shards of the new partition primaries
                expected_active_shards += shards

        # Finally validate that all shards (primaries and replicas) of all partitions are started
        # and writes into the partitioned table while upgrading were successful
        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            wait_for_active_shards(c, expected_active_shards)
            c.execute('''
                REFRESH TABLE doc.parted
            ''')
            c.execute('''
                SELECT count(*)
                FROM doc.parted
            ''')
            res = c.fetchone()
            self.assertEqual(res[0], nodes + 1)
Exemple #9
0
    def test_snapshot_restore_and_drop_in_parallel(self):
        """Test to run the drop and restore operation on two different
           snapshots in parallel.

        The purpose of this test is to validate that the snapshot mechanism
        of CrateDB can handle the two operations in parallel. Here, Minio is
        used as s3 backend for the repository, but this should work on any
        other backend as well.
        """
        with MinioServer() as minio:
            t = threading.Thread(target=minio.run)
            t.daemon = True
            t.start()
            wait_until(lambda: _is_up('127.0.0.1', 9000))

            num_nodes = random.randint(3, 5)
            number_of_shards = random.randint(1, 3)
            number_of_replicas = random.randint(0, 2)
            num_docs = random.randint(1, 100)

            cluster_settings = {
                'cluster.name': gen_id(),
                'path.data': self.PATH_DATA
            }
            shutil.rmtree(self.PATH_DATA, ignore_errors=True)
            cluster = self._new_cluster('latest-nightly',
                                        num_nodes,
                                        settings=cluster_settings)
            cluster.start()

            with connect(cluster.node().http_url, error_trace=True) as conn:
                c = conn.cursor()
                wait_for_active_shards(c)
                c.execute(
                    '''
                            create table doc.test(x int) clustered into ? shards with( number_of_replicas =?)
                         ''', (
                        number_of_shards,
                        number_of_replicas,
                    ))

                insert_data(conn, 'doc', 'test', num_docs)

                c.execute('''
                            CREATE REPOSITORY repo TYPE S3
                            WITH (access_key = 'minio',
                            secret_key = 'miniostorage',
                            bucket='backups',
                            endpoint = '127.0.0.1:9000',
                            protocol = 'http')
                        ''')

                c.execute(
                    'CREATE SNAPSHOT repo.snapshot1 TABLE doc.test WITH (wait_for_completion = true)'
                )
                c.execute(
                    'CREATE SNAPSHOT repo.snapshot2 TABLE doc.test WITH (wait_for_completion = true)'
                )
                c.execute('DROP TABLE doc.test')
                # Drop snapshot2 while the restore of snapshot1 is still running
                c.execute(
                    'RESTORE SNAPSHOT repo.snapshot1 ALL WITH (wait_for_completion = false)'
                )
                try:
                    c.execute('DROP SNAPSHOT repo.snapshot2')
                except ProgrammingError:
                    self.fail(
                        "Restore and Drop Snapshot operation should work in parallel"
                    )

                assert_busy(lambda: self._assert_num_docs(conn, num_docs))

            cluster.stop()
Exemple #10
0
    def _test_relocation_with_concurrent_indexing(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                        create table doc.test(x int) clustered into 1 shards with( "number_of_replicas" = 2,
                        "unassigned.node_left.delayed_timeout" = '100ms', "allocation.max_retries" = '0')
                        ''')

            insert_data(conn, 'doc', 'test', 10)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            # make sure that no shards are allocated, so we can make sure the primary stays
            # on the old node (when one node stops, we lose the master too, so a replica
            # will not be promoted)
            c.execute(
                '''alter table doc.test set("routing.allocation.enable"='none')'''
            )

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            c.execute(
                '''select id from sys.nodes order by version['number'] desc limit 1'''
            )
            new_node_id = c.fetchone()[0]
            c.execute(
                '''select id from sys.nodes order by version['number'] asc limit 1'''
            )
            old_node_id = c.fetchone()[0]

            # remove the replica and guaranteed the primary is placed on the old node
            c.execute(
                '''alter table doc.test set (
                        "number_of_replicas"=0,
                        "routing.allocation.enable"='all',
                        "routing.allocation.include._id"=?
                        )''', (old_node_id, ))

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            c.execute(
                '''alter table doc.test set ("routing.allocation.include._id"=?)''',
                (new_node_id, ))
            insert_data(conn, 'doc', 'test', 50)

            # ensure the relocation from old node to new node has occurred; otherwise the table is green
            # even though shards haven't moved to the new node yet (allocation was throttled).
            assert_busy(lambda: self._assert_shard_state(
                conn, 'doc', 'test', new_node_id, 'STARTED'))
            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            c.execute('refresh table doc.test')
            self._assert_num_docs_by_node_id(conn, 'doc', 'test', new_node_id,
                                             60)

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version,
                                  self.NUMBER_OF_NODES)

            c.execute('''alter table doc.test set("number_of_replicas"=2)''')
            c.execute(
                '''alter table doc.test reset("routing.allocation.include._id")'''
            )

            insert_data(conn, 'doc', 'test', 45)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            c.execute('refresh table doc.test')
            c.execute('select id from sys.nodes')
            node_ids = c.fetchall()
            self.assertEqual(len(node_ids), self.NUMBER_OF_NODES)

            for node_id in node_ids:
                self._assert_num_docs_by_node_id(conn, 'doc', 'test',
                                                 node_id[0], 105)