Esempio n. 1
0
    def _test_turnoff_translog_retention_after_upgraded(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            number_of_replicas = random.randint(0, 2)
            c.execute(
                '''
                        create table doc.test(x int) clustered into 1 shards with( number_of_replicas =?,
                        "soft_deletes.enabled" = true)
                     ''', (number_of_replicas, ))

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            insert_data(conn, 'doc', 'test', random.randint(100, 200))
            c.execute('refresh table doc.test')

            num_docs = random.randint(0, 100)
            if num_docs > 0:
                insert_data(conn, 'doc', 'test', num_docs)

            # update the cluster to the new version
            self._upgrade_cluster(cluster, path.to_version,
                                  self.NUMBER_OF_NODES)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            c.execute('refresh table doc.test')
            self._assert_translog_is_empty(conn, 'doc', 'test')
Esempio n. 2
0
    def _test_recovery(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                        create table doc.test(x int) clustered into 1 shards with( number_of_replicas = 1,
                         "unassigned.node_left.delayed_timeout" = '100ms', "allocation.max_retries" = '0')
                    ''')

            num_docs = random.randint(0, 10)
            if num_docs > 0:
                insert_data(conn, 'doc', 'test', num_docs)

            if random.choice([True, False]):
                c.execute("refresh table doc.test")

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version,
                                  self.NUMBER_OF_NODES)

            if random.choice([True, False]):
                c.execute("refresh table doc.test")

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
Esempio n. 3
0
    def _test_closed_index_during_rolling_upgrade(self, path):
        """
        This test creates and closes a new table at every stage of the rolling
        upgrade. It then checks that the table is effectively closed and
        replicated.
        """

        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                        create table doc.old_cluster(x int) clustered into 1 shards with( number_of_replicas = 0)
                      ''')

            self._assert_is_green(conn, 'doc', 'old_cluster')
            c.execute('alter table doc.old_cluster close')
            self._assert_is_closed(conn, 'doc', 'old_cluster')

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            self._assert_is_closed(conn, 'doc', 'old_cluster')

            c.execute('''
                      create table doc.mixed_cluster(x int) clustered into 1 shards with( number_of_replicas = 0)
                      ''')

            assert_busy(
                lambda: self._assert_is_green(conn, 'doc', 'mixed_cluster'))
            c.execute('alter table doc.mixed_cluster close')

            self._assert_is_closed(conn, 'doc', 'mixed_cluster')

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version,
                                  self.NUMBER_OF_NODES)

            self._assert_is_closed(conn, 'doc', 'old_cluster')
            self._assert_is_closed(conn, 'doc', 'mixed_cluster')

            c.execute('''
                      create table doc.upgraded_cluster(x int) clustered into 1 shards with( number_of_replicas = 0)
                      ''')

            assert_busy(
                lambda: self._assert_is_green(conn, 'doc', 'upgraded_cluster'))
            c.execute('alter table doc.upgraded_cluster close')

            self._assert_is_closed(conn, 'doc', 'upgraded_cluster')
Esempio n. 4
0
    def _test_retention_leases_established_when_promoting_primary(self, path):
        number_of_nodes = 3
        cluster = self._new_cluster(path.from_version, number_of_nodes)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()

            number_of_shards = random.randint(1, 5)
            number_of_replicas = random.randint(0, 1)

            c.execute(
                '''create table doc.test(x int) clustered into ? shards with(
                    "number_of_replicas" = ?,
                    "soft_deletes.enabled" = false,
                    "allocation.max_retries" = 0,
                    "unassigned.node_left.delayed_timeout" = '100ms'
                    )''', (
                    number_of_shards,
                    number_of_replicas,
                ))

            number_of_docs = random.randint(0, 10)
            if number_of_docs > 0:
                insert_data(conn, 'doc', 'test', number_of_docs)

            if random.choice([True, False]):
                c.execute('refresh table doc.test')

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            assert_busy(
                lambda: self.
                _assert_ensure_peer_recovery_retention_leases_renewed_and_synced(
                    conn, 'doc', 'test'))

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version, number_of_nodes)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            assert_busy(
                lambda: self.
                _assert_ensure_peer_recovery_retention_leases_renewed_and_synced(
                    conn, 'doc', 'test'))
Esempio n. 5
0
    def _test_recovery_with_concurrent_indexing(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                        create table doc.test(x int) clustered into 1 shards with( number_of_replicas = 2,
                        "unassigned.node_left.delayed_timeout" = '100ms', "allocation.max_retries" = '0')
                    ''')

            # insert data into the initial homogeneous cluster
            insert_data(conn, 'doc', 'test', 10)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            # make sure that we can index while the replicas are recovering
            c.execute('''alter table doc.test set ("routing.allocation.enable"='primaries')''')

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            c.execute('''alter table doc.test set ("routing.allocation.enable"='all')''')
            # insert data into a mixed cluster
            insert_data(conn, 'doc', 'test', 50)
            c.execute('refresh table doc.test')
            # make sure that we can index while the replicas are recovering
            c.execute('select count(*) from doc.test')
            self.assertEqual(c.fetchone()[0], 60)
            # check counts for each node individually
            c.execute('select id from sys.nodes')
            node_ids = c.fetchall()
            self.assertEqual(len(node_ids), self.NUMBER_OF_NODES)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            for node_id in node_ids:
                assert_busy(lambda: self._assert_num_docs_by_node_id(conn, 'doc', 'test', node_id[0], 60))

            c.execute('''alter table doc.test set ("routing.allocation.enable"='primaries')''')
            # upgrade the full cluster
            self._upgrade_cluster(cluster, path.to_version, self.NUMBER_OF_NODES)
            c.execute('''alter table doc.test set ("routing.allocation.enable"='all')''')

            insert_data(conn, 'doc', 'test', 45)
            c.execute('refresh table doc.test')
            c.execute('select count(*) from doc.test')
            res = c.fetchone()
            self.assertEqual(res[0], 105)

            c.execute('select id from sys.nodes')
            node_ids = c.fetchall()
            self.assertEqual(len(node_ids), self.NUMBER_OF_NODES)

            for node_id in node_ids:
                assert_busy(lambda: self._assert_num_docs_by_node_id(conn, 'doc', 'test', node_id[0], 105))
Esempio n. 6
0
    def _test_recovery_closed_index(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                        create table doc.test(x int) clustered into 1 shards with( number_of_replicas = 1,
                        "unassigned.node_left.delayed_timeout" = '100ms', "allocation.max_retries" = '0')
                      ''')

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            c.execute('alter table doc.test close')

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            self._assert_is_closed(conn, 'doc', 'test')

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version, self.NUMBER_OF_NODES)

            self._assert_is_closed(conn, 'doc', 'test')
Esempio n. 7
0
    def _test_update_docs(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()
        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                      create table doc.test(id int primary key, data text) clustered into 1 shards with(
                       "unassigned.node_left.delayed_timeout" = '100ms', "number_of_replicas" = 2)
                      ''')

            inserts = [(i, str(random.randint)) for i in range(0, 100)]
            c.executemany('''insert into doc.test(id, data) values (?, ?)''',
                          inserts)

            # ensure all shards are active before upgrading a node. otherwise the cluster tries to allocate new
            # replicas if the upgraded node contained the primary, which will fail due to node version allocation rules.
            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            if random.choice([True, False]):
                assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            # update the data in a mixed cluster
            updates = [(i, str(random.randint)) for i in range(0, 100)]

            res = c.executemany(
                'insert into doc.test(id, data) values(?, ?) on conflict(id) do update set data = excluded.data',
                updates)
            self.assertEqual(len(res), 100)
            for result in res:
                self.assertEqual(result['rowcount'], 1)

            if random.choice([True, False]):
                assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version,
                                  self.NUMBER_OF_NODES)

            updates = [(i, str(random.randint)) for i in range(0, 100)]
            res = c.executemany(
                'insert into doc.test(id, data) values(?, ?) on conflict(id) do update set data = excluded.data',
                updates)
            self.assertEqual(len(res), 100)
            for result in res:
                self.assertEqual(result['rowcount'], 1)
Esempio n. 8
0
    def _test_operation_based_recovery(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                        create table doc.test(x int) clustered into 1 shards with( "number_of_replicas" = 2,
                        "soft_deletes.enabled" = true)
                        ''')

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            insert_data(conn, 'doc', 'test', random.randint(100, 200))
            c.execute('refresh table doc.test')

            self._assert_ensure_checkpoints_are_synced(conn, 'doc', 'test')
            num_docs = random.randint(0, 3)
            if num_docs > 0:
                insert_data(conn, 'doc', 'test', num_docs)

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            num_docs = random.randint(0, 3)
            if num_docs > 0:
                insert_data(conn, 'doc', 'test', num_docs)
            self._assert_ensure_checkpoints_are_synced(conn, 'doc', 'test')

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version,
                                  self.NUMBER_OF_NODES)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            num_docs = random.randint(0, 3)
            if num_docs > 0:
                insert_data(conn, 'doc', 'test', num_docs)

            self._assert_ensure_checkpoints_are_synced(conn, 'doc', 'test')
Esempio n. 9
0
    def test_snapshot_restore_and_drop_in_parallel(self):
        """Test to run the drop and restore operation on two different
           snapshots in parallel.

        The purpose of this test is to validate that the snapshot mechanism
        of CrateDB can handle the two operations in parallel. Here, Minio is
        used as s3 backend for the repository, but this should work on any
        other backend as well.
        """
        with MinioServer() as minio:
            t = threading.Thread(target=minio.run)
            t.daemon = True
            t.start()
            wait_until(lambda: _is_up('127.0.0.1', 9000))

            num_nodes = random.randint(3, 5)
            number_of_shards = random.randint(1, 3)
            number_of_replicas = random.randint(0, 2)
            num_docs = random.randint(1, 100)

            cluster_settings = {
                'cluster.name': gen_id(),
                'path.data': self.PATH_DATA
            }
            shutil.rmtree(self.PATH_DATA, ignore_errors=True)
            cluster = self._new_cluster('latest-nightly',
                                        num_nodes,
                                        settings=cluster_settings)
            cluster.start()

            with connect(cluster.node().http_url, error_trace=True) as conn:
                c = conn.cursor()
                wait_for_active_shards(c)
                c.execute(
                    '''
                            create table doc.test(x int) clustered into ? shards with( number_of_replicas =?)
                         ''', (
                        number_of_shards,
                        number_of_replicas,
                    ))

                insert_data(conn, 'doc', 'test', num_docs)

                c.execute('''
                            CREATE REPOSITORY repo TYPE S3
                            WITH (access_key = 'minio',
                            secret_key = 'miniostorage',
                            bucket='backups',
                            endpoint = '127.0.0.1:9000',
                            protocol = 'http')
                        ''')

                c.execute(
                    'CREATE SNAPSHOT repo.snapshot1 TABLE doc.test WITH (wait_for_completion = true)'
                )
                c.execute(
                    'CREATE SNAPSHOT repo.snapshot2 TABLE doc.test WITH (wait_for_completion = true)'
                )
                c.execute('DROP TABLE doc.test')
                # Drop snapshot2 while the restore of snapshot1 is still running
                c.execute(
                    'RESTORE SNAPSHOT repo.snapshot1 ALL WITH (wait_for_completion = false)'
                )
                try:
                    c.execute('DROP SNAPSHOT repo.snapshot2')
                except ProgrammingError:
                    self.fail(
                        "Restore and Drop Snapshot operation should work in parallel"
                    )

                assert_busy(lambda: self._assert_num_docs(conn, num_docs))

            cluster.stop()
Esempio n. 10
0
    def _test_auto_expand_indices_during_rolling_upgrade(self, path):
        number_of_nodes = 3
        cluster = self._new_cluster(path.from_version, number_of_nodes)
        cluster.start()

        # all nodes without the primary
        number_of_replicas = number_of_nodes - 1
        number_of_replicas_with_excluded_node = number_of_replicas - 1

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''select id from sys.nodes''')
            node_ids = c.fetchall()
            self.assertEqual(len(node_ids), number_of_nodes)

            c.execute(
                '''create table doc.test(x int) clustered into 1 shards with( "number_of_replicas" = ?)''',
                (f"0-{number_of_replicas}", ))
            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            # exclude one node from allocation, but this won't have any effect as all nodes are on the old version
            c.execute(
                'alter table doc.test set ("routing.allocation.exclude._id" = ?)',
                (random.choice(node_ids)[0], ))

            # check that the replicas expanding automatically to all nodes, even that one is excluded
            assert_busy(lambda: self._assert_number_of_replicas(
                conn, 'doc', 'test', number_of_replicas))

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            # health is yellow because the replicas are expanded, but one could not be allocated as the node
            # is excluded by allocation filtering
            assert_busy(lambda: self._assert_is_yellow(conn, 'doc', 'test'))

            # check that the replicas still expanding automatically to all nodes, even that one is excluded
            assert_busy(lambda: self._assert_number_of_replicas(
                conn, 'doc', 'test', number_of_replicas))

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version, number_of_nodes)

            # now that all nodes are on the same version including the path to expand replicas based on the
            # allocation filtering, replicas are expanded only to 1 and the health is green
            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            assert_busy(lambda: self._assert_number_of_replicas(
                conn, 'doc', 'test', number_of_replicas_with_excluded_node))
Esempio n. 11
0
    def _test_relocation_with_concurrent_indexing(self, path):
        cluster = self._new_cluster(path.from_version, self.NUMBER_OF_NODES)
        cluster.start()

        with connect(cluster.node().http_url, error_trace=True) as conn:
            c = conn.cursor()
            c.execute('''
                        create table doc.test(x int) clustered into 1 shards with( "number_of_replicas" = 2,
                        "unassigned.node_left.delayed_timeout" = '100ms', "allocation.max_retries" = '0')
                        ''')

            insert_data(conn, 'doc', 'test', 10)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            # make sure that no shards are allocated, so we can make sure the primary stays
            # on the old node (when one node stops, we lose the master too, so a replica
            # will not be promoted)
            c.execute(
                '''alter table doc.test set("routing.allocation.enable"='none')'''
            )

            self._upgrade_to_mixed_cluster(cluster, path.to_version)

            c.execute(
                '''select id from sys.nodes order by version['number'] desc limit 1'''
            )
            new_node_id = c.fetchone()[0]
            c.execute(
                '''select id from sys.nodes order by version['number'] asc limit 1'''
            )
            old_node_id = c.fetchone()[0]

            # remove the replica and guaranteed the primary is placed on the old node
            c.execute(
                '''alter table doc.test set (
                        "number_of_replicas"=0,
                        "routing.allocation.enable"='all',
                        "routing.allocation.include._id"=?
                        )''', (old_node_id, ))

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            c.execute(
                '''alter table doc.test set ("routing.allocation.include._id"=?)''',
                (new_node_id, ))
            insert_data(conn, 'doc', 'test', 50)

            # ensure the relocation from old node to new node has occurred; otherwise the table is green
            # even though shards haven't moved to the new node yet (allocation was throttled).
            assert_busy(lambda: self._assert_shard_state(
                conn, 'doc', 'test', new_node_id, 'STARTED'))
            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))

            c.execute('refresh table doc.test')
            self._assert_num_docs_by_node_id(conn, 'doc', 'test', new_node_id,
                                             60)

            # upgrade fully to the new version
            self._upgrade_cluster(cluster, path.to_version,
                                  self.NUMBER_OF_NODES)

            c.execute('''alter table doc.test set("number_of_replicas"=2)''')
            c.execute(
                '''alter table doc.test reset("routing.allocation.include._id")'''
            )

            insert_data(conn, 'doc', 'test', 45)

            assert_busy(lambda: self._assert_is_green(conn, 'doc', 'test'))
            c.execute('refresh table doc.test')
            c.execute('select id from sys.nodes')
            node_ids = c.fetchall()
            self.assertEqual(len(node_ids), self.NUMBER_OF_NODES)

            for node_id in node_ids:
                self._assert_num_docs_by_node_id(conn, 'doc', 'test',
                                                 node_id[0], 105)