def inner(self, *args, **kwargs): # Perform validation of truncate entries in case the new version is 3.1 or more node = args[0] if self.truncate_entries_flag: base_version = self.params.get('scylla_version', default='') system_truncated = bool( parse_version(base_version) >= parse_version('3.1') and not is_enterprise(base_version)) with self.cql_connection_patient( node, keyspace='truncate_ks') as session: self.cql_truncate_simple_tables(session=session, rows=self.insert_rows) self.validate_truncated_entries_for_table( session=session, system_truncated=system_truncated) func_result = func(self, *args, **kwargs) result = node.remoter.run('scylla --version') new_version = result.stdout if new_version and parse_version(new_version) >= parse_version('3.1'): # re-new connection with self.cql_connection_patient( node, keyspace='truncate_ks') as session: self.validate_truncated_entries_for_table( session=session, system_truncated=True) self.read_data_from_truncated_tables(session=session) self.cql_insert_data_to_simple_tables(session=session, rows=self.insert_rows) return func_result
def get_version_list(self): """ return all supported releases versions, and the supported base versions of upgrade test. """ supported_versions = [] # Filter out the unsupported versions for version_prefix, _ in self.repo_maps.items(): # can't find the major version from the version_prefix string if not re.match(r'\d+.\d+', version_prefix): continue # OSS: the major version is smaller than the start support version if self.oss_start_support_version and not is_enterprise(version_prefix) and \ parse_version(version_prefix) < parse_version(self.oss_start_support_version): continue # Enterprise: the major version is smaller than the start support version if self.ent_start_support_version and is_enterprise(version_prefix) and \ parse_version(version_prefix) < parse_version(self.ent_start_support_version): continue supported_versions.append(version_prefix) version_list = self.get_supported_scylla_base_versions( supported_versions) return supported_versions, version_list
def get_product_and_version(self, scylla_version: str = None): """ return scylla product name and major version. if scylla_version isn't assigned, we will try to get the major version from the scylla_repo url. """ if scylla_version is None: assert 'unstable/' in self.scylla_repo product, version = self.scylla_repo.split('unstable/')[1].split( '/')[0:2] scylla_version = version.replace('branch-', '').replace('enterprise-', '') else: product = 'scylla-enterprise' if is_enterprise( scylla_version) else 'scylla' return product, scylla_version
def get_product_and_version(self, scylla_version: str = None) -> tuple[str, str]: """ return scylla product name and major version. if scylla_version isn't assigned, we will try to get the major version from the scylla_repo url. """ LOGGER.info("Getting scylla product and major version for upgrade versions listing...") if scylla_version is None: assert 'unstable/' in self.scylla_repo, "Did not find 'unstable/' in scylla_repo. " \ "Scylla repo: %s" % self.scylla_repo product, version = self.scylla_repo.split('unstable/')[1].split('/')[0:2] scylla_version = version.replace('branch-', '').replace('enterprise-', '') else: product = 'scylla-enterprise' if is_enterprise(scylla_version) else 'scylla' LOGGER.info("Scylla product and major version used for upgrade versions listing: %s, %s", product, version) return product, scylla_version
def test_rolling_upgrade(self): # pylint: disable=too-many-locals,too-many-statements """ Upgrade half of nodes in the cluster, and start special read workload during the stage. Checksum method is changed to xxhash from Scylla 2.2, we want to use this case to verify the read (cl=ALL) workload works well, upgrade all nodes to new version in the end. """ # In case the target version >= 3.1 we need to perform test for truncate entries target_upgrade_version = self.params.get('target_upgrade_version', default='') self.truncate_entries_flag = False if target_upgrade_version and parse_version(target_upgrade_version) >= parse_version('3.1') and \ not is_enterprise(target_upgrade_version): self.truncate_entries_flag = True with self.subTest('pre-test - prepare test keyspaces and tables'): # prepare test keyspaces and tables before upgrade to avoid schema change during mixed cluster. self.prepare_keyspaces_and_tables() self.fill_and_verify_db_data('BEFORE UPGRADE', pre_fill=True) # write workload during entire test self.log.info('Starting c-s write workload during entire test') write_stress_during_entire_test = self.params.get( 'write_stress_during_entire_test') entire_write_cs_thread_pool = self.run_stress_thread( stress_cmd=write_stress_during_entire_test) # Prepare keyspace and tables for truncate test if self.truncate_entries_flag: self.insert_rows = 10 self.fill_db_data_for_truncate_test( insert_rows=self.insert_rows) with self.subTest('pre-test - Run stress workload before upgrade'): # complex workload: prepare write self.log.info('Starting c-s complex workload (5M) to prepare data') stress_cmd_complex_prepare = self.params.get( 'stress_cmd_complex_prepare') complex_cs_thread_pool = self.run_stress_thread( stress_cmd=stress_cmd_complex_prepare, profile='data_dir/complex_schema.yaml') # wait for the complex workload to finish self.verify_stress_thread(complex_cs_thread_pool) # generate random order to upgrade nodes_num = len(self.db_cluster.nodes) # prepare an array containing the indexes indexes = list(range(nodes_num)) # shuffle it so we will upgrade the nodes in a # random order random.shuffle(indexes) # prepare write workload self.log.info('Starting c-s prepare write workload (n=10000000)') prepare_write_stress = self.params.get('prepare_write_stress') prepare_write_cs_thread_pool = self.run_stress_thread( stress_cmd=prepare_write_stress) self.log.info( 'Sleeping for 60s to let cassandra-stress start before the upgrade...' ) time.sleep(60) with DbEventsFilter(type='DATABASE_ERROR', line='Failed to load schema'), \ DbEventsFilter(type='SCHEMA_FAILURE', line='Failed to load schema'), \ DbEventsFilter(type='DATABASE_ERROR', line='Failed to pull schema'), \ DbEventsFilter(type='RUNTIME_ERROR', line='Failed to load schema'): step = 'Step1 - Upgrade First Node ' with self.subTest(step): # upgrade first node self.db_cluster.node_to_upgrade = self.db_cluster.nodes[ indexes[0]] self.log.info('Upgrade Node %s begin', self.db_cluster.node_to_upgrade.name) self.upgrade_node(self.db_cluster.node_to_upgrade) self.log.info('Upgrade Node %s ended', self.db_cluster.node_to_upgrade.name) self.db_cluster.node_to_upgrade.check_node_health() # wait for the prepare write workload to finish self.verify_stress_thread(prepare_write_cs_thread_pool) # read workload (cl=QUORUM) self.log.info( 'Starting c-s read workload (cl=QUORUM n=10000000)') stress_cmd_read_cl_quorum = self.params.get( 'stress_cmd_read_cl_quorum') read_stress_queue = self.run_stress_thread( stress_cmd=stress_cmd_read_cl_quorum) # wait for the read workload to finish self.verify_stress_thread(read_stress_queue) self.fill_and_verify_db_data('after upgraded one node') self.search_for_idx_token_error_after_upgrade( node=self.db_cluster.node_to_upgrade, step=step + ' - after upgraded one node') # read workload self.log.info('Starting c-s read workload for 10m') stress_cmd_read_10m = self.params.get('stress_cmd_read_10m') read_10m_cs_thread_pool = self.run_stress_thread( stress_cmd=stress_cmd_read_10m) self.log.info( 'Sleeping for 60s to let cassandra-stress start before the upgrade...' ) time.sleep(60) step = 'Step2 - Upgrade Second Node ' with self.subTest(step): # upgrade second node self.db_cluster.node_to_upgrade = self.db_cluster.nodes[ indexes[1]] self.log.info('Upgrade Node %s begin', self.db_cluster.node_to_upgrade.name) self.upgrade_node(self.db_cluster.node_to_upgrade) self.log.info('Upgrade Node %s ended', self.db_cluster.node_to_upgrade.name) self.db_cluster.node_to_upgrade.check_node_health() # wait for the 10m read workload to finish self.verify_stress_thread(read_10m_cs_thread_pool) self.fill_and_verify_db_data('after upgraded two nodes') self.search_for_idx_token_error_after_upgrade( node=self.db_cluster.node_to_upgrade, step=step + ' - after upgraded two nodes') # read workload (60m) self.log.info('Starting c-s read workload for 60m') stress_cmd_read_60m = self.params.get('stress_cmd_read_60m') read_60m_cs_thread_pool = self.run_stress_thread( stress_cmd=stress_cmd_read_60m) self.log.info( 'Sleeping for 60s to let cassandra-stress start before the rollback...' ) time.sleep(60) with self.subTest('Step3 - Rollback Second Node '): # rollback second node self.log.info('Rollback Node %s begin', self.db_cluster.nodes[indexes[1]].name) self.rollback_node(self.db_cluster.nodes[indexes[1]]) self.log.info('Rollback Node %s ended', self.db_cluster.nodes[indexes[1]].name) self.db_cluster.nodes[indexes[1]].check_node_health() step = 'Step4 - Verify data during mixed cluster mode ' with self.subTest(step): self.fill_and_verify_db_data('after rollback the second node') self.log.info('Repair the first upgraded Node') self.db_cluster.nodes[indexes[0]].run_nodetool(sub_cmd='repair') self.search_for_idx_token_error_after_upgrade( node=self.db_cluster.node_to_upgrade, step=step) with DbEventsFilter(type='DATABASE_ERROR', line='Failed to load schema'), \ DbEventsFilter(type='SCHEMA_FAILURE', line='Failed to load schema'), \ DbEventsFilter(type='DATABASE_ERROR', line='Failed to pull schema'), \ DbEventsFilter(type='RUNTIME_ERROR', line='Failed to load schema'): step = 'Step5 - Upgrade rest of the Nodes ' with self.subTest(step): for i in indexes[1:]: self.db_cluster.node_to_upgrade = self.db_cluster.nodes[i] self.log.info('Upgrade Node %s begin', self.db_cluster.node_to_upgrade.name) self.upgrade_node(self.db_cluster.node_to_upgrade) self.log.info('Upgrade Node %s ended', self.db_cluster.node_to_upgrade.name) self.db_cluster.node_to_upgrade.check_node_health() self.fill_and_verify_db_data( 'after upgraded %s' % self.db_cluster.node_to_upgrade.name) self.search_for_idx_token_error_after_upgrade( node=self.db_cluster.node_to_upgrade, step=step) with self.subTest('Step6 - Verify stress results after upgrade '): self.log.info( 'Waiting for stress threads to complete after upgrade') # wait for the 60m read workload to finish self.verify_stress_thread(read_60m_cs_thread_pool) self.verify_stress_thread(entire_write_cs_thread_pool) with self.subTest( 'Step7 - Upgrade sstables to latest supported version '): # figure out what is the last supported sstable version self.expected_sstable_format_version = self.get_highest_supported_sstable_version( ) # run 'nodetool upgradesstables' on all nodes and check/wait for all file to be upgraded upgradesstables = self.db_cluster.run_func_parallel( func=self.upgradesstables_if_command_available) # only check sstable format version if all nodes had 'nodetool upgradesstables' available if all(upgradesstables): self.log.info('Upgrading sstables if new version is available') tables_upgraded = self.db_cluster.run_func_parallel( func=self.wait_for_sstable_upgrade) assert all(tables_upgraded ), "Failed to upgrade the sstable format {}".format( tables_upgraded) # Verify sstabledump self.log.info( 'Starting sstabledump to verify correctness of sstables') self.db_cluster.nodes[0].remoter.run( 'for i in `sudo find /var/lib/scylla/data/keyspace_complex/ -type f |grep -v manifest.json |' 'grep -v snapshots |head -n 1`; do echo $i; sudo sstabledump $i 1>/tmp/sstabledump.output || ' 'exit 1; done', verbose=True) with self.subTest( 'Step8 - Run stress and verify after upgrading entire cluster ' ): self.log.info('Starting verify_stress_after_cluster_upgrade') verify_stress_after_cluster_upgrade = self.params.get( # pylint: disable=invalid-name 'verify_stress_after_cluster_upgrade') verify_stress_cs_thread_pool = self.run_stress_thread( stress_cmd=verify_stress_after_cluster_upgrade) self.verify_stress_thread(verify_stress_cs_thread_pool) # complex workload: verify data by simple read cl=ALL self.log.info( 'Starting c-s complex workload to verify data by simple read') stress_cmd_complex_verify_read = self.params.get( 'stress_cmd_complex_verify_read') complex_cs_thread_pool = self.run_stress_thread( stress_cmd=stress_cmd_complex_verify_read, profile='data_dir/complex_schema.yaml') # wait for the read complex workload to finish self.verify_stress_thread(complex_cs_thread_pool) # After adjusted the workloads, there is a entire write workload, and it uses a fixed duration for catching # the data lose. # But the execute time of workloads are not exact, so let only use basic prepare write & read verify for # complex workloads,and comment two complex workloads. # # TODO: retest commented workloads and decide to enable or delete them. # # complex workload: verify data by multiple ops # self.log.info('Starting c-s complex workload to verify data by multiple ops') # stress_cmd_complex_verify_more = self.params.get('stress_cmd_complex_verify_more') # complex_cs_thread_pool = self.run_stress_thread(stress_cmd=stress_cmd_complex_verify_more, # profile='data_dir/complex_schema.yaml') # wait for the complex workload to finish # self.verify_stress_thread(complex_cs_thread_pool) # complex workload: verify data by delete 1/10 data # self.log.info('Starting c-s complex workload to verify data by delete') # stress_cmd_complex_verify_delete = self.params.get('stress_cmd_complex_verify_delete') # complex_cs_thread_pool = self.run_stress_thread(stress_cmd=stress_cmd_complex_verify_delete, # profile='data_dir/complex_schema.yaml') # wait for the complex workload to finish # self.verify_stress_thread(complex_cs_thread_pool) # During the test we filter and ignore some specific errors, but we want to allow only certain amount of them step = 'Step9 - Search for errors that we filter during the test ' with self.subTest(step): self.log.info( 'Checking how many failed_to_load_schem errors happened during the test' ) error_factor = 3 schema_load_error_num = 0 for node in self.db_cluster.nodes: errors = node.search_database_log( search_pattern='Failed to load schema version', start_from_beginning=True, publish_events=False) schema_load_error_num += len(errors) self.search_for_idx_token_error_after_upgrade(node=node, step=step) self.log.info('schema_load_error_num: %d', schema_load_error_num) assert schema_load_error_num <= error_factor * 8 * \ len(self.db_cluster.nodes), 'Only allowing shards_num * %d schema load errors per host during the ' \ 'entire test, actual: %d' % ( error_factor, schema_load_error_num) self.log.info( 'all nodes were upgraded, and last workaround is verified.')
def test_05_is_enterprise(self): self.assertEqual(is_enterprise('2019.1.1'), True) self.assertEqual(is_enterprise('2018'), True) self.assertEqual(is_enterprise('3.1'), False) self.assertEqual(is_enterprise('2.2'), False) self.assertEqual(is_enterprise('666.development'), False)
def get_supported_scylla_base_versions(self, supported_versions): # pylint: disable=too-many-branches """ We have special base versions list for each release, and we don't support to upgraded from enterprise to opensource. This function is used to get the base versions list which will be used in the upgrade test. @supported_version: all scylla release versions, the base versions will be filtered out from the supported_version """ oss_base_version = [] ent_base_version = [] oss_release_list = [ v for v in supported_versions if not is_enterprise(v) ] ent_release_list = [v for v in supported_versions if is_enterprise(v)] # The major version of unstable scylla, eg: 4.6.dev, 4.5, 2021.2.dev, 2021.1 version = self.scylla_version product = self.product if product == 'scylla-enterprise': if version in supported_versions: # The dest version is a released enterprise version idx = ent_release_list.index(version) oss_base_version.append(supported_src_oss.get(version)) if idx == 0: # The dest version is a minor release of latest enterprise release ent_base_version.append(version) else: # Choose the last two releases as upgrade base ent_base_version += ent_release_list[idx - 1:][:2] elif version == 'enterprise' or parse_version( version) > parse_version(ent_release_list[0]): oss_base_version.append(ent_release_list[-1]) elif re.match(r'\d+.\d+', version) and parse_version(version) >= parse_version( ent_release_list[0]): oss_base_version.append(oss_release_list[-1]) ent_base_version += ent_release_list[-2:] elif product == 'scylla': if version in supported_versions: # The dest version is a released opensource version idx = oss_release_list.index(version) if idx == 0: # The dest version is a minor release of latest enterprise release oss_base_version.append(version) else: # Choose the last two releases as upgrade base oss_base_version += oss_release_list[idx - 1:][:2] elif version == 'master' or parse_version(version) > parse_version( oss_release_list[0]): oss_base_version.append(oss_release_list[-1]) elif re.match(r'\d+.\d+', version) and parse_version(version) < parse_version( oss_release_list[0]): # If dest version is smaller than the first supported opensource release, it might be an invalid dest version oss_base_version.append(oss_release_list[-1]) else: raise ValueError("Unsupported product %s" % product) # Filter out unsupported version oss_base_version = [ v for v in oss_base_version if v in supported_versions ] ent_base_version = [ v for v in ent_base_version if v in supported_versions ] return oss_base_version + ent_base_version