def common_load(self, start_key, end_key): if self.get_context_variable('sbt_model_enabled'): # add transactional put into 1 cache 2k times total PiClientIgniteUtils.load_data_with_txput_sbt_model( self.config, self.ignite, self.get_client_config(), only_caches_batch=1, start_key=start_key, end_key=start_key + 1) # load normal data into 200 caches 2k times total PiClientIgniteUtils.load_data_with_txput_sbt_model( self.config, self.ignite, self.get_client_config(), only_caches_batch=200, start_key=start_key + 2, end_key=int(end_key * self.load_multiplier)) else: PiClientIgniteUtils.load_data_with_putall( self.ignite, self.get_client_config(), start_key=start_key, end_key=int(end_key * self.load_multiplier))
def start_piclients(): for _ in range(0, 3): try: PiClientIgniteUtils.load_data_with_putall( self.ignite, self.get_client_config(), value_type=ModelTypes.VALUE_ACCOUNT.value, nodes_num=24, end_key=1000) except Exception as err: print(err)
def _setup_with_context(self, context_name, **kwargs): self._start_grid_no_preload(context_name, **kwargs) if self.preloading_size > 0: if self.preloading_with_streamer: PiClientIgniteUtils.load_data_with_streamer( self.ignite, self.get_client_config(), end_key=self.preloading_size) else: PiClientIgniteUtils.load_data_with_putall( self.ignite, self.get_client_config(), end_key=self.preloading_size) self._wait_cluster_ready()
def test_rebalancing_with_ttl_caches(self): """ IGN-13549 (IGNITE-11400) Rebalancing caches with TTL enabled can cause data corruption. :return: """ with PiClient(self.ignite, self.get_client_config()): checksums = create_distributed_checksum_operation().evaluate() self.wait_for_running_clients_num(client_num=0, timeout=120) self.ignite.cu.control_utility( '--cache idle_verify --dump --skip-zeros') log_print('Calculating checksums done: %s' % checksums) self.assert_nodes_alive() self.ignite.kill_node(2) PiClientIgniteUtils.load_data_with_putall( self.ignite, self.get_client_config(), start_key=self.preloading_size, end_key=self.preloading_size + 100000) util_sleep(5 * 60) self.ignite.start_node(2) self.ignite.jmx.wait_for_finish_rebalance(self.rebalance_timeout * 2, self.group_names) with PiClient(self.ignite, self.get_client_config()): # , jvm_options=jvm_options): checksums = create_distributed_checksum_operation().evaluate() self.wait_for_running_clients_num(client_num=0, timeout=120) log_print('Calculating checksums done: %s' % checksums) tiden_assert_equal( 0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "# of AssertionError")
def test_during_loading(self): """ Should be fully fixed in 8.5.8-p1 Scenario: 1. Start 3 server nodes 2. Load 1000 keys into 120 TX caches 3. Start 3 client node and start TX loading (PESSIMISTIC/REPEATABLE_READ, OPTIMISTIC/SERIALIZABLE) (12 transfer operations, 10 caches in each operation, 1000ms between each transaction i.e. ~ 4 tx per second from each client)) 4. In clients try to destroy caches 5. Interesting things happens Fixed in 8.5.8-p1 https://ggsystems.atlassian.net/browse/GG-19179 Issues that was found during this test: https://ggsystems.atlassian.net/browse/GG-19411 https://ggsystems.atlassian.net/browse/GG-19383 :return: """ PiClient.read_timeout = 600 ignite = self.start_ignite_grid(self.ignite_name) ignite.cu.activate(activate_on_particular_node=1) PiClientIgniteUtils.load_data_with_putall(ignite, self.client_config, ) def get_dumps(): for node_id in ignite.nodes.keys(): self.util_get_threads_from_jstack(ignite, node_id, 'END') try: with PiClient(ignite, self.client_config) as piclient: with TransactionalLoading(self, ignite=ignite, config_file=self.client_config, on_exit_action=get_dumps, kill_transactions_on_exit=True, with_exception=False, # do interrupt loading operation if smth happens? skip_consistency_check=True, # we are destroying caches here if you notice loading_profile=LoadingProfile( delay=1000, allowed_transactions=( TxDescriptor(concurrency='OPTIMISTIC', isolation='SERIALIZABLE', ),) )): # allowed_transactions=(TxDescriptor(), ))): # )): node_id = piclient.get_node_id() client_ignite = piclient.get_ignite(node_id) cache_names = client_ignite.cacheNames().toArray() caches_to_kill_num = 50 frags = 0 for cache in cache_names: node_id = piclient.get_node_id() log_print('Destroying cache %s on node %s' % (cache, node_id), color='red') piclient.get_ignite(node_id).cache(cache).destroy() frags += 1 if frags >= caches_to_kill_num: break finally: npe_errors = ignite.find_exception_in_logs(".*java.lang.NullPointerException.*") assertion_errors = ignite.find_exception_in_logs(".*java.lang.AssertionError.*") if npe_errors != 0 or assertion_errors != 0: assert False, "There are errors in logs: NPE - %s, AE - %s" % (npe_errors, assertion_errors)
def test_during_rebalance(self): ignite = self.start_ignite_grid(self.ignite_name) ignite.cu.activate(activate_on_particular_node=1) PiClientIgniteUtils.load_data_with_putall(ignite, self.client_config, ) util_sleep_for_a_while(30) with PiClient(ignite, self.client_config) as piclient: cache_to_test = 'test_cache_with_index' # self.create_cache_with_indexed_data(cache_to_test) client_ignite = piclient.get_ignite() gateway = piclient.get_gateway() cache_config = IgniteCacheConfig(gateway) cache_config.set_name('cache_1') cache_config.set_cache_mode('replicated') cache_config.set_atomicity_mode('transactional') cache_config.set_write_synchronization_mode('full_sync') cache_config.set_affinity(False, 32) cache_config.set_group_name('some_new_group') cache_config1 = IgniteCacheConfig(gateway) cache_config1.set_name(cache_to_test) cache_config1.set_cache_mode('replicated') cache_config1.set_atomicity_mode('transactional') cache_config1.set_write_synchronization_mode('full_sync') cache_config1.set_affinity(False, 32) cache_config1.set_group_name('some_new_group') # set query entities caches = gateway.jvm.java.util.ArrayList() caches.add(cache_config.get_config_object()) caches.add(cache_config1.get_config_object()) log_print("Creating caches", color='green') client_ignite.getOrCreateCaches(caches) cache_names = piclient.get_ignite().cacheNames().toArray() if cache_to_test not in cache_names: log_print("Could not find cache in %s" % cache_names, color='red') util_sleep_for_a_while(10) ignite.kill_node(2) log_print("Overwrite values in cache %s" % cache_to_test, color='green') operation = create_put_all_operation(cache_to_test, 1, 1001, 100, value_type=ModelTypes.VALUE_ALL_TYPES_INDEXED.value) operation.evaluate() util_sleep_for_a_while(15) ignite.start_node(2, skip_topology_check=True) util_sleep_for_a_while(5) client_ignite.cache(cache_to_test).destroy() ignite.update_starting_node_attrs() ignite.nodes[3]['status'] = NodeStatus.STARTED client_ignite.cache('cache_1').destroy()
def run_stress_restarts(self, cluster_id_to_restart, iterations, nodes_to_restart, time_to_sleep_range): client_config = self.preconfigure_cluster_0() with PiClient(self.clusters[0].grid, client_config, jvm_options=['-ea']) as piclient: ignite = piclient.get_ignite() self.start_dynamic_caches_with_node_filter(client_config) last_loaded_key = START_DATA_SIZE PiClientIgniteUtils.load_data_with_putall(self.clusters[0].grid, client_config, end_key=last_loaded_key, jvm_options=['-ea'], check_clients=False) util_sleep_for_a_while(MINI_DR_STABILIZATION_TIMEOUT) nodes_before = 6 last_loaded_key += 1 for i in range(0, iterations): log_print(f'Current iteration {i + 1} from {iterations}', color='debug') sleep_for_time = random.uniform(time_to_sleep_range[0], time_to_sleep_range[1]) log_print( f'In this run we are going to sleep for {sleep_for_time} seconds after each node restart', color='green') log_print('Trying to load data into created/existing caches', color='yellow') self.start_dynamic_caches_with_node_filter(client_config) PiClientIgniteUtils.load_data_with_putall( self.clusters[0].grid, client_config, start_key=last_loaded_key, end_key=last_loaded_key + LOAD_DATA_SIZE, jvm_options=['-ea'], check_clients=False) last_loaded_key += LOAD_DATA_SIZE self.increment_atomic(ignite) log_print("Round restart") for node_id in nodes_to_restart: self.clusters[cluster_id_to_restart].grid.kill_node( node_id) self.clusters[cluster_id_to_restart].grid.start_node( node_id, skip_topology_check=True) sleep(sleep_for_time) log_print("Wait for topology messages") for node_id in nodes_to_restart: self.clusters[ cluster_id_to_restart].grid.update_started_node_status( node_id) util_sleep_for_a_while(MINI_DR_STABILIZATION_TIMEOUT) last_loaded_key = self.verify_cluster(0, nodes_before, last_loaded_key) util_sleep_for_a_while(DR_STABILIZATION_TIMEOUT) checksum_master1, checksum_slave1 = self.calculate_checksum_and_validate( last_loaded_key) tiden_assert(checksum_master1 == checksum_slave1, 'Hash sum master and slave not match') self.put_data(self.clusters[1], 1, 'cluster_2_node_without_dr') util_sleep_for_a_while(MINI_DR_STABILIZATION_TIMEOUT) checksum_master2, checksum_slave2 = self.calculate_checksum_and_validate( last_loaded_key) tiden_assert(checksum_master2 == checksum_slave2, 'Hash sum master and slave not match')
def test_start_on_lfs(self): PiClient.read_timeout = 3600 SshPool.default_timeout = 1200 self.ignite_old_version = self.start_ignite_grid('old', activate=True) key_map = { 'cache_group_1_001': ModelTypes.KEY_ALL_TYPES_MAPPED.value, 'cache_group_1_002': 'java.lang.Long', 'cache_group_1_003': ModelTypes.KEY_DEFAULT_TABLE.value, 'cache_group_1_004': ModelTypes.KEY_ACCOUNT.value, } batch_size = 10000 PiClientIgniteUtils.load_data_with_putall( self.ignite_old_version, Ignite.config_builder.get_config('client', config_set_name=self.config_name), start_key=0, end_key=250000, batch_size=batch_size, key_map=key_map, value_type=ModelTypes.VALUE_ALL_TYPES_4_INDEX.value) sleep(120) self.ignite_old_version.kill_nodes() cmd = dict() for node_id, dscr in self.ignite_old_version.nodes.items(): ln_to_work = 'ln -s %s/%s %s/%s' % ( dscr['ignite_home'], 'work', self.get_app(self.ignite_app_names['new']).nodes[node_id] ['ignite_home'], 'work', ) if dscr['host'] in cmd: cmd[dscr['host']].append(ln_to_work) else: cmd[dscr['host']] = [ ln_to_work, ] self.tiden.ssh.exec(cmd) sleep(10) self.ignite_old_version = self.start_ignite_grid('new', activate=True, replaced_name='old') sleep(120) self.ignite_old_version.cu.control_utility( '--cache validate_indexes', all_required=['no issues found']) PiClientIgniteUtils.load_data_with_putall( self.ignite_old_version, Ignite.config_builder.get_config('client', config_set_name=self.config_name), start_key=250001, end_key=500001, batch_size=batch_size, key_map=key_map, value_type=ModelTypes.VALUE_ALL_TYPES_4_INDEX.value) sleep(120) self.ignite_old_version.cu.control_utility('--cache', 'idle_verify')
def run(self, artifact_name): """ Run scenario for defined artifact :param artifact_name: name from artifact configuration file """ super().run(artifact_name) log_print("Running snapshot benchmark with config: %s" % self.config, color='green') version = self.test_class.tiden.config['artifacts'][artifact_name][ 'ignite_version'] time_to_jfr = int(self.config.get('time_to_jfr', 0)) now_plus_300 = datetime.datetime.now() + datetime.timedelta(seconds=1) try: self.test_class.create_app_config_set( Ignite, START_TIME_CONFIG_SET, caches_list_file='caches_%s.xml' % START_TIME_CONFIG_SET, deploy=True, snapshots_enabled=True, logger=False, wal_segment_size=self.test_class.consumption_config.get( 'wal_segment_size', 1024 * 1024 * 1024), logger_path='%s/ignite-log4j2.xml' % self.test_class.tiden.config['rt']['remote'] ['test_module_dir'], disabled_cache_configs=False, zookeeper_enabled=False, checkpoint_read_lock_timeout=self.read_lock_property_value( version), # caches related variables additional_configs=[ 'caches.tmpl.xml', ], part_32=self.config.get('part_32', 10000), # 100), part_64=self.config.get('part_64', 10000), # 100), part_128=self.config.get('part_128', 10000), # 100), # artifact config variables **self.artifact_config_variables, ) version, ignite = self.test_class.start_ignite_grid( artifact_name, activate=True, config_set=START_TIME_CONFIG_SET, jvm_options=self.artifact_jvm_properties) ignite.set_snapshot_timeout(1200) if time_to_jfr: now_plus_300 = datetime.datetime.now() + datetime.timedelta( seconds=350) ignite.make_cluster_jfr(300) PiClientIgniteUtils.load_data_with_putall( ignite, Ignite.config_builder.get_config( 'client', config_set_name=START_TIME_CONFIG_SET), # end_key=int(self.config.get('data_size')) end_key=10000) # kill nodes ignite.kill_nodes() sleep(120) self.start_probes(artifact_name) ignite.start_nodes() self.stop_probes() ignite.kill_nodes() ignite.delete_lfs() # do some calculations finally: # if time_to_jfr: # pause.until(now_plus_300) # remove config set self.test_class.remove_app_config_set(Ignite, START_TIME_CONFIG_SET)
def run_snapshot(self, artifact_name, snapshot_type): """ Run scenario for defined artifact :param artifact_name: name from artifact configuration file :param snapshot_type: inc/full """ super().run(artifact_name) log_print("Running snapshot benchmark with config: %s" % self.config, color='green') version = self.test_class.tiden.config['artifacts'][artifact_name][ 'ignite_version'] incremental_snapshot = True if snapshot_type == 'inc' else False try: self.test_class.create_app_config_set( Ignite, SNAPSHOT_CONFIG_SET, caches_list_file='caches_%s.xml' % SNAPSHOT_CONFIG_SET, deploy=True, snapshots_enabled=True, logger=False, wal_segment_size=self.test_class.consumption_config.get( 'wal_segment_size', 64 * 1024 * 1024), logger_path='%s/ignite-log4j2.xml' % self.test_class.tiden.config['rt']['remote'] ['test_module_dir'], disabled_cache_configs=False, zookeeper_enabled=False, checkpoint_read_lock_timeout=self.read_lock_property_value( version), # caches related variables additional_configs=[ 'caches.tmpl.xml', ], part_32=self.config.get('part_32', 32), part_64=self.config.get('part_64', 64), part_128=self.config.get('part_64', 128), # artifact config variables **self.artifact_config_variables, ) version, ignite = self.test_class.start_ignite_grid( artifact_name, activate=True, config_set=SNAPSHOT_CONFIG_SET, jvm_options=self.artifact_jvm_properties) time_results = list() directory_size = list() self.start_probes(artifact_name) client_config = Ignite.config_builder.get_config( 'client', config_set_name=SNAPSHOT_CONFIG_SET) PiClientIgniteUtils.load_data_with_putall( ignite, client_config, end_key=int(self.config.get('data_size'))) if incremental_snapshot: ignite.su.snapshot_utility('snapshot', '-type=full') # default times to run # plus warmup times # plus rerun times warmup_runs, prod_runs = self._get_number_of_runs() log_print("Running {} iterations".format(warmup_runs + prod_runs)) for i in range(0, warmup_runs + prod_runs): self.write_time_event('iteration_%s start' % i) warmup_iteration = False if warmup_runs == 0 else i < warmup_runs log_print("Running iteration %s, (%s)" % (i, 'warmup' if warmup_iteration else 'prod')) ignite.su.snapshot_utility('snapshot', f'-type={snapshot_type}') latest_snapshot_id = ignite.su.snapshots[-1]['id'] dir_size = get_nodes_directory_size( ignite, self.test_class.ssh, 'work/snapshot/%s' % list( SnapshotScenario.util_find_snapshot_folders_on_fs( ignite, latest_snapshot_id).values())[0]) m = re.search( 'Command \[SNAPSHOT\] successfully finished in (\d*) seconds', ignite.su.latest_utility_output) if incremental_snapshot: # todo user remove operation after dr-master merge with PiClient(ignite, client_config) as piclient: ignite_instance = piclient.get_ignite() for cache_name in ignite_instance.cacheNames().toArray( ): ignite_instance.cache(cache_name).removeAll() PiClientIgniteUtils.load_data_with_putall( ignite, client_config, end_key=int(self.config.get('data_size'))) # skip some operation as warmup if not warmup_iteration: assert m, 'Unable to get snapshot time execution' time_results.append(int(m.group(1))) directory_size.append(int(dir_size)) self.write_time_event('iteration_%s stop' % i) ignite.cu.deactivate() self.stop_probes(time_results=time_results, avg_snapshot_dir_size=directory_size, seconds=True) self.results['evaluated'] = True ignite.kill_nodes() ignite.delete_lfs() log_put("Cleanup Ignite LFS ... ") commands = {} for node_idx in ignite.nodes.keys(): host = ignite.nodes[node_idx]['host'] if commands.get(host) is None: commands[host] = [ 'rm -rf %s/work/*' % ignite.nodes[node_idx]['ignite_home'] ] else: commands[host].append( 'rm -rf %s/work/*' % ignite.nodes[node_idx]['ignite_home']) results = self.test_class.tiden.ssh.exec(commands) print(results) log_put("Ignite LFS deleted.") log_print() finally: # remove config set self.test_class.remove_app_config_set(Ignite, SNAPSHOT_CONFIG_SET)