def test_client_hashes_sequence_values(self): test_seq = 'test_seq' with PiClient(self.ignite, self.get_client_config()) as piclient: seq = self.get_or_create_atomic_sequence(piclient, 'test_seq') seq.incrementAndGet() tiden_assert_equal(1, seq.get(), "sequence '%s' value" % test_seq) for i in range(1, 5): seq.incrementAndGet() tiden_assert_equal(5, seq.get(), "sequence '%s' value" % test_seq) # create full snapshot full_snapshot_id = self.su.snapshot_utility( 'snapshot', '-type=full') # increment sequence after snapshot for i in range(1, 5): seq.incrementAndGet() tiden_assert_equal(9, seq.get(), "sequence '%s' value" % test_seq) # restore full snapshot self.su.snapshot_utility('restore', '-id=%s' % full_snapshot_id) with PiClient(self.ignite, self.get_client_config()) as piclient: seq = self.get_or_create_atomic_sequence(piclient, 'test_seq', create=False) tiden_assert_is_not_none( seq, "after restore from snapshot, sequence '%s'" % test_seq) tiden_assert_equal(6, seq.get(), "sequence '%s' value" % test_seq) for i in range(1, 5): seq.incrementAndGet() tiden_assert_equal(10, seq.get(), "sequence '%s' value" % test_seq) self.restart_grid() seq = self.get_or_create_atomic_sequence(piclient, 'test_seq', create=False) tiden_assert_is_not_none( seq, "after restore from snapshot, sequence '%s'" % test_seq) tiden_assert_equal(12, seq.get(), "sequence '%s' value" % test_seq)
def test_baseline_two_nodes_removed_one_added_with_loading(self): with PiClient(self.ignite, self.get_client_config()): with TransactionalLoading(self): new_server_num = len(self.ignite.get_alive_default_nodes()) - 2 self.ignite.kill_node(2) self.ignite.kill_node(3) self.ignite.wait_for_topology_snapshot( server_num=new_server_num) self.ignite.add_additional_nodes(self.get_server_config(), 1) self.ignite.start_additional_nodes( self.ignite.get_all_additional_nodes()) self._set_baseline_few_times() self.ignite.wait_for_topology_snapshot(client_num=0) util_sleep_for_a_while(self.rebalance_timeout) self.util_verify(save_lfs_on_exception=True) tiden_assert_equal( 0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "No AssertionError in logs")
def test_baseline_restart_node_add_one_additional_node(self): with PiClient(self.ignite, self.get_client_config()): with TransactionalLoading(self): self.ignite.kill_node(2) self.delete_lfs(node_ids=[ 2, ]) self.ignite.start_node(2) util_sleep_for_a_while(5) new_nodes = self.ignite.add_additional_nodes( self.get_server_config(), 1) self.ignite.start_additional_nodes(new_nodes) # self.cu.kill_transactions() self._set_baseline_few_times(5) self.util_verify(save_lfs_on_exception=True) tiden_assert_equal( 0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "No AssertionError in logs")
def test_blinking_node_with_extra_node_in_blt(self): # IGNITE-8893 with PiClient(self.ignite, self.get_client_config()): self.ignite.start_additional_nodes( self.ignite.add_additional_nodes(self.get_server_config())) for iteration in range(0, self.iterations): log_print("Iteration {}/{}".format(str(iteration + 1), str(self.iterations)), color='blue') self.assert_nodes_alive() self.ignite.kill_node(2) util_sleep(10) self.ignite.start_node(2) self.ignite.jmx.wait_for_finish_rebalance( self.rebalance_timeout, self.group_names) util_sleep(60) tiden_assert_equal( 0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "# of AssertionError")
def test_ignite_8897(self): """ 1. create LRT 2. start PME 3. try to add new node to baseline :return: """ # start grid, remove one random node from baseline self.start_grid() self.ignite.cu.set_current_topology_as_baseline() grid_size = len(self.ignite.get_all_alive_nodes()) stopping_node_id = self.ignite.get_random_server_nodes()[0] stopping_node_consistent_id = self.ignite.get_node_consistent_id( stopping_node_id) self.kill_node(stopping_node_id) self.ignite.wait_for_topology_snapshot(server_num=grid_size - 1, client_num=None) self.ignite.cu.remove_node_from_baseline(stopping_node_consistent_id) util_sleep_for_a_while(5) # create a long running transaction with PiClient(self.ignite, self.get_client_config(), nodes_num=1, jvm_options=self.jvm_options): self.ignite.wait_for_topology_snapshot(None, 1) TestLrt.create_transactional_cache() lrt_operations = TestLrt.launch_transaction_operations() self.release_transactions(lrt_operations)
def zookeeper_fail_test(self, scenario, expecting_broken_cluster=False): node_connection = self.get_server_connections() try: with PiClient(self.ignite, self.get_client_config()): with TransactionalLoading(self, skip_consistency_check=True): util_sleep_for_a_while(10, msg='Wait until load started') self.zookeeper_fail_scenario(scenario) self.su.snapshot_utility('SNAPSHOT', '-type=full') self.ignite.kill_node(2) util_sleep_for_a_while(60, msg='Wait after zookeeper issue') self.ignite.start_node(2) for node_id in node_connection.keys(): tiden_assert(self.ignite.check_node_is_alive(node_id), "Node {} is expected to be alive".format(node_id)) if expecting_broken_cluster: tiden_assert( False, 'split up all zookeeper host expected to broke cluster') except Exception as e: if expecting_broken_cluster: util_sleep_for_a_while(60, msg='Wait all node segmented') for node_id in node_connection.keys(): tiden_assert( not self.ignite.check_node_is_alive(node_id), "Node {} is expected to be dead".format(node_id)) else: raise e
def check_replica_value(self, role='master'): app = self.ignite_master_app if role == 'master' else self.ignite_replica_app with PiClient(app, self.get_client_config(role), jvm_options=self.get_dr_jvm_options(role=role), cu=self.ignite_replica_app.cu) as piclient: cache = piclient.get_ignite().getOrCreateCache('com.sbt.bm.ucp.published.api.retail.PublishedIndividual') assert 1 == cache.get(10000) log_print('CHECK CLUSTER HAVE VALUE')
def test_blinking_node_baseline(self): # IGNITE-8879 with PiClient(self.ignite, self.get_client_config()): for iteration in range(0, self.iterations): log_print("Iteration {}/{}".format(str(iteration + 1), str(self.iterations)), color='blue') self.assert_nodes_alive() self.ignite.kill_node(2) self.ignite.wait_for_topology_snapshot( server_num=len(self.ignite.get_alive_default_nodes())) self.cu.set_current_topology_as_baseline() util_sleep(5) self.ignite.start_node(2) self.ignite.wait_for_topology_snapshot( server_num=len(self.ignite.get_alive_default_nodes())) self.cu.set_current_topology_as_baseline() self.ignite.jmx.wait_for_finish_rebalance( self.rebalance_timeout, self.group_names) tiden_assert_equal( 0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "# of AssertionError")
def test_baseline_adding_one_node_with_loading(self): with PiClient(self.ignite, self.get_client_config()): with TransactionalLoading(self, loading_profile=LoadingProfile(delay=1, transaction_timeout=50), skip_consistency_check=True, tx_metrics=['txCreated', 'txCommit', 'txRollback', 'txFailed']) as tx_loading: self.ignite.add_additional_nodes(self.get_server_config(), 1) self._sleep_and_custom_event(tx_loading, 'start nodes') self.ignite.start_additional_nodes(self.ignite.get_all_additional_nodes()) self._sleep_and_custom_event(tx_loading, 'set blt') self._set_baseline_few_times() self._sleep_and_custom_event(tx_loading, 'sleep') self._sleep_and_custom_event(tx_loading, 'end loading') metrics = tx_loading.metrics self.ignite.wait_for_topology_snapshot(client_num=0) self.create_loading_metrics_graph('test_baseline_adding_one_node_with_loading', metrics) util_sleep_for_a_while(self.rebalance_timeout) self.cu.control_utility('--cache', 'idle_verify') tiden_assert_equal(0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "No AssertionError in logs" )
def test_blinking_clients_clean_lfs(self): """ IGN-9159 (IGNITE-7165) Re-balancing is canceled if client node joins. Re-balancing can take hours and each time when client node joins it starts again :return: """ self.wait_for_running_clients_num(client_num=0, timeout=120) for iteration in range(0, self.iterations): log_print("Iteration {}/{}".format(str(iteration + 1), str(self.iterations)), color='blue') self.assert_nodes_alive() self.ignite.kill_node(2) self.cleanup_lfs(2) self.start_node(2) # restart clients 4 times for restart_time in range(0, 4): with PiClient(self.ignite, self.get_client_config()): pass self.ignite.jmx.wait_for_finish_rebalance(self.rebalance_timeout, self.group_names) tiden_assert_equal( 0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "# of AssertionError")
def test_baseline_removing_two_nodes_with_loading(self): with PiClient(self.ignite, self.get_client_config()): with TransactionalLoading(self, loading_profile=LoadingProfile(delay=1, transaction_timeout=50), tx_metrics=['txCreated', 'txCommit', 'txRollback', 'txFailed']) as tx_loading: new_server_num = len(self.ignite.get_alive_default_nodes()) - 2 self._sleep_and_custom_event(tx_loading, 'kill nodes') self.ignite.kill_node(2) self.ignite.kill_node(3) self.ignite.wait_for_topology_snapshot(server_num=new_server_num) self._sleep_and_custom_event(tx_loading, 'set blt') self._set_baseline_few_times() self._sleep_and_custom_event(tx_loading, 'sleep') self._sleep_and_custom_event(tx_loading, 'end loading') metrics = tx_loading.metrics self.ignite.wait_for_topology_snapshot(client_num=0) self.create_loading_metrics_graph('test_baseline_removing_two_nodes_with_loading', metrics) util_sleep_for_a_while(self.rebalance_timeout) self.cu.control_utility('--cache', 'idle_verify') tiden_assert_equal(0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "No AssertionError in logs" )
def test_two_blinking_nodes_clean_lfs(self): with PiClient(self.ignite, self.get_client_config()): with ExitStack() as stack: if is_enabled(self.config.get('zookeeper_enabled')) and \ is_enabled(self.config.get('zookeeper_nodes_restart')): stack.enter_context(ZkNodesRestart(self.zoo, 2)) for iteration in range(0, self.iterations): log_print("Iteration {}/{}".format(str(iteration + 1), str(self.iterations)), color='blue') self.assert_nodes_alive() self.ignite.kill_node(2) self.cleanup_lfs(2) # self.ignite.start_node(2) self.start_node(2) self.ignite.kill_node(3) util_sleep(10) # self.ignite.start_node(3) self.start_node(3) self.ignite.jmx.wait_for_finish_rebalance( self.rebalance_timeout, self.group_names) tiden_assert_equal( 0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "# of AssertionError")
def test_util_1_7_list_seq(self): """ Test control.sh --cache seq shows correct information about sequences. :return: """ from random import randint with PiClient(self.ignite, self.get_client_config(), nodes_num=1) as piclient: seq_name = 'seq_name_%s' for i in range(1, 6): seq_ = self.util_get_or_create_atomic_sequence(piclient, seq_name % i) counter_num = randint(1, 100) log_print('Seq seq_%s will be incremented up to %s' % (i, counter_num), color='green') for j in range(1, counter_num): seq_.incrementAndGet() for i in range(1, 6): seq_ = self.util_get_or_create_atomic_sequence(piclient, seq_name % i) seq_val = seq_.get() log_print('Seq seq_%s has value %s' % (i, seq_val), color='debug') for i in range(0, 3): self.cu.control_utility('--cache', 'list seq_name.* --seq') output = self.cu.latest_utility_output log_print(output) expected = ['idle_verify check has finished, no conflicts have been found.'] self.cu.control_utility('--cache idle_verify', all_required=expected) #self.su.snapshot_utility('idle_verify', # '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'], # all_required=expected) log_print(self.cu.latest_utility_output, color='debug')
def test_util_1_6_utility_control_cache_contention(self): """ Test control.sh --cache contention shows correct information about keys with contention. :return: """ try: LRT_TIMEOUT = 10 with PiClient(self.ignite, self.get_server_config(), nodes_num=1) as piclient: cache_name = 'test_cache_001' self.cu.control_utility('--cache contention 1') lrt_operations = self.launch_transaction_operations(cache_name) self.cu.control_utility('--cache contention 1') self.cu.control_utility('--cache contention 11') self.cu.control_utility('--cache contention 10') for operation in lrt_operations: transaction_xid = operation.getTransaction().xid().toString() if transaction_xid not in self.cu.latest_utility_output: log_print("Transaction with XID %s is not in list. Key=%s, Value=%s" % (transaction_xid, operation.getKey(), operation.getValue()), color='debug') self.release_transactions(lrt_operations) log_print("Sleep for %s seconds" % LRT_TIMEOUT) util_sleep_for_a_while(LRT_TIMEOUT) self.cu.control_utility('--cache contention 1') except TidenException as e: log_print(e, color='red') pass
def test_util_1_8_counters_detection_during_PME_node_from_baseline(self): """ Tests PME synchronise partition counters if some detected. :return: """ self.load_data_with_streamer(end_key=1000, value_type=ModelTypes.VALUE_ALL_TYPES_INDEXED.value) with PiClient(self.ignite, self.get_client_config(), nodes_num=1) as piclient: caches_before_lrt = [] for cache_name in piclient.get_ignite().cacheNames().toArray(): caches_before_lrt.append(cache_name) cache_under_test = caches_before_lrt[0] log_print('Cache under test: %s' % cache_under_test, color='blue') operation = create_broke_data_entry_operation(cache_under_test, 1, True, 'counter') operation.evaluate() expected = ['Conflict partition'] self.cu.control_utility('--cache idle_verify', all_required=expected) output = self.cu.latest_utility_output grp_id, part_id = None, None for line in output.split('\n'): m = search('Conflict partition: (PartitionKey|PartitionKeyV2) \[grpId=(\d+),.*partId=(\d+)\]', line) if m: grp_id = m.group(2) part_id = m.group(3) tiden_assert(grp_id and part_id, 'Expecting to find conflicts in output\n{}'.format(self.cu.latest_utility_output)) # Start one more server node and change baseline to run PME log_print("Going to start additional node", color='green') self.ignite.add_additional_nodes(self.get_server_config(), 1) self.ignite.start_additional_nodes(self.ignite.get_all_additional_nodes()) self.cu.control_utility('--baseline') self.cu.set_current_topology_as_baseline() self.cu.control_utility('--baseline') msg_in_log = self.find_in_node_log('Partition states validation has failed for group: %s' % cache_under_test, node_id=1) assert msg_in_log != [] # Check there are no conflicts after PME util_sleep_for_a_while(30) self.cu.control_utility('--cache', 'idle_verify') # Stop one more server node and change baseline to run PME self.ignite.kill_node(self.ignite.get_alive_additional_nodes()[0]) util_sleep_for_a_while(30) self.cu.control_utility('--baseline') self.cu.set_current_topology_as_baseline() self.cu.control_utility('--baseline') # Check there are no conflicts after PME self.cu.control_utility('--cache', 'idle_verify')
def test_client_start_restart_grid_stop(self): with PiClient(self.ignite, self.get_client_config()) as piclient: log_print(piclient) self.stop_grid() self.start_grid() cache_names = piclient.get_ignite().cacheNames().toArray() for cache_name in cache_names: log_print(cache_name)
def test_rebalancing_with_ttl_caches(self): """ IGN-13549 (IGNITE-11400) Rebalancing caches with TTL enabled can cause data corruption. :return: """ with PiClient(self.ignite, self.get_client_config()): checksums = create_distributed_checksum_operation().evaluate() self.wait_for_running_clients_num(client_num=0, timeout=120) self.ignite.cu.control_utility( '--cache idle_verify --dump --skip-zeros') log_print('Calculating checksums done: %s' % checksums) self.assert_nodes_alive() self.ignite.kill_node(2) PiClientIgniteUtils.load_data_with_putall( self.ignite, self.get_client_config(), start_key=self.preloading_size, end_key=self.preloading_size + 100000) util_sleep(5 * 60) self.ignite.start_node(2) self.ignite.jmx.wait_for_finish_rebalance(self.rebalance_timeout * 2, self.group_names) with PiClient(self.ignite, self.get_client_config()): # , jvm_options=jvm_options): checksums = create_distributed_checksum_operation().evaluate() self.wait_for_running_clients_num(client_num=0, timeout=120) log_print('Calculating checksums done: %s' % checksums) tiden_assert_equal( 0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "# of AssertionError")
def test_cluster_stress_tolerance(self, node_under_test, other_node, fault_combination): timeout = 15 thread_timeout = 10 take_a_rest_timeout = 10 host_under_test = node_under_test.get('host') other_host = other_node.get('host') stress = StressT(self.ssh) with PiClient(self.ignite, self.get_client_config()): with TransactionalLoading(self, loading_profile=LoadingProfile(delay=1, commit_possibility=1.0, transaction_timeout=5000), tx_metrics=['txCreated', 'txCommit', 'txRollback', 'txFailed']) as tx_loading: util_sleep_for_a_while(take_a_rest_timeout, msg='Loading warm up for') self._custom_event(tx_loading, 'start') util_sleep_for_a_while(take_a_rest_timeout) for key, value in fault_combination.items(): if value: self._sleep_and_custom_event(tx_loading, '%s start' % key) print_red('%s start' % key) if key == 'disc load' and value: stress.load_disk(node_under_test['ignite_home'], host_under_test, timeout=timeout) if key == 'network load' and value: stress.load_network(host_under_test, other_host, timeout=timeout) if key == 'cpu load' and value: stress.load_cpu(host_under_test, timeout=timeout) if key == 'ram load' and value: stress.load_ram(host_under_test, timeout=timeout) if key in ['sigstop_server', 'sigstop_client'] and value: if key == 'sigstop_server': pid = stress.get_random_server_pid(host_under_test) else: pid = stress.get_random_client_pid(host_under_test) stress.sigstop_process(host_under_test, pid, timeout=thread_timeout) if key in ['packets loss', ' packets duplicate', 'packets corrupt'] and value: stress.network_emulate_packet(host_under_test, other_host, lost_rate='5.0%', timeout=timeout, type=key.split()[-1]) self._custom_event(tx_loading, ' ') print_red('%s stop' % key) # util_sleep_for_a_while(take_a_rest_timeout, msg='Rest between tests for') util_sleep_for_a_while(take_a_rest_timeout) self._custom_event(tx_loading, 'end') metrics = tx_loading.metrics self.ignite.wait_for_topology_snapshot(client_num=0) self.create_loading_metrics_graph('test_baseline_adding_two_nodes_with_loading', metrics) self.cu.list_transactions() self.cu.control_utility('--cache', 'idle_verify') tiden_assert_equal(0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "No AssertionError in logs" )
def test_mixed_cluster_load_caches_old_server(self): """ 1. start mixed cluster (new version servers + old version servers) 2. activate from new version control.sh 3. start old version server 4. add it to baseline 5. smoke check: 5.1. create dynamic caches from old server node 5.2. do some load from old server node """ self.ignite_new_version.cu.activate() created_caches = [] self.server_config = Ignite.config_builder.get_config( 'server', config_set_name='base') ignite = self.ignite_old_version with PiClient(ignite, self.server_config, nodes_num=1) as piclient: ignite.cu.add_node_to_baseline( ignite.get_node_consistent_id(piclient.node_ids[0])) dynamic_caches_factory = DynamicCachesFactory() async_ops = [] for method in dynamic_caches_factory.dynamic_cache_configs: cache_name = "cache_group_%s" % method log_print('Loading {}...'.format(cache_name), color='green') ignite = piclient.get_ignite() ignite.getOrCreateCache( getattr(dynamic_caches_factory, method)(cache_name)) async_operation = create_async_operation( create_put_all_operation, cache_name, 1, 1001, 10, value_type=self.data_model) async_ops.append(async_operation) async_operation.evaluate() created_caches.append(cache_name) log_print('Waiting async results...', color='debug') # wait for streamer to complete for async_op in async_ops: async_op.getResult() with TransactionalLoading(MixedTestLoadingAdapter(self), config_file=self.server_config, loading_profile=LoadingProfile( delay=1, transaction_timeout=100000)): sleep(60)
def test_baseline_remove_and_back_one_node_with_additional_in_baseline( self): with PiClient(self.ignite, self.get_client_config()): with TransactionalLoading(self, loading_profile=LoadingProfile( run_for_seconds=30, delay=10)): current_server_nodes = self.ignite.get_nodes_num('server') self.start_additional_nodes( self.ignite.add_additional_nodes(self.get_server_config(), 2)) self.ignite.wait_for_topology_snapshot( server_num=current_server_nodes + 2) log_print('Kill one nodes: baseline and additional') self.ignite.kill_node(2) self.ignite.kill_node( self.ignite.get_alive_additional_nodes()[0]) log_print('Remove node from baseline') self.ignite.wait_for_topology_snapshot( server_num=current_server_nodes) # self._set_baseline_few_times() self.cu.control_utility('--baseline') self.cu.remove_node_from_baseline( self.ignite.get_node_consistent_id(2)) self.load_data_with_streamer(1001, 1501) log_print('Start node again') self.ignite.start_node(2) self.ignite.wait_for_topology_snapshot( server_num=current_server_nodes + 1) # self._set_baseline_few_times() self.cu.add_node_to_baseline( self.ignite.get_node_consistent_id(2)) self.ignite.wait_for_topology_snapshot( server_num=current_server_nodes + 1) print_red("AssertExceptions: %s" % str( self.ignite.find_exception_in_logs("java.lang.AssertionError"))) self.util_verify(save_lfs_on_exception=True) tiden_assert_equal( 0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "No AssertionError in logs")
def test_baseline_auto_activation(self): with PiClient(self.ignite, self.get_client_config()) as piclient: cache_names = piclient.get_ignite().cacheNames() async_operations = [] for cache_name in cache_names.toArray(): async_operation = create_async_operation( create_cross_cache_account_runner_operation, cache_name, 1, 1000, 0.5, delay=1, run_for_seconds=20) async_operations.append(async_operation) async_operation.evaluate() # wait operations to complete for async_op in async_operations: print(async_op.getResult()) alive_default_nodes = self.ignite.get_alive_default_nodes() for node_id in alive_default_nodes: self.ignite.kill_node(node_id) util_sleep_for_a_while(5) self.ignite.start_nodes() self.ignite.add_additional_nodes(self.get_server_config(), 1) self.ignite.start_additional_nodes( self.ignite.get_all_additional_nodes()) print_red("AssertExceptions: %s" % str( self.ignite.find_exception_in_logs("java.lang.AssertionError"))) self.cu.control_utility('--baseline') assert 'Cluster state: active' in self.cu.latest_utility_output, \ 'Cluster is inactive' util_sleep_for_a_while(self.rebalance_timeout) self.util_verify(save_lfs_on_exception=True) tiden_assert_equal( 0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "No AssertionError in logs")
def test_master_master_master_blinking_blt(self): self.prepare_clusters() client_config = self.preconfigure_cluster_0() iterations = 10 last_loaded_key = START_DATA_SIZE nodes_before = 6 with PiClient(self.clusters[0].grid, client_config, jvm_options=['-ea']) as piclient: PiClientIgniteUtils.load_data_with_streamer( self.clusters[0].grid, client_config, end_key=last_loaded_key, jvm_options=['-ea'], check_clients=False) sleep(60) with TransactionalLoading(self, ignite=self.clusters[0].grid, config_file=client_config, skip_consistency_check=True): for i in range(0, iterations): log_print(f'Current iteration {i + 1} from {iterations}', color='debug') self.clusters[0].grid.kill_node(2) utility_baseline_log = 'control-utility-baseline.log' self.clusters[0].grid.cu.set_current_topology_as_baseline( background=True, log=utility_baseline_log) self.clusters[0].grid.start_node(2, skip_topology_check=True) self.clusters[0].grid.wait_for_topology_snapshot( server_num=6) self.clusters[0].grid.update_started_node_status(2) self.clusters[0].grid.cu.set_current_topology_as_baseline( background=True, log=utility_baseline_log) self.verify_cluster(0, nodes_before, last_loaded_key)
def test_baseline_two_nodes_removed_one_added_with_cpu_loading(self): with PiClient(self.ignite, self.get_client_config()): with TransactionalLoading(self, loading_profile=LoadingProfile(delay=1, commit_possibility=0.2, transaction_timeout=1000), tx_metrics=['txCreated', 'txCommit', 'txRollback', 'txFailed']) as tx_loading: new_server_num = len(self.ignite.get_alive_default_nodes()) - 2 self._sleep_and_custom_event(tx_loading, 'kill nodes') self.ignite.kill_node(2) self.ignite.kill_node(3) self.ignite.wait_for_topology_snapshot(server_num=new_server_num) self.ignite.add_additional_nodes(self.get_server_config(), 1) self._sleep_and_custom_event(tx_loading, 'add new nodes') self.ignite.start_additional_nodes(self.ignite.get_all_additional_nodes()) self._sleep_and_custom_event(tx_loading, 'set blt') self._set_baseline_few_times() self._sleep_and_custom_event(tx_loading, 'sleep') self._sleep_and_custom_event(tx_loading, 'cpu_load') cpu_load_operation = create_cpu_load_operation(1.0, 1.0, 2) cpu_load_operation.evaluate() self._sleep_and_custom_event(tx_loading, 'cpu_load_sleep_end') cpu_load_operation.interrupt() self._sleep_and_custom_event(tx_loading, 'end loading') metrics = tx_loading.metrics self.ignite.wait_for_topology_snapshot(client_num=0) log_print(inspect.stack()[0].function) self.create_loading_metrics_graph('test_baseline_two_nodes_removed_one_added_with_loading', metrics) util_sleep_for_a_while(self.rebalance_timeout) self.cu.control_utility('--cache', 'idle_verify') tiden_assert_equal(0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "No AssertionError in logs" )
def test_loading_rest(self): with PiClient(self.ignite, self.get_client_config()): with TransactionalLoading(self, loading_profile=LoadingProfile(delay=1, transaction_timeout=5000), tx_metrics=True) as tx_loading: self._sleep_and_custom_event(tx_loading, 'start') util_sleep_for_a_while(100) self._sleep_and_custom_event(tx_loading, 'end') self.ignite.wait_for_topology_snapshot(client_num=0) self.create_loading_metrics_graph('test_baseline_adding_two_nodes_with_loading', tx_loading) self.cu.list_transactions() self.cu.control_utility('--cache', 'idle_verify') tiden_assert_equal(0, self.ignite.find_exception_in_logs('java.lang.AssertionError'), "No AssertionError in logs" )
def make_data_loading(self, duration, role='master', func_on_load=None): app = self.ignite_master_app if role == 'master' else self.ignite_replica_app with PiClient(app, self.get_client_config(role), jvm_options=self.get_dr_jvm_options(role), cu=app.cu) as piclient: with TransactionalLoading(piclient, ignite=app, skip_consistency_check=True, cross_cache_batch=100, skip_atomic=True, config_file=self.get_client_config(role), wait_before_kill=False, loading_profile=LoadingProfile(delay=1, start_key=0, end_key=100, transaction_timeout=500, run_for_seconds=duration + 10), tx_metrics=['txCreated', 'txCommit', 'txRollback', 'txFailed']) as tx_loading: sleep(20) if func_on_load == 'switch': self.ignite_master_app.ru.replication_utility('switch') elif func_on_load == 'bootstrap': self.ignite_master_app.ru.replication_utility('bootstrap', '-role=master -archive=ZIP -single_copy -parallelism=4 -snapshot_folder=%s/snapshot' % self.dr_storage, timeout=1200) self.ignite_replica_app.ru.replication_utility('bootstrap', '-role=replica -snapshot_folder=%s/snapshot -snapshot_id=%s' % ( self.dr_storage, self.ignite_master_app.ru.get_session_id_from_bootstrap_command()), timeout=1200) elif func_on_load == 'restart_on_load': self.ignite_replica_app.ru.replication_utility('pause') sleep(10) self.restart_ignite_grid('replica') sleep(10) self.ignite_replica_app.ru.replication_utility('resume') elif func_on_load == 'pitr': cache = piclient.get_ignite().getOrCreateCache( 'com.sbt.bm.ucp.published.api.retail.PublishedIndividual') cache.put(10000, 1) sleep(45) self.ignite_replica_app.ru.replication_utility('stop', '-recovery') sleep(duration) log_print(tx_loading.metrics['txCommit']) app.wait_for_topology_snapshot( None, 0, '' ) log_print('Loading done')
def calculate_sum_by_field(self, start_key=0, end_key=1001, field='balance', config_file=None, jvm_options=None): """ Calculate checksum based on piclient :param start_key: start key :param end_key: end key :param field: field that used in sum operation :param config_file: client config :param jvm_options: jvm options :return: """ log_print("Calculating sum over field '%s' in each cache value" % field) result = {} if not config_file: config_file = self.client_config with PiClient(self.current_client_ignite, config_file, jvm_options=jvm_options, nodes_num=1) as piclient: sorted_cache_names = [] for cache_name in piclient.get_ignite().cacheNames().toArray(): sorted_cache_names.append(cache_name) sorted_cache_names.sort() log_print(sorted_cache_names, color='debug') for cache_name in sorted_cache_names: balance = 0 log_print('Caches: {}'.format(cache_name), color='debug') balance += create_sum_operation(cache_name, start_key, end_key, field).evaluate() result[cache_name] = balance log_print('Bal {}'.format(balance), color='debug') log_print('Calculating over field done') return result
def check_read_only_cluster(self, role='master'): app = self.ignite_master_app if role == 'master' else self.ignite_replica_app read_only = True with PiClient(app, self.get_client_config(role), jvm_options=self.get_dr_jvm_options(role)) as piclient: cache = piclient.get_ignite().getOrCreateCache((piclient.get_ignite().cacheNames().toArray())[0]) try: with IgniteTransaction() as tx: cache.put(1, 1) tx.commit() read_only = False except Py4JJavaError: pass finally: if not read_only: raise TidenException("assert {} read-only".format(role)) log_print('Check cluster {} in read-only success'.format(role))
def was_snapshots(self): """ Check previous run for any snapshot tasks Wait for snapshot creation or restore ends """ end_time = time() + 60 * 4 if len(self.context['history']) < 2: return if [ o for o in self.context['history'][-2]['operations'] if 'snapshot' in o ]: while True: try: with PiClient(self.cluster, self.client_config, new_instance=True, name='snapshot_wait', exception_print=False, read_timeout=60 * 4) as piclient: op = create_async_operation( wait_snapshot_operation, 100, gateway=piclient.get_gateway()) op.evaluate() while True: if op.getStatus().toString() == "FINISHED": if bool(op.getResult()): return if time() > end_time: log_print( 'failed to wait snapshot wait operation finished', color='red') return except: log_print('snapshot wait failed', color='red') sleep(5) if time() > end_time: log_print( 'failed to wait piclient start to wait snapshot execution', color='red') return
def test_util_1_7_broke_index(self): """ Tests control.sh --cache validate_indexes detects broken index for some cache. :return: """ self.load_data_with_streamer(end_key=5000, value_type=ModelTypes.VALUE_ALL_TYPES_INDEXED.value) update_table = ['!tables', '!index', '\'create index example_idx on \"cache_group_1_028\".ALLTYPESINDEXED(STRINGCOL);\'', '!index'] sqlline = Sqlline(self.ignite) sqlline.run_sqlline(update_table) with PiClient(self.ignite, self.get_client_config(), nodes_num=1): cache_under_test = 'cache_group_1_028' log_print('Cache under test: %s' % cache_under_test, color='blue') operation = create_broke_data_entry_operation(cache_under_test, 0, True, 'index') operation.evaluate() util_sleep_for_a_while(10) self.cu.control_utility('--cache', 'validate_indexes') output = self.cu.latest_utility_output expected = ['idle_verify failed', 'Idle verify failed'] self.cu.control_utility('--cache idle_verify', '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'], all_required=expected) #self.su.snapshot_utility('idle_verify', # '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'], # all_required=expected) log_print(output, color='debug') found_broken_index = False for line in output.split('\n'): m = search('IndexValidationIssue.*cacheName=%s, idxName=EXAMPLE_IDX' % cache_under_test, line) if m: found_broken_index = True log_print('Index EXAMPLE_IDX is broken', color='green') if not found_broken_index: raise TidenException('Expecting index broken, but it\'s not!!!')
def test_old_cluster_load_caches_new_client(self): """ 1. start old version grid 2. activate from old version control.sh 3. start new version client 4. smoke check: 4.1. create dynamic caches 4.2. do some load """ created_caches = [] self.ignite_old_version.cu.activate() with PiClient(self.ignite_new_version, self.client_config, nodes_num=1) as piclient: dynamic_caches_factory = DynamicCachesFactory() async_ops = [] for method in dynamic_caches_factory.dynamic_cache_configs: cache_name = "cache_group_%s" % method log_print('Loading {}...'.format(cache_name), color='green') piclient.get_ignite().getOrCreateCache( getattr(dynamic_caches_factory, method)(cache_name)) async_operation = create_async_operation( create_put_all_operation, cache_name, 1, 1001, 10, value_type=self.data_model) async_ops.append(async_operation) async_operation.evaluate() created_caches.append(cache_name) log_print('Waiting async results...', color='debug') # wait for streamer to complete for async_op in async_ops: async_op.getResult() with TransactionalLoading(MixedTestLoadingAdapter(self), config_file=self.client_config, loading_profile=LoadingProfile( delay=1, transaction_timeout=100000)): sleep(60)