def calculate_checksums(self, idle_verify=True): if idle_verify: self.diff_idle_verify_dump() checksum_for_master = PiClientIgniteUtils.calc_checksums_distributed(self.ignite_master_app, config=self.get_client_config('master'), jvm_options=self.get_dr_jvm_options( 'master')) checksum_for_replica = PiClientIgniteUtils.calc_checksums_distributed(self.ignite_master_app, config=self.get_client_config('replica'), jvm_options=self.get_dr_jvm_options( 'replica')) if idle_verify: self.diff_idle_verify_dump(raise_flag=True) return checksum_for_master, checksum_for_replica
def verify_cluster(self, nodes_before, last_loaded_key=None): if len(nodes_before) != self.ignite.get_nodes_num('server'): log_print("There are missing nodes on cluster.", color='yellow') self.verify_no_assertion_errors() log_print("Wait for topology messages again.", color='yellow') for node_id in self.ignite.get_all_default_nodes(): self.ignite.update_started_node_status(node_id) log_print("Missing nodes case confirmed. Trying to restart node.", color='red') if len(nodes_before) != self.ignite.get_nodes_num('server'): nodes_to_start = [] for node_id in self.ignite.get_alive_default_nodes(): # assert that node is not dead otherwise kill/restart again if not self.ignite.check_node_status(node_id): log_print("Restarting node %s" % node_id, color='yellow') nodes_to_start.append(node_id) for node_id in nodes_to_start: self.ignite.start_node(node_id, skip_nodes_check=True, check_only_servers=True) if len(nodes_before) != self.ignite.get_nodes_num('server'): for node_id in self.ignite.get_alive_default_nodes(): self.util_get_threads_from_jstack(node_id, "FAILED") assert False, "Failed to restart node" self.cu.control_utility('--activate') self.verify_no_assertion_errors() activate_failed = False log_print('Check that there is no Error in activate logs', color='yellow') if 'Error' in self.cu.latest_utility_output: activate_failed = True log_print('Failed!', color='red') sleep(5) self.cu.control_utility('--baseline') self.verify_no_assertion_errors() log_print('Check that there is no Error in control.sh --baseline logs', color='yellow') if 'Error' in self.cu.latest_utility_output: log_print('Failed! Second try after sleep 60 seconds', color='red') sleep(60) self.cu.control_utility('--baseline') if 'Error' in self.cu.latest_utility_output or activate_failed: log_print('Cluster looks hang.') log_print('Check that there is no AssertionError in logs', color='yellow') self.verify_no_assertion_errors() if last_loaded_key: try: log_print('Trying to load data into survivor caches', color='yellow') PiClientIgniteUtils.load_data_with_streamer( self.ignite, self.get_client_config(), start_key=last_loaded_key, end_key=last_loaded_key + 500, allow_overwrite=True, check_clients=False, ) last_loaded_key += 500 log_print('Printing checksums of existing caches', color='yellow') print( PiClientIgniteUtils.calc_checksums_distributed( self.ignite, self.get_client_config(), check_clients=False)) log_print('Check that there is no AssertionError in logs', color='yellow') except Exception as e: for node_id in self.ignite.get_alive_default_nodes(): self.util_get_threads_from_jstack(node_id, "FAILED") assert False, "Unable to connect client" finally: self.verify_no_assertion_errors() return last_loaded_key