Beispiel #1
0
 def util_verify(self, save_lfs_on_exception=False):
     util_sleep_for_a_while(10, 'Sleep before IDLE_VERIFY')
     from pt.util import version_num
     if version_num(self.cu.get_ignite_version()) < version_num('2.5.0'):
         idle_verify_pass = [
             'Command \[IDLE_VERIFY.*\] started',
             'Partition verification finished, no conflicts have been found.',
             'Command \[IDLE_VERIFY\] successfully finished in [0-9\.]+ seconds.'
         ]
         self.cu.control_utility('--cache',
                                 'idle_verify',
                                 all_required=idle_verify_pass)
     else:
         idle_verify_pass = [
             'idle_verify check has finished, no conflicts have been found.'
         ]
         try:
             self.cu.control_utility('--cache',
                                     'idle_verify',
                                     all_required=idle_verify_pass)
         except TidenException as e:
             self.cu.control_utility('idle_verify', '--analyse')
             self.stop_grid(fail=False)
             if save_lfs_on_exception:
                 self.save_lfs('bug')
             raise e
Beispiel #2
0
    def restart_empty_grid_with_nodes_count(self, nodes_count):
        self.cu.deactivate()
        util_sleep_for_a_while(5)
        current_nodes = self.ignite.get_alive_default_nodes()
        self.ignite.stop_nodes()
        util_sleep_for_a_while(5)
        self.delete_lfs()
        additional_nodes_count = nodes_count - len(current_nodes)

        if additional_nodes_count < 0:
            print_blue('Going to remove nodes %s' %
                       current_nodes[additional_nodes_count:])
            for node_id in current_nodes[additional_nodes_count:]:
                current_nodes.remove(node_id)
                # if self.ignite.nodes.get(node_id):
                #     del self.ignite.nodes[node_id]

        log_print('Going to start nodes {}'.format(current_nodes))
        self.ignite.start_nodes(*current_nodes)

        if additional_nodes_count > 0:
            additional_nodes_count = nodes_count - len(current_nodes)
            print_blue('Starting %s additional nodes' % additional_nodes_count)
            node_id = list(
                self.ignite.add_additional_nodes(self.get_server_config(),
                                                 additional_nodes_count))
            self.ignite.start_additional_nodes(node_id)

        self.cu.activate()
Beispiel #3
0
    def util_start_additional_nodes(self,
                                    node_type='server',
                                    add_to_baseline=False,
                                    nodes_ct=None):
        nodes_count = nodes_ct if nodes_ct else randint(1, 3)

        if node_type == 'server':
            config = self.get_server_config()
        else:
            config = self.get_client_config()

        additional_nodes = self.ignite.add_additional_nodes(
            config, nodes_count)
        self.start_additional_nodes(additional_nodes)

        baseline_msg = ''
        if add_to_baseline:
            self.cu.set_current_topology_as_baseline()
            baseline_msg = 'Server nodes added to baseline.'
            util_sleep_for_a_while(10)

        util_sleep_for_a_while(5)
        self.cu.control_utility('--baseline')
        print_green('Started %s %s nodes. %s' %
                    (nodes_count, node_type,
                     '' if not add_to_baseline else baseline_msg))

        return additional_nodes
Beispiel #4
0
 def restart_empty_inactive_grid(self):
     self.cu.deactivate()
     util_sleep_for_a_while(5)
     self.ignite.stop_nodes()
     util_sleep_for_a_while(5)
     self.cleanup_lfs()
     self.ignite.start_nodes()
Beispiel #5
0
    def test_ignite_9398_deactivate(self):
        """
        https://ggsystems.atlassian.net/browse/IGN-11435
        https://issues.apache.org/jira/browse/IGNITE-9398

        Fixed in: 8.5.1-p14

        This ticket optimizes dispatching of Custom Discovery Messages by offloading their processsing
        to separate thread. If the fix is ok, then all nodes must mention custom discovery messages faster

        :return:
        """
        self.start_grid()

        util_sleep_for_a_while(3)
        jmx = JmxUtility(self.ignite)
        jmx.deactivate(1)

        max_time = self._get_last_exchange_time()

        print_blue(
            "Max time diff between 'Started exchange init' and 'Finish exchange future' at all nodes: %s msec"
            % max_time)

        self._dump_exchange_time(max_time, "cluster deactivate")

        jmx.kill_utility()
Beispiel #6
0
    def test_ignite_8897(self):
        """
        1. create LRT
        2. start PME
        3. try to add new node to baseline
        :return:
        """

        # start grid, remove one random node from baseline
        self.start_grid()
        self.ignite.cu.set_current_topology_as_baseline()

        grid_size = len(self.ignite.get_all_alive_nodes())
        stopping_node_id = self.ignite.get_random_server_nodes()[0]
        stopping_node_consistent_id = self.ignite.get_node_consistent_id(
            stopping_node_id)

        self.kill_node(stopping_node_id)
        self.ignite.wait_for_topology_snapshot(server_num=grid_size - 1,
                                               client_num=None)
        self.ignite.cu.remove_node_from_baseline(stopping_node_consistent_id)
        util_sleep_for_a_while(5)

        # create a long running transaction
        with PiClient(self.ignite,
                      self.get_client_config(),
                      nodes_num=1,
                      jvm_options=self.jvm_options):

            self.ignite.wait_for_topology_snapshot(None, 1)

            TestLrt.create_transactional_cache()
            lrt_operations = TestLrt.launch_transaction_operations()

            self.release_transactions(lrt_operations)
    def test_util_1_6_utility_control_cache_contention(self):
        """
        Test control.sh --cache contention shows correct information about keys with contention.
        :return:
        """
        try:
            LRT_TIMEOUT = 10
            with PiClient(self.ignite, self.get_server_config(), nodes_num=1) as piclient:

                cache_name = 'test_cache_001'

                self.cu.control_utility('--cache contention 1')

                lrt_operations = self.launch_transaction_operations(cache_name)

                self.cu.control_utility('--cache contention 1')
                self.cu.control_utility('--cache contention 11')
                self.cu.control_utility('--cache contention 10')

                for operation in lrt_operations:
                    transaction_xid = operation.getTransaction().xid().toString()
                    if transaction_xid not in self.cu.latest_utility_output:
                        log_print("Transaction with XID %s is not in list. Key=%s, Value=%s"
                                  % (transaction_xid, operation.getKey(), operation.getValue()), color='debug')

                self.release_transactions(lrt_operations)

                log_print("Sleep for %s seconds" % LRT_TIMEOUT)
                util_sleep_for_a_while(LRT_TIMEOUT)

                self.cu.control_utility('--cache contention 1')

        except TidenException as e:
            log_print(e, color='red')
            pass
    def test_host_ignite(self):
        self.start_grid('host')
        nodes_num = len(self.ignite.get_alive_default_nodes())
        self.ignite.cu.activate()
        self.ignite.cu.control_utility('--baseline')

        self.load_data_with_streamer(
            start_key=1,
            end_key=1000,
            value_type=
            'org.apache.ignite.piclient.model.values.AllTypesIndexed',
            ignite=self.ignite)
        self.ignite.start_node_inside(1)
        self.ignite.wait_for_topology_snapshot(server_num=nodes_num - 1)

        util_sleep_for_a_while(10)
        self.ignite.start_node_inside(1)

        self.ignite.wait_for_topology_snapshot(server_num=nodes_num)
        self.load_data_with_streamer(
            start_key=1,
            end_key=1000,
            allow_overwrite=True,
            value_type=
            'org.apache.ignite.piclient.model.values.AllTypesIndexed',
            ignite=self.ignite)
        log_print('Done')
    def test_util_1_8_counters_detection_during_PME_node_from_baseline(self):
        """
        Tests PME synchronise partition counters if some detected.
        :return:
        """
        self.load_data_with_streamer(end_key=1000,
                                     value_type=ModelTypes.VALUE_ALL_TYPES_INDEXED.value)

        with PiClient(self.ignite, self.get_client_config(), nodes_num=1) as piclient:

            caches_before_lrt = []
            for cache_name in piclient.get_ignite().cacheNames().toArray():
                caches_before_lrt.append(cache_name)

            cache_under_test = caches_before_lrt[0]
            log_print('Cache under test: %s' % cache_under_test, color='blue')

            operation = create_broke_data_entry_operation(cache_under_test, 1, True, 'counter')
            operation.evaluate()

            expected = ['Conflict partition']
            self.cu.control_utility('--cache idle_verify', all_required=expected)

            output = self.cu.latest_utility_output

            grp_id, part_id = None, None
            for line in output.split('\n'):
                m = search('Conflict partition: (PartitionKey|PartitionKeyV2) \[grpId=(\d+),.*partId=(\d+)\]', line)
                if m:
                    grp_id = m.group(2)
                    part_id = m.group(3)

            tiden_assert(grp_id and part_id,
                         'Expecting to find conflicts in output\n{}'.format(self.cu.latest_utility_output))

            # Start one more server node and change baseline to run PME
            log_print("Going to start additional node", color='green')
            self.ignite.add_additional_nodes(self.get_server_config(), 1)
            self.ignite.start_additional_nodes(self.ignite.get_all_additional_nodes())
            self.cu.control_utility('--baseline')
            self.cu.set_current_topology_as_baseline()
            self.cu.control_utility('--baseline')
            msg_in_log = self.find_in_node_log('Partition states validation has failed for group: %s'
                                               % cache_under_test, node_id=1)
            assert msg_in_log != []

            # Check there are no conflicts after PME
            util_sleep_for_a_while(30)
            self.cu.control_utility('--cache', 'idle_verify')

            # Stop one more server node and change baseline to run PME
            self.ignite.kill_node(self.ignite.get_alive_additional_nodes()[0])
            util_sleep_for_a_while(30)
            self.cu.control_utility('--baseline')
            self.cu.set_current_topology_as_baseline()
            self.cu.control_utility('--baseline')

            # Check there are no conflicts after PME
            self.cu.control_utility('--cache', 'idle_verify')
Beispiel #10
0
 def restart_grid_with_deleted_wal(self):
     self.cu.deactivate()
     util_sleep_for_a_while(5)
     self.ignite.stop_nodes()
     util_sleep_for_a_while(5)
     self.delete_lfs(delete_db=False,
                     delete_binary_meta=False,
                     delete_marshaller=False,
                     delete_snapshots=False)
     self.ignite.start_nodes()
     self.cu.activate(activate_on_particular_node=1)
Beispiel #11
0
    def util_get_restore_point(cls, seconds_ago=1):
        from datetime import datetime, timedelta
        from time import timezone

        util_sleep_for_a_while(2)
        time_format = "%Y-%m-%d-%H:%M:%S.%f"

        restore_point = (datetime.now() - timedelta(seconds=seconds_ago))
        # Hack to handle UTC timezone
        if int(timezone / -(60 * 60)) == 0:
            restore_point = restore_point + timedelta(hours=3)

        return restore_point.strftime(time_format)[:-3]
    def test_util_1_7_broke_index_from_utility(self):
        """
        This test is just check pmi-tool can break index for some cache.
        """
        cache_under_test = 'cache_group_1_028'
        self.util_deploy_pmi_tool()
        custom_context = self.create_test_context('pmi_tool')
        custom_context.add_context_variables(
            fix_consistent_id=True
        )
        custom_context.set_client_result_config("client_pmi.xml")
        custom_context.set_server_result_config("server_pmi.xml")

        custom_context.build_and_deploy(self.ssh)

        self.load_data_with_streamer(end_key=5000,
                                     value_type=ModelTypes.VALUE_ALL_TYPES_INDEXED.value)

        update_table = ['!tables', '!index',
                        '\'create index example_idx on \"%s\".ALLTYPESINDEXED(STRINGCOL);\'' % cache_under_test,
                        '!index']

        sqlline = Sqlline(self.ignite)
        sqlline.run_sqlline(update_table)

        util_sleep_for_a_while(10)

        self.run_pmi_tool(cache_under_test)

        self.cu.control_utility('--cache', 'validate_indexes')
        output = self.cu.latest_utility_output

        expected = ['idle_verify failed', 'Idle verify failed']
        self.cu.control_utility('--cache idle_verify',
                                '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'],
                                all_required=expected)
        #self.su.snapshot_utility('idle_verify',
        #                         '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'],
        #                         all_required=expected)

        log_print(output, color='debug')
        found_broken_index = False
        for line in output.split('\n'):
            m = search('IndexValidationIssue.*cacheName=%s, idxName=EXAMPLE_IDX' % cache_under_test, line)
            if m:
                found_broken_index = True
                log_print('Index EXAMPLE_IDX is broken', color='green')

        if not found_broken_index:
            raise TidenException('Expecting index broken, but it\'s not!!!')
Beispiel #13
0
    def _set_baseline_few_times(self, times=2):
        topology_changed = False
        lst_output = ''
        utility_baseline_log = 'control-utility-baseline.log'

        util_sleep_for_a_while(20)

        for _ in range(0, times):
            self.cu.set_current_topology_as_baseline(background=True,
                                                     log=utility_baseline_log)

            check_command = {
                self.cu.latest_utility_host: [
                    'cat %s/%s' %
                    (self.ignite.client_ignite_home, utility_baseline_log)
                ]
            }

            timeout_counter = 0
            baseline_timeout = 120
            completed = False
            while timeout_counter < baseline_timeout and not completed:
                lst_output = self.ignite.ssh.exec(check_command)[
                    self.cu.latest_utility_host][0]

                log_put('Waiting for topology changed %s/%s' %
                        (timeout_counter, baseline_timeout))

                if 'Connection to cluster failed.' in lst_output:
                    print_red('Utility unable to connect to cluster')
                    break

                if 'Number of baseline nodes: ' in lst_output:
                    completed = True
                    break

                util_sleep_for_a_while(5)
                timeout_counter += 5

            if completed:
                topology_changed = True
                break

            log_print()

        if not topology_changed:
            print_red(lst_output)
            raise TidenException('Unable to change grid topology')

        return topology_changed
Beispiel #14
0
    def test_ignite_10128(self):
        """
        https://ggsystems.atlassian.net/browse/IGN-12187
        https://issues.apache.org/jira/browse/IGNITE-10128

        IO race during read\write cache configurations.
        :return:
        """
        self.start_grid_no_activate()
        max_iterations = 100
        for i in range(0, max_iterations):
            self.cu.activate()
            util_sleep_for_a_while(3)
            self.cu.deactivate()
            util_sleep_for_a_while(3)
Beispiel #15
0
    def sbt_case_upload_data_and_create_restore_point(self):
        # start_key = int(self.max_key * self.load_multiplier) + 1
        # self.sbt_like_load(start_key, 5000)
        self.upload_data_using_sbt_model()

        util_sleep_for_a_while(10)
        restore_point = self.util_get_restore_point()
        checksum = self.calc_checksums_distributed(
            config_file=self.get_client_config())

        # start_key = int(5000 * self.load_multiplier) + 1
        # self.sbt_like_load(start_key, 7000)
        self.upload_data_using_sbt_model(start_key=5000, end_key=7000)

        return restore_point, checksum
    def test_util_1_7_broke_index(self):
        """
        Tests control.sh --cache validate_indexes detects broken index for some cache.
        :return:
        """
        self.load_data_with_streamer(end_key=5000,
                                     value_type=ModelTypes.VALUE_ALL_TYPES_INDEXED.value)

        update_table = ['!tables', '!index',
                        '\'create index example_idx on \"cache_group_1_028\".ALLTYPESINDEXED(STRINGCOL);\'',
                        '!index']

        sqlline = Sqlline(self.ignite)
        sqlline.run_sqlline(update_table)

        with PiClient(self.ignite, self.get_client_config(), nodes_num=1):
            cache_under_test = 'cache_group_1_028'
            log_print('Cache under test: %s' % cache_under_test, color='blue')

            operation = create_broke_data_entry_operation(cache_under_test, 0, True, 'index')
            operation.evaluate()

            util_sleep_for_a_while(10)
            self.cu.control_utility('--cache', 'validate_indexes')
            output = self.cu.latest_utility_output

            expected = ['idle_verify failed', 'Idle verify failed']
            self.cu.control_utility('--cache idle_verify',
                                    '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'],
                                    all_required=expected)
            #self.su.snapshot_utility('idle_verify',
            #                         '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'],
            #                         all_required=expected)

            log_print(output, color='debug')
            found_broken_index = False
            for line in output.split('\n'):
                m = search('IndexValidationIssue.*cacheName=%s, idxName=EXAMPLE_IDX' % cache_under_test, line)
                if m:
                    found_broken_index = True
                    log_print('Index EXAMPLE_IDX is broken', color='green')

            if not found_broken_index:
                raise TidenException('Expecting index broken, but it\'s not!!!')
    def test_util_1_1_idle_verify(self):
        """
        Test idle_verify command detects problem if some key is corrupted (there is some difference between key
        on primary partition and backup).
        """

        partitions_to_break = [1, 2]
        with PiClient(self.ignite, self.get_client_config(), nodes_num=1):
            cache_under_test = 'cache_group_1_028'
            log_print('Cache under test: %s' % cache_under_test, color='blue')

            operation = create_broke_data_entry_operation(cache_under_test, partitions_to_break[0], True,
                                                          'value', 'counter')
            log_print(operation.evaluate())
            operation = create_broke_data_entry_operation(cache_under_test, partitions_to_break[1], True, 'counter')
            log_print(operation.evaluate())

        util_sleep_for_a_while(10)

        expected = ['Conflict partition']
        self.cu.control_utility('--cache idle_verify', all_required=expected)

        log_print(self.cu.latest_utility_output)
        output = self.cu.latest_utility_output
        # m = search('See log for additional information. (.*)', self.cu.latest_utility_output)
        # if m:
        #     conflict_file = m.group(1)
        #     host = self.cu.latest_utility_host
        #     output = self.ssh.exec_on_host(host, ['cat {}'.format(conflict_file)])
        #     log_print(output, color='blue')
        # else:
        #     tiden_assert(False, 'Conflict file is not found in output:\n{}'.format(self.cu.latest_utility_output))

        grpId, partId = [], []
        for line in output.split('\n'):
            m = search('Conflict partition: (PartitionKey|PartitionKeyV2) \[grpId=(\d+),.*partId=(\d+)\]', line)
            if m:
                grpId.append(m.group(2))
                partId.append(int(m.group(3)))

        tiden_assert(grpId and partId, 'Could not find partition id in buffer %s' % output)
        tiden_assert(len(set(grpId)), 'Should be one group in output %s' % output)
        tiden_assert(set(partId) == set(partitions_to_break), 'Partition ids should match %s' % output)
Beispiel #18
0
 def teardown_testcase(self):
     log_print('TestTeardown is called', color='green')
     if self.reusable_lfs and not self.lfs_stored:
         log_print('Going to stop nodes....')
         self.ignite.stop_nodes()
         util_sleep_for_a_while(5)
         self.save_lfs('snapshot_util', timeout=1200)
         self.lfs_stored = True
     else:
         self.stop_grid_hard()
     self.su.copy_utility_log()
     if self.get_context_variable('zookeeper_enabled'):
         self.zoo.stop()
     self.cleanup_lfs()
     self.remove_additional_nodes()
     self.ignite.set_node_option('*', 'config', self.get_server_config())
     self.set_current_context()
     self._reset_cluster()
     log_print(repr(self.ignite), color='debug')
Beispiel #19
0
    def _change_grid_topology_and_set_baseline(self,
                                               disable_auto_baseline=True):
        """
        Restart empty grid, start one additional node and add it to baseline topology.

        :param disable_auto_baseline:
        :return:
        """

        self.restart_empty_grid()
        self.ignite.add_additional_nodes(self.get_server_config(), 1)
        self.ignite.start_additional_nodes(
            self.ignite.get_all_additional_nodes())

        util_sleep_for_a_while(5)

        if disable_auto_baseline and self.cu.is_baseline_autoajustment_supported(
        ):
            self.cu.disable_baseline_autoajustment()
            log_print("Baseline auto adjustment disabled", color='green')

        self._set_baseline_few_times()
 def start_and_config_jass_docker(self):
     jaas_remote_config_path = self.config['rt']['remote'][
         'test_module_dir']
     jaas_host = self.config['environment']['server_hosts'][0]
     # replace host in jaas configs
     self.ssh.exec([
         f'sed -i \'s/_HOST_/{jaas_host}/g\' {jaas_remote_config_path}/jaas.config'
     ])
     self.ssh.exec([
         f'sed -i \'s/_HOST_/{jaas_host}/g\' {jaas_remote_config_path}/jaas_ssl.config'
     ])
     # change permission of docker start script and run it
     self.ssh.exec_on_host(jaas_host, [
         'docker run --name ldap -d -p 389:10389 -p 636:10636 openmicroscopy/apacheds'
     ])
     util_sleep_for_a_while(5, 'wait ldap docker start')
     # setuo jaas users
     for auth_cred_name in self.auth_creds.values():
         user = auth_cred_name['user']
         password = auth_cred_name['pwd']
         description = auth_cred_name['description']
         log_print(
             f'Try to add user {user}:{password} with description {description}'
         )
         for file in ['new_user', 'add_description', 'set_password']:
             self.ssh.exec([
                 f'sed \'s/_USER_/{user}/g\' {jaas_remote_config_path}/{file}.ldif | '
                 f'sed \'s/_DESCRIPTION_/{description}/g\' | '
                 f'sed \'s/_PASSWORD_/{password}/g\' > {jaas_remote_config_path}/{file}_{user}.ldif'
             ])
             ldap_command = f"ldapmodify -a -D uid=admin,ou=system -w secret -h {jaas_host}:389 " \
                            f"-f {jaas_remote_config_path}/{file}_{user}.ldif"
             log_print(f'execute command - "{ldap_command}"')
             result = self.ssh.exec_on_host(jaas_host, [ldap_command])
             log_print(result)
         log_print(f'Successfully add user {user} to ldap', color='green')
Beispiel #21
0
    def test_24_fitness_set_baseline_with_properties(self):
        """
        This test checks the cluster behaviour with option GG_DISABLE_SNAPSHOT_ON_BASELINE_CHANGE_WITH_ENABLED_PITR
        that could be set in different ways:
            1. Set at one of the server nodes.
            2. Set on some client node/nodes.
        """
        created_caches = []

        self.ignite_old_version.cu.activate()

        # Preloading
        with PiClient(self.ignite_new_version, self.client_config,
                      nodes_num=1) as piclient:

            dynamic_caches_factory = DynamicCachesFactory()
            async_ops = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method
                log_print('Loading {}...'.format(cache_name), color='green')
                piclient.get_ignite().getOrCreateCache(
                    getattr(dynamic_caches_factory, method)(cache_name))

                async_operation = create_async_operation(
                    create_put_all_operation,
                    cache_name,
                    1,
                    1001,
                    10,
                    value_type=self.data_model)
                async_ops.append(async_operation)
                async_operation.evaluate()
                created_caches.append(cache_name)

            log_print('Waiting async results...', color='debug')
            # wait for streamer to complete
            for async_op in async_ops:
                async_op.getResult()

        util_sleep_for_a_while(20)

        new_client_config = Ignite.config_builder.get_config(
            'client', config_set_name='24_fit_with_consist_id')
        jvm_options = self.ignite_new_version.get_jvm_options(1)
        jvm_options.append(
            '-DGG_DISABLE_SNAPSHOT_ON_BASELINE_CHANGE_WITH_ENABLED_PITR=true')
        # with PiClient(self.ignite_new_version, self.client_config, jvm_options=jvm_options, nodes_num=1) as piclient:
        with PiClient(self.ignite_new_version, self.client_config,
                      nodes_num=1) as piclient:
            for i in range(1, 5):
                self.ignite_old_version.cu.control_utility('--baseline')
                log_print('Stopping node {}'.format(i), color='green')

                jvm_options = self.ignite_new_version.get_jvm_options(i)
                jvm_options.append(
                    '-DGG_DISABLE_SNAPSHOT_ON_BASELINE_CHANGE_WITH_ENABLED_PITR=false'
                )
                self.ignite_new_version.set_node_option(
                    '*', 'config',
                    Ignite.config_builder.get_config(
                        'server', config_set_name='24_fit_with_consist_id'))
                log_print("Starting node {} with new consistent id".format(i),
                          color='debug')
                self.ignite_new_version.start_nodes(i,
                                                    already_nodes=4,
                                                    other_nodes=4,
                                                    timeout=240)
                log_print("Changing baseline", color='debug')
                self.ignite_old_version.cu.set_current_topology_as_baseline()
                util_sleep_for_a_while(60,
                                       msg='Wait for rebalance to completed')

        log_print('Test is done')
Beispiel #22
0
    def test_24_fitness_rolling_upgrade(self):
        """
        This test checks the main rolling upgrade scenario under the load:
            1. Old cluster up and running (consistent_id's are not set).
            2. First cycle (upgrade to new version and set property
                GG_DISABLE_SNAPSHOT_ON_BASELINE_CHANGE_WITH_ENABLED_PITR):
            3. Second cycle (set correct consistent_id with adding to baseline topology).

        """
        created_caches = []

        self.ignite_old_version.cu.activate()

        with PiClient(self.ignite_new_version, self.client_config,
                      nodes_num=1) as piclient:

            dynamic_caches_factory = DynamicCachesFactory()
            async_ops = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method
                log_print('Loading {}...'.format(cache_name), color='green')
                piclient.get_ignite().getOrCreateCache(
                    getattr(dynamic_caches_factory, method)(cache_name))

                async_operation = create_async_operation(
                    create_put_all_operation,
                    cache_name,
                    1,
                    1001,
                    10,
                    value_type=self.data_model)
                async_ops.append(async_operation)
                async_operation.evaluate()
                created_caches.append(cache_name)

            log_print('Waiting async results...', color='debug')
            # wait for streamer to complete
            for async_op in async_ops:
                async_op.getResult()

        util_sleep_for_a_while(60)

        with PiClient(self.ignite_old_version, self.client_config,
                      nodes_num=4) as piclient:
            cache_names = piclient.get_ignite().cacheNames()

            # Start transaction loading for TTL caches
            with TransactionalLoading(MixedTestLoadingAdapter(self),
                                      config_file=self.client_config,
                                      loading_profile=LoadingProfile(
                                          delay=0,
                                          transaction_timeout=100000,
                                          run_for_seconds=600)):
                util_sleep_for_a_while(20)
                log_print('Rolling upgrade', color='green')
                async_ops = []
                for cache_name in [
                        cache_name for cache_name in cache_names.toArray()
                        if cache_name.startswith("M2_PRODUCT")
                ]:
                    async_operation = create_async_operation(
                        create_put_all_operation,
                        cache_name,
                        1001,
                        400001,
                        10,
                        value_type=ModelTypes.VALUE_ALL_TYPES.value)
                    async_ops.append(async_operation)
                    async_operation.evaluate()

                # First cycle: upgrade version and set property.
                for i in range(1, 5):
                    self.ignite_old_version.cu.control_utility('--baseline')
                    log_print('Stopping node {}'.format(i), color='green')
                    self.ignite_old_version.kill_nodes(i)

                    self.ignite_new_version.cleanup_work_dir(i)
                    folder = self.ignite_old_version.get_work_dir(i)
                    log_print(folder, color='debug')
                    self.ignite_new_version.copy_work_dir_from(i, folder)

                    jvm_options = self.ignite_new_version.get_jvm_options(i)
                    jvm_options.append(
                        '-DGG_DISABLE_SNAPSHOT_ON_BASELINE_CHANGE_WITH_ENABLED_PITR=true'
                    )

                    util_sleep_for_a_while(10)
                    self.ignite_new_version.start_nodes(i,
                                                        already_nodes=(4 - i),
                                                        other_nodes=(4 - i),
                                                        timeout=240)
                    self.ignite_new_version.cu.control_utility('--baseline')

                for async_op in async_ops:
                    async_op.getResult()

                util_sleep_for_a_while(30)
                log_print('Change consistent ID', color='green')

                self.ignite_new_version.set_node_option(
                    '*', 'config',
                    Ignite.config_builder.get_config(
                        'server', config_set_name='24_fit_with_consist_id'))

                # Second cycle - change consistent_id and add to baseline topology.
                for i in range(1, 5):
                    self.ignite_new_version.cu.control_utility('--baseline')
                    log_print('Stopping node {}'.format(i), color='green')
                    self.ignite_new_version.kill_nodes(i)
                    log_print(
                        "Starting node {} with new consistent id".format(i),
                        color='debug')
                    self.ignite_new_version.start_nodes(i, timeout=240)
                    log_print("Changing baseline", color='debug')
                    self.ignite_new_version.cu.set_current_topology_as_baseline(
                    )
                    util_sleep_for_a_while(
                        60, msg='Wait for rebalance to completed')

                log_print('Transactional loading done', color='green')

            # Just to check client node still can interact with cluster - calculate checksum from client node.
            sorted_cache_names = []
            for cache_name in piclient.get_ignite().cacheNames().toArray():
                sorted_cache_names.append(cache_name)

            sorted_cache_names.sort()

            async_operations = []
            cache_operation = {}
            for cache_name in sorted_cache_names:
                async_operation = create_async_operation(
                    create_checksum_operation, cache_name, 1, 10000)
                async_operations.append(async_operation)
                cache_operation[async_operation] = cache_name
                async_operation.evaluate()

            checksums = ''
            cache_checksum = {}
            for async_operation in async_operations:
                result = str(async_operation.getResult())
                cache_checksum[cache_operation.get(async_operation)] = result
                checksums += result

            log_print('Calculating checksums done')
Beispiel #23
0
    def test_ignite_8657(self):
        """
        This test is based on IGNITE-8657:
        1. start grid with EXCHANGE_HISTORY_SIZE smaller than N
        2. activate
        3. start simultaneously M > N clients
        4. all client nodes should and be able to perform cache put/get operations and transactions

        NB: this test hangs with 2.5.1-p6, due to piclient wait Ignition.start() forever
        """
        self.start_grid()
        self.load_random_data_with_streamer(0, 1000, nodes_num=2)
        self.cu.set_current_topology_as_baseline()

        nodes_before = set(self.ignite.get_all_common_nodes())

        with PiClient(self.ignite,
                      self.get_client_config(),
                      nodes_num=10,
                      jvm_options=self.jvm_options,
                      read_timeout=300) as piclient:
            nodes_after = set(self.ignite.get_all_common_nodes())
            nodes_started = list(nodes_after - nodes_before)
            node_ids = deque(nodes_started)
            node_id = node_ids[0]
            node_ids.rotate()

            for i in range(1, 5):
                gateway = piclient.get_gateway(node_id)
                ignite = piclient.get_ignite(node_id)
                tx = ignite.transactions().txStart()
                util_sleep_for_a_while(3)
                tx.commit()
                for concurrency in ['OPTIMISTIC', 'PESSIMISTIC']:
                    for isolation in [
                            'READ_COMMITTED', 'REPEATABLE_READ', 'SERIALIZABLE'
                    ]:
                        print_blue('Run transaction %s %s' %
                                   (concurrency, isolation))

                        node_id = node_ids[0]
                        node_ids.rotate()

                        gateway = piclient.get_gateway(node_id)
                        ignite = piclient.get_ignite(node_id)

                        concurrency_isolation_map = self._get_tx_type_map(
                            gateway)

                        cache_names = ignite.cacheNames().toArray()

                        tx = ignite.transactions().txStart(
                            concurrency_isolation_map.get(concurrency),
                            concurrency_isolation_map.get(isolation))

                        for cache_name in cache_names:
                            cache = ignite.getCache(cache_name)
                            val = cache.get(int(random() * 1000))
                            # log_print('got %s' % repr(val))
                            if val:
                                cache.put(int(random() * 1000), val)

                        tx.commit()
                node_id = node_ids[0]
                node_ids.rotate()

                ignite = piclient.get_ignite(node_id)
                async_ops = []
                for cache_name in ignite.cacheNames().toArray():
                    _async = create_async_operation(create_streamer_operation,
                                                    cache_name, 1002, 2000)
                    _async.evaluate()
                    async_ops.append(_async)

                for async_op in async_ops:
                    async_op.getResult()
Beispiel #24
0
 def restart_grid_without_activation(self):
     self.cu.deactivate()
     util_sleep_for_a_while(5)
     self.ignite.stop_nodes()
     util_sleep_for_a_while(5)
     self.ignite.start_nodes()
Beispiel #25
0
    def test_ignite_8855(self):
        """
         This test is based on IGNITE-8855:
         1. start grid with EXCHANGE_HISTORY_SIZE smaller than N
         2. activate
         3. start simultaneously M > N clients
         4. there should be throttling on client reconnects, e.g.
         client out of exchange should not try to reconnect all at once.
         NB: this test fails on 8.5.1-p13
        """
        nodes_num = TestPmeRegress.MAX_CLIENT_NODES
        self.start_grid()
        self.load_random_data_with_streamer(0, 1000, nodes_num=2)
        self.cu.set_current_topology_as_baseline()

        pp = PrettyPrinter()

        try:
            with PiClient(self.ignite,
                          self.get_client_config(),
                          nodes_num=nodes_num,
                          jvm_options=self.jvm_options,
                          read_timeout=300
                          ):  # that's enough to start if bug was fixed
                self.ignite.wait_for_topology_snapshot(None, nodes_num)
                n_tries = 0
                res = None
                while n_tries < 3:
                    util_sleep_for_a_while(5)
                    self.ignite.get_data_from_log(
                        'server',
                        'Client node tries to connect but its exchange info is cleaned up from exchange history',
                        '(.*)', 'log_kickoff')
                    res = self._collect_msg('log_kickoff', 'server')
                    log_print('Client kick off messages: \n' + pp.pformat(res))
                    if not res:
                        n_tries = n_tries + 1
                        continue

                    assert res, "There should be client kick off messages"

                    util_sleep_for_a_while(5)

                    self.ignite.get_data_from_log(
                        'client',
                        'Client node reconnected',
                        '(.*)',
                        'reconnect'  # and now Jinn would appear!
                    )
                    res = self._collect_msg('reconnect')
                    if not res:
                        n_tries = n_tries + 1
                        continue

                    log_print('Reconnect attempts: \n' + pp.pformat(res))

                    self.ignite.wait_for_topology_snapshot(
                        None, nodes_num, "Ensure topology is stable")
                    break

        except PiClientException as e:
            log_print("Got client exception: %s" % str(e))
            assert False, "IGNITE-8855 reproduced"
    def util_break_partition_using_pmi_tool(self, break_value=True):
        cache_under_test = 'cache_group_1_028'
        self.util_deploy_pmi_tool()
        custom_context = self.create_test_context('pmi_tool')
        custom_context.add_context_variables(
            fix_consistent_id=True
        )
        custom_context.set_client_result_config("client_pmi.xml")
        custom_context.set_server_result_config("server_pmi.xml")

        custom_context.build_and_deploy(self.ssh)

        self.load_data_with_streamer(end_key=5000,
                                     value_type=ModelTypes.VALUE_ALL_TYPES_INDEXED.value)

        util_sleep_for_a_while(10)

        broke_type = 'counter'
        if break_value:
            broke_type = 'value'

        self.run_pmi_tool(cache_under_test, broke_type=broke_type)

        util_sleep_for_a_while(10)

        expected = ['idle_verify failed', 'Idle verify failed']
        self.cu.control_utility('--cache idle_verify',
                                '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'],
                                all_required=expected)
        # self.su.snapshot_utility('idle_verify',
        #                         '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'],
        #                         all_required=expected)
        log_print(self.su.latest_utility_output)

        output = self.util_get_info_from_conflicts_file()
        log_print(output, color='green')
        records_info = [line for line in output if 'Partition instances' in line]

        if len(records_info) > 0:
            records_info = records_info[0].split('],')
        else:
            records_info = []

        partition_hash_record = set()

        prim_part_hash, prim_update_cntr = None, None
        for line in records_info:
            m = search('PartitionHashRecord \[isPrimary=(.+), partHash=([\d,-]+), updateCntr=(\d+)', line)

            if m:
                if m.group(1) == 'true':
                    prim_part_hash = m.group(2)
                    prim_update_cntr = m.group(3)
                else:
                    partition_hash_record.add((m.group(1), m.group(2), m.group(3)))
        for isPrimary, partHash, updateCntr in partition_hash_record:
            if break_value:
                assert partHash != prim_part_hash
                assert updateCntr == prim_update_cntr
            else:
                assert partHash == prim_part_hash
                assert updateCntr != prim_update_cntr

        grpId, partId = None, None
        for line in output:
            m = search('Conflict partition: PartitionKey \[grpId=(\d+),.*partId=(\d+)\]', line)
            if m:
                grpId = m.group(1)
                partId = m.group(2)

        if grpId and partId:
            self.cu.control_utility('--cache idle_verify', '-analyze -grpId=%s -partId=%s' % (grpId, partId),
                                    all_required=expected)
            #self.su.snapshot_utility('idle_verify', '-analyze -grpId=%s -partId=%s' % (grpId, partId),
            #                         all_required=expected)

            output = self.util_get_info_from_conflicts_file(file_name='idle_verify_conflicts_2.txt')
            log_print(output, color='debug')
        else:
            log_print('Could not find partition id', color='red')
        log_print(self.su.latest_utility_output)
Beispiel #27
0
    def _run_iteration(self, ignite, iteration):
        """
        One iteration of clients PME benchmark is as follows:

            1. start transactional loading at `loading_clients_hosts`, sleep `warmup_clients_delay` so load stabilize
            2.    start `num_clients_to_kill` clients at `clients_hosts` (different from `loading_clients_hosts`)
                  measure JOIN exchange time, sleep `stabilization_delay`
            3.    stop started additional clients, measure LEAVE exchange time, sleep `cooldown_delay`
        :param ignite:
        :param iteration:
        :return:
        """
        log_print("===> PME {} Clients(s) Left-Join Benchmark iteration {}/{} artifact started ".format(
            self.config['num_clients_to_kill'],
            iteration,
            self.config['iterations']
        ), color='green')

        loading_client_hosts = self._get_loading_client_hosts()
        client_hosts = self._get_client_hosts(loading_client_hosts)
        num_clients = self.config['num_clients_to_kill']

        metrics = None
        ex = None
        x1_join_time = None
        x1_leave_time = None

        try:
            # start loading clients
            with PiClient(
                    ignite,
                    self.test_class.client_config,
                    client_hosts=loading_client_hosts,
                    clients_per_host=self.config.get('loading_clients_per_host', 1)
            ):
                # initiate transactional loading
                with TransactionalLoading(
                        self.test_class,
                        ignite=ignite,
                        kill_transactions_on_exit=self.config['kill_transactions_on_exit'],
                        cross_cache_batch=self.config['cross_cache_batch'],
                        skip_atomic=self.config['skip_atomic'],
                        skip_consistency_check=not self.config['consistency_check_enabled'],
                        loading_profile=LoadingProfile(
                            delay=self.config['tx_delay'],
                            commit_possibility=self.config['commit_possibility'],
                            start_key=1,
                            end_key=self.config['load_factor'] - 1,
                            transaction_timeout=self.config['transaction_timeout']
                        ),
                        tx_metrics=['txCreated', 'txCommit', 'txFailed', 'txRollback']
                ) as tx_loading:
                    metrics = tx_loading.metrics

                    util_sleep_for_a_while(self.config['warmup_clients_delay'], "Before JOIN")

                    current_clients_num = ignite.get_nodes_num('client')
                    expected_total_clients_num = current_clients_num + num_clients

                    self.test_class._prepare_before_test(ignite, tx_loading, 'JOIN %d client(s)' % num_clients)

                    # start num_clients client nodes on 'flaky' hosts
                    with PiClient(
                            ignite,
                            self.test_class.client_config,
                            client_hosts=client_hosts,
                            clients_per_host=self.config.get('clients_per_host', 1),
                            nodes_num=num_clients,
                            new_instance=True,
                    ):

                        ignite.wait_for_topology_snapshot(client_num=expected_total_clients_num, timeout=600, check_only_servers=True, exclude_nodes_from_check=[])
                        tx_loading.metrics_thread.add_custom_event('%d client(s) joined' % num_clients)
                        new_topVer = self.test_class._get_new_top_after_test(ignite)
                        self.test_class._wait_exchange_finished(ignite, new_topVer)

                        x1_join_time, x2_time = self.test_class._measurements_after_test('JOIN %d client(s)' % num_clients, skip_exch=1)

                        util_sleep_for_a_while(self.config['stabilization_delay'])

                        # upon exit from with block, num_clients client nodes will be killed
                        self.test_class._prepare_before_test(ignite, tx_loading, 'LEAVE %d client(s)' % num_clients)

                    ignite.wait_for_topology_snapshot(client_num=current_clients_num, timeout=600, check_only_servers=True, exclude_nodes_from_check=[])
                    tx_loading.metrics_thread.add_custom_event('%d client(s) left' % num_clients)
                    new_topVer = self.test_class._get_new_top_after_test(ignite)
                    self.test_class._wait_exchange_finished(ignite, new_topVer)

                    x1_leave_time, x2_time = self.test_class._measurements_after_test('LEAVE %d client(s)' % num_clients, skip_exch=1)
                    util_sleep_for_a_while(self.config['cooldown_delay'])

            ignite.wait_for_topology_snapshot(client_num=0)
        except Exception as e:
            ex = e
        if metrics:
            self.test_class.create_loading_metrics_graph(
                'pme_%d_clients_left_join_%s_%d' % (num_clients, self.run_id, iteration),
                metrics,
                dpi_factor=0.75
            )
        if ex:
            raise ex

        return {
            'Exchange Client Join': x1_join_time,
            'Exchange Client Leave': x1_leave_time,
        }
Beispiel #28
0
    def _run_iteration(self, ignite, iteration):
        """
        One iteration of server PME benchmark is as follows:

            1. start transactional loading, sleep `warmup_servers_delay` so that load stabilize
            2.   kill random N nodes, measure LEAVE exchange time, sleep `stabilization_delay`
            3.   restart killed nodes, measure JOIN exchange time, sleep `cooldown_delay`
            4. stop load

        :param ignite:
        :param iteration:
        :return:
        """
        log_print(
            "===> PME {} Server(s) Left-Join Benchmark iteration {}/{} started "
            .format(self.config['num_servers_to_kill'], iteration,
                    self.config['iterations']),
            color='green')

        # if debug:
        #     from pt.util import read_yaml_file
        #     from os.path import join
        #     base_path = 'pt/tests/res/exchanges'
        #     exch_test = iteration
        #     start_exch = read_yaml_file(join(base_path, 'start_exch.%d.yaml' % exch_test))
        #     finish_exch = read_yaml_file(join(base_path, 'finish_exch.%d.yaml' % exch_test))
        #     merge_exch = read_yaml_file(join(base_path, 'merge_exch.%d.yaml' % exch_test))
        #     self.test_class.exchanges = ExchangesCollection.create_from_log_data(start_exch, finish_exch, merge_exch)
        #     self.test_class.new_topVer = 5
        #     x1_leave_time, x2_time = self.test_class._measurements_after_test('test_leave', skip_exch=1)
        #     self.test_class.new_topVer = 6
        #     x1_join_time, x2_time = self.test_class._measurements_after_test('test_join', skip_exch=1)
        #
        #     return x1_leave_time, x1_join_time

        loading_client_hosts = self._get_loading_client_hosts()
        num_servers = self._get_num_server_nodes()
        num_servers_to_kill = self.config['num_servers_to_kill']
        kill_coordinator = self.config['kill_coordinator']

        metrics = None
        ex = None
        x1_join_time = None
        x1_leave_time = None

        try:
            # start loading clients ...
            with PiClient(ignite,
                          self.test_class.client_config,
                          client_hosts=loading_client_hosts,
                          clients_per_host=self.config.get(
                              'loading_clients_per_host', 1)):
                # ... and initiate transactional load
                with TransactionalLoading(
                        self.test_class,
                        ignite=ignite,
                        kill_transactions_on_exit=self.
                        config['kill_transactions_on_exit'],
                        cross_cache_batch=self.config['cross_cache_batch'],
                        skip_atomic=self.config['skip_atomic'],
                        skip_consistency_check=not self.
                        config['consistency_check_enabled'],
                        loading_profile=LoadingProfile(
                            delay=self.config['tx_delay'],
                            commit_possibility=self.
                            config['commit_possibility'],
                            start_key=1,
                            end_key=self.config['load_factor'] - 1,
                            transaction_timeout=self.
                            config['transaction_timeout']),
                        tx_metrics=[
                            'txCreated', 'txCommit', 'txFailed', 'txRollback'
                        ]) as tx_loading:
                    metrics = tx_loading.metrics

                    # pick random server nodes
                    node_ids = ignite.get_random_server_nodes(
                        num_servers_to_kill,
                        use_coordinator=kill_coordinator,
                        node_ids=self.test_class.server_node_ids,
                    )

                    expected_total_server_num = num_servers - len(node_ids)

                    # ... wait load stabilize
                    util_sleep_for_a_while(self.config['warmup_servers_delay'],
                                           "Before LEAVE")

                    if is_enabled(self.config.get('jfr_enabled', False)):
                        ignite.make_cluster_jfr(60)

                    util_sleep_for_a_while(2)
                    self.test_class._prepare_before_test(
                        ignite, tx_loading,
                        'LEAVE %d server(s)' % len(node_ids))

                    # ... kill selected random nodes
                    ignite.kill_nodes(*node_ids)
                    ignite.wait_for_topology_snapshot(
                        server_num=expected_total_server_num)
                    tx_loading.metrics_thread.add_custom_event(
                        '%d server(s) left' % len(node_ids))

                    new_topVer = self.test_class._get_new_top_after_test(
                        ignite)
                    self.test_class._wait_exchange_finished(ignite, new_topVer)

                    x1_leave_time, x2_time = self.test_class._measurements_after_test(
                        'LEAVE %d server(s)' % len(node_ids), skip_exch=1)

                    if is_enabled(self.config.get('heapdump_enabled', False)):
                        ignite.make_cluster_heapdump(
                            [1], 'after_%d_server_leave' % len(node_ids))

                    # ... wait exchange stabilize
                    util_sleep_for_a_while(self.config['stabilization_delay'],
                                           "After LEAVE, before JOIN")

                    if self.config['measure_restart_nodes']:
                        self.test_class._prepare_before_test(
                            ignite, tx_loading,
                            'JOIN %d server(s)' % len(node_ids))

                    # ... restart killed nodes
                    ignite.start_nodes(*node_ids)
                    ignite.wait_for_topology_snapshot(
                        server_num=expected_total_server_num + len(node_ids))

                    if self.config['measure_restart_nodes']:
                        tx_loading.metrics_thread.add_custom_event(
                            '%d server(s) joined' % len(node_ids))

                        new_topVer = self.test_class._get_new_top_after_test(
                            ignite)
                        self.test_class._wait_exchange_finished(
                            ignite, new_topVer)
                        x1_join_time, x2_time = self.test_class._measurements_after_test(
                            'JOIN %d server(s)' % len(node_ids), skip_exch=1)
                        # if is_enabled(self.config.get('heapdump_enabled', False)):
                        #     ignite.make_cluster_heapdump([1], 'after_%d_server_join' % len(node_ids))

                    # ... wait exchange cooldown
                    util_sleep_for_a_while(self.config['cooldown_delay'],
                                           "After JOIN")

            ignite.wait_for_topology_snapshot(client_num=0)
        except Exception as e:
            ex = e
        if metrics:
            self.test_class.create_loading_metrics_graph(
                'pme_%d_servers_left_join_%s_%d' %
                (num_servers_to_kill, self.run_id, iteration),
                metrics,
                dpi_factor=0.75)
        if ex:
            raise ex

        return {
            'Exchange Server Join': x1_join_time,
            'Exchange Server Leave': x1_leave_time,
        }
    def test_massive_index_rebuild(self):
        """
        1) 2 nodes, backupCnt = 1, persistenceEnabled
        2) Load (A, B) type into a cache with defined (A, B) types in index config
        3) Load new type of data into a cache (C,D)
        4) Kill one node
        5) Create new index in alive cluster
        6) Start node again

        :return:
        """

        PiClient.read_timeout = 1200

        self.set_current_context('indexed_types')

        self.util_copy_piclient_model_to_libs()
        self.ignite.set_activation_timeout(240)
        self.ignite.set_snapshot_timeout(240)
        self.ignite.set_node_option('*', 'jvm_options', ['-ea'])
        self.su.clear_snapshots_list()
        self.start_grid(skip_activation=True)

        self.ignite.cu.activate(activate_on_particular_node=1)

        PiClientIgniteUtils.load_data_with_streamer(
            self.ignite,
            self.get_client_config(),
            value_type=ModelTypes.VALUE_ALL_TYPES_30_INDEX.value,
            end_key=5000)

        PiClientIgniteUtils.load_data_with_streamer(
            self.ignite,
            self.get_client_config(),
            value_type=ModelTypes.VALUE_ACCOUNT.value,
            start_key=5000,
            end_key=10000)

        PiClientIgniteUtils.load_data_with_streamer(
            self.ignite,
            self.get_client_config(),
            value_type=ModelTypes.VALUE_EXT_ALL_TYPES_30_INDEX.value,
            start_key=10000,
            end_key=15000)

        # PiClientIgniteUtils.load_data_with_streamer(self.ignite,
        #                                             self.get_client_config(),
        #                                             cache_names_patterns=
        #                                             ['cache_group_3'],
        #                                             value_type=ModelTypes.VALUE_EXT_ALL_TYPES_30_INDEX.value,
        #                                             end_key=10000)

        iterations = 50

        sqlline = Sqlline(self.ignite)

        columns = [
            'longCol',
            'doubleCol',
            'stringCol',
            'booleanCol',
            'longCol1',
            # 'doubleCol1', 'stringCol1', 'intCol', 'intCol1',  # 'booleanCol1',
            # 'index', 'longCol2', 'doubleCol2', 'stringCol2', 'booleanCol2',
            # 'longCol12', 'doubleCol12', 'stringCol12', 'intCol12', 'intCol2',
            # 'shortCol2', 'longCol3', 'doubleCol3', 'stringCol3', 'booleanCol3',
            # 'longCol13', 'doubleCol13', 'stringCol13', 'intCol13', 'intCol3', 'shortCol3'
        ]

        with PiClient(self.ignite, self.get_client_config()) as piclient:
            cache_names = piclient.get_ignite().cacheNames().toArray()

            for i in range(0, iterations):
                log_print('Current iteration %s from %s' % (i, iterations),
                          color='debug')

                update_table = []

                self.ignite.kill_node(2)

                indexed_columns = ','.join(columns)

                for cache_name in cache_names:
                    # self.ssh.exec_on_host('REMOVE')
                    vtype = 'ALLTYPES30INDEX'  # if 'cache_group_3' not in cache_name else 'EXTALLTYPES30INDEX'

                    update_table.append(
                        f'\'CREATE INDEX IF NOT EXISTS {cache_name}_{vtype} on '
                        f'\"{cache_name}\".{vtype}({indexed_columns}) INLINE_SIZE 32 PARALLEL 28;\''
                    )

                update_table.append('!index')

                sqlline.run_sqlline(update_table)

                self.ignite.start_node(2)

                util_sleep_for_a_while(30)

                self.verify_no_assertion_errors()

                self.cu.control_utility('--cache validate_indexes',
                                        all_required='no issues found.')
Beispiel #30
0
    def test_pme_bench_activate(self):
        """
        Perform activate and deactivate benchmarks, save results to .csv in var_dir
        """
        self.start_grid_no_activate()
        self.last_top = self.ignite.ignite_srvs.last_topology_snapshot()
        self.last_topVer = max([_['ver'] for _ in self.last_top])
        last_minorTopVer = 0
        print_blue("Topology version before activate: %d" % self.last_topVer)

        util_sleep_for_a_while(3)
        self.ignite.ignite_srvs.jmx.activate(1)

        n_tries = 0
        max_tries = 5
        while n_tries < max_tries:
            n_tries += 1
            max_time = self._get_last_exchange_time()
            if self.exchange_finished:
                break
            util_sleep_for_a_while(5)

        self.new_top = self.ignite.ignite_srvs.last_topology_snapshot()
        self.new_topVer = max([_['ver'] for _ in self.new_top])
        assert self.new_topVer == self.last_topVer, "ERROR: major topology changed, possibly crash during activation"

        for exch_topVer, exch_data in self.agg_exch_x1.items():
            exch_major_topVer = int(exch_topVer / 10000)
            exch_minor_topVer = exch_topVer - exch_major_topVer * 10000
            if exch_major_topVer == self.last_topVer:
                x1_time = self.agg_exch_x1[exch_topVer]['max_duration']
                x2_time = self.agg_exch_x2[exch_topVer]['max_duration']
                self._dump_exchange_time(
                    x1_time,
                    x2_time,
                    "activate [%d, %d]" %
                    (exch_major_topVer, exch_minor_topVer),
                    num_partitions=self.num_partitions)
                last_minorTopVer = exch_minor_topVer

        self.ignite.ignite_srvs.jmx.deactivate(1)

        n_tries = 0
        max_tries = 5
        while n_tries < max_tries:
            n_tries += 1
            max_time = self._get_last_exchange_time()
            if self.exchange_finished:
                break
            util_sleep_for_a_while(5)

        for exch_topVer, exch_data in self.agg_exch_x1.items():
            exch_major_topVer = int(exch_topVer / 10000)
            exch_minor_topVer = exch_topVer - exch_major_topVer * 10000
            if exch_major_topVer == self.last_topVer and exch_minor_topVer > last_minorTopVer:
                x1_time = self.agg_exch_x1[exch_topVer]['max_duration']
                x2_time = self.agg_exch_x2[exch_topVer]['max_duration']
                self._dump_exchange_time(
                    x1_time,
                    x2_time,
                    "deactivate [%d, %d]" %
                    (exch_major_topVer, exch_minor_topVer),
                    num_partitions=self.num_partitions)