Example #1
0
    def test_client_hashes_sequence_values(self):
        test_seq = 'test_seq'
        with PiClient(self.ignite, self.get_client_config()) as piclient:

            seq = self.get_or_create_atomic_sequence(piclient, 'test_seq')

            seq.incrementAndGet()

            tiden_assert_equal(1, seq.get(), "sequence '%s' value" % test_seq)

            for i in range(1, 5):
                seq.incrementAndGet()

            tiden_assert_equal(5, seq.get(), "sequence '%s' value" % test_seq)
            # create full snapshot
            full_snapshot_id = self.su.snapshot_utility(
                'snapshot', '-type=full')

            # increment sequence after snapshot
            for i in range(1, 5):
                seq.incrementAndGet()

            tiden_assert_equal(9, seq.get(), "sequence '%s' value" % test_seq)

            # restore full snapshot
            self.su.snapshot_utility('restore', '-id=%s' % full_snapshot_id)

        with PiClient(self.ignite, self.get_client_config()) as piclient:

            seq = self.get_or_create_atomic_sequence(piclient,
                                                     'test_seq',
                                                     create=False)
            tiden_assert_is_not_none(
                seq, "after restore from snapshot, sequence '%s'" % test_seq)

            tiden_assert_equal(6, seq.get(), "sequence '%s' value" % test_seq)

            for i in range(1, 5):
                seq.incrementAndGet()

            tiden_assert_equal(10, seq.get(), "sequence '%s' value" % test_seq)

            self.restart_grid()

            seq = self.get_or_create_atomic_sequence(piclient,
                                                     'test_seq',
                                                     create=False)
            tiden_assert_is_not_none(
                seq, "after restore from snapshot, sequence '%s'" % test_seq)

            tiden_assert_equal(12, seq.get(), "sequence '%s' value" % test_seq)
Example #2
0
    def test_baseline_two_nodes_removed_one_added_with_loading(self):
        with PiClient(self.ignite, self.get_client_config()):
            with TransactionalLoading(self):
                new_server_num = len(self.ignite.get_alive_default_nodes()) - 2
                self.ignite.kill_node(2)
                self.ignite.kill_node(3)

                self.ignite.wait_for_topology_snapshot(
                    server_num=new_server_num)

                self.ignite.add_additional_nodes(self.get_server_config(), 1)
                self.ignite.start_additional_nodes(
                    self.ignite.get_all_additional_nodes())

                self._set_baseline_few_times()

        self.ignite.wait_for_topology_snapshot(client_num=0)

        util_sleep_for_a_while(self.rebalance_timeout)

        self.util_verify(save_lfs_on_exception=True)

        tiden_assert_equal(
            0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
            "No AssertionError in logs")
Example #3
0
    def test_baseline_restart_node_add_one_additional_node(self):
        with PiClient(self.ignite, self.get_client_config()):
            with TransactionalLoading(self):
                self.ignite.kill_node(2)

                self.delete_lfs(node_ids=[
                    2,
                ])

                self.ignite.start_node(2)

                util_sleep_for_a_while(5)

                new_nodes = self.ignite.add_additional_nodes(
                    self.get_server_config(), 1)
                self.ignite.start_additional_nodes(new_nodes)

                # self.cu.kill_transactions()
                self._set_baseline_few_times(5)

        self.util_verify(save_lfs_on_exception=True)

        tiden_assert_equal(
            0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
            "No AssertionError in logs")
Example #4
0
    def test_blinking_node_with_extra_node_in_blt(self):
        # IGNITE-8893
        with PiClient(self.ignite, self.get_client_config()):
            self.ignite.start_additional_nodes(
                self.ignite.add_additional_nodes(self.get_server_config()))

            for iteration in range(0, self.iterations):
                log_print("Iteration {}/{}".format(str(iteration + 1),
                                                   str(self.iterations)),
                          color='blue')

                self.assert_nodes_alive()

                self.ignite.kill_node(2)

                util_sleep(10)

                self.ignite.start_node(2)

                self.ignite.jmx.wait_for_finish_rebalance(
                    self.rebalance_timeout, self.group_names)
                util_sleep(60)

        tiden_assert_equal(
            0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
            "# of AssertionError")
Example #5
0
    def test_ignite_8897(self):
        """
        1. create LRT
        2. start PME
        3. try to add new node to baseline
        :return:
        """

        # start grid, remove one random node from baseline
        self.start_grid()
        self.ignite.cu.set_current_topology_as_baseline()

        grid_size = len(self.ignite.get_all_alive_nodes())
        stopping_node_id = self.ignite.get_random_server_nodes()[0]
        stopping_node_consistent_id = self.ignite.get_node_consistent_id(
            stopping_node_id)

        self.kill_node(stopping_node_id)
        self.ignite.wait_for_topology_snapshot(server_num=grid_size - 1,
                                               client_num=None)
        self.ignite.cu.remove_node_from_baseline(stopping_node_consistent_id)
        util_sleep_for_a_while(5)

        # create a long running transaction
        with PiClient(self.ignite,
                      self.get_client_config(),
                      nodes_num=1,
                      jvm_options=self.jvm_options):

            self.ignite.wait_for_topology_snapshot(None, 1)

            TestLrt.create_transactional_cache()
            lrt_operations = TestLrt.launch_transaction_operations()

            self.release_transactions(lrt_operations)
    def zookeeper_fail_test(self, scenario, expecting_broken_cluster=False):
        node_connection = self.get_server_connections()
        try:
            with PiClient(self.ignite, self.get_client_config()):
                with TransactionalLoading(self, skip_consistency_check=True):
                    util_sleep_for_a_while(10, msg='Wait until load started')
                    self.zookeeper_fail_scenario(scenario)
                    self.su.snapshot_utility('SNAPSHOT', '-type=full')
                    self.ignite.kill_node(2)
                    util_sleep_for_a_while(60,
                                           msg='Wait after zookeeper issue')

            self.ignite.start_node(2)

            for node_id in node_connection.keys():
                tiden_assert(self.ignite.check_node_is_alive(node_id),
                             "Node {} is expected to be alive".format(node_id))
            if expecting_broken_cluster:
                tiden_assert(
                    False,
                    'split up all zookeeper host expected to broke cluster')

        except Exception as e:
            if expecting_broken_cluster:
                util_sleep_for_a_while(60, msg='Wait all node segmented')
                for node_id in node_connection.keys():
                    tiden_assert(
                        not self.ignite.check_node_is_alive(node_id),
                        "Node {} is expected to be dead".format(node_id))
            else:
                raise e
Example #7
0
 def check_replica_value(self, role='master'):
     app = self.ignite_master_app if role == 'master' else self.ignite_replica_app
     with PiClient(app, self.get_client_config(role), jvm_options=self.get_dr_jvm_options(role=role),
                   cu=self.ignite_replica_app.cu) as piclient:
         cache = piclient.get_ignite().getOrCreateCache('com.sbt.bm.ucp.published.api.retail.PublishedIndividual')
         assert 1 == cache.get(10000)
         log_print('CHECK CLUSTER HAVE VALUE')
Example #8
0
    def test_blinking_node_baseline(self):
        # IGNITE-8879
        with PiClient(self.ignite, self.get_client_config()):
            for iteration in range(0, self.iterations):
                log_print("Iteration {}/{}".format(str(iteration + 1),
                                                   str(self.iterations)),
                          color='blue')

                self.assert_nodes_alive()

                self.ignite.kill_node(2)
                self.ignite.wait_for_topology_snapshot(
                    server_num=len(self.ignite.get_alive_default_nodes()))

                self.cu.set_current_topology_as_baseline()

                util_sleep(5)

                self.ignite.start_node(2)
                self.ignite.wait_for_topology_snapshot(
                    server_num=len(self.ignite.get_alive_default_nodes()))
                self.cu.set_current_topology_as_baseline()

                self.ignite.jmx.wait_for_finish_rebalance(
                    self.rebalance_timeout, self.group_names)

        tiden_assert_equal(
            0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
            "# of AssertionError")
    def test_baseline_adding_one_node_with_loading(self):
        with PiClient(self.ignite, self.get_client_config()):
            with TransactionalLoading(self,
                                      loading_profile=LoadingProfile(delay=1,
                                                                     transaction_timeout=50),
                                      skip_consistency_check=True,
                                      tx_metrics=['txCreated', 'txCommit', 'txRollback', 'txFailed']) as tx_loading:
                self.ignite.add_additional_nodes(self.get_server_config(), 1)
                self._sleep_and_custom_event(tx_loading, 'start nodes')
                self.ignite.start_additional_nodes(self.ignite.get_all_additional_nodes())

                self._sleep_and_custom_event(tx_loading, 'set blt')
                self._set_baseline_few_times()

                self._sleep_and_custom_event(tx_loading, 'sleep')
                self._sleep_and_custom_event(tx_loading, 'end loading')

                metrics = tx_loading.metrics

        self.ignite.wait_for_topology_snapshot(client_num=0)

        self.create_loading_metrics_graph('test_baseline_adding_one_node_with_loading',
                                          metrics)

        util_sleep_for_a_while(self.rebalance_timeout)

        self.cu.control_utility('--cache', 'idle_verify')
        
        tiden_assert_equal(0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
                           "No AssertionError in logs"
                           )
Example #10
0
    def test_blinking_clients_clean_lfs(self):
        """
        IGN-9159 (IGNITE-7165)

        Re-balancing is canceled if client node joins.
        Re-balancing can take hours and each time when client node joins it starts again
        :return:
        """
        self.wait_for_running_clients_num(client_num=0, timeout=120)

        for iteration in range(0, self.iterations):
            log_print("Iteration {}/{}".format(str(iteration + 1),
                                               str(self.iterations)),
                      color='blue')

            self.assert_nodes_alive()

            self.ignite.kill_node(2)
            self.cleanup_lfs(2)
            self.start_node(2)

            # restart clients 4 times
            for restart_time in range(0, 4):
                with PiClient(self.ignite, self.get_client_config()):
                    pass

            self.ignite.jmx.wait_for_finish_rebalance(self.rebalance_timeout,
                                                      self.group_names)
            tiden_assert_equal(
                0,
                self.ignite.find_exception_in_logs('java.lang.AssertionError'),
                "# of AssertionError")
    def test_baseline_removing_two_nodes_with_loading(self):
        with PiClient(self.ignite, self.get_client_config()):
            with TransactionalLoading(self,
                                      loading_profile=LoadingProfile(delay=1,
                                                                     transaction_timeout=50),
                                      tx_metrics=['txCreated', 'txCommit', 'txRollback', 'txFailed']) as tx_loading:
                new_server_num = len(self.ignite.get_alive_default_nodes()) - 2
                self._sleep_and_custom_event(tx_loading, 'kill nodes')
                self.ignite.kill_node(2)
                self.ignite.kill_node(3)

                self.ignite.wait_for_topology_snapshot(server_num=new_server_num)

                self._sleep_and_custom_event(tx_loading, 'set blt')
                self._set_baseline_few_times()

                self._sleep_and_custom_event(tx_loading, 'sleep')
                self._sleep_and_custom_event(tx_loading, 'end loading')

                metrics = tx_loading.metrics

        self.ignite.wait_for_topology_snapshot(client_num=0)

        self.create_loading_metrics_graph('test_baseline_removing_two_nodes_with_loading',
                                          metrics)

        util_sleep_for_a_while(self.rebalance_timeout)

        self.cu.control_utility('--cache', 'idle_verify')
        
        tiden_assert_equal(0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
                           "No AssertionError in logs"
                           )
Example #12
0
    def test_two_blinking_nodes_clean_lfs(self):
        with PiClient(self.ignite, self.get_client_config()):

            with ExitStack() as stack:
                if is_enabled(self.config.get('zookeeper_enabled')) and \
                        is_enabled(self.config.get('zookeeper_nodes_restart')):
                    stack.enter_context(ZkNodesRestart(self.zoo, 2))

                for iteration in range(0, self.iterations):
                    log_print("Iteration {}/{}".format(str(iteration + 1),
                                                       str(self.iterations)),
                              color='blue')

                    self.assert_nodes_alive()

                    self.ignite.kill_node(2)
                    self.cleanup_lfs(2)
                    # self.ignite.start_node(2)
                    self.start_node(2)

                    self.ignite.kill_node(3)

                    util_sleep(10)

                    # self.ignite.start_node(3)
                    self.start_node(3)

                    self.ignite.jmx.wait_for_finish_rebalance(
                        self.rebalance_timeout, self.group_names)

        tiden_assert_equal(
            0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
            "# of AssertionError")
    def test_util_1_7_list_seq(self):
        """
        Test control.sh --cache seq shows correct information about sequences.
        :return:
        """
        from random import randint
        with PiClient(self.ignite, self.get_client_config(), nodes_num=1) as piclient:

            seq_name = 'seq_name_%s'
            for i in range(1, 6):
                seq_ = self.util_get_or_create_atomic_sequence(piclient, seq_name % i)

                counter_num = randint(1, 100)
                log_print('Seq seq_%s will be incremented up to %s' % (i, counter_num), color='green')
                for j in range(1, counter_num):
                    seq_.incrementAndGet()

            for i in range(1, 6):
                seq_ = self.util_get_or_create_atomic_sequence(piclient, seq_name % i)
                seq_val = seq_.get()
                log_print('Seq seq_%s has value %s' % (i, seq_val), color='debug')

            for i in range(0, 3):
                self.cu.control_utility('--cache', 'list seq_name.* --seq')
                output = self.cu.latest_utility_output
                log_print(output)

            expected = ['idle_verify check has finished, no conflicts have been found.']
            self.cu.control_utility('--cache idle_verify', all_required=expected)
            #self.su.snapshot_utility('idle_verify',
            #                         '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'],
            #                         all_required=expected)

            log_print(self.cu.latest_utility_output, color='debug')
    def test_util_1_6_utility_control_cache_contention(self):
        """
        Test control.sh --cache contention shows correct information about keys with contention.
        :return:
        """
        try:
            LRT_TIMEOUT = 10
            with PiClient(self.ignite, self.get_server_config(), nodes_num=1) as piclient:

                cache_name = 'test_cache_001'

                self.cu.control_utility('--cache contention 1')

                lrt_operations = self.launch_transaction_operations(cache_name)

                self.cu.control_utility('--cache contention 1')
                self.cu.control_utility('--cache contention 11')
                self.cu.control_utility('--cache contention 10')

                for operation in lrt_operations:
                    transaction_xid = operation.getTransaction().xid().toString()
                    if transaction_xid not in self.cu.latest_utility_output:
                        log_print("Transaction with XID %s is not in list. Key=%s, Value=%s"
                                  % (transaction_xid, operation.getKey(), operation.getValue()), color='debug')

                self.release_transactions(lrt_operations)

                log_print("Sleep for %s seconds" % LRT_TIMEOUT)
                util_sleep_for_a_while(LRT_TIMEOUT)

                self.cu.control_utility('--cache contention 1')

        except TidenException as e:
            log_print(e, color='red')
            pass
    def test_util_1_8_counters_detection_during_PME_node_from_baseline(self):
        """
        Tests PME synchronise partition counters if some detected.
        :return:
        """
        self.load_data_with_streamer(end_key=1000,
                                     value_type=ModelTypes.VALUE_ALL_TYPES_INDEXED.value)

        with PiClient(self.ignite, self.get_client_config(), nodes_num=1) as piclient:

            caches_before_lrt = []
            for cache_name in piclient.get_ignite().cacheNames().toArray():
                caches_before_lrt.append(cache_name)

            cache_under_test = caches_before_lrt[0]
            log_print('Cache under test: %s' % cache_under_test, color='blue')

            operation = create_broke_data_entry_operation(cache_under_test, 1, True, 'counter')
            operation.evaluate()

            expected = ['Conflict partition']
            self.cu.control_utility('--cache idle_verify', all_required=expected)

            output = self.cu.latest_utility_output

            grp_id, part_id = None, None
            for line in output.split('\n'):
                m = search('Conflict partition: (PartitionKey|PartitionKeyV2) \[grpId=(\d+),.*partId=(\d+)\]', line)
                if m:
                    grp_id = m.group(2)
                    part_id = m.group(3)

            tiden_assert(grp_id and part_id,
                         'Expecting to find conflicts in output\n{}'.format(self.cu.latest_utility_output))

            # Start one more server node and change baseline to run PME
            log_print("Going to start additional node", color='green')
            self.ignite.add_additional_nodes(self.get_server_config(), 1)
            self.ignite.start_additional_nodes(self.ignite.get_all_additional_nodes())
            self.cu.control_utility('--baseline')
            self.cu.set_current_topology_as_baseline()
            self.cu.control_utility('--baseline')
            msg_in_log = self.find_in_node_log('Partition states validation has failed for group: %s'
                                               % cache_under_test, node_id=1)
            assert msg_in_log != []

            # Check there are no conflicts after PME
            util_sleep_for_a_while(30)
            self.cu.control_utility('--cache', 'idle_verify')

            # Stop one more server node and change baseline to run PME
            self.ignite.kill_node(self.ignite.get_alive_additional_nodes()[0])
            util_sleep_for_a_while(30)
            self.cu.control_utility('--baseline')
            self.cu.set_current_topology_as_baseline()
            self.cu.control_utility('--baseline')

            # Check there are no conflicts after PME
            self.cu.control_utility('--cache', 'idle_verify')
 def test_client_start_restart_grid_stop(self):
     with PiClient(self.ignite, self.get_client_config()) as piclient:
         log_print(piclient)
         self.stop_grid()
         self.start_grid()
         cache_names = piclient.get_ignite().cacheNames().toArray()
         for cache_name in cache_names:
             log_print(cache_name)
Example #17
0
    def test_rebalancing_with_ttl_caches(self):
        """
        IGN-13549 (IGNITE-11400)

        Rebalancing caches with TTL enabled can cause data corruption.
        :return:
        """

        with PiClient(self.ignite, self.get_client_config()):
            checksums = create_distributed_checksum_operation().evaluate()

        self.wait_for_running_clients_num(client_num=0, timeout=120)
        self.ignite.cu.control_utility(
            '--cache idle_verify --dump --skip-zeros')
        log_print('Calculating checksums done: %s' % checksums)

        self.assert_nodes_alive()

        self.ignite.kill_node(2)

        PiClientIgniteUtils.load_data_with_putall(
            self.ignite,
            self.get_client_config(),
            start_key=self.preloading_size,
            end_key=self.preloading_size + 100000)

        util_sleep(5 * 60)

        self.ignite.start_node(2)

        self.ignite.jmx.wait_for_finish_rebalance(self.rebalance_timeout * 2,
                                                  self.group_names)

        with PiClient(self.ignite,
                      self.get_client_config()):  # , jvm_options=jvm_options):
            checksums = create_distributed_checksum_operation().evaluate()

        self.wait_for_running_clients_num(client_num=0, timeout=120)
        log_print('Calculating checksums done: %s' % checksums)
        tiden_assert_equal(
            0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
            "# of AssertionError")
    def test_cluster_stress_tolerance(self, node_under_test, other_node, fault_combination):
        timeout = 15
        thread_timeout = 10
        take_a_rest_timeout = 10
        host_under_test = node_under_test.get('host')
        other_host = other_node.get('host')

        stress = StressT(self.ssh)

        with PiClient(self.ignite, self.get_client_config()):
            with TransactionalLoading(self,
                                      loading_profile=LoadingProfile(delay=1,
                                                                     commit_possibility=1.0,
                                                                     transaction_timeout=5000),
                                      tx_metrics=['txCreated', 'txCommit', 'txRollback', 'txFailed']) as tx_loading:
                util_sleep_for_a_while(take_a_rest_timeout, msg='Loading warm up for')
                self._custom_event(tx_loading, 'start')
                util_sleep_for_a_while(take_a_rest_timeout)
                for key, value in fault_combination.items():
                    if value:
                        self._sleep_and_custom_event(tx_loading, '%s start' % key)
                        print_red('%s start' % key)
                        if key == 'disc load' and value:
                            stress.load_disk(node_under_test['ignite_home'], host_under_test, timeout=timeout)
                        if key == 'network load' and value:
                            stress.load_network(host_under_test, other_host, timeout=timeout)
                        if key == 'cpu load' and value:
                            stress.load_cpu(host_under_test, timeout=timeout)
                        if key == 'ram load' and value:
                            stress.load_ram(host_under_test, timeout=timeout)
                        if key in ['sigstop_server', 'sigstop_client'] and value:
                            if key == 'sigstop_server':
                                pid = stress.get_random_server_pid(host_under_test)
                            else:
                                pid = stress.get_random_client_pid(host_under_test)

                            stress.sigstop_process(host_under_test, pid, timeout=thread_timeout)
                        if key in ['packets loss', ' packets duplicate', 'packets corrupt'] and value:
                            stress.network_emulate_packet(host_under_test, other_host, lost_rate='5.0%',
                                                          timeout=timeout, type=key.split()[-1])
                        self._custom_event(tx_loading, ' ')
                        print_red('%s stop' % key)
                        # util_sleep_for_a_while(take_a_rest_timeout, msg='Rest between tests for')
                util_sleep_for_a_while(take_a_rest_timeout)
                self._custom_event(tx_loading, 'end')
                metrics = tx_loading.metrics

        self.ignite.wait_for_topology_snapshot(client_num=0)
        self.create_loading_metrics_graph('test_baseline_adding_two_nodes_with_loading', metrics)
        self.cu.list_transactions()
        self.cu.control_utility('--cache', 'idle_verify')
        tiden_assert_equal(0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
                           "No AssertionError in logs"
                           )
Example #19
0
    def test_mixed_cluster_load_caches_old_server(self):
        """
        1. start mixed cluster (new version servers + old version servers)
        2. activate from new version control.sh
        3. start old version server
        4. add it to baseline
        5. smoke check:
        5.1. create dynamic caches from old server node
        5.2. do some load from old server node
        """

        self.ignite_new_version.cu.activate()
        created_caches = []
        self.server_config = Ignite.config_builder.get_config(
            'server', config_set_name='base')
        ignite = self.ignite_old_version
        with PiClient(ignite, self.server_config, nodes_num=1) as piclient:
            ignite.cu.add_node_to_baseline(
                ignite.get_node_consistent_id(piclient.node_ids[0]))

            dynamic_caches_factory = DynamicCachesFactory()
            async_ops = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method
                log_print('Loading {}...'.format(cache_name), color='green')

                ignite = piclient.get_ignite()

                ignite.getOrCreateCache(
                    getattr(dynamic_caches_factory, method)(cache_name))

                async_operation = create_async_operation(
                    create_put_all_operation,
                    cache_name,
                    1,
                    1001,
                    10,
                    value_type=self.data_model)
                async_ops.append(async_operation)
                async_operation.evaluate()
                created_caches.append(cache_name)

            log_print('Waiting async results...', color='debug')
            # wait for streamer to complete
            for async_op in async_ops:
                async_op.getResult()

            with TransactionalLoading(MixedTestLoadingAdapter(self),
                                      config_file=self.server_config,
                                      loading_profile=LoadingProfile(
                                          delay=1,
                                          transaction_timeout=100000)):
                sleep(60)
Example #20
0
    def test_baseline_remove_and_back_one_node_with_additional_in_baseline(
            self):
        with PiClient(self.ignite, self.get_client_config()):
            with TransactionalLoading(self,
                                      loading_profile=LoadingProfile(
                                          run_for_seconds=30, delay=10)):
                current_server_nodes = self.ignite.get_nodes_num('server')

                self.start_additional_nodes(
                    self.ignite.add_additional_nodes(self.get_server_config(),
                                                     2))

                self.ignite.wait_for_topology_snapshot(
                    server_num=current_server_nodes + 2)

                log_print('Kill one nodes: baseline and additional')
                self.ignite.kill_node(2)
                self.ignite.kill_node(
                    self.ignite.get_alive_additional_nodes()[0])

                log_print('Remove node from baseline')
                self.ignite.wait_for_topology_snapshot(
                    server_num=current_server_nodes)

                # self._set_baseline_few_times()
                self.cu.control_utility('--baseline')
                self.cu.remove_node_from_baseline(
                    self.ignite.get_node_consistent_id(2))

                self.load_data_with_streamer(1001, 1501)

                log_print('Start node again')
                self.ignite.start_node(2)

                self.ignite.wait_for_topology_snapshot(
                    server_num=current_server_nodes + 1)

                # self._set_baseline_few_times()
                self.cu.add_node_to_baseline(
                    self.ignite.get_node_consistent_id(2))

                self.ignite.wait_for_topology_snapshot(
                    server_num=current_server_nodes + 1)

        print_red("AssertExceptions: %s" % str(
            self.ignite.find_exception_in_logs("java.lang.AssertionError")))

        self.util_verify(save_lfs_on_exception=True)

        tiden_assert_equal(
            0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
            "No AssertionError in logs")
Example #21
0
    def test_baseline_auto_activation(self):
        with PiClient(self.ignite, self.get_client_config()) as piclient:
            cache_names = piclient.get_ignite().cacheNames()

            async_operations = []
            for cache_name in cache_names.toArray():
                async_operation = create_async_operation(
                    create_cross_cache_account_runner_operation,
                    cache_name,
                    1,
                    1000,
                    0.5,
                    delay=1,
                    run_for_seconds=20)
                async_operations.append(async_operation)
                async_operation.evaluate()

            # wait operations to complete
            for async_op in async_operations:
                print(async_op.getResult())

        alive_default_nodes = self.ignite.get_alive_default_nodes()
        for node_id in alive_default_nodes:
            self.ignite.kill_node(node_id)

        util_sleep_for_a_while(5)

        self.ignite.start_nodes()

        self.ignite.add_additional_nodes(self.get_server_config(), 1)
        self.ignite.start_additional_nodes(
            self.ignite.get_all_additional_nodes())

        print_red("AssertExceptions: %s" % str(
            self.ignite.find_exception_in_logs("java.lang.AssertionError")))

        self.cu.control_utility('--baseline')

        assert 'Cluster state: active' in self.cu.latest_utility_output, \
            'Cluster is inactive'

        util_sleep_for_a_while(self.rebalance_timeout)

        self.util_verify(save_lfs_on_exception=True)

        tiden_assert_equal(
            0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
            "No AssertionError in logs")
Example #22
0
    def test_master_master_master_blinking_blt(self):
        self.prepare_clusters()

        client_config = self.preconfigure_cluster_0()

        iterations = 10
        last_loaded_key = START_DATA_SIZE
        nodes_before = 6

        with PiClient(self.clusters[0].grid,
                      client_config,
                      jvm_options=['-ea']) as piclient:
            PiClientIgniteUtils.load_data_with_streamer(
                self.clusters[0].grid,
                client_config,
                end_key=last_loaded_key,
                jvm_options=['-ea'],
                check_clients=False)

            sleep(60)

            with TransactionalLoading(self,
                                      ignite=self.clusters[0].grid,
                                      config_file=client_config,
                                      skip_consistency_check=True):
                for i in range(0, iterations):
                    log_print(f'Current iteration {i + 1} from {iterations}',
                              color='debug')

                    self.clusters[0].grid.kill_node(2)

                    utility_baseline_log = 'control-utility-baseline.log'

                    self.clusters[0].grid.cu.set_current_topology_as_baseline(
                        background=True, log=utility_baseline_log)

                    self.clusters[0].grid.start_node(2,
                                                     skip_topology_check=True)

                    self.clusters[0].grid.wait_for_topology_snapshot(
                        server_num=6)

                    self.clusters[0].grid.update_started_node_status(2)

                    self.clusters[0].grid.cu.set_current_topology_as_baseline(
                        background=True, log=utility_baseline_log)

                    self.verify_cluster(0, nodes_before, last_loaded_key)
    def test_baseline_two_nodes_removed_one_added_with_cpu_loading(self):
        with PiClient(self.ignite, self.get_client_config()):
            with TransactionalLoading(self,
                                      loading_profile=LoadingProfile(delay=1,
                                                                     commit_possibility=0.2,
                                                                     transaction_timeout=1000),
                                      tx_metrics=['txCreated', 'txCommit', 'txRollback', 'txFailed']) as tx_loading:
                new_server_num = len(self.ignite.get_alive_default_nodes()) - 2
                self._sleep_and_custom_event(tx_loading, 'kill nodes')
                self.ignite.kill_node(2)
                self.ignite.kill_node(3)

                self.ignite.wait_for_topology_snapshot(server_num=new_server_num)

                self.ignite.add_additional_nodes(self.get_server_config(), 1)
                self._sleep_and_custom_event(tx_loading, 'add new nodes')
                self.ignite.start_additional_nodes(self.ignite.get_all_additional_nodes())

                self._sleep_and_custom_event(tx_loading, 'set blt')
                self._set_baseline_few_times()

                self._sleep_and_custom_event(tx_loading, 'sleep')

                self._sleep_and_custom_event(tx_loading, 'cpu_load')

                cpu_load_operation = create_cpu_load_operation(1.0, 1.0, 2)
                cpu_load_operation.evaluate()
                self._sleep_and_custom_event(tx_loading, 'cpu_load_sleep_end')
                cpu_load_operation.interrupt()

                self._sleep_and_custom_event(tx_loading, 'end loading')

                metrics = tx_loading.metrics

        self.ignite.wait_for_topology_snapshot(client_num=0)

        log_print(inspect.stack()[0].function)
        self.create_loading_metrics_graph('test_baseline_two_nodes_removed_one_added_with_loading',
                                          metrics)

        util_sleep_for_a_while(self.rebalance_timeout)

        self.cu.control_utility('--cache', 'idle_verify')
        
        tiden_assert_equal(0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
                           "No AssertionError in logs"
                           )
 def test_loading_rest(self):
     with PiClient(self.ignite, self.get_client_config()):
         with TransactionalLoading(self,
                                   loading_profile=LoadingProfile(delay=1,
                                                                  transaction_timeout=5000),
                                   tx_metrics=True) as tx_loading:
             self._sleep_and_custom_event(tx_loading, 'start')
             util_sleep_for_a_while(100)
             self._sleep_and_custom_event(tx_loading, 'end')
     self.ignite.wait_for_topology_snapshot(client_num=0)
     self.create_loading_metrics_graph('test_baseline_adding_two_nodes_with_loading', tx_loading)
     self.cu.list_transactions()
     self.cu.control_utility('--cache', 'idle_verify')
     
     tiden_assert_equal(0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
                        "No AssertionError in logs"
                        )
Example #25
0
 def make_data_loading(self, duration, role='master', func_on_load=None):
     app = self.ignite_master_app if role == 'master' else self.ignite_replica_app
     with PiClient(app, self.get_client_config(role), jvm_options=self.get_dr_jvm_options(role),
                   cu=app.cu) as piclient:
         with TransactionalLoading(piclient, ignite=app, skip_consistency_check=True,
                                   cross_cache_batch=100, skip_atomic=True,
                                   config_file=self.get_client_config(role),
                                   wait_before_kill=False,
                                   loading_profile=LoadingProfile(delay=1,
                                                                  start_key=0,
                                                                  end_key=100,
                                                                  transaction_timeout=500,
                                                                  run_for_seconds=duration + 10),
                                   tx_metrics=['txCreated', 'txCommit', 'txRollback', 'txFailed']) as tx_loading:
             sleep(20)
             if func_on_load == 'switch':
                 self.ignite_master_app.ru.replication_utility('switch')
             elif func_on_load == 'bootstrap':
                 self.ignite_master_app.ru.replication_utility('bootstrap',
                                                               '-role=master -archive=ZIP -single_copy -parallelism=4 -snapshot_folder=%s/snapshot' % self.dr_storage,
                                                               timeout=1200)
                 self.ignite_replica_app.ru.replication_utility('bootstrap',
                                                                '-role=replica -snapshot_folder=%s/snapshot -snapshot_id=%s' % (
                                                                    self.dr_storage,
                                                                    self.ignite_master_app.ru.get_session_id_from_bootstrap_command()),
                                                                timeout=1200)
             elif func_on_load == 'restart_on_load':
                 self.ignite_replica_app.ru.replication_utility('pause')
                 sleep(10)
                 self.restart_ignite_grid('replica')
                 sleep(10)
                 self.ignite_replica_app.ru.replication_utility('resume')
             elif func_on_load == 'pitr':
                 cache = piclient.get_ignite().getOrCreateCache(
                     'com.sbt.bm.ucp.published.api.retail.PublishedIndividual')
                 cache.put(10000, 1)
                 sleep(45)
                 self.ignite_replica_app.ru.replication_utility('stop', '-recovery')
             sleep(duration)
         log_print(tx_loading.metrics['txCommit'])
     app.wait_for_topology_snapshot(
         None,
         0,
         ''
     )
     log_print('Loading done')
Example #26
0
    def calculate_sum_by_field(self,
                               start_key=0,
                               end_key=1001,
                               field='balance',
                               config_file=None,
                               jvm_options=None):
        """
        Calculate checksum based on piclient

        :param start_key: start key
        :param end_key: end key
        :param field: field that used in sum operation
        :param config_file: client config
        :param jvm_options: jvm options
        :return:
        """
        log_print("Calculating sum over field '%s' in each cache value" %
                  field)

        result = {}

        if not config_file:
            config_file = self.client_config

        with PiClient(self.current_client_ignite,
                      config_file,
                      jvm_options=jvm_options,
                      nodes_num=1) as piclient:
            sorted_cache_names = []
            for cache_name in piclient.get_ignite().cacheNames().toArray():
                sorted_cache_names.append(cache_name)

            sorted_cache_names.sort()

            log_print(sorted_cache_names, color='debug')
            for cache_name in sorted_cache_names:
                balance = 0
                log_print('Caches: {}'.format(cache_name), color='debug')
                balance += create_sum_operation(cache_name, start_key, end_key,
                                                field).evaluate()
                result[cache_name] = balance
                log_print('Bal {}'.format(balance), color='debug')

            log_print('Calculating over field done')

        return result
Example #27
0
    def check_read_only_cluster(self, role='master'):
        app = self.ignite_master_app if role == 'master' else self.ignite_replica_app
        read_only = True

        with PiClient(app, self.get_client_config(role), jvm_options=self.get_dr_jvm_options(role)) as piclient:
            cache = piclient.get_ignite().getOrCreateCache((piclient.get_ignite().cacheNames().toArray())[0])
            try:
                with IgniteTransaction() as tx:
                    cache.put(1, 1)
                    tx.commit()
                    read_only = False

            except Py4JJavaError:
                pass
            finally:
                if not read_only:
                    raise TidenException("assert {} read-only".format(role))
                log_print('Check cluster {} in read-only success'.format(role))
    def was_snapshots(self):
        """
        Check previous run for any snapshot tasks
        Wait for snapshot creation or restore ends
        """
        end_time = time() + 60 * 4
        if len(self.context['history']) < 2:
            return
        if [
                o for o in self.context['history'][-2]['operations']
                if 'snapshot' in o
        ]:
            while True:
                try:
                    with PiClient(self.cluster,
                                  self.client_config,
                                  new_instance=True,
                                  name='snapshot_wait',
                                  exception_print=False,
                                  read_timeout=60 * 4) as piclient:
                        op = create_async_operation(
                            wait_snapshot_operation,
                            100,
                            gateway=piclient.get_gateway())
                        op.evaluate()

                        while True:
                            if op.getStatus().toString() == "FINISHED":
                                if bool(op.getResult()):
                                    return

                            if time() > end_time:
                                log_print(
                                    'failed to wait snapshot wait operation finished',
                                    color='red')
                                return
                except:
                    log_print('snapshot wait failed', color='red')
                    sleep(5)
                    if time() > end_time:
                        log_print(
                            'failed to wait piclient start to wait snapshot execution',
                            color='red')
                        return
    def test_util_1_7_broke_index(self):
        """
        Tests control.sh --cache validate_indexes detects broken index for some cache.
        :return:
        """
        self.load_data_with_streamer(end_key=5000,
                                     value_type=ModelTypes.VALUE_ALL_TYPES_INDEXED.value)

        update_table = ['!tables', '!index',
                        '\'create index example_idx on \"cache_group_1_028\".ALLTYPESINDEXED(STRINGCOL);\'',
                        '!index']

        sqlline = Sqlline(self.ignite)
        sqlline.run_sqlline(update_table)

        with PiClient(self.ignite, self.get_client_config(), nodes_num=1):
            cache_under_test = 'cache_group_1_028'
            log_print('Cache under test: %s' % cache_under_test, color='blue')

            operation = create_broke_data_entry_operation(cache_under_test, 0, True, 'index')
            operation.evaluate()

            util_sleep_for_a_while(10)
            self.cu.control_utility('--cache', 'validate_indexes')
            output = self.cu.latest_utility_output

            expected = ['idle_verify failed', 'Idle verify failed']
            self.cu.control_utility('--cache idle_verify',
                                    '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'],
                                    all_required=expected)
            #self.su.snapshot_utility('idle_verify',
            #                         '-output=%s/idle_verify_output.txt' % self.config['rt']['remote']['test_dir'],
            #                         all_required=expected)

            log_print(output, color='debug')
            found_broken_index = False
            for line in output.split('\n'):
                m = search('IndexValidationIssue.*cacheName=%s, idxName=EXAMPLE_IDX' % cache_under_test, line)
                if m:
                    found_broken_index = True
                    log_print('Index EXAMPLE_IDX is broken', color='green')

            if not found_broken_index:
                raise TidenException('Expecting index broken, but it\'s not!!!')
Example #30
0
    def test_old_cluster_load_caches_new_client(self):
        """
        1. start old version grid
        2. activate from old version control.sh
        3. start new version client
        4. smoke check:
        4.1. create dynamic caches
        4.2. do some load
        """
        created_caches = []

        self.ignite_old_version.cu.activate()
        with PiClient(self.ignite_new_version, self.client_config,
                      nodes_num=1) as piclient:
            dynamic_caches_factory = DynamicCachesFactory()
            async_ops = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method
                log_print('Loading {}...'.format(cache_name), color='green')
                piclient.get_ignite().getOrCreateCache(
                    getattr(dynamic_caches_factory, method)(cache_name))

                async_operation = create_async_operation(
                    create_put_all_operation,
                    cache_name,
                    1,
                    1001,
                    10,
                    value_type=self.data_model)
                async_ops.append(async_operation)
                async_operation.evaluate()
                created_caches.append(cache_name)

            log_print('Waiting async results...', color='debug')
            # wait for streamer to complete
            for async_op in async_ops:
                async_op.getResult()

            with TransactionalLoading(MixedTestLoadingAdapter(self),
                                      config_file=self.client_config,
                                      loading_profile=LoadingProfile(
                                          delay=1,
                                          transaction_timeout=100000)):
                sleep(60)