Exemple #1
0
    def test_mixed_cluster_load_caches_old_server(self):
        """
        1. start mixed cluster (new version servers + old version servers)
        2. activate from new version control.sh
        3. start old version server
        4. add it to baseline
        5. smoke check:
        5.1. create dynamic caches from old server node
        5.2. do some load from old server node
        """

        self.ignite_new_version.cu.activate()
        created_caches = []
        self.server_config = Ignite.config_builder.get_config(
            'server', config_set_name='base')
        ignite = self.ignite_old_version
        with PiClient(ignite, self.server_config, nodes_num=1) as piclient:
            ignite.cu.add_node_to_baseline(
                ignite.get_node_consistent_id(piclient.node_ids[0]))

            dynamic_caches_factory = DynamicCachesFactory()
            async_ops = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method
                log_print('Loading {}...'.format(cache_name), color='green')

                ignite = piclient.get_ignite()

                ignite.getOrCreateCache(
                    getattr(dynamic_caches_factory, method)(cache_name))

                async_operation = create_async_operation(
                    create_put_all_operation,
                    cache_name,
                    1,
                    1001,
                    10,
                    value_type=self.data_model)
                async_ops.append(async_operation)
                async_operation.evaluate()
                created_caches.append(cache_name)

            log_print('Waiting async results...', color='debug')
            # wait for streamer to complete
            for async_op in async_ops:
                async_op.getResult()

            with TransactionalLoading(MixedTestLoadingAdapter(self),
                                      config_file=self.server_config,
                                      loading_profile=LoadingProfile(
                                          delay=1,
                                          transaction_timeout=100000)):
                sleep(60)
Exemple #2
0
    def test_baseline_auto_activation(self):
        with PiClient(self.ignite, self.get_client_config()) as piclient:
            cache_names = piclient.get_ignite().cacheNames()

            async_operations = []
            for cache_name in cache_names.toArray():
                async_operation = create_async_operation(
                    create_cross_cache_account_runner_operation,
                    cache_name,
                    1,
                    1000,
                    0.5,
                    delay=1,
                    run_for_seconds=20)
                async_operations.append(async_operation)
                async_operation.evaluate()

            # wait operations to complete
            for async_op in async_operations:
                print(async_op.getResult())

        alive_default_nodes = self.ignite.get_alive_default_nodes()
        for node_id in alive_default_nodes:
            self.ignite.kill_node(node_id)

        util_sleep_for_a_while(5)

        self.ignite.start_nodes()

        self.ignite.add_additional_nodes(self.get_server_config(), 1)
        self.ignite.start_additional_nodes(
            self.ignite.get_all_additional_nodes())

        print_red("AssertExceptions: %s" % str(
            self.ignite.find_exception_in_logs("java.lang.AssertionError")))

        self.cu.control_utility('--baseline')

        assert 'Cluster state: active' in self.cu.latest_utility_output, \
            'Cluster is inactive'

        util_sleep_for_a_while(self.rebalance_timeout)

        self.util_verify(save_lfs_on_exception=True)

        tiden_assert_equal(
            0, self.ignite.find_exception_in_logs('java.lang.AssertionError'),
            "No AssertionError in logs")
Exemple #3
0
def calc_checksums_on_client(piclient,
                             start_key=0,
                             end_key=1000,
                             dict_mode=False):
    """
    Calculate checksum based on piclient
    :param start_key: start key
    :param end_key: end key
    :param dict_mode:
    :return:
        """
    log_print("Calculating checksums using cache.get() from client")
    cache_operation = {}
    cache_checksum = {}

    sorted_cache_names = []
    for cache_name in piclient.get_ignite().cacheNames().toArray():
        sorted_cache_names.append(cache_name)

    sorted_cache_names.sort()

    async_operations = []
    for cache_name in sorted_cache_names:
        async_operation = create_async_operation(
            create_checksum_operation,
            cache_name,
            start_key,
            end_key,
            gateway=piclient.get_gateway())
        async_operations.append(async_operation)
        cache_operation[async_operation] = cache_name
        async_operation.evaluate()

    checksums = ''

    for async_operation in async_operations:
        result = str(async_operation.getResult())
        cache_checksum[cache_operation.get(async_operation)] = result
        checksums += result

    log_print('Calculating checksums done')

    if dict_mode:
        return cache_checksum
    else:
        return checksums
Exemple #4
0
    def test_old_cluster_load_caches_new_client(self):
        """
        1. start old version grid
        2. activate from old version control.sh
        3. start new version client
        4. smoke check:
        4.1. create dynamic caches
        4.2. do some load
        """
        created_caches = []

        self.ignite_old_version.cu.activate()
        with PiClient(self.ignite_new_version, self.client_config,
                      nodes_num=1) as piclient:
            dynamic_caches_factory = DynamicCachesFactory()
            async_ops = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method
                log_print('Loading {}...'.format(cache_name), color='green')
                piclient.get_ignite().getOrCreateCache(
                    getattr(dynamic_caches_factory, method)(cache_name))

                async_operation = create_async_operation(
                    create_put_all_operation,
                    cache_name,
                    1,
                    1001,
                    10,
                    value_type=self.data_model)
                async_ops.append(async_operation)
                async_operation.evaluate()
                created_caches.append(cache_name)

            log_print('Waiting async results...', color='debug')
            # wait for streamer to complete
            for async_op in async_ops:
                async_op.getResult()

            with TransactionalLoading(MixedTestLoadingAdapter(self),
                                      config_file=self.client_config,
                                      loading_profile=LoadingProfile(
                                          delay=1,
                                          transaction_timeout=100000)):
                sleep(60)
    def was_snapshots(self):
        """
        Check previous run for any snapshot tasks
        Wait for snapshot creation or restore ends
        """
        end_time = time() + 60 * 4
        if len(self.context['history']) < 2:
            return
        if [
                o for o in self.context['history'][-2]['operations']
                if 'snapshot' in o
        ]:
            while True:
                try:
                    with PiClient(self.cluster,
                                  self.client_config,
                                  new_instance=True,
                                  name='snapshot_wait',
                                  exception_print=False,
                                  read_timeout=60 * 4) as piclient:
                        op = create_async_operation(
                            wait_snapshot_operation,
                            100,
                            gateway=piclient.get_gateway())
                        op.evaluate()

                        while True:
                            if op.getStatus().toString() == "FINISHED":
                                if bool(op.getResult()):
                                    return

                            if time() > end_time:
                                log_print(
                                    'failed to wait snapshot wait operation finished',
                                    color='red')
                                return
                except:
                    log_print('snapshot wait failed', color='red')
                    sleep(5)
                    if time() > end_time:
                        log_print(
                            'failed to wait piclient start to wait snapshot execution',
                            color='red')
                        return
Exemple #6
0
    def _create_dynamic_caches_with_data(self, with_index=False):
        log_print("Create dynamic caches and load data")

        data_model = ModelTypes.VALUE_ALL_TYPES.value
        created_caches = []
        with PiClient(self.ignite, self.get_client_config(),
                      nodes_num=1) as piclient:
            dynamic_caches_factory = DynamicCachesFactory()
            async_ops = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method
                print_green('Loading %s...' % cache_name)

                gateway = piclient.get_gateway()
                ignite = piclient.get_ignite()

                ignite.getOrCreateCache(
                    getattr(dynamic_caches_factory, method)(cache_name,
                                                            gateway=gateway))

                if with_index:
                    data_model = ModelTypes.VALUE_ALL_TYPES.value
                async_operation = create_async_operation(
                    create_streamer_operation,
                    cache_name,
                    1,
                    self.max_key + 2,
                    value_type=data_model)
                async_ops.append(async_operation)
                async_operation.evaluate()
                created_caches.append(cache_name)

            log_print('Waiting async results...', color='blue')
            # wait for streamer to complete
            for async_op in async_ops:
                async_op.getResult()

        log_print("Dynamic caches with data created")
        return created_caches
Exemple #7
0
    def util_run_money_transfer_task(self, time_to_run=10):
        log_print("Starting money transfer task", color='green')

        with PiClient(self.ignite, self.get_client_config()) as piclient:
            cache_names = piclient.get_ignite().cacheNames()

            async_operations = []
            for cache_name in cache_names.toArray():
                async_operation = create_async_operation(
                    create_account_runner_operation,
                    cache_name,
                    1,
                    self.max_key,
                    0.5,
                    delay=1,
                    run_for_seconds=time_to_run)
                async_operations.append(async_operation)
                async_operation.evaluate()

            # wait operations to complete
            for async_op in async_operations:
                async_op.getResult()

        log_print("Money transfer is done", color='green')
Exemple #8
0
    def _calc_checksums_over_dynamic_caches(self):
        log_print("Calculating checksums")

        with PiClient(self.ignite, self.get_client_config()):
            dynamic_caches_factory = DynamicCachesFactory()

            async_operations = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method

                checksum_operation = create_checksum_operation(
                    cache_name, 1, 1000)
                async_operation = create_async_operation(checksum_operation)
                async_operations.append(async_operation)
                async_operation.evaluate()

            checksums = ''

            for async_operation in async_operations:
                checksums += str(async_operation.getResult())

        log_print("Calculating checksums done")

        return checksums
Exemple #9
0
    def test_24_fitness_two_clients_with_snapshot(self):
        """
        """
        created_caches = []

        self.ignite_new_version.cu.activate()

        with PiClient(self.ignite_new_version, self.client_config,
                      nodes_num=1) as piclient_new:

            dynamic_caches_factory = DynamicCachesFactory()
            async_ops = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method
                log_print('Loading {}...'.format(cache_name), color='green')
                piclient_new.get_ignite().getOrCreateCache(
                    getattr(dynamic_caches_factory, method)(cache_name))

                async_operation = create_async_operation(
                    create_put_all_operation,
                    cache_name,
                    1,
                    1001,
                    10,
                    value_type=self.data_model)
                async_ops.append(async_operation)
                async_operation.evaluate()
                created_caches.append(cache_name)

            log_print('Waiting async results...', color='debug')
            # wait for streamer to complete
            for async_op in async_ops:
                async_op.getResult()

            with PiClient(self.ignite_old_version,
                          self.client_config,
                          nodes_num=2,
                          new_instance=True) as piclient_old:

                log_print('Rolling upgrade', color='green')
                self.ignite_new_version.su.snapshot_utility(
                    'snapshot', '-type=full')
                log_print('Snapshot completed', color='debug')

                sorted_cache_names = []
                for cache_name in piclient_old.get_ignite().cacheNames(
                ).toArray():
                    sorted_cache_names.append(cache_name)

                sorted_cache_names.sort()

                async_operations = []
                cache_operation = {}
                cache_checksum = {}
                for cache_name in sorted_cache_names:
                    async_operation = create_async_operation(
                        create_checksum_operation, cache_name, 1, 10000)
                    async_operations.append(async_operation)
                    cache_operation[async_operation] = cache_name
                    async_operation.evaluate()

                checksums = ''

                for async_operation in async_operations:
                    result = str(async_operation.getResult())
                    cache_checksum[cache_operation.get(
                        async_operation)] = result
                    checksums += result

                log_print(checksums, color='debug')
                self.ignite_new_version.su.snapshot_utility(
                    'restore', '-id={}'.format(
                        self.ignite_new_version.su.get_created_snapshot_id(1)))
                log_print('Test completed', color='debug')
Exemple #10
0
    def test_24_fitness_set_baseline_with_properties(self):
        """
        This test checks the cluster behaviour with option GG_DISABLE_SNAPSHOT_ON_BASELINE_CHANGE_WITH_ENABLED_PITR
        that could be set in different ways:
            1. Set at one of the server nodes.
            2. Set on some client node/nodes.
        """
        created_caches = []

        self.ignite_old_version.cu.activate()

        # Preloading
        with PiClient(self.ignite_new_version, self.client_config,
                      nodes_num=1) as piclient:

            dynamic_caches_factory = DynamicCachesFactory()
            async_ops = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method
                log_print('Loading {}...'.format(cache_name), color='green')
                piclient.get_ignite().getOrCreateCache(
                    getattr(dynamic_caches_factory, method)(cache_name))

                async_operation = create_async_operation(
                    create_put_all_operation,
                    cache_name,
                    1,
                    1001,
                    10,
                    value_type=self.data_model)
                async_ops.append(async_operation)
                async_operation.evaluate()
                created_caches.append(cache_name)

            log_print('Waiting async results...', color='debug')
            # wait for streamer to complete
            for async_op in async_ops:
                async_op.getResult()

        util_sleep_for_a_while(20)

        new_client_config = Ignite.config_builder.get_config(
            'client', config_set_name='24_fit_with_consist_id')
        jvm_options = self.ignite_new_version.get_jvm_options(1)
        jvm_options.append(
            '-DGG_DISABLE_SNAPSHOT_ON_BASELINE_CHANGE_WITH_ENABLED_PITR=true')
        # with PiClient(self.ignite_new_version, self.client_config, jvm_options=jvm_options, nodes_num=1) as piclient:
        with PiClient(self.ignite_new_version, self.client_config,
                      nodes_num=1) as piclient:
            for i in range(1, 5):
                self.ignite_old_version.cu.control_utility('--baseline')
                log_print('Stopping node {}'.format(i), color='green')

                jvm_options = self.ignite_new_version.get_jvm_options(i)
                jvm_options.append(
                    '-DGG_DISABLE_SNAPSHOT_ON_BASELINE_CHANGE_WITH_ENABLED_PITR=false'
                )
                self.ignite_new_version.set_node_option(
                    '*', 'config',
                    Ignite.config_builder.get_config(
                        'server', config_set_name='24_fit_with_consist_id'))
                log_print("Starting node {} with new consistent id".format(i),
                          color='debug')
                self.ignite_new_version.start_nodes(i,
                                                    already_nodes=4,
                                                    other_nodes=4,
                                                    timeout=240)
                log_print("Changing baseline", color='debug')
                self.ignite_old_version.cu.set_current_topology_as_baseline()
                util_sleep_for_a_while(60,
                                       msg='Wait for rebalance to completed')

        log_print('Test is done')
Exemple #11
0
    def test_24_fitness_rolling_upgrade(self):
        """
        This test checks the main rolling upgrade scenario under the load:
            1. Old cluster up and running (consistent_id's are not set).
            2. First cycle (upgrade to new version and set property
                GG_DISABLE_SNAPSHOT_ON_BASELINE_CHANGE_WITH_ENABLED_PITR):
            3. Second cycle (set correct consistent_id with adding to baseline topology).

        """
        created_caches = []

        self.ignite_old_version.cu.activate()

        with PiClient(self.ignite_new_version, self.client_config,
                      nodes_num=1) as piclient:

            dynamic_caches_factory = DynamicCachesFactory()
            async_ops = []
            for method in dynamic_caches_factory.dynamic_cache_configs:
                cache_name = "cache_group_%s" % method
                log_print('Loading {}...'.format(cache_name), color='green')
                piclient.get_ignite().getOrCreateCache(
                    getattr(dynamic_caches_factory, method)(cache_name))

                async_operation = create_async_operation(
                    create_put_all_operation,
                    cache_name,
                    1,
                    1001,
                    10,
                    value_type=self.data_model)
                async_ops.append(async_operation)
                async_operation.evaluate()
                created_caches.append(cache_name)

            log_print('Waiting async results...', color='debug')
            # wait for streamer to complete
            for async_op in async_ops:
                async_op.getResult()

        util_sleep_for_a_while(60)

        with PiClient(self.ignite_old_version, self.client_config,
                      nodes_num=4) as piclient:
            cache_names = piclient.get_ignite().cacheNames()

            # Start transaction loading for TTL caches
            with TransactionalLoading(MixedTestLoadingAdapter(self),
                                      config_file=self.client_config,
                                      loading_profile=LoadingProfile(
                                          delay=0,
                                          transaction_timeout=100000,
                                          run_for_seconds=600)):
                util_sleep_for_a_while(20)
                log_print('Rolling upgrade', color='green')
                async_ops = []
                for cache_name in [
                        cache_name for cache_name in cache_names.toArray()
                        if cache_name.startswith("M2_PRODUCT")
                ]:
                    async_operation = create_async_operation(
                        create_put_all_operation,
                        cache_name,
                        1001,
                        400001,
                        10,
                        value_type=ModelTypes.VALUE_ALL_TYPES.value)
                    async_ops.append(async_operation)
                    async_operation.evaluate()

                # First cycle: upgrade version and set property.
                for i in range(1, 5):
                    self.ignite_old_version.cu.control_utility('--baseline')
                    log_print('Stopping node {}'.format(i), color='green')
                    self.ignite_old_version.kill_nodes(i)

                    self.ignite_new_version.cleanup_work_dir(i)
                    folder = self.ignite_old_version.get_work_dir(i)
                    log_print(folder, color='debug')
                    self.ignite_new_version.copy_work_dir_from(i, folder)

                    jvm_options = self.ignite_new_version.get_jvm_options(i)
                    jvm_options.append(
                        '-DGG_DISABLE_SNAPSHOT_ON_BASELINE_CHANGE_WITH_ENABLED_PITR=true'
                    )

                    util_sleep_for_a_while(10)
                    self.ignite_new_version.start_nodes(i,
                                                        already_nodes=(4 - i),
                                                        other_nodes=(4 - i),
                                                        timeout=240)
                    self.ignite_new_version.cu.control_utility('--baseline')

                for async_op in async_ops:
                    async_op.getResult()

                util_sleep_for_a_while(30)
                log_print('Change consistent ID', color='green')

                self.ignite_new_version.set_node_option(
                    '*', 'config',
                    Ignite.config_builder.get_config(
                        'server', config_set_name='24_fit_with_consist_id'))

                # Second cycle - change consistent_id and add to baseline topology.
                for i in range(1, 5):
                    self.ignite_new_version.cu.control_utility('--baseline')
                    log_print('Stopping node {}'.format(i), color='green')
                    self.ignite_new_version.kill_nodes(i)
                    log_print(
                        "Starting node {} with new consistent id".format(i),
                        color='debug')
                    self.ignite_new_version.start_nodes(i, timeout=240)
                    log_print("Changing baseline", color='debug')
                    self.ignite_new_version.cu.set_current_topology_as_baseline(
                    )
                    util_sleep_for_a_while(
                        60, msg='Wait for rebalance to completed')

                log_print('Transactional loading done', color='green')

            # Just to check client node still can interact with cluster - calculate checksum from client node.
            sorted_cache_names = []
            for cache_name in piclient.get_ignite().cacheNames().toArray():
                sorted_cache_names.append(cache_name)

            sorted_cache_names.sort()

            async_operations = []
            cache_operation = {}
            for cache_name in sorted_cache_names:
                async_operation = create_async_operation(
                    create_checksum_operation, cache_name, 1, 10000)
                async_operations.append(async_operation)
                cache_operation[async_operation] = cache_name
                async_operation.evaluate()

            checksums = ''
            cache_checksum = {}
            for async_operation in async_operations:
                result = str(async_operation.getResult())
                cache_checksum[cache_operation.get(async_operation)] = result
                checksums += result

            log_print('Calculating checksums done')
Exemple #12
0
    def run(self, artifact_name):
        """
        Run FST Data Replication scenario for defined artifact

        1. start 2 clusters (master and replica) with replication on clients up and running.
        2. stop senders to prevent replication.
        3. load data to first N caches (check caches_amount variable in the test) with streamer on master.
        4. start senders, start replication and make FST operation.
        3. check the time used for caches to sync.

        :param artifact_name: name from artifact configuration file
        """
        super().run(artifact_name)

        log_print("Running Data Replication benchmark with config: %s" % self.config, color='green')

        caches_amount = 4
        try:
            # collect properties from config
            self.initialize_clusters()
            self.start_clusters(self.clusters)
            version = self.test_class.tiden.config['artifacts'][artifact_name]['ignite_version']
            self.new_behaviour = self.if_new_behaviour(version, self.min_version)

            # run ignite app
            keys_to_load = int(self.config.get('keys_to_load'))

            with PiClient(self.master, self.master_client_config, nodes_num=1) as piclient_master:
                cache_names = piclient_master.get_ignite().cacheNames().toArray()
                cache_names = [cache_name for cache_name in cache_names]
                with PiClient(self.replica, self.replica_client_config, nodes_num=1,
                              new_instance=True) as piclient_replica:
                    time_results = list()
                    async_operations = []
                    start_key, end_key, remove_probability = 0, keys_to_load, 0.0
                    for cache_name in cache_names[:caches_amount]:

                        # kill senders to prevent replication
                        senders = self.clusters[0].get_sender_nodes()
                        for node in senders:
                            self.master.kill_node(node.id)

                        log_print(f'Uploading data into cache {cache_name}')

                        async_operation = create_async_operation(
                            create_streamer_operation,
                            cache_name, start_key, end_key,
                            value_type=ModelTypes.VALUE_ALL_TYPES.value,
                            gateway=piclient_master.get_gateway(),
                        )

                        async_operations.append(async_operation)
                        async_operation.evaluate()

                        for async_op in async_operations:
                            async_op.getResult()

                        log_print('Uploading is done', color='green')

                        master_sizes = self.get_caches_size(cache_mask=lambda x: cache_name in x,
                                                            piclient=piclient_master, debug=False)
                        replica_sizes = self.get_caches_size(cache_mask=lambda x: cache_name in x,
                                                             piclient=piclient_replica, debug=False)
                        log_print('Master size={}, replica size={}'.format(master_sizes, replica_sizes))

                        # start senders
                        senders_ids = [node.id for node in senders]
                        self.master.start_additional_nodes(senders_ids, client_nodes=True, already_started=1,
                                                           other_nodes=1)

                        log_print(f'Running State transfer for cache {cache_name}', color='blue')

                        status = piclient_master.get_ignite().plugin("GridGain").dr().senderCacheStatus(cache_name)
                        log_print(f'DR status {status} for cache {cache_name}')

                        # start probes
                        self.start_probes(artifact_name, self.master.name)
                        start_time = datetime.now()

                        # make FST (new behaviour - use resume operation instead of start replication)
                        if self.new_behaviour:
                            piclient_master.get_ignite().plugin("GridGain").dr().startReplication(cache_name)
                        else:
                            piclient_master.get_ignite().plugin("GridGain").dr().resume(cache_name)
                        piclient_master.get_ignite().plugin("GridGain").dr().stateTransfer(cache_name, bytes([2]))

                        self._wait_for_same_caches_size(piclient_master, piclient_replica,
                                                        predicate=lambda x: cache_name in x)
                        replication_time = (datetime.now() - start_time).seconds
                        log_print(f'Replication time {replication_time}')
                        time_results.append(replication_time)

                    self.stop_probes(time_results=time_results, seconds=True)
                    log_print()
            self.results['evaluated'] = True

        finally:
            if self.clusters:
                for cluster in self.clusters:
                    log_print('Teardown for cluster {}'.format(cluster))
                    if cluster.grid:
                        cluster.grid.jmx.kill_utility()
                        cluster.grid.remove_additional_nodes()
                        cluster.grid.kill_nodes()
                        cluster.grid.delete_lfs()
                        cluster.grid = None
    def run(self, artifact_name):
        """
        Run scenario for defined artifact

        :param artifact_name: name from artifact configuration file
        """
        super().run(artifact_name)

        log_print("Running putAll() benchmark with config: %s" % self.config,
                  color='green')

        caches_list_file = 'caches_%s.xml' % PUT_ALL_CONFIG_SET \
            if not self.config.get('many_parts') else 'caches_many_parts.xml'
        print_detailed_cache_info = self.config.get(
            'print_detailed_cache_info')

        version = self.test_class.tiden.config['artifacts'][artifact_name][
            'ignite_version']
        try:
            self.test_class.create_app_config_set(
                Ignite,
                PUT_ALL_CONFIG_SET,
                caches_list_file=caches_list_file,
                deploy=True,
                logger=False,
                wal_segment_size=self.test_class.consumption_config.get(
                    'wal_segment_size', 64 * 1024 * 1024),
                logger_path='%s/ignite-log4j2.xml' %
                self.test_class.tiden.config['rt']['remote']
                ['test_module_dir'],
                disabled_cache_configs=False,
                zookeeper_enabled=False,
                checkpoint_read_lock_timeout=self.read_lock_property_value(
                    version),
                # caches related variables
                additional_configs=[
                    'caches.tmpl.xml',
                ],
                part_32=self.test_class.consumption_config.get('part_32', 32),
                part_64=self.test_class.consumption_config.get('part_64', 64),
                part_128=self.test_class.consumption_config.get(
                    'part_64', 128),
            )

            version, ignite = self.test_class.start_ignite_grid(
                artifact_name, activate=True, config_set=PUT_ALL_CONFIG_SET)

            self.start_probes(artifact_name)

            warmup_runs, prod_runs = self._get_number_of_runs()

            time_results = list()
            per_cache_results = {}

            client_config = Ignite.config_builder.get_config(
                'client', config_set_name=PUT_ALL_CONFIG_SET)
            with PiClient(ignite, client_config) as piclient:
                cache_names = piclient.get_ignite().cacheNames()
                data_size = int(self.config.get('data_size'))

                log_print("Running {} iterations".format(warmup_runs +
                                                         prod_runs))
                for i in range(0, warmup_runs + prod_runs):
                    # print message to all nodes log
                    if i == warmup_runs:
                        create_message_operation(
                            'Checkpoint started PRODUCTION RUN STARTED'
                        ).evaluate()

                    self.write_time_event('iteration_%s start' % i)
                    warmup_iteration = False if warmup_runs == 0 else i < warmup_runs

                    log_print("Running iteration %s (%s)" %
                              (i, 'warmup' if warmup_iteration else 'prod'))

                    log_print("Loading %s values per cache into %s caches" %
                              (data_size *
                               (i + 1) - data_size * i, cache_names.size()))

                    async_operations = {}
                    self.write_time_event('iteration_%s create putall' % i)
                    for cache_name in cache_names.toArray():
                        async_operation = create_async_operation(
                            create_put_all_operation,
                            cache_name,
                            data_size * i,
                            data_size * (i + 1),
                            int(self.config.get('put_all_batch_size')),
                            value_type=ModelTypes.VALUE_ACCOUNT.value)
                        async_operations[cache_name] = async_operation
                        async_operation.evaluate()

                    for cache_name, async_op in async_operations.items():
                        async_op.getResult()

                        # skip first operations as warmup
                        if not warmup_iteration:
                            loading_time = async_op.getOperation().getEndTime(
                            ) - async_op.getOperation().getStartTime()

                            if cache_name in per_cache_results:
                                per_cache_results[cache_name] += loading_time
                            else:
                                per_cache_results[cache_name] = loading_time

                            time_results.append(loading_time)

                    self.write_time_event('iteration_%s putall done' % i)

                log_print("Loading done")

                if print_detailed_cache_info:
                    log_print("Per cache results:")

                    for key in sorted(per_cache_results.keys()):
                        print("%s: %s" % (key, per_cache_results[key]))

            ignite.cu.deactivate()

            self.stop_probes(time_results=time_results)

            self.results['evaluated'] = True

            ignite.kill_nodes()
            ignite.delete_lfs()

            log_put("Cleanup Ignite LFS ... ")
            commands = {}
            for node_idx in ignite.nodes.keys():
                host = ignite.nodes[node_idx]['host']
                if commands.get(host) is None:
                    commands[host] = [
                        'rm -rf %s/work/*' %
                        ignite.nodes[node_idx]['ignite_home']
                    ]
                else:
                    commands[host].append(
                        'rm -rf %s/work/*' %
                        ignite.nodes[node_idx]['ignite_home'])
            results = self.test_class.tiden.ssh.exec(commands)
            print(results)
            log_put("Ignite LFS deleted.")
            log_print()
        finally:
            # remove config set
            self.test_class.remove_app_config_set(Ignite, PUT_ALL_CONFIG_SET)
Exemple #14
0
    def run(self, artifact_name):
        """
        Run Data Replication scenario for defined artifact

        Scenario is very simple
        1. start 2 clusters (master and replica) with replication on clients up and running.
        2. load data to first N caches (check caches_amount variable in the test) with streamer on master.
        3. check the time used for caches to sync.

        :param artifact_name: name from artifact configuration file
        """
        super().run(artifact_name)

        log_print("Running Data Replication benchmark with config: %s" % self.config, color='green')

        caches_amount = 4
        try:
            # collect properties from config
            self.initialize_clusters()
            self.start_clusters(self.clusters)

            # run ignite app
            keys_to_load = int(self.config.get('keys_to_load'))

            with PiClient(self.master, self.master_client_config, nodes_num=1) as piclient_master:
                cache_names = piclient_master.get_ignite().cacheNames().toArray()
                cache_names = [cache_name for cache_name in cache_names]
                with PiClient(self.replica, self.replica_client_config, nodes_num=1,
                              new_instance=True) as piclient_replica:
                    time_results = list()

                    self.start_probes(artifact_name, self.master.name)
                    async_operations = []
                    start_key, end_key, remove_probability = 0, keys_to_load, 0.0
                    for cache_name in cache_names[:caches_amount]:
                        log_print(f'Uploading data into cache {cache_name}')
                        start_time = datetime.now()
                        async_operation = create_async_operation(
                            create_streamer_operation,
                            cache_name, start_key, end_key,
                            value_type=ModelTypes.VALUE_ALL_TYPES.value,
                            gateway=piclient_master.get_gateway(),
                        )

                        async_operations.append(async_operation)
                        async_operation.evaluate()

                        for async_op in async_operations:
                            async_op.getResult()
                        log_print('Uploading is done', color='green')

                        self._wait_for_same_caches_size(piclient_master, piclient_replica,
                                                        predicate=lambda x: cache_name in x)
                        replication_time = (datetime.now() - start_time).seconds
                        log_print(f'Replication time {replication_time}')
                        time_results.append(replication_time)
                    self.stop_probes(time_results=time_results, seconds=True)
                    log_print()
            self.results['evaluated'] = True

        finally:
            if self.clusters:
                for cluster in self.clusters:
                    log_print('Teardown for cluster {}'.format(cluster))
                    if cluster.grid:
                        cluster.grid.jmx.kill_utility()
                        cluster.grid.remove_additional_nodes()
                        cluster.grid.kill_nodes()
                        cluster.grid.delete_lfs()
                        cluster.grid = None
Exemple #15
0
    def do_blink_backups_under_load(self, initial_remove_probability):

        iteration_size = self.config.get('iteration_size', 80000)
        start = 0
        keep_coordinator_busy = True
        start_value = 0

        # temporary save LFS even on test pass
        self.need_delete_lfs_on_teardown = False

        first_node = self.ignite.get_node_consistent_id(1)
        second_node = self.ignite.get_node_consistent_id(2)

        if keep_coordinator_busy:
            other_nodes = list(
                set(self.ignite.get_all_default_nodes()) - set([1]))
        else:
            other_nodes = list(
                set(self.ignite.get_all_default_nodes()) - set([1, 2]))

        current_server_num = self.ignite.get_nodes_num('server')

        tx_caches = []
        atomic_caches = []

        self.ignite.set_snapshot_timeout(600)

        with PiClient(self.ignite, self.get_client_config(),
                      nodes_num=1) as piclient:
            gateway = piclient.get_gateway()
            ignite = piclient.get_ignite()

            cache_names = ignite.cacheNames().toArray()
            for cache_name in cache_names:
                # run cross cache transfer task only for transactional caches
                if ignite.getOrCreateCache(cache_name).getConfiguration(
                        gateway.jvm.org.apache.ignite.configuration.
                        CacheConfiguration().getClass()).getAtomicityMode(
                        ).toString() == 'TRANSACTIONAL':
                    tx_caches.append(cache_name)
                else:
                    atomic_caches.append(cache_name)

        PiClientIgniteUtils.wait_for_running_clients_num(self.ignite, 0, 120)

        for iteration in range(0, self.iterations):
            log_print("Iteration {}/{}".format(str(iteration + 1),
                                               str(self.iterations)),
                      color='blue')

            start_key = start + iteration * iteration_size
            end_key = start_key + iteration_size
            if initial_remove_probability > 0.0:
                remove_probability = initial_remove_probability + iteration / self.iterations / 2.0
            else:
                remove_probability = 0.0

            current_client_num = self.ignite.get_nodes_num('client')

            for i in range(0, 3):
                with PiClient(self.ignite,
                              self.get_client_config()) as piclient:
                    log_print(
                        "Loading (remove {probability}%) {load} values per cache into {n_caches} caches"
                        .format(
                            probability=remove_probability,
                            load=iteration_size,
                            n_caches=len(tx_caches),
                        ))

                    async_operations = []
                    for cache_name in tx_caches:
                        node_id = piclient.get_node_id()
                        gateway = piclient.get_gateway(node_id)
                        tx_size = randint(1, 10)
                        log_print(
                            "Client {node_id} -> {cache_name}, tx size {tx_size}"
                            .format(
                                node_id=node_id,
                                cache_name=cache_name,
                                tx_size=tx_size,
                                removeProbability=remove_probability,
                            ))
                        async_operation = create_async_operation(
                            create_put_with_optional_remove_operation,
                            cache_name,
                            start_key,
                            end_key,
                            remove_probability,
                            gateway=gateway,
                            node_consistent_id=first_node
                            if keep_coordinator_busy else second_node,
                            tx_description=TxDescriptor(
                                concurrency='PESSIMISTIC',
                                isolation='REPEATABLE_READ',
                                size=tx_size),
                            use_monotonic_value=True,
                            monotonic_value_seed=start_value,
                        )
                        start_value = start_value + iteration_size

                        async_operations.append(async_operation)
                        async_operation.evaluate()

                    # little warm up
                    util_sleep(5)

                    node_id = self.ignite.get_random_server_nodes(
                        1, node_ids=other_nodes)[0]
                    self.ignite.kill_node(node_id)
                    self.ignite.wait_for_topology_snapshot(
                        server_num=current_server_num - 1)

                    # continue load data during node offline
                    util_sleep(15)
                    self.ignite.start_node(node_id)
                    self.ignite.wait_for_topology_snapshot(
                        server_num=current_server_num)

                PiClientIgniteUtils.wait_for_running_clients_num(
                    self.ignite, current_client_num, 120)

                self.wait_transactions_finish()

                self.ignite.jmx.wait_for_finish_rebalance(
                    self.rebalance_timeout, self.group_names)

                self.idle_verify_check_conflicts_action()

            self.idle_verify_dump_action()
Exemple #16
0
    def run(self, artifact_name):
        """
        Run scenario for defined artifact

        :param artifact_name: name from artifact configuration file
        """
        super().run(artifact_name)

        log_print("Running putAll() benchmark with config: %s" % self.config,
                  color='green')

        version, ignite = self.test_class.start_ignite_grid(artifact_name,
                                                            activate=True)

        self.start_probes(artifact_name)

        warmup_runs, prod_runs = self._get_number_of_runs()

        time_result = 0

        with PiClient(ignite, self.test_class.client_config) as piclient:
            cache_names = piclient.get_ignite().cacheNames()
            data_size = int(self.config.get('data_size'))

            log_print("Running {} iterations".format(warmup_runs + prod_runs))
            for i in range(0, warmup_runs + prod_runs):
                self.write_time_event('iteration_%s start' % i)
                warmup_iteration = False if warmup_runs == 0 else i < warmup_runs

                log_print("Running iteration %s (%s)" %
                          (i, 'warmup' if warmup_iteration else 'prod'))

                log_print("Loading %s values per cache into %s caches" %
                          (data_size *
                           (i + 1) - data_size * i, cache_names.size()))

                async_operations = []
                self.write_time_event('iteration_%s create putall' % i)
                for cache_name in cache_names.toArray():
                    async_operation = create_async_operation(
                        create_put_all_operation,
                        cache_name,
                        data_size * i,
                        data_size * (i + 1),
                        int(self.config.get('put_all_batch_size')),
                        value_type=ModelTypes.VALUE_ACCOUNT.value)
                    async_operations.append(async_operation)
                    async_operation.evaluate()

                for async_op in async_operations:
                    async_op.getResult()

                    # skip first operations as warmup
                    if not warmup_iteration:
                        time_result += async_op.getOperation().getEndTime(
                        ) - async_op.getOperation().getStartTime()

                self.write_time_event('iteration_%s putall done' % i)

            log_print("Loading done")

        ignite.cu.deactivate()

        self.stop_probes(time_results=float(time_result) / prod_runs)

        self.results['evaluated'] = True

        ignite.kill_nodes()
        ignite.delete_lfs()

        log_put("Cleanup Ignite LFS ... ")
        commands = {}
        for node_idx in ignite.nodes.keys():
            host = ignite.nodes[node_idx]['host']
            if commands.get(host) is None:
                commands[host] = [
                    'rm -rf %s/work/*' % ignite.nodes[node_idx]['ignite_home']
                ]
            else:
                commands[host].append('rm -rf %s/work/*' %
                                      ignite.nodes[node_idx]['ignite_home'])
        results = self.test_class.tiden.ssh.exec(commands)
        print(results)
        log_put("Ignite LFS deleted.")
        log_print()
Exemple #17
0
    def run(self, artifact_name):
        """
        Run scenario for defined artifact

        :param artifact_name: name from artifact configuration file
        """
        super().run(artifact_name)

        log_print("Running streamer() benchmark with config: %s" % self.config,
                  color='green')

        version = self.test_class.tiden.config['artifacts'][artifact_name][
            'ignite_version']
        try:
            xml_config_set_name = 'caches_%s.xml' % STREAMER_CONFIG_SET
            self.test_class.create_app_config_set(
                Ignite,
                STREAMER_CONFIG_SET,
                caches_list_file=xml_config_set_name,
                deploy=True,
                logger=False,
                wal_segment_size=self.test_class.consumption_config.get(
                    'wal_segment_size', 64 * 1024 * 1024),
                logger_path='%s/ignite-log4j2.xml' %
                self.test_class.tiden.config['rt']['remote']
                ['test_module_dir'],
                disabled_cache_configs=False,
                zookeeper_enabled=False,
                checkpoint_read_lock_timeout=self.read_lock_property_value(
                    version),
                # caches related variables
                additional_configs=[
                    'caches.tmpl.xml',
                ],
                part_32=self.test_class.consumption_config.get('part_32', 32),
                part_64=self.test_class.consumption_config.get('part_64', 64),
                part_128=self.test_class.consumption_config.get(
                    'part_64', 128),
            )

            version, ignite = self.test_class.start_ignite_grid(
                artifact_name, activate=True, config_set=STREAMER_CONFIG_SET)
            self.start_probes(artifact_name)

            # default times to run
            # plus warmup times
            # plus rerun times
            warmup_runs, prod_runs = self._get_number_of_runs()

            time_results = list()

            client_config = Ignite.config_builder.get_config(
                'client', config_set_name=STREAMER_CONFIG_SET)
            with PiClient(ignite, client_config) as piclient:
                cache_names = piclient.get_ignite().cacheNames()
                data_size = int(self.config.get('data_size'))

                log_print("Running {} iterations".format(warmup_runs +
                                                         prod_runs))
                for i in range(0, warmup_runs + prod_runs):
                    self.write_time_event('iteration_%s start' % i)

                    warmup_iteration = False if warmup_runs == 0 else i < warmup_runs

                    log_print("Running iteration %s (%s)" %
                              (i, 'warmup' if warmup_iteration else 'prod'))

                    log_print("Loading %s values per cache into %s caches" %
                              (data_size *
                               (i + 1) - data_size * i, cache_names.size()))

                    async_operations = []
                    self.write_time_event('iteration_%s create streamer' % i)
                    for cache_name in cache_names.toArray():
                        async_operation = create_async_operation(
                            create_streamer_operation,
                            cache_name,
                            data_size * i,
                            data_size * (i + 1),
                            value_type=ModelTypes.VALUE_ACCOUNT.value)
                        async_operations.append(async_operation)
                        async_operation.evaluate()

                    for async_op in async_operations:
                        async_op.getResult()

                        # skip first operations as warmup otherwise write results
                        if not warmup_iteration:
                            time_results.append(
                                async_op.getOperation().getEndTime() -
                                async_op.getOperation().getStartTime())

                    self.write_time_event('iteration_%s streamer done' % i)

                log_print("Loading done")

            ignite.cu.deactivate()

            self.stop_probes(time_results=time_results)

            self.results['evaluated'] = True

            ignite.kill_nodes()
            ignite.delete_lfs()

            log_put("Cleanup Ignite LFS ... ")
            commands = {}
            for node_idx in ignite.nodes.keys():
                host = ignite.nodes[node_idx]['host']
                if commands.get(host) is None:
                    commands[host] = [
                        'rm -rf %s/work/*' %
                        ignite.nodes[node_idx]['ignite_home']
                    ]
                else:
                    commands[host].append(
                        'rm -rf %s/work/*' %
                        ignite.nodes[node_idx]['ignite_home'])
            results = self.test_class.tiden.ssh.exec(commands)
            print(results)
            log_put("Ignite LFS deleted.")
            log_print()

        finally:
            # remove config set
            self.test_class.remove_app_config_set(Ignite, STREAMER_CONFIG_SET)
    def cluster_state_data(self, logs=True):
        """
        Save previous cluster state
        Restore if some nodes are killed
        Add some data
        Create tables

        :param logs:    log in console
        :return:        setup state
                            True  - cluster started as new
                            False - cluster restored
        """
        tables_count = 10
        tables_rows_count = 100
        put_count = 100
        batch_size = 100

        if logs:
            log_print('[cluster state] staring DATA', color='yellow')

        self.clients_go_away()
        self.was_snapshots()

        # for all alive nodes trying to restore topology
        if [
                node for node in self.cluster.nodes.values()
                if node['status'] != NodeStatus.NEW
        ] and not self.context['clean_cluster_was_here']:
            data_changed = self._data_changed()
            try:
                # kill and delete all additional nodes
                nodes_to_kill = [
                    node_id for node_id in self.cluster.nodes.keys()
                    if node_id > 100
                ]
                for node_id in nodes_to_kill:
                    self.cluster.kill_node(node_id, ignore_exceptions=True)
                    self.cluster.cleanup_work_dir(node_id)
                    del self.cluster.nodes[node_id]
                    log_print(f'delete {node_id}')
                if nodes_to_kill:
                    active_nodes = [
                        node for node_id, node in self.cluster.nodes.items() if
                        node_id < 100 and node['status'] == NodeStatus.STARTED
                    ]
                    self.cluster.wait_for_topology_snapshot(
                        server_num=len(active_nodes),
                        timeout=80,
                        check_only_servers=True)
                nodes_started = 0
                for node_id, node in self.cluster.nodes.items():
                    # start all killed nodes
                    if node['status'] != NodeStatus.STARTED:
                        if data_changed:
                            # clean node data if data in cluster changed without node
                            self.cluster.cleanup_work_dir(node_id)
                        self.cluster.start_node(node_id, force=True)
                        nodes_started += 1
                if nodes_started == len(self.cluster.nodes):
                    # if all nodes killed already
                    self.cluster.cu.activate()
                    self.context['activate'] = True
                baseline_nodes = self.cluster.cu.get_current_baseline()
                # update BLT if changed
                if len(baseline_nodes) != len(self.cluster.nodes) or \
                        [stat for stat in baseline_nodes.values() if stat != 'ONLINE']:
                    self.cluster.cu.set_current_topology_as_baseline(
                        strict=True, **self.control_util_ssh_options)

                if logs:
                    log_print('[cluster state] started DATA (restored)',
                              color='yellow')

                # add tables
                with PiClient(self.cluster,
                              self.client_config,
                              new_instance=True,
                              name='setup',
                              read_timeout=60 * 10) as piclient:
                    assert create_tables_operation(
                        tables_count,
                        tables_rows_count,
                        gateway=piclient.get_gateway()).evaluate(
                        ), 'Restore tables data operation has failed'
                return False
            except:
                # clean cluster if something wrong and start from start
                stacktrace = format_exc()
                log_print(stacktrace, color='red')
                self.context['step_failed'] = stacktrace
                try:
                    self.save_fail('restore')
                except:
                    log_print('failed to save restore data')
                    self.context[
                        'step_failed'] += f'\n\nFailed to save restore data\n{format_exc()}'
                self.clean_cluster()
        self.start_grid()
        with PiClient(self.cluster,
                      self.client_config,
                      new_instance=True,
                      name='setup',
                      read_timeout=60 * 10) as piclient:
            # put data
            start_counter = self._set_key_iter(put_count)
            operations = []
            caches_names = list(piclient.get_ignite().cacheNames().toArray())
            log_print(
                f'[setup data] put {put_count} keys in each of {len(caches_names)} caches'
            )
            for cache_name in caches_names:
                op = create_async_operation(create_put_all_operation,
                                            cache_name=cache_name,
                                            start=start_counter,
                                            end=start_counter + put_count,
                                            batch_size=batch_size)
                operations.append(op)
                op.evaluate()
            for op in operations:
                op.getResult()
            # create tables
            log_print(
                f'[setup data] create {tables_count} tables with {tables_rows_count} rows in each'
            )
            assert create_tables_operation(
                tables_count,
                tables_rows_count,
                gateway=piclient.get_gateway()).evaluate(
                ), 'Create tables with operations has failed'

        if logs:
            log_print(f'[cluster state] started DATA (as new)', color='yellow')
        return True
    def _run_operation(self, property_name, class_path):
        """
        Run basic operations

        1) Start operation
        2) Wait till starting string appear in piclient logs
        3) Wait till kill command was coming
        4) Kill piclient

        :param property_name:   operation name
        :param class_path:      operation class name
        """
        # handle lock
        try:
            with PiClient(self.cluster,
                          self.client_config,
                          new_instance=True,
                          name=property_name,
                          read_timeout=60 * 10) as piclient:
                # free lock
                self.operations[property_name]['started'] = False
                self.operations[property_name]['kill'] = False
                try:
                    # start async
                    log_print(
                        f'[{property_name}] piclient started, begin operation')
                    async_operation = create_async_operation(
                        create_combine_operation,
                        class_path,
                        gateway=piclient.get_gateway())
                    async_operation.evaluate()
                    log_print(f'[{property_name}] operation started')

                    if not self.cluster.wait_for_messages_in_log(
                            piclient.node_ids[0],
                            f'{property_name} operation started',
                            timeout=50,
                            fail_pattern=
                            f'(\\[{property_name}\\] Failed|\(err\) Failed)'):
                        # kill operation if not found
                        # or some exception thrown faster
                        log_print(
                            f'Failed to wait {property_name} operation start in logs',
                            color='red')
                        log_print(f'[operation] {property_name.upper()} kill',
                                  color='yellow')
                        self.operations[property_name]['kill'] = True
                        self.operations[property_name]['killed'] = True
                        return
                    if not self.operations[property_name]['started']:
                        log_print(f'[{property_name}] failed logs not found')
                        self.operations[property_name]['started'] = True
                except:
                    log_print(format_exc(), color='red')
                    self.operations[property_name]['kill'] = True
                    self.operations[property_name]['killed'] = True
                    log_print(f'[operation] {property_name.upper()} kill',
                              color='yellow')
                    return

                end_time = time() + self.max_operation_timeout
                while True:
                    # wait for kill
                    if self.operations[property_name]['kill']:
                        self.operations[property_name]['killed'] = True
                        log_print(f'[operation] {property_name.upper()} kill',
                                  color='yellow')
                        return
                    if time() > end_time:
                        if self.operations.get(property_name):
                            self.operations[property_name]['killed'] = True
                        log_print(
                            f'[operation] {property_name.upper()} timeout kill',
                            color='yellow')
                        return
                    sleep(0.5)
        except:
            log_print(f'[{property_name}] fail happened, killing operation')
            if self.operations.get(property_name):
                self.operations[property_name]['kill'] = True
                self.operations[property_name]['killed'] = True
            raise
Exemple #20
0
    def load_amount_of_data(self, ignite, node, start_key, keys_to_load):
        """
        Tricky method that loads data into a cluster.

        The main idea here is to load data that will be rebalanced. Other data will not be written into a cluster.
        So for example: we collect rebalance time by killing node_2, in this case we load data that will be contains
        primary and backup partitions on node_2. Data that should not appear on node_2 will not be loaded.

        Also there is a case with cache that shouldn't be rebalanced, but used in TransactionalLoading. This cache
        needed to measure rebalance influence to a cache that not in rebalance.

        :param ignite: ignite instance
        :param node: node to rebalance
        :param start_key: start key to load
        :param keys_to_load: number os keys that should be loaded (NOT END_KEY!)
        :return:
        """
        client_config = Ignite.config_builder.get_config('client', config_set_name=REBALANCE_CONFIG_SET)
        log_print("Just load data from %s with %s affinity keys on node %s" %
                  (start_key, keys_to_load, NODE_TO_REBALANCE))

        with PiClient(ignite, client_config, nodes_num=2 if self.with_no_rebalance_cache else 1) as piclient:
            async_operations = []

            # load cache that will not be rebalanced but with loading
            if self.with_loading and self.with_no_rebalance_cache:
                gateway = piclient.get_gateway()

                log_print("Loading cache that will not be included into rebalance", color='red')

                operation_no_rebalance = create_async_operation(
                    create_streamer_operation,
                    CACHE_NAME_NOT_IN_REBALANCE, start_key, keys_to_load,
                    value_type=self.load_type,
                    gateway=gateway,
                )

                # here we define AffinityCountKeyGenerator to a loading operation
                # this specific loading allow us to measure loading to a non rebalanced cache
                operation_no_rebalance.getOperation().setKeyGenerator(
                    AffinityCountKeyGeneratorBuilder(
                        CACHE_NAME_NOT_IN_REBALANCE,
                        ignite.get_node_consistent_id(node),
                        start_key,
                        keys_to_load,
                        False
                    ).build()
                )

                operation_no_rebalance.evaluate()

                async_operations.append(operation_no_rebalance)

            gateway = piclient.get_gateway()

            # load cache that will be rebalanced
            log_print("Loading cache that will be rebalanced", color='red')
            operation = create_async_operation(
                create_streamer_operation,
                CACHE_NAME, start_key, keys_to_load,
                value_type=self.load_type,
                gateway=gateway,
            )

            if self.parts_distribution:
                # this is specific "all data in on partition case"
                operation.getOperation().setKeyGenerator(
                    AffinityPartitionKeyGeneratorBuilder(
                        CACHE_NAME,
                        self.parts_distribution,
                        start_key,
                        keys_to_load,
                    ).build()
                )
            else:
                # default partition distribution
                operation.getOperation().setKeyGenerator(
                    AffinityCountKeyGeneratorBuilder(
                        CACHE_NAME,
                        ignite.get_node_consistent_id(node),
                        start_key,
                        keys_to_load,
                        True
                    ).build()
                )

            operation.evaluate()

            async_operations.append(operation)

            for async_operation in async_operations:
                async_operation.getResult()

            last_key = operation.getResult()

        return last_key
    def test_cycling_restart_grid_dynamic_caches_with_atomic_on_restart(self):
        """
        Scenario The Glue
        (Assertions should be enabled)

        1. Start grid, load some data
        2. In the loop:
            2.1 define node restart timeout (0.5 - 2.0 seconds)
            2.2 Load more data
            2.3 Restart each node with defined timeout (DOES NOT LOOK ON TOPOLOGY SNAPSHOT)
            2.4 Try to activate, check AssertionErrors
            2.5 Try to baseline (If 2 operation failed -> PME, kill all nodes, start new test iteration)
            2.6 Try to load data
            2.7 Try to calculate checksum

        :return:
        """
        import random

        PiClient.read_timeout = 240

        # sleep_for_time = float(random.randrange(1, 15, 1)) / 5

        self.set_current_context('in_memory')

        self.util_copy_piclient_model_to_libs()
        self.ignite.set_activation_timeout(240)
        self.ignite.set_snapshot_timeout(240)
        self.ignite.set_node_option('*', 'jvm_options', ['-ea'])
        self.su.clear_snapshots_list()
        self.start_grid(skip_activation=True)

        with PiClient(self.ignite,
                      self.get_client_config(),
                      jvm_options=['-ea']) as piclient:
            # ignite = piclient.get_ignite()

            self.start_dynamic_caches_with_node_filter()

            last_loaded_key = 1000
            PiClientIgniteUtils.load_data_with_streamer(
                self.ignite,
                self.get_client_config(),
                end_key=last_loaded_key,
                jvm_options=['-ea'])

            nodes_before = self.ignite.get_alive_default_nodes()

            iterations = 50
            last_loaded_key += 1
            for i in range(0, iterations):
                log_print('Current iteration %s from %s' % (i, iterations),
                          color='debug')
                # sleep_for_time = float(self.the_glue_timeout) if self.the_glue_timeout else random.choice([0.7, 0.9, 2.0])
                sleep_for_time = float(
                    self.the_glue_timeout) if self.the_glue_timeout else round(
                        random.uniform(0.5, 2.5), 1)
                log_print(
                    "In this run we are going to sleep for {} seconds after each node restart"
                    .format(sleep_for_time),
                    color='green')

                log_print('Trying to load data into created/existing caches',
                          color='yellow')
                self.start_dynamic_caches_with_node_filter()
                PiClientIgniteUtils.load_data_with_streamer(
                    self.ignite,
                    self.get_client_config(),
                    start_key=last_loaded_key,
                    end_key=last_loaded_key + 500,
                    jvm_options=['-ea'])
                last_loaded_key += 500

                log_print("Round restart")
                for node_id in self.ignite.get_alive_default_nodes():
                    self.ignite.kill_node(node_id)
                    self.ignite.start_node(node_id, skip_topology_check=True)
                    sleep(sleep_for_time)

                    try:
                        log_print(
                            "Incrementing atomics using distributed compute")
                        create_async_operation(
                            create_distributed_atomic_long).evaluate()
                    except Exception as e:
                        log_print("Failed to increment atomics")

                        # just print exception (https://issues.apache.org/jira/browse/IGNITE-11535)
                        traceback.print_exc()

                log_print("Wait for topology messages")
                for node_id in self.ignite.get_all_default_nodes():
                    self.ignite.update_started_node_status(node_id)

                sleep(15)

                log_print("Validating cluster")
                last_loaded_key = self.verify_cluster(nodes_before,
                                                      last_loaded_key)
Exemple #22
0
    def test_ignite_8657(self):
        """
        This test is based on IGNITE-8657:
        1. start grid with EXCHANGE_HISTORY_SIZE smaller than N
        2. activate
        3. start simultaneously M > N clients
        4. all client nodes should and be able to perform cache put/get operations and transactions

        NB: this test hangs with 2.5.1-p6, due to piclient wait Ignition.start() forever
        """
        self.start_grid()
        self.load_random_data_with_streamer(0, 1000, nodes_num=2)
        self.cu.set_current_topology_as_baseline()

        nodes_before = set(self.ignite.get_all_common_nodes())

        with PiClient(self.ignite,
                      self.get_client_config(),
                      nodes_num=10,
                      jvm_options=self.jvm_options,
                      read_timeout=300) as piclient:
            nodes_after = set(self.ignite.get_all_common_nodes())
            nodes_started = list(nodes_after - nodes_before)
            node_ids = deque(nodes_started)
            node_id = node_ids[0]
            node_ids.rotate()

            for i in range(1, 5):
                gateway = piclient.get_gateway(node_id)
                ignite = piclient.get_ignite(node_id)
                tx = ignite.transactions().txStart()
                util_sleep_for_a_while(3)
                tx.commit()
                for concurrency in ['OPTIMISTIC', 'PESSIMISTIC']:
                    for isolation in [
                            'READ_COMMITTED', 'REPEATABLE_READ', 'SERIALIZABLE'
                    ]:
                        print_blue('Run transaction %s %s' %
                                   (concurrency, isolation))

                        node_id = node_ids[0]
                        node_ids.rotate()

                        gateway = piclient.get_gateway(node_id)
                        ignite = piclient.get_ignite(node_id)

                        concurrency_isolation_map = self._get_tx_type_map(
                            gateway)

                        cache_names = ignite.cacheNames().toArray()

                        tx = ignite.transactions().txStart(
                            concurrency_isolation_map.get(concurrency),
                            concurrency_isolation_map.get(isolation))

                        for cache_name in cache_names:
                            cache = ignite.getCache(cache_name)
                            val = cache.get(int(random() * 1000))
                            # log_print('got %s' % repr(val))
                            if val:
                                cache.put(int(random() * 1000), val)

                        tx.commit()
                node_id = node_ids[0]
                node_ids.rotate()

                ignite = piclient.get_ignite(node_id)
                async_ops = []
                for cache_name in ignite.cacheNames().toArray():
                    _async = create_async_operation(create_streamer_operation,
                                                    cache_name, 1002, 2000)
                    _async.evaluate()
                    async_ops.append(_async)

                for async_op in async_ops:
                    async_op.getResult()