Python util_sleep_for_a_while 예제들, tiden.util_sleep_for_a_while Python 예제들

예제 #1

0

파일 보기

파일: actions.py 프로젝트: mshonichev/tiden_gridgain_examples

 def state_nodes_leave(self, options='nodes_leave'):
     """
     Kill nodes
     """
     default_options = self.context['options']['actions'][options]
     nodes_to_leave = int(default_options['nodes_count'])
     killed_nodes = []
     for i in range(nodes_to_leave):
         nodes_to_delete = self.cluster.get_alive_default_nodes()
         if self.context.get('temp_action_options'):
             node_to_kill = int(self.context.get('temp_action_options')[0])
         else:
             node_to_kill = choice(nodes_to_delete)
         self.context['attachments'].append({
             'name': 'Kill info',
             'source':
             f'Kill node: {node_to_kill}\nKill time: {self.current_time_pretty}',
             'type': 'text/plain'
         })
         killed_nodes.append(node_to_kill)
         self.cluster.kill_node(node_to_kill)
         server_nodes = len(self.cluster.get_alive_default_nodes()) + len(
             self.cluster.get_alive_additional_nodes())
         self.cluster.wait_for_topology_snapshot(server_num=server_nodes,
                                                 check_only_servers=True,
                                                 timeout=60)
         util_sleep_for_a_while(5)
     return killed_nodes

예제 #2

0

파일 보기

 def _prepare_before_test(self, tx_loading, custom_event_name='test'):
     util_sleep_for_a_while(self.warmup_time)
     self.last_top = self.ignite.ignite_srvs.last_topology_snapshot()
     self.last_topVer = max([_['ver'] for _ in self.last_top])
     print_blue("Last topology version before %s: %d" %
                (custom_event_name, self.last_topVer))
     tx_loading.metrics_thread.add_custom_event(custom_event_name)

예제 #3

0

파일 보기

파일: actions.py 프로젝트: mshonichev/tiden_gridgain_examples

 def state_nodes_join_existed(self,
                              options='nodes_join_existed',
                              action=None):
     """
     Kill node and start again
     """
     if action is None:
         action = lambda node_id: None
     self.context['attachments'].append({
         'name': 'Join info',
         'source': f'Join time: {self.current_time_pretty}',
         'type': 'text/plain'
     })
     killed_nodes = self.state_nodes_leave(options)
     util_sleep_for_a_while(5)
     hosts = len(self.tiden.config['environment']["server_hosts"])
     nodes_per_host = self.tiden.config['environment']["servers_per_host"]
     for idx, killed_node in enumerate(killed_nodes):
         action(killed_node)
         self.cluster.start_node(killed_node, force=True)
         self.cluster.wait_for_topology_snapshot(
             server_num=hosts * nodes_per_host - len(killed_nodes) + idx +
             1,
             timeout=80,
             check_only_servers=True)

예제 #4

0

파일 보기

    def _wait_for_same_caches_size(self, piclient_master, piclient_replica, how_long=300, predicate=None):
        from datetime import datetime
        start = datetime.now()
        iteration = 0
        delay = 5
        while True:
            cache_mask = lambda x: '' in x
            if predicate:
                cache_mask = predicate
            master_sizes = self.get_caches_size(cache_mask, piclient=piclient_master, debug=True)
            replica_sizes = self.get_caches_size(cache_mask, piclient=piclient_replica, debug=True)

            if master_sizes == replica_sizes:
                break

            self._compare_dicts(master_sizes, replica_sizes, debug=False)
            util_sleep_for_a_while(delay)
            iteration += 1
            log_print('Waiting for {} seconds. Master size={}, replica size={}'.format(iteration * delay, master_sizes,
                                                                                       replica_sizes))
            if (datetime.now() - start).seconds > how_long:
                self._compare_dicts(master_sizes, replica_sizes)
                raise TidenException('Caches size were not sync for {} seconds.'.format(how_long))

        execution_time = (datetime.now() - start).seconds
        log_print('Caches size have had sync for {} seconds.'.format(execution_time))

예제 #5

0

파일 보기

    def util_get_restore_point(cls, seconds_ago=1):
        from datetime import datetime, timedelta
        from time import timezone

        util_sleep_for_a_while(2)
        time_format = "%Y-%m-%d-%H:%M:%S.%f"

        restore_point = (datetime.now() - timedelta(seconds=seconds_ago))
        # Hack to handle UTC timezone
        if int(timezone / -(60 * 60)) == 0:
            restore_point = restore_point + timedelta(hours=3)

        return restore_point.strftime(time_format)[:-3]

예제 #6

0

파일 보기

    def _measurements_after_test(self,
                                 custom_event_name='test',
                                 skip_exch=0,
                                 skip_minor_exch=-1,
                                 max_tries=100,
                                 sleep_between_tries=10,
                                 num_partitions=1024):
        util_sleep_for_a_while(self.stabilization_time)
        n_tries = 0
        max_time = -1
        while n_tries < max_tries:
            n_tries += 1
            max_time = self._get_last_exchange_time()
            if self.exchange_finished:
                break
            util_sleep_for_a_while(sleep_between_tries)

        self.new_top = self.ignite.ignite_srvs.last_topology_snapshot()
        self.new_topVer = max([_['ver'] for _ in self.new_top])
        print_blue("New topology version after %s: %d" %
                   (custom_event_name, self.new_topVer))

        if max_time > 0:
            for topVer in range(self.last_topVer + skip_exch,
                                self.new_topVer + 1):
                for exch_topVer, exch_data in self.agg_exch_x1.items():
                    exch_maj_topVer = int(exch_topVer / 10000)
                    exch_min_topVer = exch_topVer - exch_maj_topVer * 10000
                    if exch_maj_topVer == topVer and exch_min_topVer > skip_minor_exch:
                        x1_time = self.agg_exch_x1[exch_topVer]['max_duration']
                        x2_time = self.agg_exch_x2[exch_topVer]['max_duration']
                        self._dump_exchange_time(
                            x1_time,
                            x2_time,
                            "%s [%d, %d]" % (custom_event_name,
                                             exch_maj_topVer, exch_min_topVer),
                            num_partitions=num_partitions)
                        print_red(
                            "Exchange [%d, %d] during %s: %d msec, %d msec" %
                            (exch_maj_topVer, exch_min_topVer,
                             custom_event_name, x1_time, x2_time))

예제 #7

0

파일 보기

파일: test_cache_destroy.py 프로젝트: mshonichev/tiden_gridgain_examples

    def test_during_rebalance(self):
        ignite = self.start_ignite_grid(self.ignite_name)

        ignite.cu.activate(activate_on_particular_node=1)

        PiClientIgniteUtils.load_data_with_putall(ignite, self.client_config, )

        util_sleep_for_a_while(30)
        with PiClient(ignite, self.client_config) as piclient:
            cache_to_test = 'test_cache_with_index'
            # self.create_cache_with_indexed_data(cache_to_test)
            client_ignite = piclient.get_ignite()
            gateway = piclient.get_gateway()

            cache_config = IgniteCacheConfig(gateway)
            cache_config.set_name('cache_1')
            cache_config.set_cache_mode('replicated')
            cache_config.set_atomicity_mode('transactional')
            cache_config.set_write_synchronization_mode('full_sync')
            cache_config.set_affinity(False, 32)
            cache_config.set_group_name('some_new_group')

            cache_config1 = IgniteCacheConfig(gateway)
            cache_config1.set_name(cache_to_test)
            cache_config1.set_cache_mode('replicated')
            cache_config1.set_atomicity_mode('transactional')
            cache_config1.set_write_synchronization_mode('full_sync')
            cache_config1.set_affinity(False, 32)
            cache_config1.set_group_name('some_new_group')

            # set query entities

            caches = gateway.jvm.java.util.ArrayList()
            caches.add(cache_config.get_config_object())
            caches.add(cache_config1.get_config_object())

            log_print("Creating caches", color='green')
            client_ignite.getOrCreateCaches(caches)

            cache_names = piclient.get_ignite().cacheNames().toArray()
            if cache_to_test not in cache_names:
                log_print("Could not find cache in %s" % cache_names, color='red')

            util_sleep_for_a_while(10)

            ignite.kill_node(2)

            log_print("Overwrite values in cache %s" % cache_to_test, color='green')

            operation = create_put_all_operation(cache_to_test, 1, 1001, 100,
                                                 value_type=ModelTypes.VALUE_ALL_TYPES_INDEXED.value)
            operation.evaluate()

            util_sleep_for_a_while(15)
            ignite.start_node(2, skip_topology_check=True)

            util_sleep_for_a_while(5)
            client_ignite.cache(cache_to_test).destroy()

            ignite.update_starting_node_attrs()
            ignite.nodes[3]['status'] = NodeStatus.STARTED
            client_ignite.cache('cache_1').destroy()

예제 #8

0

파일 보기

    def calculate_checksum_and_validate(self, last_loaded_key, iteration=0):
        diff_keys = {}
        client_config = Ignite.config_builder.get_config(
            'client', config_set_name='cluster_1_node_without_dr')
        with PiClient(self.clusters[0].grid, client_config,
                      new_instance=True) as piclient:
            checksum_master = create_distributed_checksum_operation().evaluate(
            )

            if COLLECT_KEYS:
                cache_names = piclient.get_ignite().cacheNames().toArray()

                diff_keys['master'] = {}
                for cache in cache_names:
                    diff_keys['master'][cache] = []
                    iterator = piclient.get_ignite().cache(cache).iterator()
                    while iterator.hasNext():
                        diff_keys['master'][cache].append(
                            iterator.next().getKey())

                    diff_keys['master'][cache].sort()

        client_config = Ignite.config_builder.get_config(
            'client', config_set_name='cluster_2_node_without_dr')
        with PiClient(self.clusters[1].grid, client_config,
                      new_instance=True) as piclient:
            checksum_slave = create_distributed_checksum_operation().evaluate()

            if COLLECT_KEYS:
                cache_names = piclient.get_ignite().cacheNames().toArray()

                diff_keys['replica'] = {}
                for cache in cache_names:
                    diff_keys['replica'][cache] = []
                    iterator = piclient.get_ignite().cache(cache).iterator()
                    while iterator.hasNext():
                        diff_keys['replica'][cache].append(
                            iterator.next().getKey())

                    diff_keys['replica'][cache].sort()

        if checksum_master != checksum_slave:
            print(f"Checksum master\n{checksum_master}")
            print(f"Checksum slave\n{checksum_slave}")

            # if COLLECT_KEYS:
            #     pprint(diff_keys, width=240)

            comparison = ''.join(difflib.Differ().compare(
                checksum_master.splitlines(True),
                checksum_slave.splitlines(True)))

            if iteration > 3:
                print(comparison)

                # log_print('Unable to get checksum equality on both clusters after FST', color='blue')
                #
                # return

                tiden_assert(
                    False,
                    'Unable to get checksum equality on both clusters after FST'
                )

            if iteration > 2:
                if DO_FST_ON_DIFFERENCE:
                    caches_with_diff = []
                    for line in comparison.split('\n'):
                        val = None
                        if line.startswith('-'):
                            m = search('Cache.*\'(.*)\'.rows', line)
                            if m:
                                val = m.group(1)

                        if val:
                            caches_with_diff.append(val)

                    log_print(f'Difference detected in some caches.',
                              color='red')

                    log_print(f'Starting clear() caches: {caches_with_diff}.',
                              color='red')

                    client_config = Ignite.config_builder.get_config(
                        'client', config_set_name='cluster_2_node_without_dr')
                    with PiClient(self.clusters[1].grid,
                                  client_config,
                                  new_instance=True) as piclient:
                        ignite = piclient.get_ignite()
                        for cache in caches_with_diff:
                            ignite.cache(cache).clear()

                        # cache_names = piclient.get_ignite().cacheNames().toArray()
                        #
                        # print(list(
                        #     create_checksum_operation(cache_name, 1, last_loaded_key).evaluate() for cache_name in
                        #     cache_names))

                    util_sleep_for_a_while(RECHECK_CHECKSUM_TIMEOUT)

                    log_print(
                        f'Starting full state transfer on caches: {caches_with_diff}.',
                        color='red')

                    client_config = Ignite.config_builder.get_config(
                        'client', config_set_name='cluster_1_node_without_dr')
                    with PiClient(self.clusters[0].grid,
                                  client_config,
                                  new_instance=True) as piclient:
                        ignite = piclient.get_ignite()

                        try:
                            futs = []
                            for cache in caches_with_diff:
                                # TODO https://ggsystems.atlassian.net/browse/GG-22669
                                ignite.cache(cache)

                                futs.append(
                                    ignite.plugin(
                                        'GridGain').dr().stateTransfer(
                                            cache, bytes([2])))

                            for fut in futs:
                                fut.get()
                        except Exception as e:
                            log_print(
                                'Exception caught on on FST\n{}'.format(e),
                                color='red')
                            log_print('Going to restart replication with FST',
                                      color='yellow')
                            futs = []
                            for cache in caches_with_diff:
                                ignite.cache(cache)
                                ignite.plugin(
                                    "GridGain").dr().startReplication(cache)
                                futs.append(
                                    ignite.plugin(
                                        'GridGain').dr().stateTransfer(
                                            cache, bytes([2])))

                            for fut in futs:
                                fut.get()

                    util_sleep_for_a_while(FST_TIMEOUT)

            log_print(
                f'Going to collect checksum again after timeout - {RECHECK_CHECKSUM_TIMEOUT} seconds',
                color='red')

            util_sleep_for_a_while(RECHECK_CHECKSUM_TIMEOUT)

            return self.calculate_checksum_and_validate(
                last_loaded_key, iteration + 1)

        return checksum_master, checksum_slave

예제 #9

0

파일 보기

    def verify_cluster(self,
                       cluster_to_verify_id,
                       nodes_before,
                       last_loaded_key=None):
        client_config = Ignite.config_builder.get_config(
            'client', config_set_name='cluster_1_node_without_dr')

        servers = 0
        ignite = self.clusters[cluster_to_verify_id].grid

        for i in range(3):
            for res in ignite.last_topology_snapshot():
                if res['servers'] > servers:
                    servers = res['servers']
                else:
                    break
            util_sleep_for_a_while(5)

        if nodes_before != servers:
            log_print(
                f"There are missing nodes on cluster: Nodes in cluster: {servers} expecting {nodes_before}",
                color='yellow')

            self.verify_no_meaning_errors()

            log_print("Wait for topology messages again.", color='yellow')
            for node_id in ignite.get_all_default_nodes():
                ignite.update_started_node_status(node_id)

            log_print("Missing nodes case confirmed. Trying to restart node.",
                      color='red')
            current_cluster_nodes = ignite.get_nodes_num('server')
            if nodes_before != current_cluster_nodes:
                log_print(f"Current nodes in cluster {current_cluster_nodes}")
                nodes_to_start = []

                for node_id in ignite.get_alive_default_nodes():
                    # assert that node is not dead otherwise kill/restart again
                    if not ignite.check_node_status(node_id):
                        log_print("Restarting node %s" % node_id,
                                  color='yellow')
                        nodes_to_start.append(node_id)

                log_print(f"Going to restart nodes: {nodes_to_start}",
                          color='debug')
                for node_id in nodes_to_start:
                    ignite.start_node(node_id,
                                      skip_nodes_check=True,
                                      check_only_servers=True)

                current_cluster_nodes = ignite.get_nodes_num('server')
                if nodes_before != current_cluster_nodes:
                    log_print(
                        f"Current amount of nodes in cluster: {current_cluster_nodes}, expecting {nodes_before}",
                        color='debug')

                    for node_id in ignite.get_alive_default_nodes():
                        self.util_get_threads_from_jstack(
                            ignite, node_id, "FAILED")

                    assert False, "Failed to restart node"

        ignite.cu.control_utility('--activate')

        activate_failed = False
        log_print('Check that there is no Error in activate logs',
                  color='yellow')
        if 'Error' in ignite.cu.latest_utility_output:
            activate_failed = True
            log_print('Failed!', color='red')
        sleep(5)

        ignite.cu.control_utility('--baseline')
        self.verify_no_meaning_errors()
        log_print('Check that there is no Error in control.sh --baseline logs',
                  color='yellow')

        if 'Error' in ignite.cu.latest_utility_output:
            log_print('Failed! Second try after sleep 60 seconds', color='red')
            sleep(60)

            ignite.cu.control_utility('--baseline')

            if 'Error' in ignite.cu.latest_utility_output or activate_failed:
                log_print('Cluster looks hang.')

        log_print('Check that there is no AssertionError in logs',
                  color='yellow')
        self.verify_no_meaning_errors()

        if last_loaded_key:
            try:
                new_last_key = last_loaded_key - int(
                    random.uniform(0, 1) * LOAD_DATA_SIZE)
                log_print(
                    f'Trying to remove data from survivor caches ({new_last_key}, {last_loaded_key})',
                    color='yellow')
                PiClientIgniteUtils.remove_data(
                    ignite,
                    client_config,
                    start_key=new_last_key,
                    end_key=last_loaded_key,
                    check_clients=False,
                )

                last_loaded_key = new_last_key
            except Exception:
                for node_id in ignite.get_alive_default_nodes():
                    self.util_get_threads_from_jstack(ignite, node_id,
                                                      "FAILED")

                assert False, "Unable to connect client"
            finally:
                self.verify_no_meaning_errors()

        util_sleep_for_a_while(MINI_DR_STABILIZATION_TIMEOUT)

        checksum_master, checksum_slave = self.calculate_checksum_and_validate(
            last_loaded_key)

        tiden_assert(checksum_master == checksum_slave,
                     'Hash sum master and slave should be equal')

        return last_loaded_key

예제 #10

0

파일 보기

    def run_stress_restarts(self, cluster_id_to_restart, iterations,
                            nodes_to_restart, time_to_sleep_range):
        client_config = self.preconfigure_cluster_0()

        with PiClient(self.clusters[0].grid,
                      client_config,
                      jvm_options=['-ea']) as piclient:
            ignite = piclient.get_ignite()

            self.start_dynamic_caches_with_node_filter(client_config)

            last_loaded_key = START_DATA_SIZE
            PiClientIgniteUtils.load_data_with_putall(self.clusters[0].grid,
                                                      client_config,
                                                      end_key=last_loaded_key,
                                                      jvm_options=['-ea'],
                                                      check_clients=False)

            util_sleep_for_a_while(MINI_DR_STABILIZATION_TIMEOUT)

            nodes_before = 6

            last_loaded_key += 1
            for i in range(0, iterations):
                log_print(f'Current iteration {i + 1} from {iterations}',
                          color='debug')

                sleep_for_time = random.uniform(time_to_sleep_range[0],
                                                time_to_sleep_range[1])
                log_print(
                    f'In this run we are going to sleep for {sleep_for_time} seconds after each node restart',
                    color='green')

                log_print('Trying to load data into created/existing caches',
                          color='yellow')

                self.start_dynamic_caches_with_node_filter(client_config)

                PiClientIgniteUtils.load_data_with_putall(
                    self.clusters[0].grid,
                    client_config,
                    start_key=last_loaded_key,
                    end_key=last_loaded_key + LOAD_DATA_SIZE,
                    jvm_options=['-ea'],
                    check_clients=False)
                last_loaded_key += LOAD_DATA_SIZE

                self.increment_atomic(ignite)

                log_print("Round restart")
                for node_id in nodes_to_restart:
                    self.clusters[cluster_id_to_restart].grid.kill_node(
                        node_id)
                    self.clusters[cluster_id_to_restart].grid.start_node(
                        node_id, skip_topology_check=True)
                    sleep(sleep_for_time)

                log_print("Wait for topology messages")
                for node_id in nodes_to_restart:
                    self.clusters[
                        cluster_id_to_restart].grid.update_started_node_status(
                            node_id)

                util_sleep_for_a_while(MINI_DR_STABILIZATION_TIMEOUT)

                last_loaded_key = self.verify_cluster(0, nodes_before,
                                                      last_loaded_key)

        util_sleep_for_a_while(DR_STABILIZATION_TIMEOUT)

        checksum_master1, checksum_slave1 = self.calculate_checksum_and_validate(
            last_loaded_key)
        tiden_assert(checksum_master1 == checksum_slave1,
                     'Hash sum master and slave not match')

        self.put_data(self.clusters[1], 1, 'cluster_2_node_without_dr')

        util_sleep_for_a_while(MINI_DR_STABILIZATION_TIMEOUT)

        checksum_master2, checksum_slave2 = self.calculate_checksum_and_validate(
            last_loaded_key)
        tiden_assert(checksum_master2 == checksum_slave2,
                     'Hash sum master and slave not match')

예제 #11

0

파일 보기

    def calculate_rebalance_speed(self, ignite, prod_runs, warmup_runs, last_end_key, keys_to_load,
                                  tx_loading=None, version=None):
        """
        :param ignite: Ignite app instance
        :param prod_runs: production runs
        :param warmup_runs: warmup runs
        :param last_end_key: last inserted key (for historical rebalance)
        :param tx_loading: TransactionalLoading instance (for rebalance under loading)
        :param version: Ignite version (for non historical rebalance - full with/ and w/o loading)
        :param keys_to_load: number of key to load 5kk ~ 4.5 Gb traffic
        :return:
        """
        assert ignite or self.historical_rebalance, \
            'Error in scenario code! Ignite or historical_rebalance should be defined'

        netstat = Netstat('dft', {}, self.test_class.ssh)  # run netstat to collect network activity

        rebalance_speed = list()  # list of results for various runs

        jmx = None

        # Here we use artifact name to store LFS.
        # This needed to measure same versions with different JVM options/configs
        stored_lfs_name = 'rebalance_%s' % self.artifact_name

        try:
            warmup_time = 0

            # Be careful here - JMX utility must be properly killed after test
            jmx = JmxUtility(ignite)
            jmx.start_utility()

            for i in range(0, warmup_runs + prod_runs):
                host = ignite.nodes[NODE_TO_REBALANCE]['host']
                group_path = 'work/db/{}/cacheGroup-{}'.format(
                    ignite.get_node_consistent_id(NODE_TO_REBALANCE),
                    CACHE_GROUP
                )

                warmup_iteration = False if warmup_runs == 0 else i < warmup_runs
                log_print("Running iteration %s (%s)" % (i, 'warmup' if warmup_iteration else 'prod'))

                dir_size_before = get_nodes_directory_size(
                    ignite,
                    self.test_class.ssh,
                    group_path,
                    nodes=[NODE_TO_REBALANCE]
                )

                self.rebalance_event_write(i, tx_loading, 'iteration_{}: restart node'.format(i))

                ignite.kill_node(NODE_TO_REBALANCE)

                if self.historical_rebalance:
                    # For historical rebalance we store LFS for all cluster and then use this LFS to continue test
                    # There is some issues appears while restoring Ignite from stored LFS
                    # For 2.5.8 version for example there may be some failures

                    list_nodes_to_stop = list(node_id for node_id in ignite.nodes.keys()
                                              if node_id < 100000 and node_id != NODE_TO_REBALANCE)
                    if not ignite.exists_stored_lfs(stored_lfs_name):
                        self.load_amount_of_data(ignite, GAGE_NODE, last_end_key, keys_to_load)
                        # Avoid topology changes by client exiting
                        # https://ggsystems.atlassian.net/browse/GG-21629
                        ignite.wait_for_topology_snapshot(client_num=0)
                        util_sleep_for_a_while(30)
                        ignite.cu.deactivate()
                        util_sleep_for_a_while(30)
                        ignite.kill_nodes(*list_nodes_to_stop)
                        ignite.save_lfs(stored_lfs_name)
                        ignite.start_nodes()
                        ignite.cu.activate()
                    else:
                        util_sleep_for_a_while(30)
                        ignite.cu.deactivate()
                        util_sleep_for_a_while(30)
                        ignite.kill_nodes(*list_nodes_to_stop)
                        ignite.delete_lfs()
                        ignite.restore_lfs(stored_lfs_name)
                        ignite.start_nodes()
                else:
                    self._cleanup_lfs(ignite, NODE_TO_REBALANCE)

                    # sleep after cleanup LFS
                    sleep(10)

                start_rcvd_bytes = netstat.network()[host]['total']['RX']['bytes']
                start_sent_bytes = netstat.network()[host]['total']['TX']['bytes']

                ignite.start_node(NODE_TO_REBALANCE)

                log_print("Waiting for finish rebalance")

                # To collect JFRs we need to define time to collect JFRs.
                # Here we use warmup iteration rebalance time to set time
                if warmup_time and not warmup_iteration and self.jfr_settings:
                    ignite.make_cluster_jfr(int(warmup_time) + LOADING_STABLE_TIME, self.jfr_settings)

                rebalance_time = jmx.wait_for_finish_rebalance(
                    MAX_REBALANCE_TIMEOUT,
                    [CACHE_GROUP],
                    calculate_exact_time=True,
                    time_for_node=NODE_TO_REBALANCE,
                    log=False
                )

                self.rebalance_event_write(i, tx_loading, 'rebalance finished')

                finish_rcvd_bytes = netstat.network()[host]['total']['RX']['bytes']
                finish_sent_bytes = netstat.network()[host]['total']['TX']['bytes']

                self.write_time_event('iteration_%s rebalance finished' % i)

                # wait for checkpoint
                sleep(CHECKPOINT_SLEEP)

                dir_size_after = get_nodes_directory_size(
                    ignite,
                    self.test_class.ssh,
                    group_path,
                    nodes=[NODE_TO_REBALANCE]
                )

                if version:
                    log_print('Values for version: {}'.format(version))
                rcvd_bytes = finish_rcvd_bytes - start_rcvd_bytes
                avg_speed = float(rcvd_bytes) / rebalance_time
                log_print("Current rebalance time in {} seconds (Directory size: before/after - {}/{}, "
                          "Network activity: send/received - {}/{})\nRebalance speed (received network/time): {}/sec"
                          .format(rebalance_time,
                                  convert_size(dir_size_before),
                                  convert_size(dir_size_after),
                                  convert_size(finish_sent_bytes - start_sent_bytes, start_byte=True),
                                  convert_size(rcvd_bytes, start_byte=True),
                                  convert_size(avg_speed, start_byte=True)
                                  ),
                          color='yellow'
                          )

                if not warmup_iteration:
                    if tx_loading:
                        sleep(LOADING_STABLE_TIME)

                        self.write_time_event('~JFR collection ended')

                        sleep(LOADING_STABLE_TIME)

                    rebalance_speed.append(avg_speed)
                else:
                    warmup_time = rebalance_time
        finally:
            if jmx:
                jmx.kill_utility()

        return rebalance_speed

예제 #12

0

파일 보기

파일: assert_results.py 프로젝트: mshonichev/tiden_gridgain_examples

    def assert_results(self, allowed_asserts: list, cycle_start: datetime,
                       start_time: datetime, base_name: str):
        """
        Get all exceptions from all files on remote host

        :param allowed_asserts:     filters list [(method, args), (method, args), ... ]
        :param cycle_start:         latest full cluster clean
        :param start_time:          case start time
        :param base_name:           local directory name
        """

        log_print('assert')
        util_sleep_for_a_while(10)
        to_write_all = []

        # find all log files on remote host
        logs_to_check = []
        for host, results in self.cluster.ssh.exec(
            [f"ls {self.tiden.config['rt']['remote']['test_dir']}"]).items():
            for file_name in results[0].split('\n'):
                if file_name.endswith('.log'):
                    logs_to_check.append({
                        'log_path':
                        f"{self.tiden.config['rt']['remote']['test_dir']}/{file_name}",
                        'name': file_name,
                        'host': host
                    })

        # go through all patterns with errors
        found_fails = self.cluster.find_fails(
            files_to_check=logs_to_check,
            store_files=self.tiden.config['rt']['test_module_dir'],
            ignore_node_ids=True)
        for file_name, fail_info in found_fails.items():
            exceptions_on_this_cycle = False
            for exceptions_info in fail_info['exceptions']:
                found_time: datetime = exceptions_info.get('time')
                if found_time and found_time > start_time:
                    if not exceptions_on_this_cycle:
                        exceptions_on_this_cycle = True
                    valid_exception = False
                    for assert_item in allowed_asserts:
                        if assert_item['method'](*assert_item['args'],
                                                 exceptions_info['exception']):
                            valid_exception = True
                            break
                    if not valid_exception:
                        to_write_all.append({
                            'time':
                            found_time,
                            'file':
                            file_name,
                            'text':
                            '\n'.join(exceptions_info['exception'])
                        })
                elif exceptions_on_this_cycle:
                    to_write_all.append({
                        'file':
                        file_name,
                        'text':
                        '\n'.join(exceptions_info['exception'])
                    })

        if to_write_all:
            # aggregate fails in one file
            self.context['step_failed'] = 'Found exceptions in logs!'
            self.context['report']['status'] = 'failed'
            log_print("Found exceptions!", color='red')

            fails_text = ''
            with_time = [i for i in to_write_all if i.get('time')]
            with_time.sort(key=lambda i: i['time'])
            for exception_with_time in with_time:
                fails_text += f'\n\n{exception_with_time["file"]} {exception_with_time["time"].isoformat()}\n' \
                              f'{exception_with_time["text"]}\n'
            without_time = [i for i in to_write_all if not i.get('time')]
            without_time.sort(key=lambda i: i['file'])
            for exception_without_time in without_time:
                fails_text += f'\n\n{exception_without_time["file"]}\n' \
                              f'{exception_without_time["text"]}\n'

            if self.tiden.config['combinations'].get('save_attachments'):
                run_dir = self.ensure_dir(
                    join(self.tiden.config['rt']['test_module_dir'],
                         cycle_start.isoformat()))
                combine_run_dir = self.ensure_dir(join(run_dir, base_name))
                with open(join(combine_run_dir, 'fails.log'), 'w') as f:
                    f.write(fails_text)

            files_unique_name = f'{uuid4()}_fails.log'
            files_receiver_url = self.tiden.config['environment'].get(
                'report_files_url')
            if files_receiver_url:
                post(f'{files_receiver_url}/files/add',
                     headers={'filename': files_unique_name},
                     files={'file': fails_text.encode('utf-8')})
            self.context['attachments'].append({
                'name': 'fails.log',
                'source': files_unique_name,
                'type': 'file'
            })
            self.download_run_logs()
        log_print('end up with assert')