Exemple #1
0
    def _failure_injector_loop(self):

        while self.enable_failures:
            f_injector = FailureInjector(self.redpanda)
            f_injector.inject_failure(self._next_failure())

            delay = self.failure_delay_provier()
            self.redpanda.logger.info(
                f"waiting {delay} seconds before next failure")
            time.sleep(delay)
Exemple #2
0
class ProcessKill(DisruptiveAction):
    PROCESS_START_WAIT_SEC = 20
    PROCESS_START_WAIT_BACKOFF = 2

    def __init__(self, redpanda: RedpandaService, config: ActionConfig,
                 admin: Admin):
        super(ProcessKill, self).__init__(redpanda, config, admin)
        self.failure_injector = FailureInjector(self.redpanda)
        self.is_reversible = True

    def max_affected_nodes_reached(self):
        return len(self.affected_nodes) >= self.config.max_affected_nodes

    def do_action(self):
        node = self.target_node()
        if node:
            self.redpanda.logger.info(
                f'executing action on {node.account.hostname}')
            self.failure_injector.inject_failure(
                FailureSpec(FailureSpec.FAILURE_KILL, node))
            self.affected_nodes.add(node)
            self.last_affected_node = node

            # Update started_nodes so storage validations are run
            # on the correct set of nodes later.
            self.redpanda.remove_from_started_nodes(node)
            return node
        else:
            self.redpanda.logger.warn(f'no usable node')
            return None

    def do_reverse_action(self):
        self._start_rp(node=self.last_affected_node)
        self.affected_nodes.remove(self.last_affected_node)
        self.redpanda.add_to_started_nodes(self.last_affected_node)

        last_affected_node, self.last_affected_node = self.last_affected_node, None
        return last_affected_node

    def _start_rp(self, node):
        self.failure_injector._start(node)
        wait_until(
            lambda: self.redpanda.redpanda_pid(node),
            timeout_sec=self.PROCESS_START_WAIT_SEC,
            backoff_sec=self.PROCESS_START_WAIT_BACKOFF,
            err_msg=
            f'Failed to start redpanda process on {node.account.hostname}')
        def failure_injector_loop():
            f_injector = FailureInjector(self.redpanda)
            while enable_failures:
                f_type = random.choice(FailureSpec.FAILURE_TYPES)
                length = 0
                # allow suspending any node
                if f_type == FailureSpec.FAILURE_SUSPEND:
                    length = random.randint(1, 10)
                    node = random.choice(self.redpanda.nodes)
                else:
                    #kill/termianate only active nodes (not to influence the test outcome)
                    idx = random.choice(list(self.active_nodes)) - 1
                    node = self.redpanda.nodes[idx]

                f_injector.inject_failure(
                    FailureSpec(node=node, type=f_type, length=length))

                delay = random.randint(20, 45)
                self.redpanda.logger.info(
                    f"waiting {delay} seconds before next failure")
                time.sleep(delay)
Exemple #4
0
        def failure_injector_loop():
            f_injector = FailureInjector(self.redpanda)
            while enable_failures:
                f_type = random.choice(FailureSpec.FAILURE_TYPES)
                length = 0
                # allow suspending any node
                if f_type == FailureSpec.FAILURE_SUSPEND:
                    length = random.randint(
                        1, NodeOperationFuzzyTest.max_suspend_duration_seconds)
                    node = random.choice(self.redpanda.nodes)
                else:
                    #kill/termianate only active nodes (not to influence the test outcome)
                    idx = random.choice(list(self.active_nodes))
                    node = self.redpanda.get_node(idx)

                f_injector.inject_failure(
                    FailureSpec(node=node, type=f_type, length=length))

                delay = random.randint(
                    NodeOperationFuzzyTest.min_inter_failure_time,
                    NodeOperationFuzzyTest.max_inter_failure_time)
                self.redpanda.logger.info(
                    f"waiting {delay} seconds before next failure")
                time.sleep(delay)
Exemple #5
0
        def failure_injector_loop():
            f_injector = FailureInjector(self.redpanda)
            while failures:
                f_type = random.choice(FailureSpec.FAILURE_TYPES)
                length = 0
                node = random.choice(self.redpanda.nodes)
                while self.redpanda.idx(node) in suppressed:
                    node = random.choice(self.redpanda.nodes)

                # allow suspending any node
                if f_type == FailureSpec.FAILURE_SUSPEND:
                    length = random.randint(
                        1,
                        ConsumerOffsetsMigrationTest.max_suspend_duration_sec)

                f_injector.inject_failure(
                    FailureSpec(node=node, type=f_type, length=length))

                delay = random.randint(
                    ConsumerOffsetsMigrationTest.min_inter_failure_time_sec,
                    ConsumerOffsetsMigrationTest.max_inter_failure_time_sec)
                self.redpanda.logger.info(
                    f"waiting {delay} seconds before next failure")
                time.sleep(delay)
Exemple #6
0
 def inject_failure(self, spec):
     f_injector = FailureInjector(self.redpanda)
     f_injector.inject_failure(spec)