def restart_machine(self): """ Restart the nodes to fail in the tests :return: Nothing """ node_down_timer_tasks = [] for node in self.server_to_fail: node_failure_timer_task = NodeDownTimerTask(node.ip) node_down_timer_tasks.append(node_failure_timer_task) task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "restart_machine", self.timeout, self.pause_between_failover_action, self.failover_expected, self.timeout_buffer, failure_timers=node_down_timer_tasks) for node_down_timer_task in node_down_timer_tasks: self.node_failure_task_manager.schedule(node_down_timer_task, 2) self.task_manager.schedule(task) try: task.result() except Exception, e: self.fail("Exception: {}".format(e))
def start_couchbase_server(self): """ Start the couchbase server on the nodes to fail in the tests :return: Nothing """ task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "start_couchbase", self.timeout, 0, False, self.timeout_buffer, False) self.task_manager.schedule(task) try: task.result() except Exception, e: self.fail("Exception: {}".format(e))
def disable_firewall(self): """ Disable firewall on the nodes to fail in the tests :return: Nothing """ self.time_start = time.time() task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "disable_firewall", self.timeout, self.pause_between_failover_action, False, self.timeout_buffer, False) self.task_manager.schedule(task) try: task.result() except Exception as e: self.fail("Exception: {}".format(e))
def disable_firewall(self): """ Disable firewall on the nodes to fail in the tests :return: Nothing """ self.time_start = time.time() task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "disable_firewall", self.timeout, self.pause_between_failover_action, False, self.timeout_buffer, False) self.task_manager.schedule(task) try: task.result() except Exception, e: self.fail("Exception: {}".format(e))
def bring_back_failed_nodes_up(self): if self.failover_action == "disk_failure": task = AutoFailoverNodesFailureTask( self.orchestrator, self.server_to_fail, "recover_disk_failure", self.timeout, self.pause_between_failover_action, expect_auto_failover=False, timeout_buffer=self.timeout_buffer, check_for_failover=False, disk_timeout=self.disk_timeout, disk_location=self.disk_location, disk_size=self.disk_location_size) self.task_manager.schedule(task) try: task.result() except Exception, e: self.fail("Exception: {}".format(e))
def split_network(self): """ Split the network in the cluster. Stop network traffic from few nodes while allowing the traffic from rest of the cluster. :return: Nothing """ self.time_start = time.time() if self.server_to_fail.__len__() < 2: self.fail("Need atleast 2 servers to fail") task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "network_split", self.timeout, self.pause_between_failover_action, False, self.timeout_buffer) self.task_manager.schedule(task) try: task.result() except Exception, e: self.fail("Exception: {}".format(e))
def fail_disk_via_disk_full(self): node_down_timer_tasks = [] for node in self.server_to_fail: node_failure_timer_task = NodeDownTimerTask(node.ip) node_down_timer_tasks.append(node_failure_timer_task) task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "disk_full", self.timeout, self.pause_between_failover_action, self.failover_expected, self.timeout_buffer, failure_timers=node_down_timer_tasks, disk_timeout=self.disk_timeout, disk_location=self.disk_location, disk_size=self.disk_location_size) for node_down_timer_task in node_down_timer_tasks: self.node_failure_task_manager.schedule(node_down_timer_task, 2) self.task_manager.schedule(task) try: task.result() except Exception as e: self.fail("Exception: {}".format(e))
def restart_machine(self): """ Restart the nodes to fail in the tests :return: Nothing """ node_down_timer_tasks = [] for node in self.server_to_fail: node_failure_timer_task = NodeDownTimerTask(node.ip) node_down_timer_tasks.append(node_failure_timer_task) task = AutoFailoverNodesFailureTask( self.orchestrator, self.server_to_fail, "restart_machine", self.timeout, self.pause_between_failover_action, self.failover_expected, self.timeout_buffer, failure_timers=node_down_timer_tasks) for node_down_timer_task in node_down_timer_tasks: self.node_failure_task_manager.schedule(node_down_timer_task, 2) self.task_manager.schedule(task) try: task.result() except Exception as e: self.fail("Exception: {}".format(e)) finally: self.sleep(120, "Sleeping for 2 min for the machines to restart") for node in self.server_to_fail: for i in range(0, 2): try: shell = RemoteMachineShellConnection(node) break except: self.log.info("Unable to connect to the host. " "Machine has not restarted") self.sleep(60, "Sleep for another minute and try " "again")
def stop_indexer(self): """ Stop the indexer on the nodes to fail in the tests :return: Nothing """ task = self.async_stop_indexer() self.task_manager.schedule(task) try: task.result() except Exception as e: self.fail("Exception: {}".format(e)) finally: task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "start_indexer", self.timeout, 0, False, 0, check_for_failover=False) self.task_manager.schedule(task) task.result()
def enable_firewall(self): """ Enable firewall on the nodes to fail in the tests. :return: Nothing """ node_down_timer_tasks = [] for node in self.server_to_fail: node_failure_timer_task = NodeDownTimerTask(node.ip) node_down_timer_tasks.append(node_failure_timer_task) task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "enable_firewall", self.timeout, self.pause_between_failover_action, self.failover_expected, self.timeout_buffer, failure_timers=node_down_timer_tasks) for node_down_timer_task in node_down_timer_tasks: self.node_failure_task_manager.schedule(node_down_timer_task, 2) self.task_manager.schedule(task) try: task.result() except Exception, e: self.fail("Exception: {}".format(e))
def stop_memcached(self): """ Stop the memcached on the nodes to fail in the tests :return: Nothing """ node_down_timer_tasks = [] for node in self.server_to_fail: node_failure_timer_task = NodeDownTimerTask(node.ip, 11211) node_down_timer_tasks.append(node_failure_timer_task) self.timeout_buffer += 3 task = AutoFailoverNodesFailureTask( self.orchestrator, self.server_to_fail, "stop_memcached", self.timeout, self.pause_between_failover_action, self.failover_expected, self.timeout_buffer, failure_timers=node_down_timer_tasks) for node_down_timer_task in node_down_timer_tasks: self.node_failure_task_manager.schedule(node_down_timer_task, 2) self.task_manager.schedule(task) try: task.result() except Exception as e: self.fail("Exception: {}".format(e)) finally: task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "start_memcached", self.timeout, 0, False, 0, check_for_failover=False) self.task_manager.schedule(task) task.result()
def stop_couchbase_server(self): """ Stop couchbase server on the nodes to fail in the tests :return: Nothing """ node_down_timer_tasks = [] for node in self.server_to_fail: node_failure_timer_task = NodeDownTimerTask(node.ip, node.port) node_down_timer_tasks.append(node_failure_timer_task) task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "stop_couchbase", self.timeout, self.pause_between_failover_action, self.failover_expected, self.timeout_buffer, failure_timers=node_down_timer_tasks) for node_down_timer_task in node_down_timer_tasks: self.node_failure_task_manager.schedule(node_down_timer_task, 2) self.task_manager.schedule(task) try: task.result() except Exception as e: self.fail("Exception: {}".format(e))
failure_timers=node_down_timer_tasks) for node_down_timer_task in node_down_timer_tasks: self.node_failure_task_manager.schedule(node_down_timer_task, 2) self.task_manager.schedule(task) try: task.result() except Exception, e: self.fail("Exception: {}".format(e)) finally: task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "start_memcached", self.timeout, 0, False, 0, check_for_failover=False) self.task_manager.schedule(task) task.result() def split_network(self): """ Split the network in the cluster. Stop network traffic from few nodes while allowing the traffic from rest of the cluster. :return: Nothing """ self.time_start = time.time() if self.server_to_fail.__len__() < 2: self.fail("Need atleast 2 servers to fail") task = AutoFailoverNodesFailureTask(self.orchestrator, self.server_to_fail, "network_split", self.timeout, self.pause_between_failover_action, False,