def failover_recovery(self, node_to_upgrade, recovery_type, graceful=True): rest = self.__get_rest_node(node_to_upgrade) otp_node = self.__get_otp_node(rest, node_to_upgrade) self.log.info("Failing over the node %s" % otp_node.id) success = rest.fail_over(otp_node.id, graceful=graceful) if not success: self.log_failure("Failover unsuccessful") return # Monitor failover rebalance rebalance_passed = rest.monitorRebalance() if not rebalance_passed: self.log_failure("Graceful failover rebalance failed") return shell = RemoteMachineShellConnection(node_to_upgrade) appropriate_build = self.__get_build(self.upgrade_version, shell) self.assertTrue(appropriate_build.url, msg="Unable to find build %s" % self.upgrade_version) self.assertTrue(shell.download_build(appropriate_build), "Failed while downloading the build!") self.log.info("Starting node upgrade") upgrade_success = shell.couchbase_upgrade(appropriate_build, save_upgrade_config=False, forcefully=self.is_downgrade) shell.disconnect() if not upgrade_success: self.log_failure("Upgrade failed") return rest.add_back_node("ns_1@" + otp_node.ip) self.sleep(5, "Wait after add_back_node") rest.set_recovery_type(otp_node.id, recoveryType=recovery_type) delta_recovery_buckets = list() if recovery_type == "delta": delta_recovery_buckets.append(self.bucket.name) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], deltaRecoveryBuckets=delta_recovery_buckets) rebalance_passed = rest.monitorRebalance() if not rebalance_passed: self.log_failure("Graceful failover rebalance failed") return
def offline(self, node_to_upgrade, version, rebalance_required=False): rest = RestConnection(node_to_upgrade) shell = RemoteMachineShellConnection(node_to_upgrade) appropriate_build = self.__get_build(version, shell) self.assertTrue(appropriate_build.url, msg="Unable to find build %s" % version) self.assertTrue(shell.download_build(appropriate_build), "Failed while downloading the build!") self.log.info("Starting node upgrade") upgrade_success = shell.couchbase_upgrade( appropriate_build, save_upgrade_config=False, forcefully=self.is_downgrade) shell.disconnect() if not upgrade_success: self.log_failure("Upgrade failed") return self.log.info("Wait for ns_server to accept connections") if not rest.is_ns_server_running(timeout_in_seconds=120): self.log_failure("Server not started post upgrade") return self.log.info("Validate the cluster rebalance status") if not rest.cluster_status()["balanced"]: if rebalance_required: otp_nodes = [node.id for node in rest.node_statuses()] rest.rebalance(otpNodes=otp_nodes, ejectedNodes=[]) rebalance_passed = rest.monitorRebalance() if not rebalance_passed: self.log_failure( "Rebalance failed post node upgrade of {0}" .format(node_to_upgrade)) return else: self.log_failure("Cluster reported (/pools/default) balanced=false") return
def _upgrade(self, upgrade_version, server, queue=None, skip_init=False, info=None, save_upgrade_config=False, fts_query_limit=None, debug_logs=False): try: remote = RemoteMachineShellConnection(server) appropriate_build = self.__get_build(server, upgrade_version, remote, info=info) self.assertTrue( appropriate_build.url, msg="unable to find build {0}".format(upgrade_version)) self.assertTrue(remote.download_build(appropriate_build), "Build wasn't downloaded!") o, e = remote.couchbase_upgrade( appropriate_build, save_upgrade_config=save_upgrade_config, forcefully=self.is_downgrade, fts_query_limit=fts_query_limit, debug_logs=debug_logs) self.log.info("upgrade {0} to version {1} is completed".format( server.ip, upgrade_version)) if 5.0 > float(self.initial_version[:3]) and self.is_centos7: remote.execute_command("systemctl daemon-reload") remote.start_server() self.rest = RestConnection(server) if self.is_linux: self.wait_node_restarted( server, wait_time=testconstants.NS_SERVER_TIMEOUT * 4, wait_if_warmup=True) else: self.wait_node_restarted( server, wait_time=testconstants.NS_SERVER_TIMEOUT * 10, wait_if_warmup=True, check_service=True) if not skip_init: self.rest.init_cluster(self.rest_settings.rest_username, self.rest_settings.rest_password) self.sleep(self.sleep_time) remote.disconnect() self.sleep(10) return o, e except Exception, e: self.log.error(e) if queue is not None: queue.put(False) if not self.is_linux: remote = RemoteMachineShellConnection(server) output, error = remote.execute_command( "cmd /c schtasks /Query /FO LIST /TN removeme /V") remote.log_command_output(output, error) output, error = remote.execute_command( "cmd /c schtasks /Query /FO LIST /TN installme /V") remote.log_command_output(output, error) output, error = remote.execute_command( "cmd /c schtasks /Query /FO LIST /TN upgrademe /V") remote.log_command_output(output, error) remote.disconnect() raise e