def remove_node(self, otpnode=None, wait_for_rebalance=True, rest=None): """ Method to remove nodes from a cluster. :param otpnode: list of nodes to be removed. :param wait_for_rebalance: boolean, wait for rebalance to finish after removing the nodes. :param rest: RestConnection object """ if not rest: rest = self.rest nodes = rest.node_statuses() '''This is the case when master node is running cbas service as well''' if len(nodes) <= len(otpnode): return helper = RestHelper(rest) try: removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) except Exception: self.sleep(5, "Rebalance failed on Removal. Retry.. THIS IS A BUG") removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) if wait_for_rebalance: self.assertTrue( removed, "Rebalance operation failed while removing %s" % otpnode)
def terminate(self): if self._xdcr: self._terminate_replications(self._s_master, "cluster1") if self._rdirection == "bidirection": self._terminate_replications(self._d_master, "cluster0") for key in self._clusters_keys_olst: nodes = self._clusters_dic[key] for node in nodes: rest = RestConnection(node) buckets = rest.get_buckets() for bucket in buckets: status = rest.delete_bucket(bucket.name) if status: self._log.info('Deleted bucket : {0} from {1}'.format( bucket.name, node.ip)) rest = RestConnection(nodes[0]) helper = RestHelper(rest) servers = rest.node_statuses() master_id = rest.get_nodes_self().id if len(nodes) > 1: removed = helper.remove_nodes( knownNodes=[node.id for node in servers], ejectedNodes=[ node.id for node in servers if node.id != master_id ], wait_for_rebalance=True)
def cleanup_cluster(servers, wait_for_rebalance=True, master = None): log = logger.Logger.get_logger() if master == None: master = servers[0] rest = RestConnection(master) helper = RestHelper(rest) helper.is_ns_server_running(timeout_in_seconds=testconstants.NS_SERVER_TIMEOUT) nodes = rest.node_statuses() master_id = rest.get_nodes_self().id for node in nodes: if int(node.port) in xrange(9091, 9991): rest.eject_node(node) nodes.remove(node) if len(nodes) > 1: log.info("rebalancing all nodes in order to remove nodes") rest.log_client_error("Starting rebalance from test, ejected nodes %s" % \ [node.id for node in nodes if node.id != master_id]) removed = helper.remove_nodes(knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in nodes if node.id != master_id], wait_for_rebalance=wait_for_rebalance) success_cleaned = [] for removed in [node for node in nodes if (node.id != master_id)]: removed.rest_password = servers[0].rest_password removed.rest_username = servers[0].rest_username try: rest = RestConnection(removed) except Exception as ex: log.error("can't create rest connection after rebalance out for ejected nodes,\ will retry after 10 seconds according to MB-8430: {0} ".format(ex)) time.sleep(10) rest = RestConnection(removed) start = time.time() while time.time() - start < 30: if len(rest.get_pools_info()["pools"]) == 0: success_cleaned.append(removed) break else: time.sleep(0.1) if time.time() - start > 10: log.error("'pools' on node {0}:{1} - {2}".format( removed.ip, removed.port, rest.get_pools_info()["pools"])) for node in set([node for node in nodes if (node.id != master_id)]) - set(success_cleaned): log.error("node {0}:{1} was not cleaned after removing from cluster".format( removed.ip, removed.port)) try: rest = RestConnection(node) rest.force_eject_node() except Exception as ex: log.error("force_eject_node {0}:{1} failed: {2}".format(removed.ip, removed.port, ex)) if len(set([node for node in nodes if (node.id != master_id)])\ - set(success_cleaned)) != 0: raise Exception("not all ejected nodes were cleaned successfully") log.info("removed all the nodes from cluster associated with {0} ? {1}".format(servers[0], \ [(node.id, node.port) for node in nodes if (node.id != master_id)]))
def cleanup_cluster(servers, wait_for_rebalance=True, master = None): log = logger.Logger.get_logger() if master is None: master = servers[0] rest = RestConnection(master) helper = RestHelper(rest) helper.is_ns_server_running(timeout_in_seconds=testconstants.NS_SERVER_TIMEOUT) nodes = rest.node_statuses() master_id = rest.get_nodes_self().id for node in nodes: if int(node.port) in xrange(9091, 9991): rest.eject_node(node) nodes.remove(node) if len(nodes) > 1: log.info("rebalancing all nodes in order to remove nodes") rest.log_client_error("Starting rebalance from test, ejected nodes %s" % \ [node.id for node in nodes if node.id != master_id]) removed = helper.remove_nodes(knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in nodes if node.id != master_id], wait_for_rebalance=wait_for_rebalance) success_cleaned = [] for removed in [node for node in nodes if (node.id != master_id)]: removed.rest_password = servers[0].rest_password removed.rest_username = servers[0].rest_username try: rest = RestConnection(removed) except Exception as ex: log.error("can't create rest connection after rebalance out for ejected nodes,\ will retry after 10 seconds according to MB-8430: {0} ".format(ex)) time.sleep(10) rest = RestConnection(removed) start = time.time() while time.time() - start < 30: if len(rest.get_pools_info()["pools"]) == 0: success_cleaned.append(removed) break else: time.sleep(0.1) if time.time() - start > 10: log.error("'pools' on node {0}:{1} - {2}".format( removed.ip, removed.port, rest.get_pools_info()["pools"])) for node in set([node for node in nodes if (node.id != master_id)]) - set(success_cleaned): log.error("node {0}:{1} was not cleaned after removing from cluster".format( removed.ip, removed.port)) try: rest = RestConnection(node) rest.force_eject_node() except Exception as ex: log.error("force_eject_node {0}:{1} failed: {2}".format(removed.ip, removed.port, ex)) if len(set([node for node in nodes if (node.id != master_id)])\ - set(success_cleaned)) != 0: raise Exception("not all ejected nodes were cleaned successfully") log.info("removed all the nodes from cluster associated with {0} ? {1}".format(servers[0], \ [(node.id, node.port) for node in nodes if (node.id != master_id)]))
def remove_node(self,otpnode=None, wait_for_rebalance=True): nodes = self.rest.node_statuses() '''This is the case when master node is running cbas service as well''' if len(nodes) <= len(otpnode): return helper = RestHelper(self.rest) try: removed = helper.remove_nodes(knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) except Exception as e: self.log.info("First time rebalance failed on Removal. Wait and try again. THIS IS A BUG.") time.sleep(5) removed = helper.remove_nodes(knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) if wait_for_rebalance: removed
def cleanup_cluster(servers, wait_for_rebalance=True): log = logger.Logger.get_logger() rest = RestConnection(servers[0]) helper = RestHelper(rest) helper.is_ns_server_running(timeout_in_seconds=testconstants.NS_SERVER_TIMEOUT) nodes = rest.node_statuses() master_id = rest.get_nodes_self().id if len(nodes) > 1: log.info("rebalancing all nodes in order to remove nodes") removed = helper.remove_nodes(knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in nodes if node.id != master_id], wait_for_rebalance=wait_for_rebalance) log.info("removed all the nodes from cluster associated with {0} ? {1}".format(servers[0], removed))
def remove_node(self, otpnode=None, wait_for_rebalance=True): nodes = self.rest.node_statuses() '''This is the case when master node is running cbas service as well''' if len(nodes) <= len(otpnode): return helper = RestHelper(self.rest) try: removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) except Exception: self.sleep(5, "Rebalance failed on Removal. Retry.. THIS IS A BUG") removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) if wait_for_rebalance: self.assertTrue(removed, "Rebalance operation failed while removing %s" % otpnode)
def cleanup_cluster(servers, wait_for_rebalance=True): log = logger.Logger.get_logger() rest = RestConnection(servers[0]) helper = RestHelper(rest) helper.is_ns_server_running( timeout_in_seconds=testconstants.NS_SERVER_TIMEOUT) nodes = rest.node_statuses() master_id = rest.get_nodes_self().id if len(nodes) > 1: log.info("rebalancing all nodes in order to remove nodes") removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[ node.id for node in nodes if node.id != master_id ], wait_for_rebalance=wait_for_rebalance) log.info( "removed all the nodes from cluster associated with {0} ? {1}". format(servers[0], removed))
def terminate(self): if self._xdcr: self._terminate_replications(self._s_master, "cluster1") if self._rdirection == "bidirection": self._terminate_replications(self._d_master, "cluster0") for key in self._clusters_keys_olst: nodes = self._clusters_dic[key] for node in nodes: rest = RestConnection(node) buckets = rest.get_buckets() for bucket in buckets: status = rest.delete_bucket(bucket.name) if status: self._log.info('Deleted bucket : {0} from {1}'.format(bucket.name, node.ip)) rest = RestConnection(nodes[0]) helper = RestHelper(rest) servers = rest.node_statuses() master_id = rest.get_nodes_self().id if len(nodes) > 1: removed = helper.remove_nodes(knownNodes=[node.id for node in servers], ejectedNodes=[node.id for node in servers if node.id != master_id], wait_for_rebalance=True )
def cleanup_cluster(servers, wait_for_rebalance=True): log = logger.Logger.get_logger() rest = RestConnection(servers[0]) helper = RestHelper(rest) helper.is_ns_server_running(timeout_in_seconds=testconstants.NS_SERVER_TIMEOUT) nodes = rest.node_statuses() master_id = rest.get_nodes_self().id if len(nodes) > 1: log.info("rebalancing all nodes in order to remove nodes") rest.log_client_error("Starting rebalance from test, ejected nodes %s" % \ [node.id for node in nodes if node.id != master_id]) removed = helper.remove_nodes(knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in nodes if node.id != master_id], wait_for_rebalance=wait_for_rebalance) success_cleaned = [] for removed in [node for node in nodes if (node.id != master_id)]: removed.rest_password = servers[0].rest_password removed.rest_username = servers[0].rest_username rest = RestConnection(removed) start = time.time() while time.time() - start < 30: if len(rest.get_pools_info()["pools"]) == 0: success_cleaned.append(removed) break else: time.sleep(0.1) if time.time() - start > 10: log.error("'pools' on node {0}:{1} - {2}".format( removed.ip, removed.port, rest.get_pools_info()["pools"])) for node in set([node for node in nodes if (node.id != master_id)]) - set(success_cleaned): log.error("node {0}:{1} was not cleaned after removing from cluster".format( removed.ip, removed.port)) if len(set([node for node in nodes if (node.id != master_id)])\ - set(success_cleaned)) != 0: raise Exception("not all ejected nodes were cleaned successfully") log.info("removed all the nodes from cluster associated with {0} ? {1}".format(servers[0], \ [(node.id, node.port) for node in nodes if (node.id != master_id)]))
def test_backup_upgrade_restore_default(self): if len(self.servers) < 2: self.log.error("At least 2 servers required for this test ..") return original_set = copy.copy(self.servers) worker = self.servers[len(self.servers) - 1] self.servers = self.servers[:len(self.servers) - 1] shell = RemoteMachineShellConnection(self.master) o, r = shell.execute_command("cat /opt/couchbase/VERSION.txt") fin = o[0] shell.disconnect() initial_version = self.input.param("initial_version", fin) final_version = self.input.param("final_version", fin) if initial_version == final_version: self.log.error("Same initial and final versions ..") return if not final_version.startswith('2.0'): self.log.error("Upgrade test not set to run from 1.8.1 -> 2.0 ..") return builds, changes = BuildQuery().get_all_builds() product = 'couchbase-server-enterprise' #CASE where the worker isn't a 2.0+ worker_flag = 0 shell = RemoteMachineShellConnection(worker) o, r = shell.execute_command("cat /opt/couchbase/VERSION.txt") temp = o[0] if not temp.startswith('2.0'): worker_flag = 1 if worker_flag == 1: self.log.info( "Loading version {0} on worker.. ".format(final_version)) remote = RemoteMachineShellConnection(worker) info = remote.extract_remote_info() older_build = BuildQuery().find_build(builds, product, info.deliverable_type, info.architecture_type, final_version) remote.stop_couchbase() remote.couchbase_uninstall() remote.download_build(older_build) remote.install_server(older_build) remote.disconnect() remote_tmp = "{1}/{0}".format("backup", "/root") perm_comm = "mkdir -p {0}".format(remote_tmp) if not initial_version == fin: for server in self.servers: remote = RemoteMachineShellConnection(server) info = remote.extract_remote_info() self.log.info( "Loading version .. {0}".format(initial_version)) older_build = BuildQuery().find_build(builds, product, info.deliverable_type, info.architecture_type, initial_version) remote.stop_couchbase() remote.couchbase_uninstall() remote.download_build(older_build) remote.install_server(older_build) rest = RestConnection(server) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) rest.init_cluster(server.rest_username, server.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() self.common_setUp() bucket = "default" if len(self.servers) > 1: self.add_nodes_and_rebalance() rest = RestConnection(self.master) info = rest.get_nodes_self() size = int(info.memoryQuota * 2.0 / 3.0) rest.create_bucket(bucket, ramQuotaMB=size) ready = BucketOperationHelper.wait_for_memcached(self.master, bucket) self.assertTrue(ready, "wait_for_memcached_failed") distribution = {10: 0.2, 20: 0.5, 30: 0.25, 40: 0.05} inserted_keys, rejected_keys = MemcachedClientHelper.load_bucket_and_return_the_keys( servers=[self.master], name=bucket, ram_load_ratio=0.5, value_size_distribution=distribution, moxi=True, write_only=True, delete_ratio=0.1, number_of_threads=2) if len(self.servers) > 1: rest = RestConnection(self.master) self.assertTrue(RebalanceHelper.wait_for_replication( rest.get_nodes(), timeout=180), msg="replication did not complete") ready = RebalanceHelper.wait_for_stats_on_all(self.master, bucket, 'ep_queue_size', 0) self.assertTrue(ready, "wait_for ep_queue_size == 0 failed") ready = RebalanceHelper.wait_for_stats_on_all(self.master, bucket, 'ep_flusher_todo', 0) self.assertTrue(ready, "wait_for ep_queue_size == 0 failed") node = RestConnection(self.master).get_nodes_self() shell = RemoteMachineShellConnection(worker) o, r = shell.execute_command(perm_comm) shell.log_command_output(o, r) shell.disconnect() #Backup #BackupHelper(self.master, self).backup(bucket, node, remote_tmp) shell = RemoteMachineShellConnection(worker) shell.execute_command( "/opt/couchbase/bin/cbbackup http://{0}:{1} {2}".format( self.master.ip, self.master.port, remote_tmp)) shell.disconnect() BucketOperationHelper.delete_bucket_or_assert(self.master, bucket, self) time.sleep(30) #Upgrade for server in self.servers: self.log.info( "Upgrading to current version {0}".format(final_version)) remote = RemoteMachineShellConnection(server) info = remote.extract_remote_info() new_build = BuildQuery().find_build(builds, product, info.deliverable_type, info.architecture_type, final_version) remote.stop_couchbase() remote.couchbase_uninstall() remote.download_build(new_build) remote.install_server(new_build) rest = RestConnection(server) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) rest.init_cluster(server.rest_username, server.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() time.sleep(30) #Restore rest = RestConnection(self.master) info = rest.get_nodes_self() size = int(info.memoryQuota * 2.0 / 3.0) rest.create_bucket(bucket, ramQuotaMB=size) ready = BucketOperationHelper.wait_for_memcached(server, bucket) self.assertTrue(ready, "wait_for_memcached_failed") #BackupHelper(self.master, self).restore(backup_location=remote_tmp, moxi_port=info.moxi) shell = RemoteMachineShellConnection(worker) shell.execute_command( "/opt/couchbase/bin/cbrestore {2} http://{0}:{1} -b {3}".format( self.master.ip, self.master.port, remote_tmp, bucket)) shell.disconnect() time.sleep(60) keys_exist = BucketOperationHelper.keys_exist_or_assert_in_parallel( inserted_keys, self.master, bucket, self, concurrency=4) self.assertTrue(keys_exist, msg="unable to verify keys after restore") time.sleep(30) BucketOperationHelper.delete_bucket_or_assert(self.master, bucket, self) rest = RestConnection(self.master) helper = RestHelper(rest) nodes = rest.node_statuses() master_id = rest.get_nodes_self().id if len(self.servers) > 1: removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[ node.id for node in nodes if node.id != master_id ], wait_for_rebalance=True) shell = RemoteMachineShellConnection(worker) shell.remove_directory(remote_tmp) shell.disconnect() self.servers = copy.copy(original_set) if initial_version == fin: builds, changes = BuildQuery().get_all_builds() for server in self.servers: remote = RemoteMachineShellConnection(server) info = remote.extract_remote_info() self.log.info( "Loading version .. {0}".format(initial_version)) older_build = BuildQuery().find_build(builds, product, info.deliverable_type, info.architecture_type, initial_version) remote.stop_couchbase() remote.couchbase_uninstall() remote.download_build(older_build) remote.install_server(older_build) rest = RestConnection(server) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) rest.init_cluster(server.rest_username, server.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect()
def _install_and_upgrade(self, initial_version='1.6.5.3', create_buckets=False, insert_data=False, start_upgraded_first=True, load_ratio=-1, roll_upgrade=False, upgrade_path=[], do_new_rest=False): node_upgrade_path = [] node_upgrade_path.extend(upgrade_path) #then start them in whatever order you want inserted_keys = [] log = logger.Logger.get_logger() if roll_upgrade: log.info("performing an online upgrade") input = TestInputSingleton.input rest_settings = input.membase_settings servers = input.servers save_upgrade_config = False is_amazon = False if input.test_params.get('amazon', False): is_amazon = True if initial_version.startswith("1.6") or initial_version.startswith( "1.7"): product = 'membase-server-enterprise' else: product = 'couchbase-server-enterprise' # install older build on all nodes for server in servers: remote = RemoteMachineShellConnection(server) rest = RestConnection(server) info = remote.extract_remote_info() # check to see if we are installing from latestbuilds or releases # note: for newer releases (1.8.0) even release versions can have the # form 1.8.0r-55 if re.search('r', initial_version): builds, changes = BuildQuery().get_all_builds() older_build = BuildQuery().find_membase_build( builds, deliverable_type=info.deliverable_type, os_architecture=info.architecture_type, build_version=initial_version, product=product, is_amazon=is_amazon) else: older_build = BuildQuery().find_membase_release_build( deliverable_type=info.deliverable_type, os_architecture=info.architecture_type, build_version=initial_version, product=product, is_amazon=is_amazon) remote.membase_uninstall() remote.couchbase_uninstall() remote.stop_membase() remote.stop_couchbase() remote.download_build(older_build) #now let's install ? remote.membase_install(older_build) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) rest.init_cluster_port(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() bucket_data = {} master = servers[0] if create_buckets: #let's create buckets #wait for the bucket #bucket port should also be configurable , pass it as the #parameter to this test ? later self._create_default_bucket(master) inserted_keys = self._load_data(master, load_ratio) _create_load_multiple_bucket(self, master, bucket_data, howmany=2) # cluster all the nodes together ClusterOperationHelper.add_all_nodes_or_assert(master, servers, rest_settings, self) rest = RestConnection(master) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue( rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) log.info('started rebalance operation on master node {0}'.format( master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue( rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format( otpNodeIds)) if initial_version == "1.7.0" or initial_version == "1.7.1": self._save_config(rest_settings, master) input_version = input.test_params['version'] node_upgrade_path.append(input_version) current_version = initial_version previous_version = current_version #if we dont want to do roll_upgrade ? log.info("Upgrade path: {0} -> {1}".format(initial_version, node_upgrade_path)) log.info("List of servers {0}".format(servers)) if not roll_upgrade: for version in node_upgrade_path: previous_version = current_version current_version = version if version != initial_version: log.info("Upgrading to version {0}".format(version)) self._stop_membase_servers(servers) if previous_version.startswith( "1.7") and current_version.startswith("1.8"): save_upgrade_config = True # No need to save the upgrade config from 180 to 181 if previous_version.startswith( "1.8.0") and current_version.startswith("1.8.1"): save_upgrade_config = False appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue( appropriate_build.url, msg="unable to find build {0}".format(version)) for server in servers: remote = RemoteMachineShellConnection(server) remote.download_build(appropriate_build) remote.membase_upgrade( appropriate_build, save_upgrade_config=save_upgrade_config) RestHelper( RestConnection(server)).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) #verify admin_creds still set pools_info = RestConnection(server).get_pools_info() self.assertTrue(pools_info['implementationVersion'], appropriate_build.product_version) if start_upgraded_first: log.info("Starting server {0} post upgrade".format( server)) remote.start_membase() else: remote.stop_membase() remote.disconnect() if not start_upgraded_first: log.info("Starting all servers together") self._start_membase_servers(servers) time.sleep(TIMEOUT_SECS) if version == "1.7.0" or version == "1.7.1": self._save_config(rest_settings, master) if create_buckets: self.assertTrue( BucketOperationHelper.wait_for_bucket_creation( 'default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) # rolling upgrade else: version = input.test_params['version'] appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue(appropriate_build.url, msg="unable to find build {0}".format(version)) # rebalance node out # remove membase from node # install destination version onto node # rebalance it back into the cluster for server_index in range(len(servers)): server = servers[server_index] master = servers[server_index - 1] log.info("current master is {0}, rolling node is {1}".format( master, server)) rest = RestConnection(master) nodes = rest.node_statuses() allNodes = [] toBeEjectedNodes = [] for node in nodes: allNodes.append(node.id) if "{0}:{1}".format(node.ip, node.port) == "{0}:{1}".format( server.ip, server.port): toBeEjectedNodes.append(node.id) helper = RestHelper(rest) removed = helper.remove_nodes(knownNodes=allNodes, ejectedNodes=toBeEjectedNodes) self.assertTrue( removed, msg="Unable to remove nodes {0}".format(toBeEjectedNodes)) remote = RemoteMachineShellConnection(server) remote.download_build(appropriate_build) # if initial version is 180 # Don't uninstall the server if not initial_version.startswith('1.8.0'): remote.membase_uninstall() remote.couchbase_uninstall() remote.membase_install(appropriate_build) else: remote.membase_upgrade(appropriate_build) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) log.info( "sleep for 10 seconds to wait for membase-server to start..." ) time.sleep(TIMEOUT_SECS) rest.init_cluster_port(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() #readd this to the cluster ClusterOperationHelper.add_all_nodes_or_assert( master, [server], rest_settings, self) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) # Issue rest call to the newly added node # MB-5108 if do_new_rest: master = server rest = RestConnection(master) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue( rebalanceStarted, "unable to start rebalance on master node {0}".format( master.ip)) log.info( 'started rebalance operation on master node {0}'.format( master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue( rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful". format(otpNodeIds)) #TODO: how can i verify that the cluster init config is preserved # verify data on upgraded nodes if create_buckets: self.assertTrue(BucketOperationHelper.wait_for_bucket_creation( 'default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) rest = RestConnection(master) buckets = rest.get_buckets() for bucket in buckets: BucketOperationHelper.keys_exist_or_assert( bucket_data[bucket.name]["inserted_keys"], master, bucket.name, self)
def _install_and_upgrade(self, initial_version='1.6.5.3', create_buckets=False, insert_data=False, start_upgraded_first=True, load_ratio=-1, roll_upgrade=False, upgrade_path=[]): node_upgrade_path = [] node_upgrade_path.extend(upgrade_path) #then start them in whatever order you want inserted_keys = [] log = logger.Logger.get_logger() if roll_upgrade: log.info("performing a rolling upgrade") input = TestInputSingleton.input rest_settings = input.membase_settings servers = input.servers save_upgrade_config = False is_amazon = False if input.test_params.get('amazon',False): is_amazon = True # install older build on all nodes for server in servers: remote = RemoteMachineShellConnection(server) rest = RestConnection(server) info = remote.extract_remote_info() older_build = BuildQuery().find_membase_release_build(deliverable_type=info.deliverable_type, os_architecture=info.architecture_type, build_version=initial_version, product='membase-server-enterprise', is_amazon=is_amazon) remote.membase_uninstall() remote.couchbase_uninstall() remote.execute_command('/etc/init.d/membase-server stop') remote.download_build(older_build) #now let's install ? remote.membase_install(older_build) RestHelper(rest).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT) rest.init_cluster_port(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota(memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() bucket_data = {} master = servers[0] if create_buckets: #let's create buckets #wait for the bucket #bucket port should also be configurable , pass it as the #parameter to this test ? later self._create_default_bucket(master) inserted_keys = self._load_data(master, load_ratio) _create_load_multiple_bucket(self, master, bucket_data, howmany=2) # cluster all the nodes together ClusterOperationHelper.add_all_nodes_or_assert(master, servers, rest_settings, self) rest = RestConnection(master) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue(rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) log.info('started rebalance operation on master node {0}'.format(master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue(rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format(otpNodeIds)) if initial_version == "1.7.0" or initial_version == "1.7.1": self._save_config(rest_settings, master) input_version = input.test_params['version'] node_upgrade_path.append(input_version) #if we dont want to do roll_upgrade ? log.info("Upgrade path: {0} -> {1}".format(initial_version, node_upgrade_path)) log.info("List of servers {0}".format(servers)) if not roll_upgrade: for version in node_upgrade_path: if version is not initial_version: log.info("Upgrading to version {0}".format(version)) self._stop_membase_servers(servers) if re.search('1.8', version): save_upgrade_config = True appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue(appropriate_build.url, msg="unable to find build {0}".format(version)) for server in servers: remote = RemoteMachineShellConnection(server) remote.download_build(appropriate_build) remote.membase_upgrade(appropriate_build, save_upgrade_config=save_upgrade_config) RestHelper(RestConnection(server)).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT) #verify admin_creds still set pools_info = RestConnection(server).get_pools_info() self.assertTrue(pools_info['implementationVersion'], appropriate_build.product_version) if start_upgraded_first: log.info("Starting server {0} post upgrade".format(server)) remote.start_membase() else: remote.stop_membase() remote.disconnect() if not start_upgraded_first: log.info("Starting all servers together") self._start_membase_servers(servers) time.sleep(TIMEOUT_SECS) if version == "1.7.0" or version == "1.7.1": self._save_config(rest_settings, master) if create_buckets: self.assertTrue(BucketOperationHelper.wait_for_bucket_creation('default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) # rolling upgrade else: version = input.test_params['version'] appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue(appropriate_build.url, msg="unable to find build {0}".format(version)) # rebalance node out # remove membase from node # install destination version onto node # rebalance it back into the cluster for server_index in range(len(servers)): server = servers[server_index] master = servers[server_index - 1] log.info("current master is {0}, rolling node is {1}".format(master, server)) rest = RestConnection(master) nodes = rest.node_statuses() allNodes = [] toBeEjectedNodes = [] for node in nodes: allNodes.append(node.id) if "{0}:{1}".format(node.ip, node.port) == "{0}:{1}".format(server.ip, server.port): toBeEjectedNodes.append(node.id) helper = RestHelper(rest) removed = helper.remove_nodes(knownNodes=allNodes, ejectedNodes=toBeEjectedNodes) self.assertTrue(removed, msg="Unable to remove nodes {0}".format(toBeEjectedNodes)) remote = RemoteMachineShellConnection(server) remote.membase_uninstall() remote.couchbase_uninstall() remote.download_build(appropriate_build) remote.membase_install(appropriate_build) RestHelper(rest).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT) log.info("sleep for 10 seconds to wait for membase-server to start...") time.sleep(TIMEOUT_SECS) rest.init_cluster_port(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota(memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() #readd this to the cluster ClusterOperationHelper.add_all_nodes_or_assert(master, [server], rest_settings, self) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue(rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) log.info('started rebalance operation on master node {0}'.format(master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue(rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format(otpNodeIds)) #ClusterOperationHelper.verify_persistence(servers, self) #TODO: how can i verify that the cluster init config is preserved # verify data on upgraded nodes if create_buckets: self.assertTrue(BucketOperationHelper.wait_for_bucket_creation('default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) rest = RestConnection(master) buckets = rest.get_buckets() for bucket in buckets: BucketOperationHelper.keys_exist_or_assert(bucket_data[bucket.name]["inserted_keys"], master, bucket.name, self)
def test_backup_upgrade_restore_default(self): if len(self.servers) < 2: self.log.error("At least 2 servers required for this test ..") return original_set = copy.copy(self.servers) worker = self.servers[len(self.servers) - 1] self.servers = self.servers[:len(self.servers)-1] shell = RemoteMachineShellConnection(self.master) o, r = shell.execute_command("cat /opt/couchbase/VERSION.txt") fin = o[0] shell.disconnect() initial_version = self.input.param("initial_version", fin) final_version = self.input.param("final_version", fin) if initial_version==final_version: self.log.error("Same initial and final versions ..") return if not final_version.startswith('2.0'): self.log.error("Upgrade test not set to run from 1.8.1 -> 2.0 ..") return builds, changes = BuildQuery().get_all_builds(version=final_version) product = 'couchbase-server-enterprise' #CASE where the worker isn't a 2.0+ worker_flag = 0 shell = RemoteMachineShellConnection(worker) o, r = shell.execute_command("cat /opt/couchbase/VERSION.txt") temp = o[0] if not temp.startswith('2.0'): worker_flag = 1 if worker_flag == 1: self.log.info("Loading version {0} on worker.. ".format(final_version)) remote = RemoteMachineShellConnection(worker) info = remote.extract_remote_info() older_build = BuildQuery().find_build(builds, product, info.deliverable_type, info.architecture_type, final_version) remote.stop_couchbase() remote.couchbase_uninstall() remote.download_build(older_build) remote.install_server(older_build) remote.disconnect() remote_tmp = "{1}/{0}".format("backup", "/root") perm_comm = "mkdir -p {0}".format(remote_tmp) if not initial_version == fin: for server in self.servers: remote = RemoteMachineShellConnection(server) info = remote.extract_remote_info() self.log.info("Loading version .. {0}".format(initial_version)) older_build = BuildQuery().find_build(builds, product, info.deliverable_type, info.architecture_type, initial_version) remote.stop_couchbase() remote.couchbase_uninstall() remote.download_build(older_build) remote.install_server(older_build) rest = RestConnection(server) RestHelper(rest).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT) rest.init_cluster(server.rest_username, server.rest_password) rest.init_cluster_memoryQuota(memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() self.common_setUp() bucket = "default" if len(self.servers) > 1: self.add_nodes_and_rebalance() rest = RestConnection(self.master) info = rest.get_nodes_self() size = int(info.memoryQuota * 2.0 / 3.0) rest.create_bucket(bucket, ramQuotaMB=size) ready = BucketOperationHelper.wait_for_memcached(self.master, bucket) self.assertTrue(ready, "wait_for_memcached_failed") distribution = {10: 0.2, 20: 0.5, 30: 0.25, 40: 0.05} inserted_keys, rejected_keys = MemcachedClientHelper.load_bucket_and_return_the_keys(servers=[self.master], name=bucket, ram_load_ratio=0.5, value_size_distribution=distribution, moxi=True, write_only=True, delete_ratio=0.1, number_of_threads=2) if len(self.servers) > 1: rest = RestConnection(self.master) self.assertTrue(RebalanceHelper.wait_for_replication(rest.get_nodes(), timeout=180), msg="replication did not complete") ready = RebalanceHelper.wait_for_stats_on_all(self.master, bucket, 'ep_queue_size', 0) self.assertTrue(ready, "wait_for ep_queue_size == 0 failed") ready = RebalanceHelper.wait_for_stats_on_all(self.master, bucket, 'ep_flusher_todo', 0) self.assertTrue(ready, "wait_for ep_queue_size == 0 failed") node = RestConnection(self.master).get_nodes_self() shell = RemoteMachineShellConnection(worker) o, r = shell.execute_command(perm_comm) shell.log_command_output(o, r) shell.disconnect() #Backup #BackupHelper(self.master, self).backup(bucket, node, remote_tmp) shell = RemoteMachineShellConnection(worker) shell.execute_command("/opt/couchbase/bin/cbbackup http://{0}:{1} {2}".format( self.master.ip, self.master.port, remote_tmp)) shell.disconnect() BucketOperationHelper.delete_bucket_or_assert(self.master, bucket, self) time.sleep(30) #Upgrade for server in self.servers: self.log.info("Upgrading to current version {0}".format(final_version)) remote = RemoteMachineShellConnection(server) info = remote.extract_remote_info() new_build = BuildQuery().find_build(builds, product, info.deliverable_type, info.architecture_type, final_version) remote.stop_couchbase() remote.couchbase_uninstall() remote.download_build(new_build) remote.install_server(new_build) rest = RestConnection(server) RestHelper(rest).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT) rest.init_cluster(server.rest_username, server.rest_password) rest.init_cluster_memoryQuota(memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() time.sleep(30) #Restore rest = RestConnection(self.master) info = rest.get_nodes_self() size = int(info.memoryQuota * 2.0 / 3.0) rest.create_bucket(bucket, ramQuotaMB=size) ready = BucketOperationHelper.wait_for_memcached(server, bucket) self.assertTrue(ready, "wait_for_memcached_failed") #BackupHelper(self.master, self).restore(backup_location=remote_tmp, moxi_port=info.moxi) shell = RemoteMachineShellConnection(worker) shell.execute_command("/opt/couchbase/bin/cbrestore {2} http://{0}:{1} -b {3}".format( self.master.ip, self.master.port, remote_tmp, bucket)) shell.disconnect() time.sleep(60) keys_exist = BucketOperationHelper.keys_exist_or_assert_in_parallel(inserted_keys, self.master, bucket, self, concurrency=4) self.assertTrue(keys_exist, msg="unable to verify keys after restore") time.sleep(30) BucketOperationHelper.delete_bucket_or_assert(self.master, bucket, self) rest = RestConnection(self.master) helper = RestHelper(rest) nodes = rest.node_statuses() master_id = rest.get_nodes_self().id if len(self.servers) > 1: removed = helper.remove_nodes(knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in nodes if node.id != master_id], wait_for_rebalance=True ) shell = RemoteMachineShellConnection(worker) shell.remove_directory(remote_tmp) shell.disconnect() self.servers = copy.copy(original_set) if initial_version == fin: builds, changes = BuildQuery().get_all_builds(version=initial_version) for server in self.servers: remote = RemoteMachineShellConnection(server) info = remote.extract_remote_info() self.log.info("Loading version .. {0}".format(initial_version)) older_build = BuildQuery().find_build(builds, product, info.deliverable_type, info.architecture_type, initial_version) remote.stop_couchbase() remote.couchbase_uninstall() remote.download_build(older_build) remote.install_server(older_build) rest = RestConnection(server) RestHelper(rest).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT) rest.init_cluster(server.rest_username, server.rest_password) rest.init_cluster_memoryQuota(memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect()
def _install_and_upgrade(self, initial_version='1.6.5.3', create_buckets=False, insert_data=False, start_upgraded_first=True, load_ratio=-1, roll_upgrade=False, upgrade_path=[]): node_upgrade_path = [] node_upgrade_path.extend(upgrade_path) #then start them in whatever order you want inserted_keys = [] log = logger.Logger.get_logger() if roll_upgrade: log.info("performing a rolling upgrade") input = TestInputSingleton.input input_version = input.test_params['version'] rest_settings = input.membase_settings servers = input.servers is_amazon = False if input.test_params.get('amazon', False): is_amazon = True # install older build on all nodes for server in servers: remote = RemoteMachineShellConnection(server) rest = RestConnection(server) info = remote.extract_remote_info() older_build = BuildQuery().find_membase_release_build( deliverable_type=info.deliverable_type, os_architecture=info.architecture_type, build_version=initial_version, product='membase-server-enterprise', is_amazon=is_amazon) remote.membase_uninstall() remote.couchbase_uninstall() if older_build.product_version.startswith("1.8"): abbr_product = "cb" else: abbr_product = "mb" remote.download_binary_in_win(older_build.url, abbr_product, initial_version) #now let's install ? remote.install_server_win(older_build, initial_version) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) rest.init_cluster(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() bucket_data = {} master = servers[0] # cluster all the nodes together ClusterOperationHelper.add_all_nodes_or_assert(master, servers, rest_settings, self) rest = RestConnection(master) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue( rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) log.info('started rebalance operation on master node {0}'.format( master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue( rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format( otpNodeIds)) if create_buckets: #let's create buckets #wait for the bucket #bucket port should also be configurable , pass it as the #parameter to this test ? later self._create_default_bucket(master) inserted_keys = self._load_data(master, load_ratio) _create_load_multiple_bucket(self, master, bucket_data, howmany=2) #if initial_version == "1.7.0" or initial_version == "1.7.1": # self._save_config(rest_settings, master) node_upgrade_path.append(input_version) #if we dont want to do roll_upgrade ? log.info("Upgrade path: {0} -> {1}".format(initial_version, node_upgrade_path)) log.info("List of servers {0}".format(servers)) if not roll_upgrade: for version in node_upgrade_path: if version is not initial_version: log.info( "SHUTDOWN ALL CB OR MB SERVERS IN CLUSTER BEFORE DOING UPGRADE" ) for server in servers: shell = RemoteMachineShellConnection(server) shell.stop_membase() shell.disconnect() log.info("Upgrading to version {0}".format(version)) appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue( appropriate_build.url, msg="unable to find build {0}".format(version)) for server in servers: remote = RemoteMachineShellConnection(server) if version.startswith("1.8"): abbr_product = "cb" remote.download_binary_in_win(appropriate_build.url, abbr_product, version) log.info("###### START UPGRADE. #########") remote.membase_upgrade_win(info.architecture_type, info.windows_name, version, initial_version) RestHelper( RestConnection(server)).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) #verify admin_creds still set pools_info = RestConnection(server).get_pools_info() self.assertTrue(pools_info['implementationVersion'], appropriate_build.product_version) if not start_upgraded_first: remote.stop_membase() remote.disconnect() if not start_upgraded_first: log.info("Starting all servers together") self._start_membase_servers(servers) time.sleep(TIMEOUT_SECS) if create_buckets: self.assertTrue( BucketOperationHelper.wait_for_bucket_creation( 'default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) # rolling upgrade else: version = input.test_params['version'] if version.startswith("1.8"): abbr_product = "cb" appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue(appropriate_build.url, msg="unable to find build {0}".format(version)) # rebalance node out # remove membase from node # install destination version onto node # rebalance it back into the cluster for server_index in range(len(servers)): server = servers[server_index] master = servers[server_index - 1] log.info("current master is {0}, rolling node is {1}".format( master, server)) rest = RestConnection(master) nodes = rest.node_statuses() allNodes = [] toBeEjectedNodes = [] for node in nodes: allNodes.append(node.id) if "{0}:{1}".format(node.ip, node.port) == "{0}:{1}".format( server.ip, server.port): toBeEjectedNodes.append(node.id) helper = RestHelper(rest) removed = helper.remove_nodes(knownNodes=allNodes, ejectedNodes=toBeEjectedNodes) self.assertTrue( removed, msg="Unable to remove nodes {0}".format(toBeEjectedNodes)) remote = RemoteMachineShellConnection(server) remote.membase_uninstall() remote.couchbase_uninstall() if appropriate_build.product == 'membase-server-enterprise': abbr_product = "mb" else: abbr_product = "cb" remote.download_binary_in_win(appropriate_build.url, abbr_product, version) remote.install_server_win(appropriate_build, version) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) time.sleep(TIMEOUT_SECS) rest.init_cluster(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() #readd this to the cluster ClusterOperationHelper.add_all_nodes_or_assert( master, [server], rest_settings, self) nodes = rest.node_statuses() log.info( "wait 30 seconds before asking older node for start rebalance" ) time.sleep(30) otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue( rebalanceStarted, "unable to start rebalance on master node {0}".format( master.ip)) log.info( 'started rebalance operation on master node {0}'.format( master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue( rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful". format(otpNodeIds)) #ClusterOperationHelper.verify_persistence(servers, self) #TODO: how can i verify that the cluster init config is preserved # verify data on upgraded nodes if create_buckets: self.assertTrue(BucketOperationHelper.wait_for_bucket_creation( 'default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) rest = RestConnection(master) buckets = rest.get_buckets() for bucket in buckets: BucketOperationHelper.keys_exist_or_assert( bucket_data[bucket.name]["inserted_keys"], master, bucket.name, self)