def setUp(self): self.log = logger.Logger.get_logger() self.input = TestInputSingleton.input self.assertTrue(self.input, msg="input parameters missing...") self.servers = self.input.servers self.master = self.servers[0] rest = RestConnection(self.master) rest.init_cluster(username=self.master.rest_username, password=self.master.rest_password) info = rest.get_nodes_self() node_ram_ratio = BucketOperationHelper.base_bucket_ratio(self.servers) rest.init_cluster_memoryQuota(memoryQuota=int(info.mcdMemoryReserved * node_ram_ratio)) BucketOperationHelper.delete_all_buckets_or_assert(servers=self.servers, test_case=self) ClusterOperationHelper.cleanup_cluster(servers=self.servers) credentials = self.input.membase_settings ClusterOperationHelper.add_all_nodes_or_assert(master=self.master, all_servers=self.servers, rest_settings=credentials, test_case=self) rest = RestConnection(self.master) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue(rebalanceStarted, "unable to start rebalance on master node {0}".format(self.master.ip)) self.log.info('started rebalance operation on master node {0}'.format(self.master.ip)) rebalanceSucceeded = rest.monitorRebalance()
def setUp(self): self._cleanup_nodes = [] self._failed_nodes = [] super(FailoverBaseTest, self).setUp() self.bidirectional = self.input.param("bidirectional", False) self._value_size = self.input.param("value_size", 256) self.dgm_run = self.input.param("dgm_run", True) credentials = self.input.membase_settings self.add_back_flag = False self.during_ops = self.input.param("during_ops", None) self.log.info( "============== FailoverBaseTest setup was started for test #{0} {1}==============".format( self.case_number, self._testMethodName ) ) try: rest = RestConnection(self.master) ClusterOperationHelper.add_all_nodes_or_assert(self.master, self.servers, credentials, self) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg) except Exception, e: self.cluster.shutdown() self.fail(e)
def common_setup(self, replica): self._input = TestInputSingleton.input self._servers = self._input.servers first = self._servers[0] self.log = logger.Logger().get_logger() self.log.info(self._input) rest = RestConnection(first) for server in self._servers: RestHelper(RestConnection(server)).is_ns_server_running() ClusterOperationHelper.cleanup_cluster(self._servers) BucketOperationHelper.delete_all_buckets_or_assert(self._servers, self) ClusterOperationHelper.add_all_nodes_or_assert(self._servers[0], self._servers, self._input.membase_settings, self) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) info = rest.get_nodes_self() bucket_ram = info.mcdMemoryReserved * 3 / 4 rest.create_bucket(bucket="default", ramQuotaMB=int(bucket_ram), replicaNumber=replica, proxyPort=rest.get_nodes_self().moxi) msg = "wait_for_memcached fails" ready = BucketOperationHelper.wait_for_memcached(first, "default"), self.assertTrue(ready, msg) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue(rebalanceStarted, "unable to start rebalance on master node {0}".format(first.ip)) self.log.info('started rebalance operation on master node {0}'.format(first.ip)) rebalanceSucceeded = rest.monitorRebalance() # without a bucket this seems to fail self.assertTrue(rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format(otpNodeIds)) self.awareness = VBucketAwareMemcached(rest, "default")
def add_node_and_rebalance(self, master, servers): ClusterOperationHelper.add_all_nodes_or_assert( master, servers, self.input.membase_settings, self) rest = RestConnection(master) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue( rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) self.log.info('started rebalance operation on master node {0}'.format( master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue( rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format( otpNodeIds)) self.log.info( 'rebalance operaton succeeded for nodes: {0}'.format(otpNodeIds)) #now remove the nodes #make sure its rebalanced and node statuses are healthy helper = RestHelper(rest) self.assertTrue(helper.is_cluster_healthy, "cluster status is not healthy") self.assertTrue(helper.is_cluster_rebalanced, "cluster is not balanced")
def _setup_cluster(self): rest = RestConnection(self.master) credentials = self._input.membase_settings ClusterOperationHelper.add_all_nodes_or_assert(self.master, self._servers, credentials, self) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg)
def _setup_cluster(self): rest = RestConnection(self.master) credentials = self._input.membase_settings ClusterOperationHelper.add_all_nodes_or_assert(self.master, self._servers, credentials, self) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg)
def _cluster_setup(self): log = logger.Logger.get_logger() replicas = self._input.param("replicas", 1) keys_count = self._input.param("keys-count", 0) num_buckets = self._input.param("num-buckets", 1) bucket_name = "default" master = self._servers[0] credentials = self._input.membase_settings rest = RestConnection(self.master) info = rest.get_nodes_self() rest.init_cluster(username=self.master.rest_username, password=self.master.rest_password) rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved) rest.reset_autofailover() ClusterOperationHelper.add_all_nodes_or_assert(self.master, self._servers, credentials, self) bucket_ram = info.memoryQuota * 2 / 3 if num_buckets == 1: rest.create_bucket(bucket=bucket_name, ramQuotaMB=bucket_ram, replicaNumber=replicas, proxyPort=info.moxi) ready = BucketOperationHelper.wait_for_memcached( self.master, bucket_name) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) buckets = rest.get_buckets() else: created = BucketOperationHelper.create_multiple_buckets( self.master, replicas, howmany=num_buckets) self.assertTrue(created, "unable to create multiple buckets") buckets = rest.get_buckets() for bucket in buckets: ready = BucketOperationHelper.wait_for_memcached( self.master, bucket.name) self.assertTrue(ready, msg="wait_for_memcached failed") nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) for bucket in buckets: inserted_keys_cnt = self.load_data(self.master, bucket.name, keys_count) log.info('inserted {0} keys'.format(inserted_keys_cnt)) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg) self.assertTrue(ready, "wait_for_memcached failed")
def rebalance_servers_in(servers, rest_settings, testcase): log = logger.Logger().get_logger() master = servers[0] rest = RestConnection(master) ClusterOperationHelper.add_all_nodes_or_assert(master, servers, rest_settings, testcase) otpNodeIds = [] for node in rest.node_statuses(): otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) testcase.assertTrue(rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) log.info('started rebalance operation on master node {0}'.format(master.ip)) rebalanceSucceeded = rest.monitorRebalance() testcase.assertTrue(rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format(otpNodeIds))
def _cluster_setup(self): log = logger.Logger.get_logger() replicas = self._input.param("replicas", 1) keys_count = self._input.param("keys-count", 0) num_buckets = self._input.param("num-buckets", 1) bucket_name = "default" master = self._servers[0] credentials = self._input.membase_settings rest = RestConnection(master) info = rest.get_nodes_self() rest.init_cluster(username=master.rest_username, password=master.rest_password) rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved) rest.reset_autofailover() ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self) bucket_ram = info.memoryQuota * 2 / 3 if num_buckets == 1: rest.create_bucket(bucket=bucket_name, ramQuotaMB=bucket_ram, replicaNumber=replicas, proxyPort=info.moxi) ready = BucketOperationHelper.wait_for_memcached(master, bucket_name) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) buckets = rest.get_buckets() else: created = BucketOperationHelper.create_multiple_buckets(master, replicas, howmany=num_buckets) self.assertTrue(created, "unable to create multiple buckets") buckets = rest.get_buckets() for bucket in buckets: ready = BucketOperationHelper.wait_for_memcached(master, bucket.name) self.assertTrue(ready, msg="wait_for_memcached failed") nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) # self.load_data(master, bucket_name, keys_count) for bucket in buckets: inserted_keys_cnt = self.load_data(master, bucket.name, keys_count) log.info('inserted {0} keys'.format(inserted_keys_cnt)) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg) self.assertTrue(ready, "wait_for_memcached failed")
def add_node_and_rebalance(self, master, servers): ClusterOperationHelper.add_all_nodes_or_assert(master, servers, self.input.membase_settings, self) rest = RestConnection(master) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue(rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) self.log.info('started rebalance operation on master node {0}'.format(master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue(rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format(otpNodeIds)) self.log.info('rebalance operaton succeeded for nodes: {0}'.format(otpNodeIds)) #now remove the nodes #make sure its rebalanced and node statuses are healthy helper = RestHelper(rest) self.assertTrue(helper.is_cluster_healthy, "cluster status is not healthy") self.assertTrue(helper.is_cluster_rebalanced, "cluster is not balanced")
def setUp(self): self._cleanup_nodes = [] super(FailoverBaseTest, self).setUp() self.bidirectional = self.input.param("bidirectional", False) self._value_size = self.input.param("value_size", 256) self._failed_nodes = [] self.dgm_run = self.input.param("dgm_run", True) self.gen_create = BlobGenerator('loadOne', 'loadOne_', self._value_size, end=self.num_items) self.add_back_flag = False self.log.info("============== FailoverBaseTest setup was started for test #{0} {1}=============="\ .format(self.case_number, self._testMethodName)) rest = RestConnection(self.master) credentials = self.input.membase_settings ClusterOperationHelper.add_all_nodes_or_assert(self.master, self.servers, credentials, self) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg) self.log.info("============== FailoverBaseTest setup was finished for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName))
def setUp(self): self._cleanup_nodes = [] super(FailoverBaseTest, self).setUp() self.bidirectional = self.input.param("bidirectional", False) self._value_size = self.input.param("value_size", 256) self._failed_nodes = [] self.dgm_run = self.input.param("dgm_run", True) self.gen_create = BlobGenerator('loadOne', 'loadOne_', self._value_size, end=self.num_items) self.add_back_flag = False self.log.info("============== FailoverBaseTest setup was started for test #{0} {1}=============="\ .format(self.case_number, self._testMethodName)) rest = RestConnection(self.master) credentials = self.input.membase_settings ClusterOperationHelper.add_all_nodes_or_assert(self.master, self.servers, credentials, self) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg) self.log.info("============== FailoverBaseTest setup was finished for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName))
def _common_test_body(self): master = self.servers[0] rest = RestConnection(master) creds = self.input.membase_settings bucket_data = RebalanceBaseTest.bucket_data_init(rest) ClusterHelper.add_all_nodes_or_assert(master, self.servers, creds, self) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[]) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding nodes") nodes = rest.node_statuses() #dont rebalance out the current node while len(nodes) > 1: #pick a node that is not the master node toBeEjectedNode = RebalanceHelper.pick_node(master) distribution = RebalanceBaseTest.get_distribution(self.load_ratio) RebalanceBaseTest.load_data_for_buckets(rest, self.load_ratio, distribution, [master], bucket_data, self) self.log.info("current nodes : {0}".format([node.id for node in rest.node_statuses()])) #let's start/step rebalance three times self.log.info("removing node {0} and rebalance afterwards".format(toBeEjectedNode.id)) rest.fail_over(toBeEjectedNode.id) self.log.info("failed over {0}".format(toBeEjectedNode.id)) time.sleep(10) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[toBeEjectedNode.id]) expected_progress = 30 reached = RestHelper(rest).rebalance_reached(expected_progress) self.assertTrue(reached, "rebalance failed or did not reach {0}%".format(expected_progress)) stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") time.sleep(20) RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[toBeEjectedNode.id]) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(toBeEjectedNode.id)) time.sleep(20) RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self) nodes = rest.node_statuses()
def common_setup(self, replica): self._input = TestInputSingleton.input self._servers = self._input.servers first = self._servers[0] self.log = logger.Logger().get_logger() self.log.info(self._input) rest = RestConnection(first) for server in self._servers: RestHelper(RestConnection(server)).is_ns_server_running() ClusterOperationHelper.cleanup_cluster(self._servers) BucketOperationHelper.delete_all_buckets_or_assert(self._servers, self) ClusterOperationHelper.add_all_nodes_or_assert( self._servers[0], self._servers, self._input.membase_settings, self) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) info = rest.get_nodes_self() bucket_ram = info.mcdMemoryReserved * 3 / 4 rest.create_bucket(bucket="default", ramQuotaMB=int(bucket_ram), replicaNumber=replica, proxyPort=rest.get_nodes_self().moxi) msg = "wait_for_memcached fails" ready = BucketOperationHelper.wait_for_memcached(first, "default"), self.assertTrue(ready, msg) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue( rebalanceStarted, "unable to start rebalance on master node {0}".format(first.ip)) self.log.info('started rebalance operation on master node {0}'.format( first.ip)) rebalanceSucceeded = rest.monitorRebalance() # without a bucket this seems to fail self.assertTrue( rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format( otpNodeIds)) self.awareness = VBucketAwareMemcached(rest, "default")
def _cluster_setup(self): bucket_name = "default" master = self._servers[0] credentials = self._input.membase_settings rest = RestConnection(master) info = rest.get_nodes_self() rest.init_cluster(username=master.rest_username, password=master.rest_password) rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved) rest.reset_autofailover() ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self) bucket_ram = info.memoryQuota * 2 / 3 rest.create_bucket(bucket=bucket_name, ramQuotaMB=bucket_ram, proxyPort=info.moxi) ready = BucketOperationHelper.wait_for_memcached(master, bucket_name) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg) self.assertTrue(ready, "wait_for_memcached failed")
def _install_and_upgrade(self, initial_version='1.6.5.3', create_buckets=False, insert_data=False, start_upgraded_first=True, load_ratio=-1, roll_upgrade=False, upgrade_path=[], do_new_rest=False): node_upgrade_path = [] node_upgrade_path.extend(upgrade_path) #then start them in whatever order you want inserted_keys = [] log = logger.Logger.get_logger() if roll_upgrade: log.info("performing an online upgrade") input = TestInputSingleton.input rest_settings = input.membase_settings servers = input.servers save_upgrade_config = False is_amazon = False if input.test_params.get('amazon', False): is_amazon = True if initial_version.startswith("1.6") or initial_version.startswith( "1.7"): product = 'membase-server-enterprise' else: product = 'couchbase-server-enterprise' # install older build on all nodes for server in servers: remote = RemoteMachineShellConnection(server) rest = RestConnection(server) info = remote.extract_remote_info() # check to see if we are installing from latestbuilds or releases # note: for newer releases (1.8.0) even release versions can have the # form 1.8.0r-55 if re.search('r', initial_version): builds, changes = BuildQuery().get_all_builds() older_build = BuildQuery().find_membase_build( builds, deliverable_type=info.deliverable_type, os_architecture=info.architecture_type, build_version=initial_version, product=product, is_amazon=is_amazon) else: older_build = BuildQuery().find_membase_release_build( deliverable_type=info.deliverable_type, os_architecture=info.architecture_type, build_version=initial_version, product=product, is_amazon=is_amazon) remote.membase_uninstall() remote.couchbase_uninstall() remote.stop_membase() remote.stop_couchbase() remote.download_build(older_build) #now let's install ? remote.membase_install(older_build) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) rest.init_cluster_port(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() bucket_data = {} master = servers[0] if create_buckets: #let's create buckets #wait for the bucket #bucket port should also be configurable , pass it as the #parameter to this test ? later self._create_default_bucket(master) inserted_keys = self._load_data(master, load_ratio) _create_load_multiple_bucket(self, master, bucket_data, howmany=2) # cluster all the nodes together ClusterOperationHelper.add_all_nodes_or_assert(master, servers, rest_settings, self) rest = RestConnection(master) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue( rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) log.info('started rebalance operation on master node {0}'.format( master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue( rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format( otpNodeIds)) if initial_version == "1.7.0" or initial_version == "1.7.1": self._save_config(rest_settings, master) input_version = input.test_params['version'] node_upgrade_path.append(input_version) current_version = initial_version previous_version = current_version #if we dont want to do roll_upgrade ? log.info("Upgrade path: {0} -> {1}".format(initial_version, node_upgrade_path)) log.info("List of servers {0}".format(servers)) if not roll_upgrade: for version in node_upgrade_path: previous_version = current_version current_version = version if version != initial_version: log.info("Upgrading to version {0}".format(version)) self._stop_membase_servers(servers) if previous_version.startswith( "1.7") and current_version.startswith("1.8"): save_upgrade_config = True # No need to save the upgrade config from 180 to 181 if previous_version.startswith( "1.8.0") and current_version.startswith("1.8.1"): save_upgrade_config = False appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue( appropriate_build.url, msg="unable to find build {0}".format(version)) for server in servers: remote = RemoteMachineShellConnection(server) remote.download_build(appropriate_build) remote.membase_upgrade( appropriate_build, save_upgrade_config=save_upgrade_config) RestHelper( RestConnection(server)).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) #verify admin_creds still set pools_info = RestConnection(server).get_pools_info() self.assertTrue(pools_info['implementationVersion'], appropriate_build.product_version) if start_upgraded_first: log.info("Starting server {0} post upgrade".format( server)) remote.start_membase() else: remote.stop_membase() remote.disconnect() if not start_upgraded_first: log.info("Starting all servers together") self._start_membase_servers(servers) time.sleep(TIMEOUT_SECS) if version == "1.7.0" or version == "1.7.1": self._save_config(rest_settings, master) if create_buckets: self.assertTrue( BucketOperationHelper.wait_for_bucket_creation( 'default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) # rolling upgrade else: version = input.test_params['version'] appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue(appropriate_build.url, msg="unable to find build {0}".format(version)) # rebalance node out # remove membase from node # install destination version onto node # rebalance it back into the cluster for server_index in range(len(servers)): server = servers[server_index] master = servers[server_index - 1] log.info("current master is {0}, rolling node is {1}".format( master, server)) rest = RestConnection(master) nodes = rest.node_statuses() allNodes = [] toBeEjectedNodes = [] for node in nodes: allNodes.append(node.id) if "{0}:{1}".format(node.ip, node.port) == "{0}:{1}".format( server.ip, server.port): toBeEjectedNodes.append(node.id) helper = RestHelper(rest) removed = helper.remove_nodes(knownNodes=allNodes, ejectedNodes=toBeEjectedNodes) self.assertTrue( removed, msg="Unable to remove nodes {0}".format(toBeEjectedNodes)) remote = RemoteMachineShellConnection(server) remote.download_build(appropriate_build) # if initial version is 180 # Don't uninstall the server if not initial_version.startswith('1.8.0'): remote.membase_uninstall() remote.couchbase_uninstall() remote.membase_install(appropriate_build) else: remote.membase_upgrade(appropriate_build) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) log.info( "sleep for 10 seconds to wait for membase-server to start..." ) time.sleep(TIMEOUT_SECS) rest.init_cluster_port(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() #readd this to the cluster ClusterOperationHelper.add_all_nodes_or_assert( master, [server], rest_settings, self) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) # Issue rest call to the newly added node # MB-5108 if do_new_rest: master = server rest = RestConnection(master) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue( rebalanceStarted, "unable to start rebalance on master node {0}".format( master.ip)) log.info( 'started rebalance operation on master node {0}'.format( master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue( rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful". format(otpNodeIds)) #TODO: how can i verify that the cluster init config is preserved # verify data on upgraded nodes if create_buckets: self.assertTrue(BucketOperationHelper.wait_for_bucket_creation( 'default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) rest = RestConnection(master) buckets = rest.get_buckets() for bucket in buckets: BucketOperationHelper.keys_exist_or_assert( bucket_data[bucket.name]["inserted_keys"], master, bucket.name, self)
def _install_and_upgrade(self, initial_version='1.6.5.3', create_buckets=False, insert_data=False, start_upgraded_first=True, load_ratio=-1, roll_upgrade=False, upgrade_path=[]): node_upgrade_path = [] node_upgrade_path.extend(upgrade_path) #then start them in whatever order you want inserted_keys = [] log = logger.Logger.get_logger() if roll_upgrade: log.info("performing a rolling upgrade") input = TestInputSingleton.input rest_settings = input.membase_settings servers = input.servers save_upgrade_config = False is_amazon = False if input.test_params.get('amazon',False): is_amazon = True # install older build on all nodes for server in servers: remote = RemoteMachineShellConnection(server) rest = RestConnection(server) info = remote.extract_remote_info() older_build = BuildQuery().find_membase_release_build(deliverable_type=info.deliverable_type, os_architecture=info.architecture_type, build_version=initial_version, product='membase-server-enterprise', is_amazon=is_amazon) remote.membase_uninstall() remote.couchbase_uninstall() remote.execute_command('/etc/init.d/membase-server stop') remote.download_build(older_build) #now let's install ? remote.membase_install(older_build) RestHelper(rest).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT) rest.init_cluster_port(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota(memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() bucket_data = {} master = servers[0] if create_buckets: #let's create buckets #wait for the bucket #bucket port should also be configurable , pass it as the #parameter to this test ? later self._create_default_bucket(master) inserted_keys = self._load_data(master, load_ratio) _create_load_multiple_bucket(self, master, bucket_data, howmany=2) # cluster all the nodes together ClusterOperationHelper.add_all_nodes_or_assert(master, servers, rest_settings, self) rest = RestConnection(master) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue(rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) log.info('started rebalance operation on master node {0}'.format(master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue(rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format(otpNodeIds)) if initial_version == "1.7.0" or initial_version == "1.7.1": self._save_config(rest_settings, master) input_version = input.test_params['version'] node_upgrade_path.append(input_version) #if we dont want to do roll_upgrade ? log.info("Upgrade path: {0} -> {1}".format(initial_version, node_upgrade_path)) log.info("List of servers {0}".format(servers)) if not roll_upgrade: for version in node_upgrade_path: if version is not initial_version: log.info("Upgrading to version {0}".format(version)) self._stop_membase_servers(servers) if re.search('1.8', version): save_upgrade_config = True appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue(appropriate_build.url, msg="unable to find build {0}".format(version)) for server in servers: remote = RemoteMachineShellConnection(server) remote.download_build(appropriate_build) remote.membase_upgrade(appropriate_build, save_upgrade_config=save_upgrade_config) RestHelper(RestConnection(server)).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT) #verify admin_creds still set pools_info = RestConnection(server).get_pools_info() self.assertTrue(pools_info['implementationVersion'], appropriate_build.product_version) if start_upgraded_first: log.info("Starting server {0} post upgrade".format(server)) remote.start_membase() else: remote.stop_membase() remote.disconnect() if not start_upgraded_first: log.info("Starting all servers together") self._start_membase_servers(servers) time.sleep(TIMEOUT_SECS) if version == "1.7.0" or version == "1.7.1": self._save_config(rest_settings, master) if create_buckets: self.assertTrue(BucketOperationHelper.wait_for_bucket_creation('default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) # rolling upgrade else: version = input.test_params['version'] appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue(appropriate_build.url, msg="unable to find build {0}".format(version)) # rebalance node out # remove membase from node # install destination version onto node # rebalance it back into the cluster for server_index in range(len(servers)): server = servers[server_index] master = servers[server_index - 1] log.info("current master is {0}, rolling node is {1}".format(master, server)) rest = RestConnection(master) nodes = rest.node_statuses() allNodes = [] toBeEjectedNodes = [] for node in nodes: allNodes.append(node.id) if "{0}:{1}".format(node.ip, node.port) == "{0}:{1}".format(server.ip, server.port): toBeEjectedNodes.append(node.id) helper = RestHelper(rest) removed = helper.remove_nodes(knownNodes=allNodes, ejectedNodes=toBeEjectedNodes) self.assertTrue(removed, msg="Unable to remove nodes {0}".format(toBeEjectedNodes)) remote = RemoteMachineShellConnection(server) remote.membase_uninstall() remote.couchbase_uninstall() remote.download_build(appropriate_build) remote.membase_install(appropriate_build) RestHelper(rest).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT) log.info("sleep for 10 seconds to wait for membase-server to start...") time.sleep(TIMEOUT_SECS) rest.init_cluster_port(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota(memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() #readd this to the cluster ClusterOperationHelper.add_all_nodes_or_assert(master, [server], rest_settings, self) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue(rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) log.info('started rebalance operation on master node {0}'.format(master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue(rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format(otpNodeIds)) #ClusterOperationHelper.verify_persistence(servers, self) #TODO: how can i verify that the cluster init config is preserved # verify data on upgraded nodes if create_buckets: self.assertTrue(BucketOperationHelper.wait_for_bucket_creation('default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) rest = RestConnection(master) buckets = rest.get_buckets() for bucket in buckets: BucketOperationHelper.keys_exist_or_assert(bucket_data[bucket.name]["inserted_keys"], master, bucket.name, self)
def _install_and_upgrade(self, initial_version='1.6.5.3', create_buckets=False, insert_data=False, start_upgraded_first=True, load_ratio=-1, roll_upgrade=False, upgrade_path=[]): node_upgrade_path = [] node_upgrade_path.extend(upgrade_path) #then start them in whatever order you want inserted_keys = [] log = logger.Logger.get_logger() if roll_upgrade: log.info("performing a rolling upgrade") input = TestInputSingleton.input input_version = input.test_params['version'] rest_settings = input.membase_settings servers = input.servers is_amazon = False if input.test_params.get('amazon', False): is_amazon = True # install older build on all nodes for server in servers: remote = RemoteMachineShellConnection(server) rest = RestConnection(server) info = remote.extract_remote_info() older_build = BuildQuery().find_membase_release_build( deliverable_type=info.deliverable_type, os_architecture=info.architecture_type, build_version=initial_version, product='membase-server-enterprise', is_amazon=is_amazon) remote.membase_uninstall() remote.couchbase_uninstall() if older_build.product_version.startswith("1.8"): abbr_product = "cb" else: abbr_product = "mb" remote.download_binary_in_win(older_build.url, abbr_product, initial_version) #now let's install ? remote.install_server_win(older_build, initial_version) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) rest.init_cluster(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() bucket_data = {} master = servers[0] # cluster all the nodes together ClusterOperationHelper.add_all_nodes_or_assert(master, servers, rest_settings, self) rest = RestConnection(master) nodes = rest.node_statuses() otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue( rebalanceStarted, "unable to start rebalance on master node {0}".format(master.ip)) log.info('started rebalance operation on master node {0}'.format( master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue( rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful".format( otpNodeIds)) if create_buckets: #let's create buckets #wait for the bucket #bucket port should also be configurable , pass it as the #parameter to this test ? later self._create_default_bucket(master) inserted_keys = self._load_data(master, load_ratio) _create_load_multiple_bucket(self, master, bucket_data, howmany=2) #if initial_version == "1.7.0" or initial_version == "1.7.1": # self._save_config(rest_settings, master) node_upgrade_path.append(input_version) #if we dont want to do roll_upgrade ? log.info("Upgrade path: {0} -> {1}".format(initial_version, node_upgrade_path)) log.info("List of servers {0}".format(servers)) if not roll_upgrade: for version in node_upgrade_path: if version is not initial_version: log.info( "SHUTDOWN ALL CB OR MB SERVERS IN CLUSTER BEFORE DOING UPGRADE" ) for server in servers: shell = RemoteMachineShellConnection(server) shell.stop_membase() shell.disconnect() log.info("Upgrading to version {0}".format(version)) appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue( appropriate_build.url, msg="unable to find build {0}".format(version)) for server in servers: remote = RemoteMachineShellConnection(server) if version.startswith("1.8"): abbr_product = "cb" remote.download_binary_in_win(appropriate_build.url, abbr_product, version) log.info("###### START UPGRADE. #########") remote.membase_upgrade_win(info.architecture_type, info.windows_name, version, initial_version) RestHelper( RestConnection(server)).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) #verify admin_creds still set pools_info = RestConnection(server).get_pools_info() self.assertTrue(pools_info['implementationVersion'], appropriate_build.product_version) if not start_upgraded_first: remote.stop_membase() remote.disconnect() if not start_upgraded_first: log.info("Starting all servers together") self._start_membase_servers(servers) time.sleep(TIMEOUT_SECS) if create_buckets: self.assertTrue( BucketOperationHelper.wait_for_bucket_creation( 'default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) # rolling upgrade else: version = input.test_params['version'] if version.startswith("1.8"): abbr_product = "cb" appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon) self.assertTrue(appropriate_build.url, msg="unable to find build {0}".format(version)) # rebalance node out # remove membase from node # install destination version onto node # rebalance it back into the cluster for server_index in range(len(servers)): server = servers[server_index] master = servers[server_index - 1] log.info("current master is {0}, rolling node is {1}".format( master, server)) rest = RestConnection(master) nodes = rest.node_statuses() allNodes = [] toBeEjectedNodes = [] for node in nodes: allNodes.append(node.id) if "{0}:{1}".format(node.ip, node.port) == "{0}:{1}".format( server.ip, server.port): toBeEjectedNodes.append(node.id) helper = RestHelper(rest) removed = helper.remove_nodes(knownNodes=allNodes, ejectedNodes=toBeEjectedNodes) self.assertTrue( removed, msg="Unable to remove nodes {0}".format(toBeEjectedNodes)) remote = RemoteMachineShellConnection(server) remote.membase_uninstall() remote.couchbase_uninstall() if appropriate_build.product == 'membase-server-enterprise': abbr_product = "mb" else: abbr_product = "cb" remote.download_binary_in_win(appropriate_build.url, abbr_product, version) remote.install_server_win(appropriate_build, version) RestHelper(rest).is_ns_server_running( testconstants.NS_SERVER_TIMEOUT) time.sleep(TIMEOUT_SECS) rest.init_cluster(rest_settings.rest_username, rest_settings.rest_password) rest.init_cluster_memoryQuota( memoryQuota=rest.get_nodes_self().mcdMemoryReserved) remote.disconnect() #readd this to the cluster ClusterOperationHelper.add_all_nodes_or_assert( master, [server], rest_settings, self) nodes = rest.node_statuses() log.info( "wait 30 seconds before asking older node for start rebalance" ) time.sleep(30) otpNodeIds = [] for node in nodes: otpNodeIds.append(node.id) rebalanceStarted = rest.rebalance(otpNodeIds, []) self.assertTrue( rebalanceStarted, "unable to start rebalance on master node {0}".format( master.ip)) log.info( 'started rebalance operation on master node {0}'.format( master.ip)) rebalanceSucceeded = rest.monitorRebalance() self.assertTrue( rebalanceSucceeded, "rebalance operation for nodes: {0} was not successful". format(otpNodeIds)) #ClusterOperationHelper.verify_persistence(servers, self) #TODO: how can i verify that the cluster init config is preserved # verify data on upgraded nodes if create_buckets: self.assertTrue(BucketOperationHelper.wait_for_bucket_creation( 'default', RestConnection(master)), msg="bucket 'default' does not exist..") if insert_data: self._verify_data(master, rest, inserted_keys) rest = RestConnection(master) buckets = rest.get_buckets() for bucket in buckets: BucketOperationHelper.keys_exist_or_assert( bucket_data[bucket.name]["inserted_keys"], master, bucket.name, self)
def common_test_body(self, keys_count, replica, load_ratio, failover_reason): log = logger.Logger.get_logger() log.info("keys_count : {0}".format(keys_count)) log.info("replica : {0}".format(replica)) log.info("load_ratio : {0}".format(load_ratio)) log.info("failover_reason : {0}".format(failover_reason)) master = self._servers[0] log.info('picking server : {0} as the master'.format(master)) rest = RestConnection(master) info = rest.get_nodes_self() rest.init_cluster(username=master.rest_username, password=master.rest_password) rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved) bucket_ram = info.memoryQuota * 2 / 3 bucket = 'default' rest.create_bucket(bucket=bucket, ramQuotaMB=bucket_ram, replicaNumber=replica, proxyPort=info.moxi) ready = BucketOperationHelper.wait_for_memcached(master, bucket) self.assertTrue(ready, "wait_for_memcached_failed") credentials = self._input.membase_settings ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg) inserted_keys = FailoverBaseTest.load_data(master, bucket, keys_count, load_ratio) inserted_count = len(inserted_keys) log.info('inserted {0} keys'.format(inserted_count)) nodes = rest.node_statuses() while (len(nodes) - replica) > 1: final_replication_state = RestHelper(rest).wait_for_replication(900) msg = "replication state after waiting for up to 15 minutes : {0}" self.log.info(msg.format(final_replication_state)) chosen = RebalanceHelper.pick_nodes(master, howmany=replica) for node in chosen: #let's do op if failover_reason == 'stop_server': self.stop_server(node) log.info("10 seconds delay to wait for membase-server to shutdown") #wait for 5 minutes until node is down self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300), msg="node status is not unhealthy even after waiting for 5 minutes") elif failover_reason == "firewall": RemoteUtilHelper.enable_firewall(self._servers, node, bidirectional=self.bidirectional) self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300), msg="node status is not unhealthy even after waiting for 5 minutes") failed_over = rest.fail_over(node.id) if not failed_over: self.log.info("unable to failover the node the first time. try again in 60 seconds..") #try again in 60 seconds time.sleep(75) failed_over = rest.fail_over(node.id) self.assertTrue(failed_over, "unable to failover node after {0}".format(failover_reason)) log.info("failed over node : {0}".format(node.id)) #REMOVEME - log.info("10 seconds sleep after failover before invoking rebalance...") time.sleep(10) rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in chosen]) msg = "rebalance failed while removing failover nodes {0}".format(chosen) self.assertTrue(rest.monitorRebalance(), msg=msg) FailoverBaseTest.replication_verification(master, bucket, replica, inserted_count, self) nodes = rest.node_statuses() FailoverBaseTest.verify_data(master, inserted_keys, bucket, self)
def common_test_body(self, keys_count, replica, load_ratio, failover_reason): log = logger.Logger.get_logger() log.info("keys_count : {0}".format(keys_count)) log.info("replica : {0}".format(replica)) log.info("load_ratio : {0}".format(load_ratio)) log.info("failover_reason : {0}".format(failover_reason)) master = self._servers[0] log.info('picking server : {0} as the master'.format(master)) rest = RestConnection(master) info = rest.get_nodes_self() rest.init_cluster(username=master.rest_username, password=master.rest_password) rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved) bucket_ram = info.memoryQuota * 2 / 3 bucket = 'default' rest.create_bucket(bucket=bucket, ramQuotaMB=bucket_ram, replicaNumber=replica, proxyPort=info.moxi) ready = BucketOperationHelper.wait_for_memcached(master, bucket) self.assertTrue(ready, "wait_for_memcached_failed") credentials = self._input.membase_settings ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg) inserted_keys = FailoverBaseTest.load_data(master, bucket, keys_count, load_ratio) inserted_count = len(inserted_keys) log.info('inserted {0} keys'.format(inserted_count)) nodes = rest.node_statuses() while (len(nodes) - replica) > 1: final_replication_state = RestHelper(rest).wait_for_replication( 900) msg = "replication state after waiting for up to 15 minutes : {0}" self.log.info(msg.format(final_replication_state)) chosen = RebalanceHelper.pick_nodes(master, howmany=replica) for node in chosen: #let's do op if failover_reason == 'stop_server': self.stop_server(node) log.info( "10 seconds delay to wait for membase-server to shutdown" ) #wait for 5 minutes until node is down self.assertTrue( RestHelper(rest).wait_for_node_status( node, "unhealthy", 300), msg= "node status is not unhealthy even after waiting for 5 minutes" ) elif failover_reason == "firewall": RemoteUtilHelper.enable_firewall( self._servers, node, bidirectional=self.bidirectional) self.assertTrue( RestHelper(rest).wait_for_node_status( node, "unhealthy", 300), msg= "node status is not unhealthy even after waiting for 5 minutes" ) failed_over = rest.fail_over(node.id) if not failed_over: self.log.info( "unable to failover the node the first time. try again in 60 seconds.." ) #try again in 60 seconds time.sleep(75) failed_over = rest.fail_over(node.id) self.assertTrue( failed_over, "unable to failover node after {0}".format( failover_reason)) log.info("failed over node : {0}".format(node.id)) self._failed_nodes.append(node.ip) log.info( "10 seconds sleep after failover before invoking rebalance...") time.sleep(10) rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in chosen]) msg = "rebalance failed while removing failover nodes {0}".format( chosen) self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg) FailoverBaseTest.replication_verification(master, bucket, replica, inserted_count, self) nodes = rest.node_statuses() FailoverBaseTest.verify_data(master, inserted_keys, bucket, self)
def common_test_body(self, replica, failover_reason, load_ratio, age, max_nodes): log = logger.Logger.get_logger() bucket_name = "default" log.info("replica : {0}".format(replica)) log.info("failover_reason : {0}".format(failover_reason)) log.info("load_ratio : {0}".format(load_ratio)) log.info("age : {0}".format(age)) log.info("max_nodes : {0}".format(max_nodes)) master = self._servers[0] log.info('picking server : {0} as the master'.format(master)) rest = RestConnection(master) info = rest.get_nodes_self() rest.init_cluster(username=master.rest_username, password=master.rest_password) rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved) rest.update_autofailover_settings(True, age, max_nodes) rest.reset_autofailover() bucket_ram = info.memoryQuota * 2 / 3 rest.create_bucket(bucket=bucket_name, ramQuotaMB=bucket_ram, replicaNumber=replica, proxyPort=info.moxi) ready = BucketOperationHelper.wait_for_memcached(master, bucket_name) self.assertTrue(ready, "wait_for_memcached failed") credentials = self._input.membase_settings log.info("inserting some items in the master before adding any nodes") distribution = {512: 0.4, 1 * 1024: 0.59, 5 * 1024: 0.01} if load_ratio > 10: distribution = {5 * 1024: 0.4, 10 * 1024: 0.5, 20 * 1024: 0.1} ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) msg = "rebalance failed after adding these nodes {0}".format(nodes) self.assertTrue(rest.monitorRebalance(), msg=msg) inserted_count, rejected_count =\ MemcachedClientHelper.load_bucket(servers=self._servers, ram_load_ratio=load_ratio, value_size_distribution=distribution, number_of_threads=1) log.info('inserted {0} keys'.format(inserted_count)) nodes = rest.node_statuses() # why are we in this while loop? while (len(nodes) - replica) >= 1: final_replication_state = RestHelper(rest).wait_for_replication(900) msg = "replication state after waiting for up to 15 minutes : {0}" self.log.info(msg.format(final_replication_state)) chosen = AutoFailoverBaseTest.choose_nodes(master, nodes, replica) for node in chosen: #let's do op if failover_reason == 'stop_membase': self.stop_membase(node) log.info("10 seconds delay to wait for membase-server to shutdown") #wait for 5 minutes until node is down self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300), msg="node status is not unhealthy even after waiting for 5 minutes") elif failover_reason == "firewall": self.enable_firewall(node) self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300), msg="node status is not unhealthy even after waiting for 5 minutes") # list pre-autofailover stats stats = rest.get_bucket_stats() self.log.info("pre-autofail - curr_items : {0} versus {1}".format(stats["curr_items"], inserted_count)) AutoFailoverBaseTest.wait_for_failover_or_assert(master, replica, age, self) # manually fail over any unhealthy:active nodes left, max that we should need to manually failover is replica-max_nodes manual_failover_count = replica - max_nodes for node in chosen: self.log.info("checking {0}".format(node.ip)) if node.status.lower() == "unhealthy" and node.clusterMembership == "active": msg = "node {0} not failed over and we are over out manual failover limit of {1}" self.assertTrue(manual_failover_count > 0, msg.format(node.ip, (replica - max_nodes))) self.log.info("manual failover {0}".format(node.ip)) rest.fail_over(node.id) manual_failover_count -= 1 stats = rest.get_bucket_stats() self.log.info("post-autofail - curr_items : {0} versus {1}".format(stats["curr_items"], inserted_count)) self.assertTrue(stats["curr_items"] == inserted_count, "failover completed but curr_items ({0}) does not match inserted items ({1})".format(stats["curr_items"], inserted_count)) log.info("10 seconds sleep after autofailover before invoking rebalance...") time.sleep(10) rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in chosen]) msg="rebalance failed while removing failover nodes {0}".format(chosen) self.assertTrue(rest.monitorRebalance(), msg=msg) nodes = rest.node_statuses() if len(nodes) / (1 + replica) >= 1: final_replication_state = RestHelper(rest).wait_for_replication(900) msg = "replication state after waiting for up to 15 minutes : {0}" self.log.info(msg.format(final_replication_state)) self.assertTrue(RebalanceHelper.wait_till_total_numbers_match(master,bucket_name,600), msg="replication was completed but sum(curr_items) dont match the curr_items_total") start_time = time.time() stats = rest.get_bucket_stats() while time.time() < (start_time + 120) and stats["curr_items"] != inserted_count: self.log.info("curr_items : {0} versus {1}".format(stats["curr_items"], inserted_count)) time.sleep(5) stats = rest.get_bucket_stats() RebalanceHelper.print_taps_from_all_nodes(rest, bucket_name) self.log.info("curr_items : {0} versus {1}".format(stats["curr_items"], inserted_count)) stats = rest.get_bucket_stats() msg = "curr_items : {0} is not equal to actual # of keys inserted : {1}" self.assertEquals(stats["curr_items"], inserted_count, msg=msg.format(stats["curr_items"], inserted_count)) nodes = rest.node_statuses()