def common_setup(self): self.cluster_helper = Cluster() self.log = logger.Logger.get_logger() self.cluster_run = False self.input = TestInputSingleton.input self.servers = self.input.servers serverInfo = self.servers[0] rest = RestConnection(serverInfo) if len(set([server.ip for server in self.servers])) == 1: ip = rest.get_nodes_self().ip for server in self.servers: server.ip = ip self.cluster_run = True self.case_number = self.input.param("case_number", 0) self.replica = self.input.param("replica", 1) self.keys_count = self.input.param("keys-count", 1000) self.load_ratio = self.input.param("load-ratio", 1) self.ratio_expiry = self.input.param("ratio-expiry", 0.03) self.ratio_deletes = self.input.param("ratio-deletes", 0.13) self.num_buckets = self.input.param("num-buckets", 1) self.failover_factor = self.num_swap = self.input.param("num-swap", 1) self.num_initial_servers = self.input.param("num-initial-servers", 3) self.fail_orchestrator = self.swap_orchestrator = self.input.param("swap-orchestrator", False) self.do_access = self.input.param("do-access", True) self.load_started = False self.loaders = [] try: # Clear the state from Previous invalid run if rest._rebalance_progress_status() == "running": self.log.warning("rebalancing is still running, previous test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") self.log.info( "============== SwapRebalanceBase setup was started for test #{0} {1}==============".format( self.case_number, self._testMethodName ) ) SwapRebalanceBase.reset(self) # Make sure the test is setup correctly min_servers = int(self.num_initial_servers) + int(self.num_swap) msg = "minimum {0} nodes required for running swap rebalance" self.assertTrue(len(self.servers) >= min_servers, msg=msg.format(min_servers)) self.log.info("picking server : {0} as the master".format(serverInfo)) node_ram_ratio = BucketOperationHelper.base_bucket_ratio(self.servers) info = rest.get_nodes_self() rest.init_cluster(username=serverInfo.rest_username, password=serverInfo.rest_password) rest.init_cluster_memoryQuota(memoryQuota=int(info.mcdMemoryReserved * node_ram_ratio)) if self.num_buckets > 10: BaseTestCase.change_max_buckets(self, self.num_buckets) self.log.info( "============== SwapRebalanceBase setup was finished for test #{0} {1} ==============".format( self.case_number, self._testMethodName ) ) SwapRebalanceBase._log_start(self) except Exception, e: self.cluster_helper.shutdown() self.fail(e)
def tearDown(self): try: if (hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures) > 0 \ and TestInputSingleton.input.param("stop-on-failure", False))\ or self.input.param("skip_cleanup", False): self.log.warn("CLEANUP WAS SKIPPED") else: self.log.info("============== basetestcase cleanup was started for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) rest = RestConnection(self.master) alerts = rest.get_alerts() if alerts is not None and len(alerts) != 0: self.log.warn("Alerts were found: {0}".format(alerts)) if rest._rebalance_progress_status() == 'running': self.log.warning( "rebalancing is still running, test should be verified" ) stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert( self.servers, self) ClusterOperationHelper.cleanup_cluster(self.servers) time.sleep(10) ClusterOperationHelper.wait_for_ns_servers_or_assert( self.servers, self) self.log.info("============== basetestcase cleanup was finished for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) finally: #stop all existing task manager threads self.cluster.shutdown() self._log_finish(self)
def tearDown(self): try: if self.driver: path_screen = self.input.ui_conf[ 'screenshots'] or 'logs/screens' full_path = '{1}/screen_{0}.png'.format( time.time(), path_screen) self.log.info('screenshot is available: %s' % full_path) if not os.path.exists(path_screen): os.mkdir(path_screen) self.driver.get_screenshot_as_file(os.path.abspath(full_path)) rest = RestConnection(self.servers[0]) if rest._rebalance_progress_status() == 'running': stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert( self.servers, self) for server in self.servers: ClusterOperationHelper.cleanup_cluster([server]) ClusterOperationHelper.wait_for_ns_servers_or_assert( self.servers, self) if self.driver: self.driver.close() finally: if self.driver: self.shell.disconnect() self.cluster.shutdown()
def tearDown(self): try: if self.driver: path_screen = self.input.ui_conf['screenshots'] or 'logs/screens' full_path = '{1}/screen_{0}.png'.format(time.time(), path_screen) self.log.info('screenshot is available: %s' % full_path) if not os.path.exists(path_screen): os.mkdir(path_screen) self.driver.get_screenshot_as_file(os.path.abspath(full_path)) rest = RestConnection(self.servers[0]) if rest._rebalance_progress_status() == 'running': stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self) for server in self.servers: ClusterOperationHelper.cleanup_cluster([server]) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self) if self.driver: self.driver.close() except Exception as e: raise e finally: if self.driver: self.shell.disconnect() self.cluster.shutdown()
def reset(self): self.log.info( "============== SwapRebalanceBase cleanup was started for test #{0} {1} ==============".format( self.case_number, self._testMethodName ) ) self.log.info("Stopping load in Teardown") SwapRebalanceBase.stop_load(self.loaders) for server in self.servers: rest = RestConnection(server) if rest._rebalance_progress_status() == "running": self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self) for server in self.servers: ClusterOperationHelper.cleanup_cluster([server]) if server.data_path: rest = RestConnection(server) rest.set_data_path(data_path=server.data_path) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self) self.log.info( "============== SwapRebalanceBase cleanup was finished for test #{0} {1} ==============".format( self.case_number, self._testMethodName ) )
def tearDown(self): try: if (hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures) > 0 \ and TestInputSingleton.input.param("stop-on-failure", False))\ or self.input.param("skip_cleanup", False): self.log.warn("CLEANUP WAS SKIPPED") else: self.log.info("============== basetestcase cleanup was started for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) rest = RestConnection(self.master) alerts = rest.get_alerts() if alerts is not None and len(alerts) != 0: self.log.warn("Alerts were found: {0}".format(alerts)) if rest._rebalance_progress_status() == 'running': self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self) ClusterOperationHelper.cleanup_cluster(self.servers) self.sleep(10) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self) self.log.info("============== basetestcase cleanup was finished for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) except BaseException: # increase case_number to retry tearDown in setup for the next test self.case_number += 1000 finally: # stop all existing task manager threads self.cluster.shutdown() self._log_finish(self)
def tearDown(self): try: test_failed = len(self._resultForDoCleanups.errors) if self.driver and test_failed: BaseHelper(self).create_screenshot() if self.driver: self.driver.close() if test_failed and TestInputSingleton.input.param("stop-on-failure", False): print "test fails, teardown will be skipped!!!" return rest = RestConnection(self.servers[0]) try: reb_status = rest._rebalance_progress_status() except ValueError as e: if e.message == 'No JSON object could be decoded': print "cluster not initialized!!!" return if reb_status == 'running': stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self) for server in self.servers: ClusterOperationHelper.cleanup_cluster([server]) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self) except Exception as e: raise e finally: if self.driver: self.shell.disconnect()
def tearDown(self): try: test_failed = len(self._resultForDoCleanups.errors) if self.driver and test_failed: BaseHelper(self).create_screenshot() if self.driver: self.driver.close() if test_failed and TestInputSingleton.input.param( "stop-on-failure", False): print "test fails, teardown will be skipped!!!" return rest = RestConnection(self.servers[0]) try: reb_status = rest._rebalance_progress_status() except ValueError as e: if e.message == 'No JSON object could be decoded': print "cluster not initialized!!!" return if reb_status == 'running': stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert( self.servers, self) for server in self.servers: ClusterOperationHelper.cleanup_cluster([server]) ClusterOperationHelper.wait_for_ns_servers_or_assert( self.servers, self) except Exception as e: raise e finally: if self.driver: self.shell.disconnect()
def _common_clenup(self): rest = RestConnection(self.servers[0]) if rest._rebalance_progress_status() == 'running': stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self) ClusterOperationHelper.cleanup_cluster(self.servers) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self)
def setUp(self): super(SwapRebalanceBase, self).setUp() self.log = logger.Logger.get_logger() self.cluster_run = False rest = RestConnection(self.cluster.master) if len(set([server.ip for server in self.servers])) == 1: ip = rest.get_nodes_self().ip for server in self.servers: server.ip = ip self.cluster_run = True self.replica_to_update = self.input.param("new_replica", None) self.failover_factor = self.num_swap = self.input.param("num-swap", 1) self.num_initial_servers = self.input.param("num-initial-servers", 3) self.fail_orchestrator = self.swap_orchestrator = self.input.param( "swap-orchestrator", False) self.do_access = self.input.param("do-access", True) self.percentage_progress = self.input.param("percentage_progress", 50) self.transaction_timeout = self.input.param("transaction_timeout", 300) self.transaction_commit = self.input.param("transaction_commit", True) self.load_started = False self.loaders = [] try: # Clear the state from Previous invalid run if rest._rebalance_progress_status() == 'running': self.log.warning( "Rebalance is still running, previous test should be verified" ) stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") self.log.info( "=== SwapRebalanceBase setup started for test #{0} {1} ===". format(self.case_number, self._testMethodName)) # Make sure the test is setup correctly min_servers = int(self.num_initial_servers) + int(self.num_swap) msg = "minimum {0} nodes required for running swap rebalance" self.assertTrue(len(self.servers) >= min_servers, msg=msg.format(min_servers)) self.log.info('picking server : {0} as the master'.format( self.cluster.master)) node_ram_ratio = self.bucket_util.base_bucket_ratio( self.cluster.servers) info = rest.get_nodes_self() rest.init_cluster(username=self.cluster.master.rest_username, password=self.cluster.master.rest_password) rest.init_cluster_memoryQuota( memoryQuota=int(info.mcdMemoryReserved * node_ram_ratio)) self.enable_diag_eval_on_non_local_hosts(self.cluster.master) self.bucket_util.add_rbac_user() time.sleep(10) if self.standard_buckets > 10: self.bucket_util.change_max_buckets(self.standard_buckets) self.log.info( "=== SwapRebalanceBase setup finished for test #{0} {1} ===". format(self.case_number, self._testMethodName)) self._log_start() except Exception, e: self.fail(e)
def tearDown(self): rest = RestConnection(self.master) if rest._rebalance_progress_status() == 'running': self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") try: super(ConcurrentTests, self).tearDown() except: pass
def tearDown(self): try: if hasattr(self, 'skip_buckets_handle') and self.skip_buckets_handle: return test_failed = (hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures or self._resultForDoCleanups.errors)) \ or (hasattr(self, '_exc_info') and self._exc_info()[1] is not None) if test_failed and TestInputSingleton.input.param("stop-on-failure", False)\ or self.input.param("skip_cleanup", False): self.log.warn("CLEANUP WAS SKIPPED") else: if test_failed and TestInputSingleton.input.param('get_trace', None): for server in self.servers: try: shell = RemoteMachineShellConnection(server) output, _ = shell.execute_command("ps -aef|grep %s" % TestInputSingleton.input.param('get_trace', None)) output = shell.execute_command("pstack %s" % output[0].split()[1].strip()) print output[0] except: pass if test_failed and self.input.param('BUGS', False): self.log.warn("Test failed. Possible reason is: {0}".format(self.input.param('BUGS', False))) self.log.info("============== basetestcase cleanup was started for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) rest = RestConnection(self.master) alerts = rest.get_alerts() if alerts is not None and len(alerts) != 0: self.log.warn("Alerts were found: {0}".format(alerts)) if rest._rebalance_progress_status() == 'running': self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self) if self.input.param("forceEject", False): for server in self.servers: if server != self.servers[0]: try: rest = RestConnection(server) rest.force_eject_node() except BaseException, e: self.log.error(e) ClusterOperationHelper.cleanup_cluster(self.servers) self.sleep(10) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self) self.log.info("============== basetestcase cleanup was finished for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) except BaseException: # increase case_number to retry tearDown in setup for the next test self.case_number += 1000 finally: # stop all existing task manager threads self.cluster.shutdown(force=True) self._log_finish(self)
def _cluster_cleanup(self, bucket_util): rest = RestConnection(self.cluster.master) alerts = rest.get_alerts() if rest._rebalance_progress_status() == 'running': self.kill_memcached() self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") bucket_util.delete_all_buckets(self.cluster.servers) ClusterOperationHelper.cleanup_cluster(self.cluster.servers, master=self.cluster.master) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.cluster.servers, self)
def reset(self): rest = RestConnection(self.servers[0]) if rest._rebalance_progress_status() == 'running': self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self) for server in self.servers: ClusterOperationHelper.cleanup_cluster([server]) self.log.info("Stopping load in Teardown") ClusterHelper.wait_for_ns_servers_or_assert(self.servers, self)
def tearDown(self): rest = RestConnection(self.master) if rest._rebalance_progress_status() == 'running': self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") try: super(QueriesOpsJoinsTests, self).tearDown() except: pass ClusterOperationHelper.cleanup_cluster(self.servers) self.sleep(10)
def tearDown(self): try: test_failed = ( hasattr(self, "_resultForDoCleanups") and len(self._resultForDoCleanups.failures or self._resultForDoCleanups.errors) ) or (hasattr(self, "_exc_info") and self._exc_info()[1] is not None) if ( test_failed and TestInputSingleton.input.param("stop-on-failure", False) or self.input.param("skip_cleanup", False) ): self.log.warn("CLEANUP WAS SKIPPED") else: if test_failed and self.input.param("BUGS", False): self.log.warn("Test failed. Possible reason is: {0}".format(self.input.param("BUGS", False))) self.log.info( "============== basetestcase cleanup was started for test #{0} {1} ==============".format( self.case_number, self._testMethodName ) ) rest = RestConnection(self.master) alerts = rest.get_alerts() if alerts is not None and len(alerts) != 0: self.log.warn("Alerts were found: {0}".format(alerts)) if rest._rebalance_progress_status() == "running": self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self) if self.input.param("forceEject", False): for server in self.servers: if server != self.servers[0]: try: rest = RestConnection(server) rest.force_eject_node() except BaseException, e: self.log.error(e) ClusterOperationHelper.cleanup_cluster(self.servers) self.sleep(10) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self) self.log.info( "============== basetestcase cleanup was finished for test #{0} {1} ==============".format( self.case_number, self._testMethodName ) ) except BaseException: # increase case_number to retry tearDown in setup for the next test self.case_number += 1000 finally: # stop all existing task manager threads self.cluster.shutdown() self._log_finish(self)
def cluster_cleanup(self, bucket_util): rest = RestConnection(self.cluster.master) if rest._rebalance_progress_status() == 'running': self.kill_memcached() self.log.warning( "rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() if not stopped: raise Exception("Unable to stop rebalance") bucket_util.delete_all_buckets(self.cluster.servers) self.cleanup_cluster(self.cluster.servers, master=self.cluster.master) self.wait_for_ns_servers_or_assert(self.cluster.servers)
def tearDown(self): rest = RestConnection(self.master) if rest._rebalance_progress_status() == 'running': self.log.warning( "rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") try: super(QueriesOpsJoinsTests, self).tearDown() except: pass ClusterOperationHelper.cleanup_cluster(self.servers) self.sleep(10)
def common_setup(self): self.log = logger.Logger.get_logger() self.input = TestInputSingleton.input self.servers = self.input.servers serverInfo = self.servers[0] rest = RestConnection(serverInfo) self.case_number = self.input.param("case_number", 0) # Clear the state from Previous invalid run if rest._rebalance_progress_status() == 'running': self.log.warning( "rebalancing is still running, previous test should be verified" ) stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") self.load_started = False self.loaders = [] self.log.info("============== SwapRebalanceBase setup was started for test #{0} {1}=============="\ .format(self.case_number, self._testMethodName)) SwapRebalanceBase.reset(self) self.cluster_helper = Cluster() # Initialize test params self.replica = self.input.param("replica", 1) self.keys_count = self.input.param("keys-count", 100000) self.load_ratio = self.input.param("load-ratio", 1) self.ratio_expiry = self.input.param("ratio-expiry", 0.03) self.ratio_deletes = self.input.param("ratio-deletes", 0.13) self.num_buckets = self.input.param("num-buckets", 1) self.failover_factor = self.num_swap = self.input.param("num-swap", 1) self.num_initial_servers = self.input.param("num-initial-servers", 3) self.fail_orchestrator = self.swap_orchestrator = self.input.param( "swap-orchestrator", False) self.do_access = self.input.param("do-access", True) # Make sure the test is setup correctly min_servers = int(self.num_initial_servers) + int(self.num_swap) msg = "minimum {0} nodes required for running swap rebalance" self.assertTrue(len(self.servers) >= min_servers, msg=msg.format(min_servers)) self.log.info('picking server : {0} as the master'.format(serverInfo)) node_ram_ratio = BucketOperationHelper.base_bucket_ratio(self.servers) info = rest.get_nodes_self() rest.init_cluster(username=serverInfo.rest_username, password=serverInfo.rest_password) rest.init_cluster_memoryQuota(memoryQuota=int(info.mcdMemoryReserved * node_ram_ratio)) self.log.info( "============== SwapRebalanceBase setup was finished for test #{0} {1} ==============" .format(self.case_number, self._testMethodName)) SwapRebalanceBase._log_start(self)
def tearDown(self): try: test_failed = (hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures or self._resultForDoCleanups.errors)) \ or (hasattr(self, '_exc_info') and self._exc_info()[1] is not None) if test_failed and TestInputSingleton.input.param("stop-on-failure", False)\ or self.input.param("skip_cleanup", False): self.log.warn("CLEANUP WAS SKIPPED") else: if test_failed and self.input.param('BUGS', False): self.log.warn( "Test failed. Possible reason is: {0}".format( self.input.param('BUGS', False))) self.log.info("============== basetestcase cleanup was started for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) rest = RestConnection(self.master) alerts = rest.get_alerts() if alerts is not None and len(alerts) != 0: self.log.warn("Alerts were found: {0}".format(alerts)) if rest._rebalance_progress_status() == 'running': self.log.warning( "rebalancing is still running, test should be verified" ) stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert( self.servers, self) if self.input.param("forceEject", False): for server in self.servers: if server != self.servers[0]: try: rest = RestConnection(server) rest.force_eject_node() except BaseException, e: self.log.error(e) ClusterOperationHelper.cleanup_cluster(self.servers) self.sleep(10) ClusterOperationHelper.wait_for_ns_servers_or_assert( self.servers, self) self.log.info("============== basetestcase cleanup was finished for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) except BaseException: # increase case_number to retry tearDown in setup for the next test self.case_number += 1000 finally: # stop all existing task manager threads self.cluster.shutdown() self._log_finish(self)
def tearDown(self): if not self.input.param("skip_cleanup", False): try: self.log.info("============== basetestcase cleanup was started for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) rest = RestConnection(self.master) if rest._rebalance_progress_status() == 'running': self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self) ClusterOperationHelper.cleanup_cluster(self.servers) time.sleep(10) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self) self.log.info("============== basetestcase cleanup was finished for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) finally: #stop all existing task manager threads self.cluster.shutdown() self._log_finish(self)
def reset(self): self.log.info("============== SwapRebalanceBase cleanup was started for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) self.log.info("Stopping load in Teardown") SwapRebalanceBase.stop_load(self.loaders) for server in self.servers: rest = RestConnection(server) if rest._rebalance_progress_status() == 'running': self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self) for server in self.servers: ClusterOperationHelper.cleanup_cluster([server]) if server.data_path: rest = RestConnection(server) rest.set_data_path(data_path=server.data_path) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self) self.log.info("============== SwapRebalanceBase cleanup was finished for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName))
def tearDown(self): rest = RestConnection(self.master) if rest._rebalance_progress_status() == 'running': self.log.warning("rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") try: super(QueriesOpsTests, self).tearDown() except: pass try: ClusterOperationHelper.cleanup_cluster(self.servers, master=self.master) self.sleep(1) except: for server in set(self.servers) - set([self.master]): try: rest = RestConnection(server) rest.force_eject_node() time.sleep(1) except BaseException, e: self.fail(e)
def tearDown(self): rest = RestConnection(self.master) if rest._rebalance_progress_status() == 'running': self.log.warning( "rebalancing is still running, test should be verified") stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") try: super(QueriesOpsTests, self).tearDown() except: pass try: ClusterOperationHelper.cleanup_cluster(self.servers, master=self.master) self.sleep(1) except: for server in set(self.servers) - set([self.master]): try: rest = RestConnection(server) rest.force_eject_node() time.sleep(1) except BaseException, e: self.fail(e)
def tearDown(self): if not self.input.param("skip_cleanup", False): try: self.log.info("============== basetestcase cleanup was started for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) rest = RestConnection(self.master) if rest._rebalance_progress_status() == 'running': self.log.warning( "rebalancing is still running, test should be verified" ) stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") BucketOperationHelper.delete_all_buckets_or_assert( self.servers, self) ClusterOperationHelper.cleanup_cluster(self.servers) time.sleep(10) ClusterOperationHelper.wait_for_ns_servers_or_assert( self.servers, self) self.log.info("============== basetestcase cleanup was finished for test #{0} {1} =============="\ .format(self.case_number, self._testMethodName)) finally: #stop all existing task manager threads self.cluster.shutdown() self._log_finish(self)
def _common_test_body_failed_swap_rebalance(self): master = self.servers[0] rest = RestConnection(master) num_initial_servers = self.num_initial_servers creds = self.input.membase_settings intial_severs = self.servers[:num_initial_servers] self.log.info("CREATE BUCKET PHASE") SwapRebalanceBase.create_buckets(self) # Cluster all starting set of servers self.log.info("INITIAL REBALANCE PHASE") RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1) self.log.info("DATA LOAD PHASE") self.loaders = SwapRebalanceBase.start_load_phase(self, master) # Wait till load phase is over SwapRebalanceBase.stop_load(self.loaders, do_stop=False) self.log.info("DONE LOAD PHASE") # Start the swap rebalance current_nodes = RebalanceHelper.getOtpNodeIds(master) self.log.info("current nodes : {0}".format(current_nodes)) toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap) optNodesIds = [node.id for node in toBeEjectedNodes] if self.swap_orchestrator: status, content = ClusterOperationHelper.find_orchestrator(master) self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\ format(status, content)) # When swapping all the nodes if self.num_swap is len(current_nodes): optNodesIds.append(content) else: optNodesIds[0] = content for node in optNodesIds: self.log.info("removing node {0} and rebalance afterwards".format(node)) new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.num_swap] for server in new_swap_servers: otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip) msg = "unable to add node {0} to the cluster" self.assertTrue(otpNode, msg.format(server.ip)) if self.swap_orchestrator: rest = RestConnection(new_swap_servers[0]) master = new_swap_servers[0] self.log.info("DATA ACCESS PHASE") self.loaders = SwapRebalanceBase.start_access_phase(self, master) self.log.info("SWAP REBALANCE PHASE") rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=optNodesIds) # Rebalance is failed at 20%, 40% and 60% completion for i in [1, 2, 3]: expected_progress = 20 * i self.log.info("FAIL SWAP REBALANCE PHASE @ {0}".format(expected_progress)) RestHelper(rest).rebalance_reached(expected_progress) bucket = rest.get_buckets()[0].name pid = None if self.swap_orchestrator: # get PID via remote connection if master is a new node shell = RemoteMachineShellConnection(master) o, _ = shell.execute_command("ps -eo comm,pid | awk '$1 == \"memcached\" { print $2 }'") pid = o[0] shell.disconnect() else: for i in xrange(2): try: _mc = MemcachedClientHelper.direct_client(master, bucket) pid = _mc.stats()["pid"] break except EOFError as e: self.log.error("{0}.Retry in 2 sec".format(e)) time.sleep(1) if pid is None: self.fail("impossible to get a PID") command = "os:cmd(\"kill -9 {0} \")".format(pid) self.log.info(command) killed = rest.diag_eval(command) self.log.info("killed {0}:{1}?? {2} ".format(master.ip, master.port, killed)) self.log.info("sleep for 10 sec after kill memcached") time.sleep(10) # we can't get stats for new node when rebalance falls if not self.swap_orchestrator: ClusterOperationHelper._wait_warmup_completed(self, [master], bucket, wait_time=600) i = 0 #we expect that rebalance will be failed while rest._rebalance_progress_status() == "running" and i < 60: self.log.info("rebalance progress: {0}".format(rest._rebalance_progress())) time.sleep(1) i += 1 self.log.info("rebalance progress status:{0}".format(rest._rebalance_progress_status())) knownNodes = rest.node_statuses(); self.log.info("nodes are still in cluster: {0}".format([(node.ip, node.port) for node in knownNodes])) ejectedNodes = list(set(optNodesIds) & set([node.id for node in knownNodes])) rest.rebalance(otpNodes=[node.id for node in knownNodes], ejectedNodes=ejectedNodes) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(toBeEjectedNodes)) SwapRebalanceBase.verification_phase(self, master)
def common_setup(self): self.cluster_helper = Cluster() self.log = logger.Logger.get_logger() self.cluster_run = False self.input = TestInputSingleton.input self.servers = self.input.servers serverInfo = self.servers[0] rest = RestConnection(serverInfo) if len(set([server.ip for server in self.servers])) == 1: ip = rest.get_nodes_self().ip for server in self.servers: server.ip = ip self.cluster_run = True self.case_number = self.input.param("case_number", 0) self.replica = self.input.param("replica", 1) self.keys_count = self.input.param("keys-count", 1000) self.load_ratio = self.input.param("load-ratio", 1) self.ratio_expiry = self.input.param("ratio-expiry", 0.03) self.ratio_deletes = self.input.param("ratio-deletes", 0.13) self.num_buckets = self.input.param("num-buckets", 1) self.failover_factor = self.num_swap = self.input.param("num-swap", 1) self.num_initial_servers = self.input.param("num-initial-servers", 3) self.fail_orchestrator = self.swap_orchestrator = self.input.param( "swap-orchestrator", False) self.do_access = self.input.param("do-access", True) self.load_started = False self.loaders = [] try: # Clear the state from Previous invalid run if rest._rebalance_progress_status() == 'running': self.log.warning( "rebalancing is still running, previous test should be verified" ) stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") self.log.info("============== SwapRebalanceBase setup was started for test #{0} {1}=============="\ .format(self.case_number, self._testMethodName)) SwapRebalanceBase.reset(self) # Make sure the test is setup correctly min_servers = int(self.num_initial_servers) + int(self.num_swap) msg = "minimum {0} nodes required for running swap rebalance" self.assertTrue(len(self.servers) >= min_servers, msg=msg.format(min_servers)) self.log.info( 'picking server : {0} as the master'.format(serverInfo)) node_ram_ratio = BucketOperationHelper.base_bucket_ratio( self.servers) info = rest.get_nodes_self() rest.init_cluster(username=serverInfo.rest_username, password=serverInfo.rest_password) rest.init_cluster_memoryQuota( memoryQuota=int(info.mcdMemoryReserved * node_ram_ratio)) SwapRebalanceBase.enable_diag_eval_on_non_local_hosts( self, serverInfo) # Add built-in user testuser = [{ 'id': 'cbadminbucket', 'name': 'cbadminbucket', 'password': '******' }] RbacBase().create_user_source(testuser, 'builtin', self.servers[0]) # Assign user to role role_list = [{ 'id': 'cbadminbucket', 'name': 'cbadminbucket', 'roles': 'admin' }] RbacBase().add_user_role(role_list, RestConnection(self.servers[0]), 'builtin') if self.num_buckets > 10: BaseTestCase.change_max_buckets(self, self.num_buckets) self.log.info( "============== SwapRebalanceBase setup was finished for test #{0} {1} ==============" .format(self.case_number, self._testMethodName)) SwapRebalanceBase._log_start(self) except Exception, e: self.cluster_helper.shutdown() self.fail(e)