Ejemplo n.º 1
0
    def common_setup(self):
        self.cluster_helper = Cluster()
        self.log = logger.Logger.get_logger()
        self.cluster_run = False
        self.input = TestInputSingleton.input
        self.servers = self.input.servers
        serverInfo = self.servers[0]
        rest = RestConnection(serverInfo)
        if len(set([server.ip for server in self.servers])) == 1:
            ip = rest.get_nodes_self().ip
            for server in self.servers:
                server.ip = ip
            self.cluster_run = True
        self.case_number = self.input.param("case_number", 0)
        self.replica = self.input.param("replica", 1)
        self.keys_count = self.input.param("keys-count", 1000)
        self.load_ratio = self.input.param("load-ratio", 1)
        self.ratio_expiry = self.input.param("ratio-expiry", 0.03)
        self.ratio_deletes = self.input.param("ratio-deletes", 0.13)
        self.num_buckets = self.input.param("num-buckets", 1)
        self.failover_factor = self.num_swap = self.input.param("num-swap", 1)
        self.num_initial_servers = self.input.param("num-initial-servers", 3)
        self.fail_orchestrator = self.swap_orchestrator = self.input.param("swap-orchestrator", False)
        self.do_access = self.input.param("do-access", True)
        self.load_started = False
        self.loaders = []
        try:
            # Clear the state from Previous invalid run
            if rest._rebalance_progress_status() == "running":
                self.log.warning("rebalancing is still running, previous test should be verified")
                stopped = rest.stop_rebalance()
                self.assertTrue(stopped, msg="unable to stop rebalance")
            self.log.info(
                "==============  SwapRebalanceBase setup was started for test #{0} {1}==============".format(
                    self.case_number, self._testMethodName
                )
            )
            SwapRebalanceBase.reset(self)

            # Make sure the test is setup correctly
            min_servers = int(self.num_initial_servers) + int(self.num_swap)
            msg = "minimum {0} nodes required for running swap rebalance"
            self.assertTrue(len(self.servers) >= min_servers, msg=msg.format(min_servers))

            self.log.info("picking server : {0} as the master".format(serverInfo))
            node_ram_ratio = BucketOperationHelper.base_bucket_ratio(self.servers)
            info = rest.get_nodes_self()
            rest.init_cluster(username=serverInfo.rest_username, password=serverInfo.rest_password)
            rest.init_cluster_memoryQuota(memoryQuota=int(info.mcdMemoryReserved * node_ram_ratio))
            if self.num_buckets > 10:
                BaseTestCase.change_max_buckets(self, self.num_buckets)
            self.log.info(
                "==============  SwapRebalanceBase setup was finished for test #{0} {1} ==============".format(
                    self.case_number, self._testMethodName
                )
            )
            SwapRebalanceBase._log_start(self)
        except Exception, e:
            self.cluster_helper.shutdown()
            self.fail(e)
Ejemplo n.º 2
0
 def tearDown(self):
     try:
         if (hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures) > 0 \
             and TestInputSingleton.input.param("stop-on-failure", False))\
                 or self.input.param("skip_cleanup", False):
             self.log.warn("CLEANUP WAS SKIPPED")
         else:
             self.log.info("==============  basetestcase cleanup was started for test #{0} {1} =============="\
                   .format(self.case_number, self._testMethodName))
             rest = RestConnection(self.master)
             alerts = rest.get_alerts()
             if alerts is not None and len(alerts) != 0:
                 self.log.warn("Alerts were found: {0}".format(alerts))
             if rest._rebalance_progress_status() == 'running':
                 self.log.warning(
                     "rebalancing is still running, test should be verified"
                 )
                 stopped = rest.stop_rebalance()
                 self.assertTrue(stopped, msg="unable to stop rebalance")
             BucketOperationHelper.delete_all_buckets_or_assert(
                 self.servers, self)
             ClusterOperationHelper.cleanup_cluster(self.servers)
             time.sleep(10)
             ClusterOperationHelper.wait_for_ns_servers_or_assert(
                 self.servers, self)
             self.log.info("==============  basetestcase cleanup was finished for test #{0} {1} =============="\
                   .format(self.case_number, self._testMethodName))
     finally:
         #stop all existing task manager threads
         self.cluster.shutdown()
         self._log_finish(self)
Ejemplo n.º 3
0
 def tearDown(self):
     try:
         if self.driver:
             path_screen = self.input.ui_conf[
                 'screenshots'] or 'logs/screens'
             full_path = '{1}/screen_{0}.png'.format(
                 time.time(), path_screen)
             self.log.info('screenshot is available: %s' % full_path)
             if not os.path.exists(path_screen):
                 os.mkdir(path_screen)
             self.driver.get_screenshot_as_file(os.path.abspath(full_path))
         rest = RestConnection(self.servers[0])
         if rest._rebalance_progress_status() == 'running':
             stopped = rest.stop_rebalance()
             self.assertTrue(stopped, msg="unable to stop rebalance")
         BucketOperationHelper.delete_all_buckets_or_assert(
             self.servers, self)
         for server in self.servers:
             ClusterOperationHelper.cleanup_cluster([server])
         ClusterOperationHelper.wait_for_ns_servers_or_assert(
             self.servers, self)
         if self.driver:
             self.driver.close()
     finally:
         if self.driver:
             self.shell.disconnect()
         self.cluster.shutdown()
Ejemplo n.º 4
0
 def tearDown(self):
     try:
         if self.driver:
             path_screen = self.input.ui_conf['screenshots'] or 'logs/screens'
             full_path = '{1}/screen_{0}.png'.format(time.time(), path_screen)
             self.log.info('screenshot is available: %s' % full_path)
             if not os.path.exists(path_screen):
                 os.mkdir(path_screen)
             self.driver.get_screenshot_as_file(os.path.abspath(full_path))
         rest = RestConnection(self.servers[0])
         if rest._rebalance_progress_status() == 'running':
             stopped = rest.stop_rebalance()
             self.assertTrue(stopped, msg="unable to stop rebalance")
         BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
         for server in self.servers:
             ClusterOperationHelper.cleanup_cluster([server])
         ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self)
         if self.driver:
             self.driver.close()
     except Exception as e:
         raise e
     finally:
         if self.driver:
             self.shell.disconnect()
         self.cluster.shutdown()
Ejemplo n.º 5
0
 def reset(self):
     self.log.info(
         "==============  SwapRebalanceBase cleanup was started for test #{0} {1} ==============".format(
             self.case_number, self._testMethodName
         )
     )
     self.log.info("Stopping load in Teardown")
     SwapRebalanceBase.stop_load(self.loaders)
     for server in self.servers:
         rest = RestConnection(server)
         if rest._rebalance_progress_status() == "running":
             self.log.warning("rebalancing is still running, test should be verified")
             stopped = rest.stop_rebalance()
             self.assertTrue(stopped, msg="unable to stop rebalance")
     BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
     for server in self.servers:
         ClusterOperationHelper.cleanup_cluster([server])
         if server.data_path:
             rest = RestConnection(server)
             rest.set_data_path(data_path=server.data_path)
     ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self)
     self.log.info(
         "==============  SwapRebalanceBase cleanup was finished for test #{0} {1} ==============".format(
             self.case_number, self._testMethodName
         )
     )
Ejemplo n.º 6
0
 def tearDown(self):
         try:
             if (hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures) > 0 \
                 and TestInputSingleton.input.param("stop-on-failure", False))\
                     or self.input.param("skip_cleanup", False):
                 self.log.warn("CLEANUP WAS SKIPPED")
             else:
                 self.log.info("==============  basetestcase cleanup was started for test #{0} {1} =============="\
                       .format(self.case_number, self._testMethodName))
                 rest = RestConnection(self.master)
                 alerts = rest.get_alerts()
                 if alerts is not None and len(alerts) != 0:
                     self.log.warn("Alerts were found: {0}".format(alerts))
                 if rest._rebalance_progress_status() == 'running':
                     self.log.warning("rebalancing is still running, test should be verified")
                     stopped = rest.stop_rebalance()
                     self.assertTrue(stopped, msg="unable to stop rebalance")
                 BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
                 ClusterOperationHelper.cleanup_cluster(self.servers)
                 self.sleep(10)
                 ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self)
                 self.log.info("==============  basetestcase cleanup was finished for test #{0} {1} =============="\
                       .format(self.case_number, self._testMethodName))
         except BaseException:
             # increase case_number to retry tearDown in setup for the next test
             self.case_number += 1000
         finally:
             # stop all existing task manager threads
             self.cluster.shutdown()
             self._log_finish(self)
Ejemplo n.º 7
0
 def tearDown(self):
     try:
         test_failed = len(self._resultForDoCleanups.errors)
         if self.driver and test_failed:
             BaseHelper(self).create_screenshot()
         if self.driver:
             self.driver.close()
         if test_failed and TestInputSingleton.input.param("stop-on-failure", False):
             print "test fails, teardown will be skipped!!!"
             return
         rest = RestConnection(self.servers[0])
         try:
             reb_status = rest._rebalance_progress_status()
         except ValueError as e:
             if e.message == 'No JSON object could be decoded':
                 print "cluster not initialized!!!"
                 return
         if reb_status == 'running':
             stopped = rest.stop_rebalance()
             self.assertTrue(stopped, msg="unable to stop rebalance")
         BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
         for server in self.servers:
             ClusterOperationHelper.cleanup_cluster([server])
         ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self)
     except Exception as e:
         raise e
     finally:
         if self.driver:
             self.shell.disconnect()
Ejemplo n.º 8
0
 def tearDown(self):
     try:
         test_failed = len(self._resultForDoCleanups.errors)
         if self.driver and test_failed:
             BaseHelper(self).create_screenshot()
         if self.driver:
             self.driver.close()
         if test_failed and TestInputSingleton.input.param(
                 "stop-on-failure", False):
             print "test fails, teardown will be skipped!!!"
             return
         rest = RestConnection(self.servers[0])
         try:
             reb_status = rest._rebalance_progress_status()
         except ValueError as e:
             if e.message == 'No JSON object could be decoded':
                 print "cluster not initialized!!!"
                 return
         if reb_status == 'running':
             stopped = rest.stop_rebalance()
             self.assertTrue(stopped, msg="unable to stop rebalance")
         BucketOperationHelper.delete_all_buckets_or_assert(
             self.servers, self)
         for server in self.servers:
             ClusterOperationHelper.cleanup_cluster([server])
         ClusterOperationHelper.wait_for_ns_servers_or_assert(
             self.servers, self)
     except Exception as e:
         raise e
     finally:
         if self.driver:
             self.shell.disconnect()
Ejemplo n.º 9
0
 def _common_clenup(self):
     rest = RestConnection(self.servers[0])
     if rest._rebalance_progress_status() == 'running':
         stopped = rest.stop_rebalance()
         self.assertTrue(stopped, msg="unable to stop rebalance")
     BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
     ClusterOperationHelper.cleanup_cluster(self.servers)
     ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self)
Ejemplo n.º 10
0
    def setUp(self):
        super(SwapRebalanceBase, self).setUp()
        self.log = logger.Logger.get_logger()
        self.cluster_run = False
        rest = RestConnection(self.cluster.master)
        if len(set([server.ip for server in self.servers])) == 1:
            ip = rest.get_nodes_self().ip
            for server in self.servers:
                server.ip = ip
            self.cluster_run = True
        self.replica_to_update = self.input.param("new_replica", None)
        self.failover_factor = self.num_swap = self.input.param("num-swap", 1)
        self.num_initial_servers = self.input.param("num-initial-servers", 3)
        self.fail_orchestrator = self.swap_orchestrator = self.input.param(
            "swap-orchestrator", False)
        self.do_access = self.input.param("do-access", True)
        self.percentage_progress = self.input.param("percentage_progress", 50)
        self.transaction_timeout = self.input.param("transaction_timeout", 300)
        self.transaction_commit = self.input.param("transaction_commit", True)
        self.load_started = False
        self.loaders = []
        try:
            # Clear the state from Previous invalid run
            if rest._rebalance_progress_status() == 'running':
                self.log.warning(
                    "Rebalance is still running, previous test should be verified"
                )
                stopped = rest.stop_rebalance()
                self.assertTrue(stopped, msg="unable to stop rebalance")
            self.log.info(
                "=== SwapRebalanceBase setup started for test #{0} {1} ===".
                format(self.case_number, self._testMethodName))
            # Make sure the test is setup correctly
            min_servers = int(self.num_initial_servers) + int(self.num_swap)
            msg = "minimum {0} nodes required for running swap rebalance"
            self.assertTrue(len(self.servers) >= min_servers,
                            msg=msg.format(min_servers))
            self.log.info('picking server : {0} as the master'.format(
                self.cluster.master))
            node_ram_ratio = self.bucket_util.base_bucket_ratio(
                self.cluster.servers)
            info = rest.get_nodes_self()
            rest.init_cluster(username=self.cluster.master.rest_username,
                              password=self.cluster.master.rest_password)
            rest.init_cluster_memoryQuota(
                memoryQuota=int(info.mcdMemoryReserved * node_ram_ratio))
            self.enable_diag_eval_on_non_local_hosts(self.cluster.master)
            self.bucket_util.add_rbac_user()
            time.sleep(10)

            if self.standard_buckets > 10:
                self.bucket_util.change_max_buckets(self.standard_buckets)
            self.log.info(
                "=== SwapRebalanceBase setup finished for test #{0} {1} ===".
                format(self.case_number, self._testMethodName))
            self._log_start()
        except Exception, e:
            self.fail(e)
Ejemplo n.º 11
0
 def tearDown(self):
     rest = RestConnection(self.master)
     if rest._rebalance_progress_status() == 'running':
         self.log.warning("rebalancing is still running, test should be verified")
         stopped = rest.stop_rebalance()
         self.assertTrue(stopped, msg="unable to stop rebalance")
     try:
         super(ConcurrentTests, self).tearDown()
     except:
         pass
Ejemplo n.º 12
0
 def tearDown(self):
     rest = RestConnection(self.master)
     if rest._rebalance_progress_status() == 'running':
         self.log.warning("rebalancing is still running, test should be verified")
         stopped = rest.stop_rebalance()
         self.assertTrue(stopped, msg="unable to stop rebalance")
     try:
         super(ConcurrentTests, self).tearDown()
     except:
         pass
Ejemplo n.º 13
0
    def tearDown(self):
            try:
                if hasattr(self, 'skip_buckets_handle') and self.skip_buckets_handle:
                    return
                test_failed = (hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures or self._resultForDoCleanups.errors)) \
                    or (hasattr(self, '_exc_info') and self._exc_info()[1] is not None)
                if test_failed and TestInputSingleton.input.param("stop-on-failure", False)\
                        or self.input.param("skip_cleanup", False):
                    self.log.warn("CLEANUP WAS SKIPPED")
                else:
                    if test_failed and TestInputSingleton.input.param('get_trace', None):
                        for server in self.servers:
                            try:
                                shell = RemoteMachineShellConnection(server)
                                output, _ = shell.execute_command("ps -aef|grep %s" %
                                        TestInputSingleton.input.param('get_trace', None))
                                output = shell.execute_command("pstack %s" % output[0].split()[1].strip())
                                print output[0]
                            except:
                                pass
                    if test_failed and self.input.param('BUGS', False):
                        self.log.warn("Test failed. Possible reason is: {0}".format(self.input.param('BUGS', False)))

                    self.log.info("==============  basetestcase cleanup was started for test #{0} {1} =============="\
                          .format(self.case_number, self._testMethodName))
                    rest = RestConnection(self.master)
                    alerts = rest.get_alerts()
                    if alerts is not None and len(alerts) != 0:
                        self.log.warn("Alerts were found: {0}".format(alerts))
                    if rest._rebalance_progress_status() == 'running':
                        self.log.warning("rebalancing is still running, test should be verified")
                        stopped = rest.stop_rebalance()
                        self.assertTrue(stopped, msg="unable to stop rebalance")
                    BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
                    if self.input.param("forceEject", False):
                        for server in self.servers:
                            if server != self.servers[0]:
                                try:
                                    rest = RestConnection(server)
                                    rest.force_eject_node()
                                except BaseException, e:
                                    self.log.error(e)
                    ClusterOperationHelper.cleanup_cluster(self.servers)
                    self.sleep(10)
                    ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self)
                    self.log.info("==============  basetestcase cleanup was finished for test #{0} {1} =============="\
                          .format(self.case_number, self._testMethodName))
            except BaseException:
                # increase case_number to retry tearDown in setup for the next test
                self.case_number += 1000
            finally:
                # stop all existing task manager threads
                self.cluster.shutdown(force=True)
                self._log_finish(self)
Ejemplo n.º 14
0
 def _cluster_cleanup(self, bucket_util):
     rest = RestConnection(self.cluster.master)
     alerts = rest.get_alerts()
     if rest._rebalance_progress_status() == 'running':
         self.kill_memcached()
         self.log.warning("rebalancing is still running, test should be verified")
         stopped = rest.stop_rebalance()
         self.assertTrue(stopped, msg="unable to stop rebalance")
         bucket_util.delete_all_buckets(self.cluster.servers)
     ClusterOperationHelper.cleanup_cluster(self.cluster.servers, master=self.cluster.master)
     ClusterOperationHelper.wait_for_ns_servers_or_assert(self.cluster.servers, self)
Ejemplo n.º 15
0
 def reset(self):
     rest = RestConnection(self.servers[0])
     if rest._rebalance_progress_status() == 'running':
         self.log.warning("rebalancing is still running, test should be verified")
         stopped = rest.stop_rebalance()
         self.assertTrue(stopped, msg="unable to stop rebalance")
     BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
     for server in self.servers:
         ClusterOperationHelper.cleanup_cluster([server])
     self.log.info("Stopping load in Teardown")
     ClusterHelper.wait_for_ns_servers_or_assert(self.servers, self)
Ejemplo n.º 16
0
 def tearDown(self):
     rest = RestConnection(self.master)
     if rest._rebalance_progress_status() == 'running':
         self.log.warning("rebalancing is still running, test should be verified")
         stopped = rest.stop_rebalance()
         self.assertTrue(stopped, msg="unable to stop rebalance")
     try:
         super(QueriesOpsJoinsTests, self).tearDown()
     except:
         pass
     ClusterOperationHelper.cleanup_cluster(self.servers)
     self.sleep(10)
Ejemplo n.º 17
0
    def tearDown(self):
        try:
            test_failed = (
                hasattr(self, "_resultForDoCleanups")
                and len(self._resultForDoCleanups.failures or self._resultForDoCleanups.errors)
            ) or (hasattr(self, "_exc_info") and self._exc_info()[1] is not None)
            if (
                test_failed
                and TestInputSingleton.input.param("stop-on-failure", False)
                or self.input.param("skip_cleanup", False)
            ):
                self.log.warn("CLEANUP WAS SKIPPED")
            else:
                if test_failed and self.input.param("BUGS", False):
                    self.log.warn("Test failed. Possible reason is: {0}".format(self.input.param("BUGS", False)))

                self.log.info(
                    "==============  basetestcase cleanup was started for test #{0} {1} ==============".format(
                        self.case_number, self._testMethodName
                    )
                )
                rest = RestConnection(self.master)
                alerts = rest.get_alerts()
                if alerts is not None and len(alerts) != 0:
                    self.log.warn("Alerts were found: {0}".format(alerts))
                if rest._rebalance_progress_status() == "running":
                    self.log.warning("rebalancing is still running, test should be verified")
                    stopped = rest.stop_rebalance()
                    self.assertTrue(stopped, msg="unable to stop rebalance")
                BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
                if self.input.param("forceEject", False):
                    for server in self.servers:
                        if server != self.servers[0]:
                            try:
                                rest = RestConnection(server)
                                rest.force_eject_node()
                            except BaseException, e:
                                self.log.error(e)
                ClusterOperationHelper.cleanup_cluster(self.servers)
                self.sleep(10)
                ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self)
                self.log.info(
                    "==============  basetestcase cleanup was finished for test #{0} {1} ==============".format(
                        self.case_number, self._testMethodName
                    )
                )
        except BaseException:
            # increase case_number to retry tearDown in setup for the next test
            self.case_number += 1000
        finally:
            # stop all existing task manager threads
            self.cluster.shutdown()
            self._log_finish(self)
Ejemplo n.º 18
0
 def cluster_cleanup(self, bucket_util):
     rest = RestConnection(self.cluster.master)
     if rest._rebalance_progress_status() == 'running':
         self.kill_memcached()
         self.log.warning(
             "rebalancing is still running, test should be verified")
         stopped = rest.stop_rebalance()
         if not stopped:
             raise Exception("Unable to stop rebalance")
     bucket_util.delete_all_buckets(self.cluster.servers)
     self.cleanup_cluster(self.cluster.servers, master=self.cluster.master)
     self.wait_for_ns_servers_or_assert(self.cluster.servers)
Ejemplo n.º 19
0
 def tearDown(self):
     rest = RestConnection(self.master)
     if rest._rebalance_progress_status() == 'running':
         self.log.warning(
             "rebalancing is still running, test should be verified")
         stopped = rest.stop_rebalance()
         self.assertTrue(stopped, msg="unable to stop rebalance")
     try:
         super(QueriesOpsJoinsTests, self).tearDown()
     except:
         pass
     ClusterOperationHelper.cleanup_cluster(self.servers)
     self.sleep(10)
Ejemplo n.º 20
0
    def common_setup(self):
        self.log = logger.Logger.get_logger()
        self.input = TestInputSingleton.input
        self.servers = self.input.servers
        serverInfo = self.servers[0]
        rest = RestConnection(serverInfo)
        self.case_number = self.input.param("case_number", 0)

        # Clear the state from Previous invalid run
        if rest._rebalance_progress_status() == 'running':
            self.log.warning(
                "rebalancing is still running, previous test should be verified"
            )
            stopped = rest.stop_rebalance()
            self.assertTrue(stopped, msg="unable to stop rebalance")
        self.load_started = False
        self.loaders = []
        self.log.info("==============  SwapRebalanceBase setup was started for test #{0} {1}=============="\
                      .format(self.case_number, self._testMethodName))
        SwapRebalanceBase.reset(self)
        self.cluster_helper = Cluster()
        # Initialize test params
        self.replica = self.input.param("replica", 1)
        self.keys_count = self.input.param("keys-count", 100000)
        self.load_ratio = self.input.param("load-ratio", 1)
        self.ratio_expiry = self.input.param("ratio-expiry", 0.03)
        self.ratio_deletes = self.input.param("ratio-deletes", 0.13)
        self.num_buckets = self.input.param("num-buckets", 1)
        self.failover_factor = self.num_swap = self.input.param("num-swap", 1)
        self.num_initial_servers = self.input.param("num-initial-servers", 3)
        self.fail_orchestrator = self.swap_orchestrator = self.input.param(
            "swap-orchestrator", False)
        self.do_access = self.input.param("do-access", True)

        # Make sure the test is setup correctly
        min_servers = int(self.num_initial_servers) + int(self.num_swap)
        msg = "minimum {0} nodes required for running swap rebalance"
        self.assertTrue(len(self.servers) >= min_servers,
                        msg=msg.format(min_servers))

        self.log.info('picking server : {0} as the master'.format(serverInfo))
        node_ram_ratio = BucketOperationHelper.base_bucket_ratio(self.servers)
        info = rest.get_nodes_self()
        rest.init_cluster(username=serverInfo.rest_username,
                          password=serverInfo.rest_password)
        rest.init_cluster_memoryQuota(memoryQuota=int(info.mcdMemoryReserved *
                                                      node_ram_ratio))
        self.log.info(
            "==============  SwapRebalanceBase setup was finished for test #{0} {1} =============="
            .format(self.case_number, self._testMethodName))
        SwapRebalanceBase._log_start(self)
Ejemplo n.º 21
0
    def tearDown(self):
        try:
            test_failed = (hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures or self._resultForDoCleanups.errors)) \
                or (hasattr(self, '_exc_info') and self._exc_info()[1] is not None)
            if test_failed and TestInputSingleton.input.param("stop-on-failure", False)\
                    or self.input.param("skip_cleanup", False):
                self.log.warn("CLEANUP WAS SKIPPED")
            else:
                if test_failed and self.input.param('BUGS', False):
                    self.log.warn(
                        "Test failed. Possible reason is: {0}".format(
                            self.input.param('BUGS', False)))

                self.log.info("==============  basetestcase cleanup was started for test #{0} {1} =============="\
                      .format(self.case_number, self._testMethodName))
                rest = RestConnection(self.master)
                alerts = rest.get_alerts()
                if alerts is not None and len(alerts) != 0:
                    self.log.warn("Alerts were found: {0}".format(alerts))
                if rest._rebalance_progress_status() == 'running':
                    self.log.warning(
                        "rebalancing is still running, test should be verified"
                    )
                    stopped = rest.stop_rebalance()
                    self.assertTrue(stopped, msg="unable to stop rebalance")
                BucketOperationHelper.delete_all_buckets_or_assert(
                    self.servers, self)
                if self.input.param("forceEject", False):
                    for server in self.servers:
                        if server != self.servers[0]:
                            try:
                                rest = RestConnection(server)
                                rest.force_eject_node()
                            except BaseException, e:
                                self.log.error(e)
                ClusterOperationHelper.cleanup_cluster(self.servers)
                self.sleep(10)
                ClusterOperationHelper.wait_for_ns_servers_or_assert(
                    self.servers, self)
                self.log.info("==============  basetestcase cleanup was finished for test #{0} {1} =============="\
                      .format(self.case_number, self._testMethodName))
        except BaseException:
            # increase case_number to retry tearDown in setup for the next test
            self.case_number += 1000
        finally:
            # stop all existing task manager threads
            self.cluster.shutdown()
            self._log_finish(self)
Ejemplo n.º 22
0
 def tearDown(self):
     if not self.input.param("skip_cleanup", False):
         try:
             self.log.info("==============  basetestcase cleanup was started for test #{0} {1} =============="\
                       .format(self.case_number, self._testMethodName))
             rest = RestConnection(self.master)
             if rest._rebalance_progress_status() == 'running':
                 self.log.warning("rebalancing is still running, test should be verified")
                 stopped = rest.stop_rebalance()
                 self.assertTrue(stopped, msg="unable to stop rebalance")
             BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
             ClusterOperationHelper.cleanup_cluster(self.servers)
             time.sleep(10)
             ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self)
             self.log.info("==============  basetestcase cleanup was finished for test #{0} {1} =============="\
                       .format(self.case_number, self._testMethodName))
         finally:
             #stop all existing task manager threads
             self.cluster.shutdown()
             self._log_finish(self)
Ejemplo n.º 23
0
 def reset(self):
     self.log.info("==============  SwapRebalanceBase cleanup was started for test #{0} {1} =============="\
                       .format(self.case_number, self._testMethodName))
     self.log.info("Stopping load in Teardown")
     SwapRebalanceBase.stop_load(self.loaders)
     for server in self.servers:
         rest = RestConnection(server)
         if rest._rebalance_progress_status() == 'running':
             self.log.warning("rebalancing is still running, test should be verified")
             stopped = rest.stop_rebalance()
             self.assertTrue(stopped, msg="unable to stop rebalance")
     BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
     for server in self.servers:
         ClusterOperationHelper.cleanup_cluster([server])
         if server.data_path:
             rest = RestConnection(server)
             rest.set_data_path(data_path=server.data_path)
     ClusterOperationHelper.wait_for_ns_servers_or_assert(self.servers, self)
     self.log.info("==============  SwapRebalanceBase cleanup was finished for test #{0} {1} =============="\
                       .format(self.case_number, self._testMethodName))
Ejemplo n.º 24
0
 def tearDown(self):
     rest = RestConnection(self.master)
     if rest._rebalance_progress_status() == 'running':
         self.log.warning("rebalancing is still running, test should be verified")
         stopped = rest.stop_rebalance()
         self.assertTrue(stopped, msg="unable to stop rebalance")
     try:
         super(QueriesOpsTests, self).tearDown()
     except:
         pass
     try:
         ClusterOperationHelper.cleanup_cluster(self.servers, master=self.master)
         self.sleep(1)
     except:
         for server in set(self.servers) - set([self.master]):
             try:
                 rest = RestConnection(server)
                 rest.force_eject_node()
                 time.sleep(1)
             except BaseException, e:
                 self.fail(e)
Ejemplo n.º 25
0
 def tearDown(self):
     rest = RestConnection(self.master)
     if rest._rebalance_progress_status() == 'running':
         self.log.warning(
             "rebalancing is still running, test should be verified")
         stopped = rest.stop_rebalance()
         self.assertTrue(stopped, msg="unable to stop rebalance")
     try:
         super(QueriesOpsTests, self).tearDown()
     except:
         pass
     try:
         ClusterOperationHelper.cleanup_cluster(self.servers,
                                                master=self.master)
         self.sleep(1)
     except:
         for server in set(self.servers) - set([self.master]):
             try:
                 rest = RestConnection(server)
                 rest.force_eject_node()
                 time.sleep(1)
             except BaseException, e:
                 self.fail(e)
Ejemplo n.º 26
0
 def tearDown(self):
     if not self.input.param("skip_cleanup", False):
         try:
             self.log.info("==============  basetestcase cleanup was started for test #{0} {1} =============="\
                       .format(self.case_number, self._testMethodName))
             rest = RestConnection(self.master)
             if rest._rebalance_progress_status() == 'running':
                 self.log.warning(
                     "rebalancing is still running, test should be verified"
                 )
                 stopped = rest.stop_rebalance()
                 self.assertTrue(stopped, msg="unable to stop rebalance")
             BucketOperationHelper.delete_all_buckets_or_assert(
                 self.servers, self)
             ClusterOperationHelper.cleanup_cluster(self.servers)
             time.sleep(10)
             ClusterOperationHelper.wait_for_ns_servers_or_assert(
                 self.servers, self)
             self.log.info("==============  basetestcase cleanup was finished for test #{0} {1} =============="\
                       .format(self.case_number, self._testMethodName))
         finally:
             #stop all existing task manager threads
             self.cluster.shutdown()
             self._log_finish(self)
Ejemplo n.º 27
0
    def _common_test_body_failed_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.swap_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info("removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
            ejectedNodes=optNodesIds)

        # Rebalance is failed at 20%, 40% and 60% completion
        for i in [1, 2, 3]:
            expected_progress = 20 * i
            self.log.info("FAIL SWAP REBALANCE PHASE @ {0}".format(expected_progress))
            RestHelper(rest).rebalance_reached(expected_progress)
            bucket = rest.get_buckets()[0].name
            pid = None
            if self.swap_orchestrator:
                # get PID via remote connection if master is a new node
                shell = RemoteMachineShellConnection(master)
                o, _ = shell.execute_command("ps -eo comm,pid | awk '$1 == \"memcached\" { print $2 }'")
                pid = o[0]
                shell.disconnect()
            else:
                for i in xrange(2):
                    try:
                        _mc = MemcachedClientHelper.direct_client(master, bucket)
                        pid = _mc.stats()["pid"]
                        break
                    except EOFError as e:
                        self.log.error("{0}.Retry in 2 sec".format(e))
                        time.sleep(1)
            if pid is None:
                self.fail("impossible to get a PID")
            command = "os:cmd(\"kill -9 {0} \")".format(pid)
            self.log.info(command)
            killed = rest.diag_eval(command)
            self.log.info("killed {0}:{1}??  {2} ".format(master.ip, master.port, killed))
            self.log.info("sleep for 10 sec after kill memcached")
            time.sleep(10)
            # we can't get stats for new node when rebalance falls
            if not self.swap_orchestrator:
                ClusterOperationHelper._wait_warmup_completed(self, [master], bucket, wait_time=600)
            i = 0
            #we expect that rebalance will be failed
            while rest._rebalance_progress_status() == "running" and i < 60:
                self.log.info("rebalance progress: {0}".format(rest._rebalance_progress()))
                time.sleep(1)
                i += 1
            self.log.info("rebalance progress status:{0}".format(rest._rebalance_progress_status()))
            knownNodes = rest.node_statuses();
            self.log.info("nodes are still in cluster: {0}".format([(node.ip, node.port) for node in knownNodes]))
            ejectedNodes = list(set(optNodesIds) & set([node.id for node in knownNodes]))
            rest.rebalance(otpNodes=[node.id for node in knownNodes],
                ejectedNodes=ejectedNodes)

        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(toBeEjectedNodes))

        SwapRebalanceBase.verification_phase(self, master)
Ejemplo n.º 28
0
    def common_setup(self):
        self.cluster_helper = Cluster()
        self.log = logger.Logger.get_logger()
        self.cluster_run = False
        self.input = TestInputSingleton.input
        self.servers = self.input.servers
        serverInfo = self.servers[0]
        rest = RestConnection(serverInfo)
        if len(set([server.ip for server in self.servers])) == 1:
            ip = rest.get_nodes_self().ip
            for server in self.servers:
                server.ip = ip
            self.cluster_run = True
        self.case_number = self.input.param("case_number", 0)
        self.replica = self.input.param("replica", 1)
        self.keys_count = self.input.param("keys-count", 1000)
        self.load_ratio = self.input.param("load-ratio", 1)
        self.ratio_expiry = self.input.param("ratio-expiry", 0.03)
        self.ratio_deletes = self.input.param("ratio-deletes", 0.13)
        self.num_buckets = self.input.param("num-buckets", 1)
        self.failover_factor = self.num_swap = self.input.param("num-swap", 1)
        self.num_initial_servers = self.input.param("num-initial-servers", 3)
        self.fail_orchestrator = self.swap_orchestrator = self.input.param(
            "swap-orchestrator", False)
        self.do_access = self.input.param("do-access", True)
        self.load_started = False
        self.loaders = []
        try:
            # Clear the state from Previous invalid run
            if rest._rebalance_progress_status() == 'running':
                self.log.warning(
                    "rebalancing is still running, previous test should be verified"
                )
                stopped = rest.stop_rebalance()
                self.assertTrue(stopped, msg="unable to stop rebalance")
            self.log.info("==============  SwapRebalanceBase setup was started for test #{0} {1}=============="\
                      .format(self.case_number, self._testMethodName))
            SwapRebalanceBase.reset(self)

            # Make sure the test is setup correctly
            min_servers = int(self.num_initial_servers) + int(self.num_swap)
            msg = "minimum {0} nodes required for running swap rebalance"
            self.assertTrue(len(self.servers) >= min_servers,
                            msg=msg.format(min_servers))

            self.log.info(
                'picking server : {0} as the master'.format(serverInfo))
            node_ram_ratio = BucketOperationHelper.base_bucket_ratio(
                self.servers)
            info = rest.get_nodes_self()
            rest.init_cluster(username=serverInfo.rest_username,
                              password=serverInfo.rest_password)
            rest.init_cluster_memoryQuota(
                memoryQuota=int(info.mcdMemoryReserved * node_ram_ratio))
            SwapRebalanceBase.enable_diag_eval_on_non_local_hosts(
                self, serverInfo)
            # Add built-in user
            testuser = [{
                'id': 'cbadminbucket',
                'name': 'cbadminbucket',
                'password': '******'
            }]
            RbacBase().create_user_source(testuser, 'builtin', self.servers[0])

            # Assign user to role
            role_list = [{
                'id': 'cbadminbucket',
                'name': 'cbadminbucket',
                'roles': 'admin'
            }]
            RbacBase().add_user_role(role_list,
                                     RestConnection(self.servers[0]),
                                     'builtin')

            if self.num_buckets > 10:
                BaseTestCase.change_max_buckets(self, self.num_buckets)
            self.log.info(
                "==============  SwapRebalanceBase setup was finished for test #{0} {1} =============="
                .format(self.case_number, self._testMethodName))
            SwapRebalanceBase._log_start(self)
        except Exception, e:
            self.cluster_helper.shutdown()
            self.fail(e)