Esempio n. 1
0
    def common_tearDown(servers, testcase):
        log = logger.Logger.get_logger()
        log.info(
            "==============  common_tearDown was started for test #{0} {1} ==============".format(
                testcase.case_number, testcase._testMethodName
            )
        )
        RemoteUtilHelper.common_basic_setup(servers)

        log.info("10 seconds delay to wait for couchbase-server to start")
        time.sleep(10)
        ClusterOperationHelper.wait_for_ns_servers_or_assert(
            servers, testcase, wait_time=AutoFailoverBaseTest.MAX_FAIL_DETECT_TIME * 15, wait_if_warmup=True
        )
        try:
            rest = RestConnection(self._servers[0])
            buckets = rest.get_buckets()
            for bucket in buckets:
                MemcachedClientHelper.flush_bucket(servers[0], bucket.name)
        except Exception:
            pass
        BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
        ClusterOperationHelper.cleanup_cluster(servers)
        log.info(
            "==============  common_tearDown was finished for test #{0} {1} ==============".format(
                testcase.case_number, testcase._testMethodName
            )
        )
Esempio n. 2
0
 def tearDown(self):
     try:
         self._cluster_helper.shutdown()
         log = logger.Logger.get_logger()
         log.info("==============  tearDown was started for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
         RemoteUtilHelper.common_basic_setup(self._servers)
         log.info("10 seconds delay to wait for membase-server to start")
         time.sleep(10)
         for server in self._cleanup_nodes:
             shell = RemoteMachineShellConnection(server)
             o, r = shell.execute_command("iptables -F")
             shell.log_command_output(o, r)
             o, r = shell.execute_command("/sbin/iptables -A INPUT -p tcp -i eth0 --dport 1000:60000 -j ACCEPT")
             shell.log_command_output(o, r)
             o, r = shell.execute_command("/sbin/iptables -A OUTPUT -p tcp -o eth0 --dport 1000:60000 -j ACCEPT")
             shell.log_command_output(o, r)
             o, r = shell.execute_command("/etc/init.d/couchbase-server start")
             shell.log_command_output(o, r)
             shell.disconnect()
         BucketOperationHelper.delete_all_buckets_or_assert(self._servers, self)
         ClusterOperationHelper.cleanup_cluster(self._servers)
         ClusterHelper.wait_for_ns_servers_or_assert(self._servers, self)
         log.info("==============  tearDown was finished for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
     finally:
         pass
Esempio n. 3
0
 def common_setup(input, testcase):
     servers = input.servers
     RemoteUtilHelper.common_basic_setup(servers)
     BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
     for server in servers:
         ClusterOperationHelper.cleanup_cluster([server])
     ClusterHelper.wait_for_ns_servers_or_assert(servers, testcase)
 def tearDown(self):
     if hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures) > 0 \
                 and 'stop-on-failure' in TestInputSingleton.input.test_params and \
                 str(TestInputSingleton.input.test_params['stop-on-failure']).lower() == 'true':
                 # supported starting with python2.7
                 log.warn("CLEANUP WAS SKIPPED")
                 self.cluster.shutdown(force=True)
                 self._log_finish(self)
     else:
         try:
             self.log.info("==============  tearDown was started for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
             RemoteUtilHelper.common_basic_setup(self.servers)
             BucketOperationHelper.delete_all_buckets_or_assert(self.servers, self)
             for node in self.servers:
                 master = node
                 try:
                     ClusterOperationHelper.cleanup_cluster(self.servers,
                                                            master=master)
                 except:
                     continue
             self.log.info("==============  tearDown was finished for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
         finally:
             super(FailoverBaseTest, self).tearDown()
Esempio n. 5
0
    def run_failover_operations(self, chosen, failover_reason):
        """ Method to run fail over operations used in the test scenario based on failover reason """
        # Perform Operations relalted to failover
        for node in chosen:
            if failover_reason == 'stop_server':
                self.stop_server(node)
                self.log.info("10 seconds delay to wait for membase-server to shutdown")
                # wait for 5 minutes until node is down
                self.assertTrue(RestHelper(self.rest).wait_for_node_status(node, "unhealthy", 300),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")
            elif failover_reason == "firewall":
                server = [srv for srv in self.servers if node.ip == srv.ip][0]
                RemoteUtilHelper.enable_firewall(server, bidirectional=self.bidirectional)
                status = RestHelper(self.rest).wait_for_node_status(node, "unhealthy", 300)
                if status:
                    self.log.info("node {0}:{1} is 'unhealthy' as expected".format(node.ip, node.port))
                else:
                    # verify iptables on the node if something wrong
                    for server in self.servers:
                        if server.ip == node.ip:
                            shell = RemoteMachineShellConnection(server)
                            info = shell.extract_remote_info()
                            if info.type.lower() == "windows":
                                o, r = shell.execute_command("netsh advfirewall show allprofiles")
                                shell.log_command_output(o, r)
                            else:
                                o, r = shell.execute_command("/sbin/iptables --list")
                                shell.log_command_output(o, r)
                            shell.disconnect()
                    self.rest.print_UI_logs()
                    api = self.rest.baseUrl + 'nodeStatuses'
                    status, content, header = self.rest._http_request(api)
                    json_parsed = json.loads(content)
                    self.log.info("nodeStatuses: {0}".format(json_parsed))
                    self.fail("node status is not unhealthy even after waiting for 5 minutes")

        # define precondition check for failover
        failed_over = self.rest.fail_over(node.id, graceful=self.graceful)

        # Check for negative cases
        if self.graceful and (failover_reason in ['stop_server', 'firewall']):
            if failed_over:
                # MB-10479
                self.rest.print_UI_logs()
            self.assertFalse(failed_over, "Graceful Falover was started for unhealthy node!!! ")
            return
        elif self.gracefulFailoverFail and failed_over:
            """ Check if the fail_over fails as expected """
            self.assertTrue(not failed_over,""" Graceful failover should fail due to not enough replicas """)
            return

        # Check if failover happened as expected or re-try one more time
        if not failed_over:
            self.log.info("unable to failover the node the first time. try again in  60 seconds..")
            # try again in 75 seconds
            self.sleep(75)
            failed_over = self.rest.fail_over(node.id, graceful=self.graceful)
        if self.graceful and (failover_reason not in ['stop_server', 'firewall']):
            reached = RestHelper(self.rest).rebalance_reached()
            self.assertTrue(reached, "rebalance failed for Graceful Failover, stuck or did not completed")
Esempio n. 6
0
    def test_node_firewall_enabled(self):
        timeout = self.timeout / 2

        status = self.rest.update_autoreprovision_settings(True, 1)
        if not status:
            self.fail('failed to change autoreprovision_settings!')
        self.sleep(5)
        RemoteUtilHelper.enable_firewall(self.server_fail)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(
            self.master, 1,
            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
        self.sleep(5)
        shell = RemoteMachineShellConnection(self.server_fail)
        shell.disable_firewall()
        AutoReprovisionBaseTest.wait_for_failover_or_assert(
            self.master, 0,
            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
        self.rest.rebalance(
            otpNodes=[node.id for node in self.rest.node_statuses()],
            ejectedNodes=[])
        self.assertTrue(self.rest.monitorRebalance())
        buckets = self.rest.get_buckets()
        for bucket in buckets:
            self.verify_loaded_data(self.master, bucket.name,
                                    self.loaded_items[bucket.name])
Esempio n. 7
0
 def test_upgrade_negative(self):
     op = self.input.param("op", None)
     error = self.input.param("error", '')
     remote = RemoteMachineShellConnection(self.master)
     if op is None:
         self.fail("operation should be specified")
     if op == "higher_version":
         tmp = self.initial_version
         self.initial_version = self.upgrade_versions[0]
         self.upgrade_versions = [tmp, ]
     info = None
     if op == "wrong_arch":
         info = remote.extract_remote_info()
         info.architecture_type = ('x86_64', 'x86')[info.architecture_type == 'x86']
     self._install([self.master])
     self.operations([self.master])
     try:
         if op == "close_port":
             RemoteUtilHelper.enable_firewall(self.master)
         for upgrade_version in self.upgrade_versions:
             self.sleep(self.sleep_time, "Pre-setup of old version is done. Wait for upgrade to {0} version".\
                    format(upgrade_version))
             output, error = self._upgrade(upgrade_version, self.master, info=info)
             if str(output).find(error) != -1 or str(error).find(error) != -1:
                 raise Exception(error)
     except Exception, ex:
         self.log.info("Exception %s appeared as expected" % ex)
         self.log.info("Check that old version is working fine")
         self.verification([self.master])
Esempio n. 8
0
    def common_setup(input, testcase):
        log.info("==============  common_setup was started for test #{0} {1}==============" \
                 .format(testcase.case_number, testcase._testMethodName))
        servers = input.servers
        RemoteUtilHelper.common_basic_setup(servers)
        BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
        ClusterOperationHelper.cleanup_cluster(servers)
        ClusterOperationHelper.wait_for_ns_servers_or_assert(servers, testcase)

        # Add built-in user
        testuser = [{
            'id': 'cbadminbucket',
            'name': 'cbadminbucket',
            'password': '******'
        }]
        RbacBase().create_user_source(testuser, 'builtin', servers[0])

        # Assign user to role
        role_list = [{
            'id': 'cbadminbucket',
            'name': 'cbadminbucket',
            'roles': 'admin'
        }]
        RbacBase().add_user_role(role_list, RestConnection(servers[0]),
                                 'builtin')

        log.info("==============  common_setup was finished for test #{0} {1} ==============" \
                 .format(testcase.case_number, testcase._testMethodName))
Esempio n. 9
0
 def tearDown(self):
     if hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures) > 0 \
                 and 'stop-on-failure' in TestInputSingleton.input.test_params and \
                 str(TestInputSingleton.input.test_params['stop-on-failure']).lower() == 'true':
                 #supported starting with python2.7
                 log.warn("CLEANUP WAS SKIPPED")
                 self.cluster.shutdown()
                 self._log_finish(self)
     else:
         try:
             self.log.info("==============  tearDown was started for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
             RemoteUtilHelper.common_basic_setup(self.servers)
             self.log.info("10 seconds delay to wait for membase-server to start")
             time.sleep(10)
             for server in self._cleanup_nodes:
                 shell = RemoteMachineShellConnection(server)
                 o, r = shell.execute_command("iptables -F")
                 shell.log_command_output(o, r)
                 o, r = shell.execute_command("/sbin/iptables -A INPUT -p tcp -i eth0 --dport 1000:60000 -j ACCEPT")
                 shell.log_command_output(o, r)
                 o, r = shell.execute_command("/sbin/iptables -A OUTPUT -p tcp -o eth0 --dport 1000:60000 -j ACCEPT")
                 shell.log_command_output(o, r)
                 o, r = shell.execute_command("/etc/init.d/couchbase-server start")
                 shell.log_command_output(o, r)
                 shell.disconnect()
             self.log.info("==============  tearDown was finished for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
         finally:
             super(FailoverBaseTest, self).tearDown()
 def tearDown(self):
     if hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures) > 0 \
                 and 'stop-on-failure' in TestInputSingleton.input.test_params and \
                 str(TestInputSingleton.input.test_params['stop-on-failure']).lower() == 'true':
         # supported starting with python2.7
         log.warn("CLEANUP WAS SKIPPED")
         self.cluster.shutdown(force=True)
         self._log_finish(self)
     else:
         try:
             self.log.info("==============  tearDown was started for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
             RemoteUtilHelper.common_basic_setup(self.servers)
             BucketOperationHelper.delete_all_buckets_or_assert(
                 self.servers, self)
             for node in self.servers:
                 master = node
                 try:
                     ClusterOperationHelper.cleanup_cluster(self.servers,
                                                            master=master)
                 except:
                     continue
             self.log.info("==============  tearDown was finished for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
         finally:
             super(FailoverBaseTest, self).tearDown()
Esempio n. 11
0
 def common_setup(input, testcase):
     servers = input.servers
     RemoteUtilHelper.common_basic_setup(servers)
     BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
     for server in servers:
         ClusterOperationHelper.cleanup_cluster([server])
     ClusterHelper.wait_for_ns_servers_or_assert(servers, testcase)
Esempio n. 12
0
 def tearDown(self):
     try:
         self._cluster_helper.shutdown()
         log = logger.Logger.get_logger()
         log.info("==============  tearDown was started for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
         RemoteUtilHelper.common_basic_setup(self._servers)
         log.info("10 seconds delay to wait for membase-server to start")
         time.sleep(10)
         for server in self._cleanup_nodes:
             shell = RemoteMachineShellConnection(server)
             o, r = shell.execute_command("iptables -F")
             shell.log_command_output(o, r)
             o, r = shell.execute_command(
                 "/sbin/iptables -A INPUT -p tcp -i eth0 --dport 1000:60000 -j ACCEPT"
             )
             shell.log_command_output(o, r)
             o, r = shell.execute_command(
                 "/sbin/iptables -A OUTPUT -p tcp -o eth0 --dport 1000:60000 -j ACCEPT"
             )
             shell.log_command_output(o, r)
             o, r = shell.execute_command(
                 "/etc/init.d/couchbase-server start")
             shell.log_command_output(o, r)
             shell.disconnect()
         BucketOperationHelper.delete_all_buckets_or_assert(
             self._servers, self)
         ClusterOperationHelper.cleanup_cluster(self._servers)
         ClusterHelper.wait_for_ns_servers_or_assert(self._servers, self)
         log.info("==============  tearDown was finished for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
     finally:
         pass
Esempio n. 13
0
 def tearDown(self):
     if hasattr(self, '_resultForDoCleanups') and len(self._resultForDoCleanups.failures) > 0 \
                 and 'stop-on-failure' in TestInputSingleton.input.test_params and \
                 str(TestInputSingleton.input.test_params['stop-on-failure']).lower() == 'true':
                 #supported starting with python2.7
                 log.warn("CLEANUP WAS SKIPPED")
                 self.cluster.shutdown()
                 self._log_finish(self)
     else:
         try:
             self.log.info("==============  tearDown was started for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
             RemoteUtilHelper.common_basic_setup(self.servers)
             self.log.info("10 seconds delay to wait for membase-server to start")
             time.sleep(10)
             for server in self._cleanup_nodes:
                 shell = RemoteMachineShellConnection(server)
                 o, r = shell.execute_command("iptables -F")
                 shell.log_command_output(o, r)
                 o, r = shell.execute_command("/sbin/iptables -A INPUT -p tcp -i eth0 --dport 1000:60000 -j ACCEPT")
                 shell.log_command_output(o, r)
                 o, r = shell.execute_command("/sbin/iptables -A OUTPUT -p tcp -o eth0 --dport 1000:60000 -j ACCEPT")
                 shell.log_command_output(o, r)
                 o, r = shell.execute_command("/etc/init.d/couchbase-server start")
                 shell.log_command_output(o, r)
                 shell.disconnect()
             self.log.info("==============  tearDown was finished for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
         finally:
             super(FailoverBaseTest, self).tearDown()
Esempio n. 14
0
    def test_topology_change_events(self):
        available_server_before_rebalance = copy.deepcopy(self.available_servers)
        try:
            self.log.info("Enabling firewall between Incoming node and CBAS CC "
                          "node to trigger topology_change_failed event")
            for node in available_server_before_rebalance:
                RemoteUtilHelper.enable_firewall(
                    node, bidirectional=False, xdcr=False,
                    action_on_packet="REJECT", block_ips=[self.cluster.cbas_cc_node.ip],
                    all_interface=True)

            self.log.info("Rebalancing IN CBAS node to trigger "
                          "topology_change_started event")
            rebalance_task, self.available_servers = self.rebalance_util.rebalance(
                self.cluster, kv_nodes_in=0, kv_nodes_out=0,
                cbas_nodes_in=1, cbas_nodes_out=0,
                available_servers=self.available_servers, exclude_nodes=[])

            if self.rebalance_util.wait_for_rebalance_task_to_complete(
                    rebalance_task, self.cluster, check_cbas_running=False):
                raise Exception("Rebalance passed when it should have failed.")

            self.log.info("Disabling firewall between Incoming node and CBAS CC "
                          "node and retriggering rebalance to trigger "
                          "topology_change_completed event")
            for node in available_server_before_rebalance:
                remote_client = RemoteMachineShellConnection(node)
                remote_client.disable_firewall()
                remote_client.disconnect()

            rebalance_task, self.available_servers = self.rebalance_util.rebalance(
                self.cluster, kv_nodes_in=0, kv_nodes_out=0,
                cbas_nodes_in=0, cbas_nodes_out=0,
                available_servers=self.available_servers, exclude_nodes=[])

            if not self.rebalance_util.wait_for_rebalance_task_to_complete(
                    rebalance_task, self.cluster, check_cbas_running=False):
                raise Exception("Rebalance failed even after disabling "
                                "firewall")

            self.log.info("Adding event for topology_change_started event")
            self.system_events.add_event(AnalyticsEvents.topology_change_started(
                self.cluster.cbas_cc_node.ip, 2, 0))

            self.log.info("Adding event for topology_change_failed event")
            self.system_events.add_event(AnalyticsEvents.topology_change_failed(
                self.cluster.cbas_cc_node.ip, 2, 0))

            self.log.info("Adding event for topology_change_completed event")
            self.system_events.add_event(AnalyticsEvents.topology_change_completed(
                self.cluster.cbas_cc_node.ip, 2, 0))
        except Exception as err:
            self.log.info("Disabling Firewall")
            for node in available_server_before_rebalance:
                remote_client = RemoteMachineShellConnection(node)
                remote_client.disable_firewall()
                remote_client.disconnect()
            self.fail(str(err))
 def run_failover_operations_with_ops(self, chosen, failover_reason):
     """ Method to run fail over operations used in the test scenario based on failover reason """
     # Perform Operations relalted to failover
     failed_over = True
     for node in chosen:
         unreachable = False
         if failover_reason == 'stop_server':
             unreachable = True
             self.stop_server(node)
             self.log.info("10 seconds delay to wait for membase-server to shutdown")
             # wait for 5 minutes until node is down
             self.assertTrue(RestHelper(self.rest).wait_for_node_status(node, "unhealthy", 300),
                                 msg="node status is not unhealthy even after waiting for 5 minutes")
         elif failover_reason == "firewall":
             unreachable = True
             self.filter_list.append (node.ip)
             server = [srv for srv in self.servers if node.ip == srv.ip][0]
             RemoteUtilHelper.enable_firewall(server, bidirectional=self.bidirectional)
             status = RestHelper(self.rest).wait_for_node_status(node, "unhealthy", 300)
             if status:
                 self.log.info("node {0}:{1} is 'unhealthy' as expected".format(node.ip, node.port))
             else:
                 # verify iptables on the node if something wrong
                 for server in self.servers:
                     if server.ip == node.ip:
                         shell = RemoteMachineShellConnection(server)
                         info = shell.extract_remote_info()
                         if info.type.lower() == "windows":
                             o, r = shell.execute_command("netsh advfirewall show allprofiles")
                             shell.log_command_output(o, r)
                         else:
                             o, r = shell.execute_command("/sbin/iptables --list")
                             shell.log_command_output(o, r)
                         shell.disconnect()
                 self.rest.print_UI_logs()
                 api = self.rest.baseUrl + 'nodeStatuses'
                 status, content, header = self.rest._http_request(api)
                 json_parsed = json.loads(content)
                 self.log.info("nodeStatuses: {0}".format(json_parsed))
                 self.fail("node status is not unhealthy even after waiting for 5 minutes")
     nodes = self.filter_servers(self.servers, chosen)
     failed_over = self.cluster.async_failover([self.master], failover_nodes=chosen, graceful=self.graceful)
     # Perform Compaction
     compact_tasks = []
     if self.compact:
         for bucket in self.buckets:
             compact_tasks.append(self.cluster.async_compact_bucket(self.master, bucket))
     # Run View Operations
     if self.withViewsOps:
         self.query_and_monitor_view_tasks(nodes)
     # Run mutation operations
     if self.withMutationOps:
         self.run_mutation_operations()
     failed_over.result()
     for task in compact_tasks:
         task.result()
     msg = "rebalance failed while removing failover nodes {0}".format(node.id)
     self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg=msg)
Esempio n. 16
0
 def run_failover_operations_with_ops(self, chosen, failover_reason):
     """ Method to run fail over operations used in the test scenario based on failover reason """
     # Perform Operations relalted to failover
     failed_over = True
     for node in chosen:
         unreachable = False
         if failover_reason == 'stop_server':
             unreachable=True
             self.stop_server(node)
             self.log.info("10 seconds delay to wait for membase-server to shutdown")
             # wait for 5 minutes until node is down
             self.assertTrue(RestHelper(self.rest).wait_for_node_status(node, "unhealthy", 300),
                                 msg="node status is not unhealthy even after waiting for 5 minutes")
         elif failover_reason == "firewall":
             unreachable=True
             self.filter_list.append (node.ip)
             server = [srv for srv in self.servers if node.ip == srv.ip][0]
             RemoteUtilHelper.enable_firewall(server, bidirectional=self.bidirectional)
             status = RestHelper(self.rest).wait_for_node_status(node, "unhealthy", 300)
             if status:
                 self.log.info("node {0}:{1} is 'unhealthy' as expected".format(node.ip, node.port))
             else:
                 # verify iptables on the node if something wrong
                 for server in self.servers:
                     if server.ip == node.ip:
                         shell = RemoteMachineShellConnection(server)
                         info = shell.extract_remote_info()
                         if info.type.lower() == "windows":
                             o, r = shell.execute_command("netsh advfirewall show allprofiles")
                             shell.log_command_output(o, r)
                         else:
                             o, r = shell.execute_command("/sbin/iptables --list")
                             shell.log_command_output(o, r)
                         shell.disconnect()
                 self.rest.print_UI_logs()
                 api = self.rest.baseUrl + 'nodeStatuses'
                 status, content, header = self.rest._http_request(api)
                 json_parsed = json.loads(content)
                 self.log.info("nodeStatuses: {0}".format(json_parsed))
                 self.fail("node status is not unhealthy even after waiting for 5 minutes")
     nodes = self.filter_servers(self.servers,chosen)
     failed_over = self.cluster.async_failover([self.master], failover_nodes = chosen, graceful=self.graceful)
     # Perform Compaction
     compact_tasks = []
     if self.compact:
         for bucket in self.buckets:
             compact_tasks.append(self.cluster.async_compact_bucket(self.master,bucket))
     # Run View Operations
     if self.withViewsOps:
         self.query_and_monitor_view_tasks(nodes)
     # Run mutation operations
     if self.withMutationOps:
         self.run_mutation_operations()
     failed_over.result()
     for task in compact_tasks:
         task.result()
     msg = "rebalance failed while removing failover nodes {0}".format(node.id)
     self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg=msg)
Esempio n. 17
0
 def test_60s_timeout_firewall(self):
     timeout = self.timeout
     server_fail = self._servers[1]
     status = self.rest.update_autofailover_settings(True, timeout)
     if not status:
         self.fail('failed to change autofailover_settings! See MB-7282')
     self.sleep(5)
     RemoteUtilHelper.enable_firewall(server_fail)
     AutoFailoverBaseTest.wait_for_failover_or_assert(self.master, 1, timeout + AutoFailoverBaseTest.MAX_FAIL_DETECT_TIME, self)
Esempio n. 18
0
    def common_tearDown(servers, testcase):
        RemoteUtilHelper.common_basic_setup(servers)
        log = logger.Logger.get_logger()
        log.info("10 seconds delay to wait for membase-server to start")
        time.sleep(10)

        BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
        ClusterOperationHelper.cleanup_cluster(servers)
        ClusterHelper.wait_for_ns_servers_or_assert(servers, testcase)
Esempio n. 19
0
    def common_tearDown(servers, testcase):
        RemoteUtilHelper.common_basic_setup(servers)
        log = logger.Logger.get_logger()
        log.info("10 seconds delay to wait for membase-server to start")
        time.sleep(10)

        BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
        ClusterOperationHelper.cleanup_cluster(servers)
        ClusterHelper.wait_for_ns_servers_or_assert(servers, testcase)
Esempio n. 20
0
 def common_setup(input, testcase):
     log.info("==============  common_setup was started for test #{0} {1}=============="\
                   .format(testcase.case_number, testcase._testMethodName))
     servers = input.servers
     RemoteUtilHelper.common_basic_setup(servers)
     ClusterOperationHelper.cleanup_cluster(servers)
     ClusterOperationHelper.wait_for_ns_servers_or_assert(servers, testcase)
     BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
     log.info("==============  common_setup was finished for test #{0} {1} =============="\
                   .format(testcase.case_number, testcase._testMethodName))
Esempio n. 21
0
 def common_setup(input, testcase):
     log.info("==============  common_setup was started for test #{0} {1}=============="\
                   .format(testcase.case_number, testcase._testMethodName))
     servers = input.servers
     RemoteUtilHelper.common_basic_setup(servers)
     BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
     ClusterOperationHelper.cleanup_cluster(servers)
     ClusterOperationHelper.wait_for_ns_servers_or_assert(servers, testcase)
     log.info("==============  common_setup was finished for test #{0} {1} =============="\
                   .format(testcase.case_number, testcase._testMethodName))
Esempio n. 22
0
    def post_init_cb(self):
        # Optionally change node name and restart server
        if params.get('use_domain_names', False):
            RemoteUtilHelper.use_hostname_for_server_settings(self.node)

        # Optionally disable consistency check
        if params.get('disable_consistency', False):
            self.rest.set_couchdb_option(section='couchdb',
                                         option='consistency_check_ratio',
                                         value='0.0')
Esempio n. 23
0
 def test_60s_timeout_firewall(self):
     timeout = self.timeout
     server_fail = self.servers[1]
     status = self.rest.update_autofailover_settings(True, timeout)
     if not status:
         self.fail('failed to change autofailover_settings! See MB-7282')
     self.sleep(5)
     RemoteUtilHelper.enable_firewall(server_fail)
     AutoFailoverBaseTest.wait_for_failover_or_assert(
         self.master, 1,
         timeout + AutoFailoverBaseTest.MAX_FAIL_DETECT_TIME, self)
Esempio n. 24
0
 def test_firewall_node_when_autoreprovisioning(self):
     wait_timeout = 120
     before = self.input.param("before", True)
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     shell = RemoteMachineShellConnection(self.server_fail)
     if shell.extract_remote_info().type.lower() == 'windows':
         o, r = shell.execute_command("shutdown -r -f -t 0")
     elif shell.extract_remote_info().type.lower() == 'linux':
         o, r = shell.execute_command("reboot")
     shell.log_command_output(o, r)
     if shell.extract_remote_info().type.lower() == 'windows':
         time.sleep(wait_timeout * 5)
     else:
         time.sleep(wait_timeout)
     # disable firewall on the node
     shell = RemoteMachineShellConnection(self.server_fail)
     shell.disable_firewall()
     AutoReprovisionBaseTest.wait_for_failover_or_assert(
         self.master, 0,
         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(),
                     "cluster status is not healthy")
     self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
     # self.sleep(5)
     if before:
         RemoteUtilHelper.enable_firewall(self.servers[2])
     self.rest.rebalance(
         otpNodes=[node.id for node in self.rest.node_statuses()],
         ejectedNodes=[])
     if not before:
         RemoteUtilHelper.enable_firewall(self.servers[2])
     # self.sleep(5)
     try:
         self.rest.monitorRebalance()
         self.fail("Rebalance failed expected")
     except RebalanceFailedException:
         self.log.info("Rebalance failed but it's expected")
     shell = RemoteMachineShellConnection(self.servers[2])
     shell.disable_firewall()
     self.sleep(5)
     self.rest.rebalance(
         otpNodes=[node.id for node in self.rest.node_statuses()],
         ejectedNodes=[])
     self.assertTrue(self.rest.monitorRebalance())
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name,
                                 self.loaded_items[bucket.name])
Esempio n. 25
0
 def common_tearDown(servers, testcase):
     RemoteUtilHelper.common_basic_setup(servers)
     log = logger.Logger.get_logger()
     log.info("10 seconds delay to wait for couchbase-server to start")
     time.sleep(10)
     ClusterOperationHelper.wait_for_ns_servers_or_assert(servers, testcase)
     try:
         MemcachedClientHelper.flush_bucket(servers[0], 'default')
     except Exception:
         pass
     BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
     ClusterOperationHelper.cleanup_cluster(servers)
     ClusterOperationHelper.wait_for_ns_servers_or_assert(servers, testcase)
Esempio n. 26
0
 def setUp(self):
     log = logger.Logger.get_logger()
     self._input = TestInputSingleton.input
     self._keys_count = self._input.param("keys_count", DEFAULT_KEY_COUNT)
     self._num_replicas = self._input.param("replica", DEFAULT_REPLICA)
     self.bidirectional = self._input.param("bidirectional", False)
     self.case_number = self._input.param("case_number", 0)
     self._value_size = self._input.param("value_size", 256)
     self.wait_timeout = self._input.param("wait_timeout", 60)
     self._servers = self._input.servers
     self.master = self._servers[0]
     self._failed_nodes = []
     num_buckets = 0
     self.buckets = []
     self.default_bucket = self._input.param("default_bucket", True)
     if self.default_bucket:
         self.default_bucket_name = "default"
         num_buckets += 1
     self._standard_buckets = self._input.param("standard_buckets", 0)
     self._sasl_buckets = self._input.param("sasl_buckets", 0)
     num_buckets += self._standard_buckets + self._sasl_buckets
     self.dgm_run = self._input.param("dgm_run", True)
     self.log = logger.Logger().get_logger()
     self._cluster_helper = Cluster()
     self.disabled_consistent_view = self._input.param(
         "disabled_consistent_view", None)
     self._quota = self._initialize_nodes(self._cluster_helper,
                                          self._servers,
                                          self.disabled_consistent_view)
     if self.dgm_run:
         self.quota = 256
     self.bucket_size = int(
         (2.0 / 3.0) / float(num_buckets) * float(self._quota))
     self.gen_create = BlobGenerator('loadOne',
                                     'loadOne_',
                                     self._value_size,
                                     end=self._keys_count)
     self.add_back_flag = False
     self._cleanup_nodes = []
     log.info("==============  setup was started for test #{0} {1}=============="\
                   .format(self.case_number, self._testMethodName))
     RemoteUtilHelper.common_basic_setup(self._servers)
     BucketOperationHelper.delete_all_buckets_or_assert(self._servers, self)
     for server in self._servers:
         ClusterOperationHelper.cleanup_cluster([server])
     ClusterHelper.wait_for_ns_servers_or_assert(self._servers, self)
     self._setup_cluster()
     self._create_buckets_()
     log.info("==============  setup was finished for test #{0} {1} =============="\
                   .format(self.case_number, self._testMethodName))
 def test_60s_timeout_firewall(self):
     # AUTOFAIL_TEST_5
     timeout = self.timeout
     server_fail = self._servers[1]
     status = self.rest.update_autofailover_settings(True, timeout)
     if not status:
         self.fail('failed to change autofailover_settings!')
     time.sleep(5)
     time_start = time.time()
     RemoteUtilHelper.enable_firewall(server_fail)
     AutoFailoverBaseTest.wait_for_failover_or_assert(self.master, 1, timeout, self)
     time_end = time.time()
     msg = "{0} != {1}".format(time_end - time_start, timeout)
     self.assertTrue(abs((time_end - time_start) - timeout) <= AutoFailoverBaseTest.MAX_FAIL_DETECT_TIME, msg)
     self.log.info("expected failover in {0} seconds, actual time {1} seconds".format(timeout, time_end - time_start))
Esempio n. 28
0
 def test_60s_timeout_firewall(self):
     # AUTOFAIL_TEST_5
     timeout = self.timeout
     server_fail = self._servers[1]
     status = self.rest.update_autofailover_settings(True, timeout)
     if not status:
         self.fail('failed to change autofailover_settings!')
     self.sleep(5)
     time_start = time.time()
     RemoteUtilHelper.enable_firewall(server_fail)
     AutoFailoverBaseTest.wait_for_failover_or_assert(self.master, 1, timeout + self.extra_timeout, self)
     time_end = time.time()
     msg = "{0} != {1}".format(time_end - time_start, timeout + self.extra_timeout)
     self.assertTrue(abs((time_end - time_start) - timeout - self.extra_timeout) <= AutoFailoverBaseTest.MAX_FAIL_DETECT_TIME, msg)
     self.log.info("expected failover in {0} seconds, actual time {1} seconds".format(timeout, time_end - time_start))
Esempio n. 29
0
    def pass_encrypted_in_logs_test(self):
        self.bucket_size = self._get_bucket_size(self.quota, 1)
        self._create_sasl_buckets(self.master, 1, password='******')
        bucket = self.buckets[-1]

        if self.input.param("load", 0):
            self.num_items = self.input.param("load", 0)
            self._load_doc_data_all_buckets()
        if self.input.param("views", 0):
            views = []
            for i in xrange(self.input.param("views", 0)):
                views.append(View("view_sasl" + str(i),
                                  'function (doc, meta) {'
                                  'emit(meta.id, "emitted_value%s");}' % str(i),
                                  None, False))
            self.create_views(self.master, "ddoc", views, bucket)
        if self.input.param("rebalance", 0):
            self.cluster.rebalance(self.servers[:self.nodes_init],
                                   self.servers[self.nodes_init:self.nodes_init + self.input.param("rebalance", 0)],
                                   [])

        for server in self.servers[:self.nodes_init]:
            for log_file in ['debug', 'info', 'views', 'xdcr']:
                self.assertFalse(RemoteUtilHelper.is_text_present_in_logs(server, bucket.saslPassword, logs_to_check=log_file),
                                 "%s logs contains password in plain text" % log_file)
Esempio n. 30
0
 def common_tearDown(servers, testcase):
     RemoteUtilHelper.common_basic_setup(servers)
     log = logger.Logger.get_logger()
     log.info("10 seconds delay to wait for couchbase-server to start")
     time.sleep(10)
     ClusterOperationHelper.wait_for_ns_servers_or_assert(servers, testcase)
     try:
         rest = RestConnection(self._servers[0])
         buckets = rest.get_buckets()
         for bucket in buckets:
             MemcachedClientHelper.flush_bucket(servers[0], bucket.name)
     except Exception:
         pass
     BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
     ClusterOperationHelper.cleanup_cluster(servers)
     ClusterOperationHelper.wait_for_ns_servers_or_assert(servers, testcase)
    def pass_encrypted_in_logs_test(self):
        self.bucket_size = self._get_bucket_size(self.quota, 1)
        self._create_sasl_buckets(self.master, 1, password='******')
        bucket = self.buckets[-1]

        if self.input.param("load", 0):
            self.num_items = self.input.param("load", 0)
            self._load_doc_data_all_buckets()
        if self.input.param("views", 0):
            views = []
            for i in range(self.input.param("views", 0)):
                views.append(View("view_sasl" + str(i),
                                  'function (doc, meta) {'
                                  'emit(meta.id, "emitted_value%s");}' % str(i),
                                  None, False))
            self.create_views(self.master, "ddoc", views, bucket)
        if self.input.param("rebalance", 0):
            self.cluster.rebalance(self.servers[:self.nodes_init],
                                   self.servers[self.nodes_init:self.nodes_init + self.input.param("rebalance", 0)],
                                   [])

        for server in self.servers[:self.nodes_init]:
            for log_file in ['debug', 'info', 'views', 'xdcr']:
                self.assertFalse(RemoteUtilHelper.is_text_present_in_logs(server, bucket.saslPassword, logs_to_check=log_file),
                                 "%s logs contains password in plain text" % log_file)
Esempio n. 32
0
 def tearDown(self):
     if hasattr(self, '_resultForDoCleanups') \
             and len(self._resultForDoCleanups.failures) > 0 \
             and 'stop-on-failure' in TestInputSingleton.input.test_params \
             and str(TestInputSingleton.input.test_params['stop-on-failure']).lower() == 'true':
         # supported starting with python2.7
         self.log.warn("CLEANUP WAS SKIPPED")
         self.cluster.shutdown(force=True)
     else:
         try:
             self.log.info("==============  tearDown was started for test #{0} {1} =============="\
                           .format(self.case_number, self._testMethodName))
             RemoteUtilHelper.common_basic_setup(self.cluster.servers)
             self.cluster_util.check_for_panic_and_mini_dumps(self.servers)
         finally:
             super(FailoverBaseTest, self).tearDown()
Esempio n. 33
0
 def test_firewall_node_when_autoreprovisioning(self):
     wait_timeout = 120
     before = self.input.param("before", True)
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     shell = RemoteMachineShellConnection(self.server_fail)
     if shell.extract_remote_info().type.lower() == 'windows':
         o, r = shell.execute_command("shutdown -r -f -t 0")
     elif shell.extract_remote_info().type.lower() == 'linux':
         o, r = shell.execute_command("reboot")
     shell.log_command_output(o, r)
     if shell.extract_remote_info().type.lower() == 'windows':
         time.sleep(wait_timeout * 5)
     else:
         time.sleep(wait_timeout)
     # disable firewall on the node
     shell = RemoteMachineShellConnection(self.server_fail)
     shell.disable_firewall()
     AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                         self)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy")
     self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
     # self.sleep(5)
     if before:
         RemoteUtilHelper.enable_firewall(self.servers[2])
     self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
     if not before:
         RemoteUtilHelper.enable_firewall(self.servers[2])
     # self.sleep(5)
     try:
         self.rest.monitorRebalance()
         self.fail("Rebalance failed expected")
     except RebalanceFailedException:
         self.log.info("Rebalance failed but it's expected")
     shell = RemoteMachineShellConnection(self.servers[2])
     shell.disable_firewall()
     self.sleep(5)
     self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
     self.assertTrue(self.rest.monitorRebalance())
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
Esempio n. 34
0
 def setUp(self):
     log = logger.Logger.get_logger()
     self._input = TestInputSingleton.input
     self._keys_count = self._input.param("keys_count", DEFAULT_KEY_COUNT)
     self._num_replicas = self._input.param("replica", DEFAULT_REPLICA)
     self.bidirectional = self._input.param("bidirectional", False)
     self.case_number = self._input.param("case_number", 0)
     self._value_size = self._input.param("value_size", 256)
     self.wait_timeout = self._input.param("wait_timeout", 60)
     self._servers = self._input.servers
     self.master = self._servers[0]
     self._failed_nodes = []
     num_buckets = 0
     self.buckets = []
     self.default_bucket = self._input.param("default_bucket", True)
     if self.default_bucket:
         self.default_bucket_name = "default"
         num_buckets += 1
     self._standard_buckets = self._input.param("standard_buckets", 0)
     self._sasl_buckets = self._input.param("sasl_buckets", 0)
     num_buckets += self._standard_buckets + self._sasl_buckets
     self.dgm_run = self._input.param("dgm_run", True)
     self.log = logger.Logger().get_logger()
     self._cluster_helper = Cluster()
     self.disabled_consistent_view = self._input.param("disabled_consistent_view", None)
     self._quota = self._initialize_nodes(self._cluster_helper, self._servers, self.disabled_consistent_view)
     if self.dgm_run:
         self.quota = 256
     self.bucket_size = int((2.0 / 3.0) / float(num_buckets) * float(self._quota))
     self.gen_create = BlobGenerator('loadOne', 'loadOne_', self._value_size, end=self._keys_count)
     self.add_back_flag = False
     self._cleanup_nodes = []
     log.info("==============  setup was started for test #{0} {1}=============="\
                   .format(self.case_number, self._testMethodName))
     RemoteUtilHelper.common_basic_setup(self._servers)
     BucketOperationHelper.delete_all_buckets_or_assert(self._servers, self)
     for server in self._servers:
         ClusterOperationHelper.cleanup_cluster([server])
     ClusterHelper.wait_for_ns_servers_or_assert(self._servers, self)
     self._setup_cluster()
     self._create_buckets_()
     log.info("==============  setup was finished for test #{0} {1} =============="\
                   .format(self.case_number, self._testMethodName))
Esempio n. 35
0
    def common_tearDown(servers, testcase):
        log.info("==============  common_tearDown was started for test #{0} {1} =============="\
                          .format(testcase.case_number, testcase._testMethodName))
        RemoteUtilHelper.common_basic_setup(servers)

        log.info("10 seconds delay to wait for couchbase-server to start")
        time.sleep(10)
        ClusterOperationHelper.wait_for_ns_servers_or_assert(servers, testcase, \
                wait_time=AutoFailoverBaseTest.MAX_FAIL_DETECT_TIME * 10, wait_if_warmup=True)
        try:
            rest = RestConnection(servers[0])
            buckets = rest.get_buckets()
            for bucket in buckets:
                MemcachedClientHelper.flush_bucket(servers[0], bucket.name)
        except Exception:
            pass
        BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
        ClusterOperationHelper.cleanup_cluster(servers)
        log.info("==============  common_tearDown was finished for test #{0} {1} =============="\
                          .format(testcase.case_number, testcase._testMethodName))
Esempio n. 36
0
    def common_setup(input, testcase):
        log.info("==============  common_setup was started for test #{0} {1}==============" \
                 .format(testcase.case_number, testcase._testMethodName))
        servers = input.servers
        RemoteUtilHelper.common_basic_setup(servers)
        BucketOperationHelper.delete_all_buckets_or_assert(servers, testcase)
        ClusterOperationHelper.cleanup_cluster(servers)
        ClusterOperationHelper.wait_for_ns_servers_or_assert(servers, testcase)

        # Add built-in user
        testuser = [{'id': 'cbadminbucket', 'name': 'cbadminbucket', 'password': '******'}]
        RbacBase().create_user_source(testuser, 'builtin', servers[0])
        time.sleep(10)

        # Assign user to role
        role_list = [{'id': 'cbadminbucket', 'name': 'cbadminbucket', 'roles': 'admin'}]
        RbacBase().add_user_role(role_list, RestConnection(servers[0]), 'builtin')
        time.sleep(10)

        log.info("==============  common_setup was finished for test #{0} {1} ==============" \
                 .format(testcase.case_number, testcase._testMethodName))
Esempio n. 37
0
 def test_node_memcached_failure(self):
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     self._pause_couchbase(self.server_fail)
     self.sleep(5)
     AutoReprovisionBaseTest.wait_for_warmup_or_assert(self.master, 1,
                                                       timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                       self)
     RemoteUtilHelper.common_basic_setup([self.server_fail])
     AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                         self)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy")
     self.assertTrue(helper.is_cluster_rebalanced(), "cluster is not balanced")
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
Esempio n. 38
0
    def test_node_firewall_enabled(self):
        timeout = self.timeout / 2

        status = self.rest.update_autoreprovision_settings(True, 1)
        if not status:
            self.fail('failed to change autoreprovision_settings!')
        self.sleep(5)
        RemoteUtilHelper.enable_firewall(self.server_fail)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 1,
                                                            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                            self)
        self.sleep(5)
        shell = RemoteMachineShellConnection(self.server_fail)
        shell.disable_firewall()
        AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                            self)
        self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
        self.assertTrue(self.rest.monitorRebalance())
        buckets = self.rest.get_buckets()
        for bucket in buckets:
            self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
Esempio n. 39
0
    def test_cbcollect_with_redaction_enabled_with_xdcr(self):
        rest_src = RestConnection(self.master)
        rest_src.remove_all_replications()
        rest_src.remove_all_remote_clusters()

        rest_dest = RestConnection(self.servers[1])
        rest_dest_helper = RestHelper(rest_dest)

        try:
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            self.set_redaction_level()
            rest_src.add_remote_cluster(self.servers[1].ip,
                                        self.servers[1].port,
                                        self.servers[1].rest_username,
                                        self.servers[1].rest_password, "C2")
            """ at dest cluster """
            self.add_built_in_server_user(node=self.servers[1])
            rest_dest.create_bucket(bucket='default', ramQuotaMB=512)
            bucket_ready = rest_dest_helper.vbucket_map_ready('default')
            if not bucket_ready:
                self.fail(
                    "Bucket default at dest not created after 120 seconds.")
            repl_id = rest_src.start_replication('continuous', 'default', "C2")
            if repl_id is not None:
                self.log.info("Replication created successfully")
            gen = BlobGenerator("ent-backup",
                                "ent-backup-",
                                self.value_size,
                                end=self.num_items)
            tasks = self._async_load_all_buckets(self.master, gen, "create", 0)
            for task in tasks:
                task.result()
            self.sleep(10)
            """ enable firewall """
            if self.interrupt_replication:
                RemoteUtilHelper.enable_firewall(self.master, xdcr=True)
            """ start collect logs """
            self.start_logs_collection()
            result = self.monitor_logs_collection()
            """ verify logs """
            try:
                logs_path = result["perNode"]["ns_1@" +
                                              str(self.master.ip)]["path"]
            except KeyError:
                logs_path = result["perNode"]["[email protected]"]["path"]
            redactFileName = logs_path.split('/')[-1]
            nonredactFileName = logs_path.split('/')[-1].replace(
                '-redacted', '')
            remotepath = logs_path[0:logs_path.rfind('/') + 1]
            self.verify_log_files_exist(remotepath=remotepath,
                                        redactFileName=redactFileName,
                                        nonredactFileName=nonredactFileName)
            self.log.info("Verify on log ns_server.goxdcr.log")
            self.verify_log_redaction(remotepath=remotepath,
                                      redactFileName=redactFileName,
                                      nonredactFileName=nonredactFileName,
                                      logFileName="ns_server.goxdcr.log")
        finally:
            """ clean up xdcr """
            rest_dest.delete_bucket()
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            if self.interrupt_replication:
                shell = RemoteMachineShellConnection(self.master)
                shell.disable_firewall()
                shell.disconnect()
Esempio n. 40
0
    def common_test_body(self, keys_count, failover_reason):
        log = logger.Logger.get_logger()
        log.info("keys_count : {0}".format(keys_count))
        log.info("replicas : {0}".format(self.num_replicas))
        log.info("failover_reason : {0}".format(failover_reason))
        log.info('picking server : {0} as the master'.format(self.master))

        self._load_all_buckets(self.master, self.gen_create, "create", 0,
                               batch_size=10000, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers)

        _servers_ = self.servers
        rest = RestConnection(self.master)
        nodes = rest.node_statuses()

        RebalanceHelper.wait_for_replication(self.servers, self.cluster)
        chosen = RebalanceHelper.pick_nodes(self.master, howmany=self.num_replicas)
        for node in chosen:
            #let's do op
            if failover_reason == 'stop_server':
                self.stop_server(node)
                log.info("10 seconds delay to wait for membase-server to shutdown")
                #wait for 5 minutes until node is down
                self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")
            elif failover_reason == "firewall":
                RemoteUtilHelper.enable_firewall(self.servers, node, bidirectional=self.bidirectional)
                status = RestHelper(rest).wait_for_node_status(node, "unhealthy", 300)
                if status:
                    log.info("node {0}:{1} is 'unhealthy' as expected".format(node.ip, node.port))
                else:
                    #verify iptables on the node if something wrong
                    for server in self.servers:
                        if server.ip == node.ip:
                            shell = RemoteMachineShellConnection(server)
                            o, r = shell.execute_command("/sbin/iptables --list")
                            shell.log_command_output(o, r)
                            shell.disconnect()
                    for i in rest.get_logs(): self.log.error(i)
                    self.fail("node status is not unhealthy even after waiting for 5 minutes")

            failed_over = rest.fail_over(node.id)
            if not failed_over:
                self.log.info("unable to failover the node the first time. try again in  60 seconds..")
                #try again in 75 seconds
                time.sleep(75)
                failed_over = rest.fail_over(node.id)
            self.assertTrue(failed_over, "unable to failover node after {0}".format(failover_reason))
            log.info("failed over node : {0}".format(node.id))
            self._failed_nodes.append(node)

        if self.add_back_flag:
            for node in self._failed_nodes:
                rest.add_back_node(node.id)
                time.sleep(5)
            log.info("10 seconds sleep after failover before invoking rebalance...")
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                               ejectedNodes=[])
            msg = "rebalance failed while removing failover nodes {0}".format(chosen)
            self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
        else:
            # Need a delay > min because MB-7168
            log.info("30 seconds sleep after failover before invoking rebalance...")
            time.sleep(30)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                               ejectedNodes=[node.id for node in chosen])
            msg = "rebalance failed while removing failover nodes {0}".format(chosen)
            self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
            for failed in chosen:
                for server in _servers_:
                    if server.ip == failed.ip:
                         _servers_.remove(server)
                         self._cleanup_nodes.append(server)

        log.info("Begin VERIFICATION ...")
        RebalanceHelper.wait_for_replication(_servers_, self.cluster)
        self.verify_cluster_stats(_servers_, self.master)
Esempio n. 41
0
    def run_failover_operations(self, chosen, failover_reason):
        """ Method to run fail over operations used in the test scenario based on failover reason """
        # Perform Operations relalted to failover
        graceful_count = 0
        graceful_failover = True
        failed_over = True
        for node in chosen:
            unreachable = False
            if failover_reason == 'stop_server':
                unreachable=True
                self.stop_server(node)
                self.log.info("10 seconds delay to wait for membase-server to shutdown")
                # wait for 5 minutes until node is down
                self.assertTrue(RestHelper(self.rest).wait_for_node_status(node, "unhealthy", self.wait_timeout * 10),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")
            elif failover_reason == "firewall":
                unreachable=True
                self.filter_list.append (node.ip)
                server = [srv for srv in self.servers if node.ip == srv.ip][0]
                RemoteUtilHelper.enable_firewall(server, bidirectional=self.bidirectional)
                status = RestHelper(self.rest).wait_for_node_status(node, "unhealthy", self.wait_timeout * 10)
                if status:
                    self.log.info("node {0}:{1} is 'unhealthy' as expected".format(node.ip, node.port))
                else:
                    # verify iptables on the node if something wrong
                    for server in self.servers:
                        if server.ip == node.ip:
                            shell = RemoteMachineShellConnection(server)
                            info = shell.extract_remote_info()
                            if info.type.lower() == "windows":
                                o, r = shell.execute_command("netsh advfirewall show allprofiles")
                                shell.log_command_output(o, r)
                            else:
                                o, r = shell.execute_command("/sbin/iptables --list")
                                shell.log_command_output(o, r)
                            shell.disconnect()
                    self.rest.print_UI_logs()
                    api = self.rest.baseUrl + 'nodeStatuses'
                    status, content, header = self.rest._http_request(api)
                    json_parsed = json.loads(content)
                    self.log.info("nodeStatuses: {0}".format(json_parsed))
                    self.fail("node status is not unhealthy even after waiting for 5 minutes")
            # verify the failover type
            if self.check_verify_failover_type:
                graceful_count, graceful_failover = self.verify_failover_type(node, graceful_count, self.num_replicas, unreachable)
            # define precondition check for failover
            success_failed_over = self.rest.fail_over(node.id, graceful=(self.graceful and graceful_failover))
            if self.graceful and graceful_failover:
                if self.stopGracefulFailover or self.killNodes or self.stopNodes or self.firewallOnNodes:
                    self.victim_node_operations(node)
                    # Start Graceful Again
                    self.log.info(" Start Graceful Failover Again !")
                    success_failed_over = self.rest.fail_over(node.id, graceful=(self.graceful and graceful_failover))
                    msg = "graceful failover failed for nodes {0}".format(node.id)
                    self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg=msg)
                else:
                    msg = "rebalance failed while removing failover nodes {0}".format(node.id)
                    self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg=msg)
            failed_over = failed_over and success_failed_over

        # Check for negative cases
        if self.graceful and (failover_reason in ['stop_server', 'firewall']):
            if failed_over:
                # MB-10479
                self.rest.print_UI_logs()
            self.assertFalse(failed_over, "Graceful Falover was started for unhealthy node!!! ")
            return
        elif self.gracefulFailoverFail and not failed_over:
            """ Check if the fail_over fails as expected """
            self.assertFalse(failed_over,""" Graceful failover should fail due to not enough replicas """)
            return

        # Check if failover happened as expected or re-try one more time
        if not failed_over:
            self.log.info("unable to failover the node the first time. try again in  60 seconds..")
            # try again in 75 seconds
            self.sleep(75)
            failed_over = self.rest.fail_over(node.id, graceful=(self.graceful and graceful_failover))
        if self.graceful and (failover_reason not in ['stop_server', 'firewall']):
            reached = RestHelper(self.rest).rebalance_reached()
            self.assertTrue(reached, "rebalance failed for Graceful Failover, stuck or did not completed")

        # Verify Active and Replica Bucket Count
        if self.num_replicas > 0:
            nodes = self.filter_servers(self.servers,chosen)
            self.vb_distribution_analysis(servers = nodes, buckets = self.buckets, std = 20.0 , total_vbuckets = self.total_vbuckets, type = "failover", graceful = (self.graceful and graceful_failover) )
Esempio n. 42
0
 def enable_firewall(server):
     """Enable firewall
     @param server: server object to enable firewall
     @param rep_direction: replication direction unidirection/bidirection
     """
     RemoteUtilHelper.enable_firewall(server)
    def run_failover_operations(self, chosen, failover_reason):
        """ Method to run fail over operations used in the test scenario based on failover reason """
        # Perform Operations relalted to failover
        graceful_count = 0
        graceful_failover = True
        failed_over = True
        for node in chosen:
            unreachable = False
            if failover_reason == 'stop_server':
                unreachable = True
                self.stop_server(node)
                self.log.info("10 seconds delay to wait for membase-server to shutdown")
                # wait for 5 minutes until node is down
                self.assertTrue(RestHelper(self.rest).wait_for_node_status(node, "unhealthy", self.wait_timeout * 10),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")
            elif failover_reason == "firewall":
                unreachable = True
                self.filter_list.append (node.ip)
                server = [srv for srv in self.servers if node.ip == srv.ip][0]
                RemoteUtilHelper.enable_firewall(server, bidirectional=self.bidirectional)
                status = RestHelper(self.rest).wait_for_node_status(node, "unhealthy", self.wait_timeout * 10)
                if status:
                    self.log.info("node {0}:{1} is 'unhealthy' as expected".format(node.ip, node.port))
                else:
                    # verify iptables on the node if something wrong
                    for server in self.servers:
                        if server.ip == node.ip:
                            shell = RemoteMachineShellConnection(server)
                            info = shell.extract_remote_info()
                            if info.type.lower() == "windows":
                                o, r = shell.execute_command("netsh advfirewall show allprofiles")
                                shell.log_command_output(o, r)
                            else:
                                o, r = shell.execute_command("/sbin/iptables --list")
                                shell.log_command_output(o, r)
                            shell.disconnect()
                    self.rest.print_UI_logs()
                    api = self.rest.baseUrl + 'nodeStatuses'
                    status, content, header = self.rest._http_request(api)
                    json_parsed = json.loads(content)
                    self.log.info("nodeStatuses: {0}".format(json_parsed))
                    self.fail("node status is not unhealthy even after waiting for 5 minutes")
            # verify the failover type
            if self.check_verify_failover_type:
                graceful_count, graceful_failover = self.verify_failover_type(node, graceful_count, self.num_replicas, unreachable)
            # define precondition check for failover
            success_failed_over = self.rest.fail_over(node.id, graceful=(self.graceful and graceful_failover))
            if self.graceful and graceful_failover:
                if self.stopGracefulFailover or self.killNodes or self.stopNodes or self.firewallOnNodes:
                    self.victim_node_operations(node)
                    # Start Graceful Again
                    self.log.info(" Start Graceful Failover Again !")
                    self.sleep(120)
                    success_failed_over = self.rest.fail_over(node.id, graceful=(self.graceful and graceful_failover))
                    self.sleep(180)
                    msg = "graceful failover failed for nodes {0}".format(node.id)
                    self.log.info("chosen: {0} get_failover_count: {1}".format(len(chosen),
                                                                               self.get_failover_count()))
                    self.assertEqual(len(chosen), self.get_failover_count(), msg=msg)
                else:
                    msg = "rebalance failed while removing failover nodes {0}".format(node.id)
                    self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg=msg)
            failed_over = failed_over and success_failed_over

        # Check for negative cases
        if self.graceful and (failover_reason in ['stop_server', 'firewall']):
            if failed_over:
                # MB-10479
                self.rest.print_UI_logs()
            self.assertFalse(failed_over, "Graceful Falover was started for unhealthy node!!! ")
            return
        elif self.gracefulFailoverFail and not failed_over:
            """ Check if the fail_over fails as expected """
            self.assertFalse(failed_over, """ Graceful failover should fail due to not enough replicas """)
            return

        # Check if failover happened as expected or re-try one more time
        if not failed_over:
            self.log.info("unable to failover the node the first time. try again in  60 seconds..")
            # try again in 75 seconds
            self.sleep(75)
            failed_over = self.rest.fail_over(node.id, graceful=(self.graceful and graceful_failover))
        if self.graceful and (failover_reason not in ['stop_server', 'firewall']):
            reached = RestHelper(self.rest).rebalance_reached()
            self.assertTrue(reached, "rebalance failed for Graceful Failover, stuck or did not completed")

        # Verify Active and Replica Bucket Count
        if self.num_replicas > 0:
            nodes = self.filter_servers(self.servers, chosen)
            self.vb_distribution_analysis(servers=nodes, buckets=self.buckets, std=20.0 , total_vbuckets=self.total_vbuckets, type="failover", graceful=(self.graceful and graceful_failover))
Esempio n. 44
0
    def initialize(self, params):
        log.info('*****CouchbaseServerInstaller initialize the application ****')


        start_time = time.time()
        cluster_initialized = False
        server = params["server"]
        remote_client = RemoteMachineShellConnection(params["server"])
        while time.time() < start_time + 5 * 60:
            try:
                rest = RestConnection(server)

                # Optionally change node name and restart server
                if params.get('use_domain_names', 0):
                    RemoteUtilHelper.use_hostname_for_server_settings(server)

                # Make sure that data_path and index_path are writable by couchbase user
                for path in set(filter(None, [server.data_path, server.index_path])):
                    time.sleep(3)

                    for cmd in ("rm -rf {0}/*".format(path),
                                "chown -R couchbase:couchbase {0}".format(path)):
                        remote_client.execute_command(cmd)
                rest.set_data_path(data_path=server.data_path,
                                       index_path=server.index_path)
                time.sleep(3)

                # Initialize cluster
                if "init_nodes" in params:
                    init_nodes = params["init_nodes"]
                else:
                    init_nodes = "True"
                if (isinstance(init_nodes, bool) and init_nodes) or \
                        (isinstance(init_nodes, str) and init_nodes.lower() == "true"):
                    if not server.services:
                        set_services = ["kv"]
                    elif server.services:
                        set_services = server.services.split(',')

                    kv_quota = 0
                    while kv_quota == 0:
                        time.sleep(1)
                        kv_quota = int(rest.get_nodes_self().mcdMemoryReserved)
                    info = rest.get_nodes_self()
                    cb_version = info.version[:5]

                    if cb_version in COUCHBASE_FROM_VERSION_4:
                        if "index" in set_services and "fts" not in set_services:
                            log.info("quota for index service will be %s MB" \
                                                              % (INDEX_QUOTA))
                            kv_quota = int(info.mcdMemoryReserved * 2/3) - INDEX_QUOTA
                            log.info("set index quota to node %s " % server.ip)
                            rest.set_indexer_memoryQuota(indexMemoryQuota=INDEX_QUOTA)
                            if kv_quota < MIN_KV_QUOTA:
                                raise Exception("KV RAM needs to be more than %s MB"
                                        " at node  %s"  % (MIN_KV_QUOTA, server.ip))
                        elif "index" in set_services and "fts" in set_services:
                            log.info("quota for index service will be %s MB" \
                                                              % (INDEX_QUOTA))
                            log.info("quota for fts service will be %s MB" \
                                                                % (FTS_QUOTA))
                            kv_quota = int(info.mcdMemoryReserved * 2/3)\
                                                                 - INDEX_QUOTA \
                                                                 - FTS_QUOTA
                            log.info("set both index and fts quota at node %s "\
                                                                    % server.ip)
                            rest.set_indexer_memoryQuota(indexMemoryQuota=INDEX_QUOTA)
                            rest.set_fts_memoryQuota(ftsMemoryQuota=FTS_QUOTA)
                            if kv_quota < MIN_KV_QUOTA:
                                raise Exception("KV RAM need to be more than %s MB"
                                       " at node  %s"  % (MIN_KV_QUOTA, server.ip))
                        elif "fts" in set_services and "index" not in set_services:
                            log.info("quota for fts service will be %s MB" \
                                                                % (FTS_QUOTA))
                            kv_quota = int(info.mcdMemoryReserved * 2/3) - FTS_QUOTA
                            if kv_quota < MIN_KV_QUOTA:
                                raise Exception("KV RAM need to be more than %s MB"
                                       " at node  %s"  % (MIN_KV_QUOTA, server.ip))
                            """ for fts, we need to grep quota from ns_server
                                but need to make it works even RAM of vm is
                                smaller than 2 GB """
                            rest.set_fts_memoryQuota(ftsMemoryQuota=FTS_QUOTA)
                    """ set kv quota smaller than 1 MB so that it will satify
                        the condition smaller than allow quota """
                    kv_quota -= 1
                    log.info("quota for kv: %s MB" % kv_quota)
                    rest.init_cluster_memoryQuota(server.rest_username, \
                                                       server.rest_password, \
                                                                     kv_quota)
                    if params["version"][:5] in COUCHBASE_FROM_VERSION_4:
                        rest.init_node_services(username=server.rest_username,
                                                password=server.rest_password,
                                                        services=set_services)
                    rest.init_cluster(username=server.rest_username,
                                         password=server.rest_password)

                # Optionally disable consistency check
                if params.get('disable_consistency', 0):
                    rest.set_couchdb_option(section='couchdb',
                                            option='consistency_check_ratio',
                                            value='0.0')

                # memcached env variable
                mem_req_tap_env = params.get('MEMCACHED_REQS_TAP_EVENT', 0)
                if mem_req_tap_env:
                    remote_client.set_environment_variable('MEMCACHED_REQS_TAP_EVENT',
                                                           mem_req_tap_env)
                """ set cbauth environment variables from Watson version
                    it is checked version inside method """
                remote_client.set_cbauth_env(server)
                remote_client.disconnect()
                # TODO: Make it work with windows
                if "erlang_threads" in params:
                    num_threads = params.get('erlang_threads', testconstants.NUM_ERLANG_THREADS)
                    # Stop couchbase-server
                    ClusterOperationHelper.stop_cluster([server])
                    if "sync_threads" in params or ':' in num_threads:
                        sync_threads = params.get('sync_threads', True)
                    else:
                        sync_threads = False
                    # Change type of threads(sync/async) and num erlang threads
                    ClusterOperationHelper.change_erlang_threads_values([server], sync_threads, num_threads)
                    # Start couchbase-server
                    ClusterOperationHelper.start_cluster([server])
                if "erlang_gc_level" in params:
                    erlang_gc_level = params.get('erlang_gc_level', None)
                    if erlang_gc_level is None:
                        # Don't change the value
                        break
                    # Stop couchbase-server
                    ClusterOperationHelper.stop_cluster([server])
                    # Change num erlang threads
                    ClusterOperationHelper.change_erlang_gc([server], erlang_gc_level)
                    # Start couchbase-server
                    ClusterOperationHelper.start_cluster([server])
                cluster_initialized = True
                break
            except ServerUnavailableException:
                log.error("error happened while initializing the cluster @ {0}".format(server.ip))
            log.info('sleep for 5 seconds before trying again ...')
            time.sleep(5)
        if not cluster_initialized:
            sys.exit("unable to initialize couchbase node")
Esempio n. 45
0
    def common_test_body(self, keys_count, failover_reason):
        log = logger.Logger.get_logger()
        log.info("keys_count : {0}".format(keys_count))
        log.info("replicas : {0}".format(self.num_replicas))
        log.info("failover_reason : {0}".format(failover_reason))
        log.info('picking server : {0} as the master'.format(self.master))

        self._load_all_buckets(self.master, self.gen_create, "create", 0,
                               batch_size=10000, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers)

        _servers_ = self.servers
        rest = RestConnection(self.master)
        nodes = rest.node_statuses()

        RebalanceHelper.wait_for_replication(self.servers, self.cluster)
        chosen = RebalanceHelper.pick_nodes(self.master, howmany=self.num_replicas)
        for node in chosen:
            #let's do op
            if failover_reason == 'stop_server':
                self.stop_server(node)
                log.info("10 seconds delay to wait for membase-server to shutdown")
                #wait for 5 minutes until node is down
                self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")
            elif failover_reason == "firewall":
                server = [srv for srv in self.servers if node.ip == srv.ip][0]
                RemoteUtilHelper.enable_firewall(server, bidirectional=self.bidirectional)
                status = RestHelper(rest).wait_for_node_status(node, "unhealthy", 300)
                if status:
                    log.info("node {0}:{1} is 'unhealthy' as expected".format(node.ip, node.port))
                else:
                    #verify iptables on the node if something wrong
                    for server in self.servers:
                        if server.ip == node.ip:
                            shell = RemoteMachineShellConnection(server)
                            info = shell.extract_remote_info()
                            if info.type.lower() == "windows":
                                o, r = shell.execute_command("netsh advfirewall show allprofiles")
                            else:
                                o, r = shell.execute_command("/sbin/iptables --list")
                            shell.log_command_output(o, r)
                            shell.disconnect()
                    for i in rest.get_logs(): self.log.error(i)
                    api = rest.baseUrl + 'nodeStatuses'
                    status, content, header = rest._http_request(api)
                    json_parsed = json.loads(content)
                    self.log.info("nodeStatuses: {0}".format(json_parsed))
                    self.fail("node status is not unhealthy even after waiting for 5 minutes")

            failed_over = rest.fail_over(node.id)
            if not failed_over:
                self.log.info("unable to failover the node the first time. try again in  60 seconds..")
                #try again in 75 seconds
                time.sleep(75)
                failed_over = rest.fail_over(node.id)
            self.assertTrue(failed_over, "unable to failover node after {0}".format(failover_reason))
            log.info("failed over node : {0}".format(node.id))
            self._failed_nodes.append(node)

        if self.add_back_flag:
            for node in self._failed_nodes:
                rest.add_back_node(node.id)
                time.sleep(5)
            log.info("10 seconds sleep after failover before invoking rebalance...")
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                               ejectedNodes=[])
            msg = "rebalance failed while removing failover nodes {0}".format(chosen)
            self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
        else:
            # Need a delay > min because MB-7168
            log.info("60 seconds sleep after failover before invoking rebalance...")
            time.sleep(60)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                               ejectedNodes=[node.id for node in chosen])
            msg = "rebalance failed while removing failover nodes {0}".format(chosen)
            self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
            for failed in chosen:
                for server in _servers_:
                    if server.ip == failed.ip:
                         _servers_.remove(server)
                         self._cleanup_nodes.append(server)

        log.info("Begin VERIFICATION ...")
        RebalanceHelper.wait_for_replication(_servers_, self.cluster)
        self.verify_cluster_stats(_servers_, self.master)
Esempio n. 46
0
    def initialize(self, params):
        start_time = time.time()
        cluster_initialized = False
        server = params["server"]
        remote_client = RemoteMachineShellConnection(params["server"])
        while time.time() < start_time + 5 * 60:
            try:
                rest = RestConnection(server)

                # Optionally change node name and restart server
                if params.get('use_domain_names', 0):
                    RemoteUtilHelper.use_hostname_for_server_settings(server)

                # Make sure that data_path and index_path are writable by couchbase user
                for path in set(filter(None, [server.data_path, server.index_path])):
                    time.sleep(3)

                    for cmd in ("rm -rf {0}/*".format(path),
                                "chown -R couchbase:couchbase {0}".format(path)):
                        remote_client.execute_command(cmd)
                rest.set_data_path(data_path=server.data_path,
                                       index_path=server.index_path)
                time.sleep(3)

                # Initialize cluster
                if "init_nodes" in params:
                    init_nodes = params["init_nodes"]
                else:
                    init_nodes = "True"
                if init_nodes.lower() == "true":
                    rest.init_cluster(username=server.rest_username,
                                  password=server.rest_password)
                    memory_quota = rest.get_nodes_self().mcdMemoryReserved
                    rest.init_cluster_memoryQuota(memoryQuota=memory_quota)

                # TODO: Symlink data-dir to custom path
                # remote_client.stop_couchbase()
                # remote_client.execute_command('mv /opt/couchbase/var {0}'.format(server.data_path))
                # remote_client.execute_command('ln -s {0}/var /opt/couchbase/var'.format(server.data_path))
                # remote_client.execute_command("chown -h couchbase:couchbase /opt/couchbase/var")
                # remote_client.start_couchbase()

                # Optionally disable consistency check
                if params.get('disable_consistency', 0):
                    rest.set_couchdb_option(section='couchdb',
                                            option='consistency_check_ratio',
                                            value='0.0')

                # memcached env variable
                mem_req_tap_env = params.get('MEMCACHED_REQS_TAP_EVENT', 0)
                if mem_req_tap_env:
                    remote_client.set_environment_variable('MEMCACHED_REQS_TAP_EVENT',
                                                           mem_req_tap_env)
                remote_client.disconnect()
                # TODO: Make it work with windows
                if "erlang_threads" in params:
                    num_threads = params.get('erlang_threads', testconstants.NUM_ERLANG_THREADS)
                    # Stop couchbase-server
                    ClusterOperationHelper.stop_cluster([server])
                    if "sync_threads" in params or ':' in num_threads:
                        sync_threads = params.get('sync_threads', True)
                    else:
                        sync_threads = False
                    # Change type of threads(sync/async) and num erlang threads
                    ClusterOperationHelper.change_erlang_threads_values([server], sync_threads, num_threads)
                    # Start couchbase-server
                    ClusterOperationHelper.start_cluster([server])
                if "erlang_gc_level" in params:
                    erlang_gc_level = params.get('erlang_gc_level', None)
                    if erlang_gc_level is None:
                        # Don't change the value
                        break
                    # Stop couchbase-server
                    ClusterOperationHelper.stop_cluster([server])
                    # Change num erlang threads
                    ClusterOperationHelper.change_erlang_gc([server], erlang_gc_level)
                    # Start couchbase-server
                    ClusterOperationHelper.start_cluster([server])
                cluster_initialized = True
                break
            except ServerUnavailableException:
                log.error("error happened while initializing the cluster @ {0}".format(server.ip))
            log.info('sleep for 5 seconds before trying again ...')
            time.sleep(5)
        if not cluster_initialized:
            sys.exit("unable to initialize couchbase node")
Esempio n. 47
0
    def common_test_body(self, keys_count, failover_reason):
        log = logger.Logger.get_logger()
        log.info("keys_count : {0}".format(keys_count))
        log.info("replicas : {0}".format(self.num_replicas))
        log.info("failover_reason : {0}".format(failover_reason))
        log.info('picking server : {0} as the master'.format(self.master))

        self._load_all_buckets(self.master, self.gen_create, "create", 0,
                               batch_size=10000, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers)

        _servers_ = self.servers
        rest = RestConnection(self.master)
        nodes = rest.node_statuses()

        RebalanceHelper.wait_for_replication(self.servers, self.cluster)
        chosen = RebalanceHelper.pick_nodes(self.master, howmany=self.num_replicas)
        for node in chosen:
            # let's do op
            if failover_reason == 'stop_server':
                self.stop_server(node)
                log.info("10 seconds delay to wait for membase-server to shutdown")
                # wait for 5 minutes until node is down
                self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")
            elif failover_reason == "firewall":
                server = [srv for srv in self.servers if node.ip == srv.ip][0]
                RemoteUtilHelper.enable_firewall(server, bidirectional=self.bidirectional)
                status = RestHelper(rest).wait_for_node_status(node, "unhealthy", 300)
                if status:
                    log.info("node {0}:{1} is 'unhealthy' as expected".format(node.ip, node.port))
                else:
                    # verify iptables on the node if something wrong
                    for server in self.servers:
                        if server.ip == node.ip:
                            shell = RemoteMachineShellConnection(server)
                            info = shell.extract_remote_info()
                            if info.type.lower() == "windows":
                                o, r = shell.execute_command("netsh advfirewall show allprofiles")
                            else:
                                o, r = shell.execute_command("/sbin/iptables --list")
                            shell.log_command_output(o, r)
                            shell.disconnect()
                    for i in rest.get_logs(): self.log.error(i)
                    api = rest.baseUrl + 'nodeStatuses'
                    status, content, header = rest._http_request(api)
                    json_parsed = json.loads(content)
                    self.log.info("nodeStatuses: {0}".format(json_parsed))
                    self.fail("node status is not unhealthy even after waiting for 5 minutes")

            failed_over = rest.fail_over(node.id)
            if not failed_over:
                self.log.info("unable to failover the node the first time. try again in  60 seconds..")
                # try again in 75 seconds
                time.sleep(75)
                failed_over = rest.fail_over(node.id)
            self.assertTrue(failed_over, "unable to failover node after {0}".format(failover_reason))
            log.info("failed over node : {0}".format(node.id))
            self._failed_nodes.append(node)

        if self.add_back_flag:
            for node in self._failed_nodes:
                rest.add_back_node(node.id)
                time.sleep(5)
            log.info("10 seconds sleep after failover before invoking rebalance...")
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                               ejectedNodes=[])
            msg = "rebalance failed while removing failover nodes {0}".format(chosen)
            self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
        else:
            # Need a delay > min because MB-7168
            log.info("60 seconds sleep after failover before invoking rebalance...")
            time.sleep(60)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                               ejectedNodes=[node.id for node in chosen])
            if self.during_ops:
                self.sleep(5, "Wait for some progress in rebalance")
                if self.during_ops == "change_password":
                    old_pass = self.master.rest_password
                    self.change_password(new_password=self.input.param("new_password", "new_pass"))
                    rest = RestConnection(self.master)
                elif self.during_ops == "change_port":
                    self.change_port(new_port=self.input.param("new_port", "9090"))
                    rest = RestConnection(self.master)
            try:
                msg = "rebalance failed while removing failover nodes {0}".format(chosen)
                self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
                for failed in chosen:
                    for server in _servers_:
                        if server.ip == failed.ip:
                             _servers_.remove(server)
                             self._cleanup_nodes.append(server)

                log.info("Begin VERIFICATION ...")
                RebalanceHelper.wait_for_replication(_servers_, self.cluster)
                self.verify_cluster_stats(_servers_, self.master)
            finally:
                if self.during_ops:
                     if self.during_ops == "change_password":
                         self.change_password(new_password=old_pass)
                     elif self.during_ops == "change_port":
                         self.change_port(new_port='8091',
                                          current_port=self.input.param("new_port", "9090"))
Esempio n. 48
0
    def initialize(self, params):
        start_time = time.time()
        cluster_initialized = False
        server = params["server"]
        remote_client = RemoteMachineShellConnection(params["server"])
        while time.time() < start_time + 5 * 60:
            try:
                rest = RestConnection(server)

                # Optionally change node name and restart server
                if params.get('use_domain_names', 0):
                    RemoteUtilHelper.use_hostname_for_server_settings(server)

                # Make sure that data_path and index_path are writable by couchbase user
                for path in set(filter(None, [server.data_path, server.index_path])):
                    time.sleep(3)

                    for cmd in ("rm -rf {0}/*".format(path),
                                "chown -R couchbase:couchbase {0}".format(path)):
                        remote_client.execute_command(cmd)
                rest.set_data_path(data_path=server.data_path,
                                       index_path=server.index_path)
                time.sleep(3)

                # Initialize cluster
                rest.init_cluster(username=server.rest_username,
                                  password=server.rest_password)
                memory_quota = rest.get_nodes_self().mcdMemoryReserved
                rest.init_cluster_memoryQuota(memoryQuota=memory_quota)

                # TODO: Symlink data-dir to custom path
                # remote_client.stop_couchbase()
                # remote_client.execute_command('mv /opt/couchbase/var {0}'.format(server.data_path))
                # remote_client.execute_command('ln -s {0}/var /opt/couchbase/var'.format(server.data_path))
                # remote_client.execute_command("chown -h couchbase:couchbase /opt/couchbase/var")
                # remote_client.start_couchbase()

                # Optionally disable consistency check
                if params.get('disable_consistency', 0):
                    rest.set_couchdb_option(section='couchdb',
                                            option='consistency_check_ratio',
                                            value='0.0')

                # memcached env variable
                mem_req_tap_env = params.get('MEMCACHED_REQS_TAP_EVENT', 0)
                if mem_req_tap_env:
                    remote_client.set_environment_variable('MEMCACHED_REQS_TAP_EVENT',
                                                           mem_req_tap_env)
                remote_client.disconnect()
                # TODO: Make it work with windows
                if "erlang_threads" in params:
                    num_threads = params.get('erlang_threads', testconstants.NUM_ERLANG_THREADS)
                    # Stop couchbase-server
                    ClusterOperationHelper.stop_cluster([server])
                    if "sync_threads" in params or ':' in num_threads:
                        sync_threads = params.get('sync_threads', True)
                    else:
                        sync_threads = False
                    # Change type of threads(sync/async) and num erlang threads
                    ClusterOperationHelper.change_erlang_threads_values([server], sync_threads, num_threads)
                    # Start couchbase-server
                    ClusterOperationHelper.start_cluster([server])
                if "erlang_gc_level" in params:
                    erlang_gc_level = params.get('erlang_gc_level', None)
                    if erlang_gc_level is None:
                        # Don't change the value
                        break
                    # Stop couchbase-server
                    ClusterOperationHelper.stop_cluster([server])
                    # Change num erlang threads
                    ClusterOperationHelper.change_erlang_gc([server], erlang_gc_level)
                    # Start couchbase-server
                    ClusterOperationHelper.start_cluster([server])
                cluster_initialized = True
                break
            except ServerUnavailableException:
                log.error("error happened while initializing the cluster @ {0}".format(server.ip))
            log.info('sleep for 5 seconds before trying again ...')
            time.sleep(5)
        if not cluster_initialized:
            sys.exit("unable to initialize couchbase node")
Esempio n. 49
0
    def test_cbcollect_with_redaction_enabled_with_xdcr(self):
        rest_src = RestConnection(self.master)
        rest_src.remove_all_replications()
        rest_src.remove_all_remote_clusters()

        rest_dest = RestConnection(self.servers[1])
        rest_dest_helper = RestHelper(rest_dest)

        try:
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            self.set_redaction_level()
            rest_src.add_remote_cluster(self.servers[1].ip, self.servers[1].port,
                                        self.servers[1].rest_username,
                                        self.servers[1].rest_password, "C2")

            """ at dest cluster """
            self.add_built_in_server_user(node=self.servers[1])
            rest_dest.create_bucket(bucket='default', ramQuotaMB=512)
            bucket_ready = rest_dest_helper.vbucket_map_ready('default')
            if not bucket_ready:
                self.fail("Bucket default at dest not created after 120 seconds.")
            repl_id = rest_src.start_replication('continuous', 'default', "C2")
            if repl_id is not None:
                self.log.info("Replication created successfully")
            gen = BlobGenerator("ent-backup", "ent-backup-", self.value_size, end=self.num_items)
            tasks = self._async_load_all_buckets(self.master, gen, "create", 0)
            for task in tasks:
                task.result()
            self.sleep(10)

            """ enable firewall """
            if self.interrupt_replication:
                RemoteUtilHelper.enable_firewall(self.master, xdcr=True)

            """ start collect logs """
            self.start_logs_collection()
            result = self.monitor_logs_collection()
            """ verify logs """
            try:
                logs_path = result["perNode"]["ns_1@" + str(self.master.ip)]["path"]
            except KeyError:
                logs_path = result["perNode"]["[email protected]"]["path"]
            redactFileName = logs_path.split('/')[-1]
            nonredactFileName = logs_path.split('/')[-1].replace('-redacted', '')
            remotepath = logs_path[0:logs_path.rfind('/')+1]
            self.verify_log_files_exist(remotepath=remotepath,
                                    redactFileName=redactFileName,
                                    nonredactFileName=nonredactFileName)
            self.log.info("Verify on log ns_server.goxdcr.log")
            self.verify_log_redaction(remotepath=remotepath,
                                  redactFileName=redactFileName,
                                  nonredactFileName=nonredactFileName,
                                  logFileName="ns_server.goxdcr.log")
        finally:
            """ clean up xdcr """
            rest_dest.delete_bucket()
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            if self.interrupt_replication:
                shell = RemoteMachineShellConnection(self.master)
                shell.disable_firewall()
                shell.disconnect()
Esempio n. 50
0
    def common_test_body(self, keys_count, replica, load_ratio,
                         failover_reason):
        log = logger.Logger.get_logger()
        log.info("keys_count : {0}".format(keys_count))
        log.info("replica : {0}".format(replica))
        log.info("load_ratio : {0}".format(load_ratio))
        log.info("failover_reason : {0}".format(failover_reason))
        master = self._servers[0]
        log.info('picking server : {0} as the master'.format(master))
        rest = RestConnection(master)
        info = rest.get_nodes_self()
        rest.init_cluster(username=master.rest_username,
                          password=master.rest_password)
        rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved)
        bucket_ram = info.memoryQuota * 2 / 3
        bucket = 'default'
        rest.create_bucket(bucket=bucket,
                           ramQuotaMB=bucket_ram,
                           replicaNumber=replica,
                           proxyPort=info.moxi)
        ready = BucketOperationHelper.wait_for_memcached(master, bucket)
        self.assertTrue(ready, "wait_for_memcached_failed")
        credentials = self._input.membase_settings

        ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers,
                                                       credentials, self)
        nodes = rest.node_statuses()
        rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
        msg = "rebalance failed after adding these nodes {0}".format(nodes)
        self.assertTrue(rest.monitorRebalance(), msg=msg)

        inserted_keys = FailoverBaseTest.load_data(master, bucket, keys_count,
                                                   load_ratio)
        inserted_count = len(inserted_keys)
        log.info('inserted {0} keys'.format(inserted_count))

        nodes = rest.node_statuses()
        while (len(nodes) - replica) > 1:
            final_replication_state = RestHelper(rest).wait_for_replication(
                900)
            msg = "replication state after waiting for up to 15 minutes : {0}"
            self.log.info(msg.format(final_replication_state))
            chosen = RebalanceHelper.pick_nodes(master, howmany=replica)
            for node in chosen:
                #let's do op
                if failover_reason == 'stop_server':
                    self.stop_server(node)
                    log.info(
                        "10 seconds delay to wait for membase-server to shutdown"
                    )
                    #wait for 5 minutes until node is down
                    self.assertTrue(
                        RestHelper(rest).wait_for_node_status(
                            node, "unhealthy", 300),
                        msg=
                        "node status is not unhealthy even after waiting for 5 minutes"
                    )
                elif failover_reason == "firewall":
                    RemoteUtilHelper.enable_firewall(
                        self._servers, node, bidirectional=self.bidirectional)
                    self.assertTrue(
                        RestHelper(rest).wait_for_node_status(
                            node, "unhealthy", 300),
                        msg=
                        "node status is not unhealthy even after waiting for 5 minutes"
                    )

                failed_over = rest.fail_over(node.id)
                if not failed_over:
                    self.log.info(
                        "unable to failover the node the first time. try again in  60 seconds.."
                    )
                    #try again in 60 seconds
                    time.sleep(75)
                    failed_over = rest.fail_over(node.id)
                self.assertTrue(
                    failed_over, "unable to failover node after {0}".format(
                        failover_reason))
                log.info("failed over node : {0}".format(node.id))
                self._failed_nodes.append(node.ip)

            log.info(
                "10 seconds sleep after failover before invoking rebalance...")
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                           ejectedNodes=[node.id for node in chosen])
            msg = "rebalance failed while removing failover nodes {0}".format(
                chosen)
            self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
            FailoverBaseTest.replication_verification(master, bucket, replica,
                                                      inserted_count, self)

            nodes = rest.node_statuses()
        FailoverBaseTest.verify_data(master, inserted_keys, bucket, self)
Esempio n. 51
0
 def test_node_memcached_failure_in_series(self):
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     data_lost = False
     for i in reversed(xrange(len(self.servers))):
         print self.servers[i]
         operation = random.choice(['stop', 'memcached_failure', 'restart', 'failover', 'reboot'])
         shell = RemoteMachineShellConnection(self.servers[i])
         print "operation", operation
         if i == 0:
             self.master = self.servers[1]
         if operation == 'stop':
             self._stop_couchbase(self.servers[i])
         elif operation == 'memcached_failure':
             self._pause_couchbase(self.servers[i])
         elif operation == 'restart':
             shell.restart_couchbase()
         elif operation == 'failover':
             RemoteUtilHelper.enable_firewall(self.servers[i])
         elif operation == 'reboot':
             if shell.extract_remote_info().type.lower() == 'windows':
                 o, r = shell.execute_command("shutdown -r -f -t 0")
                 self.sleep(200)
             elif shell.extract_remote_info().type.lower() == 'linux':
                 o, r = shell.execute_command("reboot")
             shell.log_command_output(o, r)
             self.sleep(60)
         self.sleep(40)
         if operation == 'memcached_failure':
             AutoReprovisionBaseTest.wait_for_warmup_or_assert(self.master, 1,
                                                               timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                               self)
         if operation != 'restart' and operation != 'memcached_failure' and operation != 'reboot':
             AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 1,
                                                                 timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                                 self)
         if operation != 'restart':
             RemoteUtilHelper.common_basic_setup([self.servers[i]])
         AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                             timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                             self)
         helper = RestHelper(RestConnection(self.master))
         self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy")
         self.sleep(40)
         if operation == 'memcached_failure' or operation == 'failover':
             self.assertTrue(helper.is_cluster_rebalanced(), "cluster is not balanced")
         else:
             if 'kv' in self.servers[i].services and self.replicas > 0:
                 self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
                 self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
                 self.assertTrue(self.rest.monitorRebalance())
             else:
                 self.assertTrue(helper.is_cluster_rebalanced(), "cluster is not balanced")
         buckets = self.rest.get_buckets()
         if self.replicas == 0 and (operation == 'restart' or operation == 'reboot'):
             data_lost = True
         for bucket in buckets:
             if not data_lost:
                 self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
Esempio n. 52
0
    def common_test_body(self, keys_count, replica, load_ratio, failover_reason):
        log = logger.Logger.get_logger()
        log.info("keys_count : {0}".format(keys_count))
        log.info("replica : {0}".format(replica))
        log.info("load_ratio : {0}".format(load_ratio))
        log.info("failover_reason : {0}".format(failover_reason))
        master = self._servers[0]
        log.info('picking server : {0} as the master'.format(master))
        rest = RestConnection(master)
        info = rest.get_nodes_self()
        rest.init_cluster(username=master.rest_username,
                          password=master.rest_password)
        rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved)
        bucket_ram = info.memoryQuota * 2 / 3
        bucket = 'default'
        rest.create_bucket(bucket=bucket,
                           ramQuotaMB=bucket_ram,
                           replicaNumber=replica,
                           proxyPort=info.moxi)
        ready = BucketOperationHelper.wait_for_memcached(master, bucket)
        self.assertTrue(ready, "wait_for_memcached_failed")
        credentials = self._input.membase_settings

        ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self)
        nodes = rest.node_statuses()
        rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
        msg = "rebalance failed after adding these nodes {0}".format(nodes)
        self.assertTrue(rest.monitorRebalance(), msg=msg)

        inserted_keys = FailoverBaseTest.load_data(master, bucket, keys_count, load_ratio)
        inserted_count = len(inserted_keys)
        log.info('inserted {0} keys'.format(inserted_count))

        nodes = rest.node_statuses()
        while (len(nodes) - replica) > 1:
            final_replication_state = RestHelper(rest).wait_for_replication(900)
            msg = "replication state after waiting for up to 15 minutes : {0}"
            self.log.info(msg.format(final_replication_state))
            chosen = RebalanceHelper.pick_nodes(master, howmany=replica)
            for node in chosen:
                #let's do op
                if failover_reason == 'stop_server':
                    self.stop_server(node)
                    log.info("10 seconds delay to wait for membase-server to shutdown")
                    #wait for 5 minutes until node is down
                    self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")
                elif failover_reason == "firewall":
                    RemoteUtilHelper.enable_firewall(self._servers, node, bidirectional=self.bidirectional)
                    self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")

                failed_over = rest.fail_over(node.id)
                if not failed_over:
                    self.log.info("unable to failover the node the first time. try again in  60 seconds..")
                    #try again in 60 seconds
                    time.sleep(75)
                    failed_over = rest.fail_over(node.id)
                self.assertTrue(failed_over, "unable to failover node after {0}".format(failover_reason))
                log.info("failed over node : {0}".format(node.id))
            #REMOVEME -
            log.info("10 seconds sleep after failover before invoking rebalance...")
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                           ejectedNodes=[node.id for node in chosen])
            msg = "rebalance failed while removing failover nodes {0}".format(chosen)
            self.assertTrue(rest.monitorRebalance(), msg=msg)
            FailoverBaseTest.replication_verification(master, bucket, replica, inserted_count, self)

            nodes = rest.node_statuses()
        FailoverBaseTest.verify_data(master, inserted_keys, bucket, self)
Esempio n. 53
0
 def test_node_memcached_failure_in_series(self):
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     data_lost = False
     for i in reversed(xrange(len(self.servers))):
         print self.servers[i]
         operation = random.choice(
             ['stop', 'memcached_failure', 'restart', 'failover', 'reboot'])
         shell = RemoteMachineShellConnection(self.servers[i])
         print "operation", operation
         if i == 0:
             self.master = self.servers[1]
         if operation == 'stop':
             self._stop_couchbase(self.servers[i])
         elif operation == 'memcached_failure':
             self._pause_couchbase(self.servers[i])
         elif operation == 'restart':
             shell.restart_couchbase()
         elif operation == 'failover':
             RemoteUtilHelper.enable_firewall(self.servers[i])
         elif operation == 'reboot':
             if shell.extract_remote_info().type.lower() == 'windows':
                 o, r = shell.execute_command("shutdown -r -f -t 0")
                 self.sleep(200)
             elif shell.extract_remote_info().type.lower() == 'linux':
                 o, r = shell.execute_command("reboot")
             shell.log_command_output(o, r)
             self.sleep(60)
         self.sleep(40)
         if operation == 'memcached_failure':
             AutoReprovisionBaseTest.wait_for_warmup_or_assert(
                 self.master, 1,
                 timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                 self)
         if operation != 'restart' and operation != 'memcached_failure' and operation != 'reboot':
             AutoReprovisionBaseTest.wait_for_failover_or_assert(
                 self.master, 1,
                 timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                 self)
         if operation != 'restart':
             RemoteUtilHelper.common_basic_setup([self.servers[i]])
         AutoReprovisionBaseTest.wait_for_failover_or_assert(
             self.master, 0,
             timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
         helper = RestHelper(RestConnection(self.master))
         self.assertTrue(helper.is_cluster_healthy(),
                         "cluster status is not healthy")
         self.sleep(40)
         if operation == 'memcached_failure' or operation == 'failover':
             self.assertTrue(helper.is_cluster_rebalanced(),
                             "cluster is not balanced")
         else:
             if 'kv' in self.servers[i].services and self.replicas > 0:
                 self.assertFalse(helper.is_cluster_rebalanced(),
                                  "cluster is balanced")
                 self.rest.rebalance(otpNodes=[
                     node.id for node in self.rest.node_statuses()
                 ],
                                     ejectedNodes=[])
                 self.assertTrue(self.rest.monitorRebalance())
             else:
                 self.assertTrue(helper.is_cluster_rebalanced(),
                                 "cluster is not balanced")
         buckets = self.rest.get_buckets()
         if self.replicas == 0 and (operation == 'restart'
                                    or operation == 'reboot'):
             data_lost = True
         for bucket in buckets:
             if not data_lost:
                 self.verify_loaded_data(self.master, bucket.name,
                                         self.loaded_items[bucket.name])
Esempio n. 54
0
    def common_test_body(self, keys_count, replica, failover_reason):
        log = logger.Logger.get_logger()
        log.info("keys_count : {0}".format(keys_count))
        log.info("replicas : {0}".format(replica))
        log.info("failover_reason : {0}".format(failover_reason))
        log.info('picking server : {0} as the master'.format(self.master))

        self._load_all_buckets(self.master,
                               self.gen_create,
                               "create",
                               0,
                               batch_size=10000,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self._servers)

        _servers_ = self._servers
        rest = RestConnection(self.master)
        nodes = rest.node_statuses()

        self._wait_for_replication(self._servers, timeout=600)
        chosen = RebalanceHelper.pick_nodes(self.master, howmany=replica)
        for node in chosen:
            #let's do op
            if failover_reason == 'stop_server':
                self.stop_server(node)
                log.info(
                    "10 seconds delay to wait for membase-server to shutdown")
                #wait for 5 minutes until node is down
                self.assertTrue(
                    RestHelper(rest).wait_for_node_status(
                        node, "unhealthy", 300),
                    msg=
                    "node status is not unhealthy even after waiting for 5 minutes"
                )
            elif failover_reason == "firewall":
                RemoteUtilHelper.enable_firewall(
                    self._servers, node, bidirectional=self.bidirectional)
                status = RestHelper(rest).wait_for_node_status(
                    node, "unhealthy", 300)
                if status:
                    log.info("node {0}:{1} is 'unhealthy' as expected".format(
                        node.ip, node.port))
                else:
                    #verify iptables on the node if something wrong
                    for server in self._servers:
                        if server.ip == node.ip:
                            shell = RemoteMachineShellConnection(server)
                            o, r = shell.execute_command(
                                "/sbin/iptables --list")
                            shell.log_command_output(o, r)
                            shell.disconnect()
                    self.assertTrue(
                        status,
                        msg=
                        "node status is not unhealthy even after waiting for 5 minutes"
                    )

            failed_over = rest.fail_over(node.id)
            if not failed_over:
                self.log.info(
                    "unable to failover the node the first time. try again in  60 seconds.."
                )
                #try again in 75 seconds
                time.sleep(75)
                failed_over = rest.fail_over(node.id)
            self.assertTrue(
                failed_over,
                "unable to failover node after {0}".format(failover_reason))
            log.info("failed over node : {0}".format(node.id))
            self._failed_nodes.append(node)

        if self.add_back_flag:
            for node in self._failed_nodes:
                rest.add_back_node(node.id)
                time.sleep(5)
            log.info(
                "10 seconds sleep after failover before invoking rebalance...")
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                           ejectedNodes=[])
            msg = "rebalance failed while removing failover nodes {0}".format(
                chosen)
            self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
        else:
            log.info(
                "10 seconds sleep after failover before invoking rebalance...")
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                           ejectedNodes=[node.id for node in chosen])
            msg = "rebalance failed while removing failover nodes {0}".format(
                chosen)
            self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
            for failed in chosen:
                for server in _servers_:
                    if server.ip == failed.ip:
                        _servers_.remove(server)
                        self._cleanup_nodes.append(server)

        log.info("Begin VERIFICATION ...")
        self._wait_for_stats_all_buckets(_servers_)
        self._wait_for_replication(self._servers, timeout=600)
        self._verify_stats_all_buckets(_servers_)
        self._verify_all_buckets(self.master)
Esempio n. 55
0
 def start_firewall_on_node(self, node):
     """ Method to start a server which is subject to failover """
     for server in self.cluster.servers:
         if server.ip == node.ip:
             RemoteUtilHelper.enable_firewall(server)