Example #1
0
 def test_node_reboot(self):
     wait_timeout = 120
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     shell = RemoteMachineShellConnection(self.server_fail)
     if shell.extract_remote_info().type.lower() == 'windows':
         o, r = shell.execute_command("shutdown -r -f -t 0")
     elif shell.extract_remote_info().type.lower() == 'linux':
         o, r = shell.execute_command("reboot")
     shell.log_command_output(o, r)
     if shell.extract_remote_info().type.lower() == 'windows':
         time.sleep(wait_timeout * 5)
     else:
         time.sleep(wait_timeout)
     # disable firewall on the node
     shell = RemoteMachineShellConnection(self.server_fail)
     shell.disable_firewall()
     AutoReprovisionBaseTest.wait_for_failover_or_assert(
         self.master, 0,
         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(),
                     "cluster status is not healthy")
     self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
     self.rest.rebalance(
         otpNodes=[node.id for node in self.rest.node_statuses()],
         ejectedNodes=[])
     self.assertTrue(self.rest.monitorRebalance())
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name,
                                 self.loaded_items[bucket.name])
 def test_node_reboot(self):
     wait_timeout = 120
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     shell = RemoteMachineShellConnection(self.server_fail)
     if shell.extract_remote_info().type.lower() == 'windows':
         o, r = shell.execute_command("shutdown -r -f -t 0")
     elif shell.extract_remote_info().type.lower() == 'linux':
         o, r = shell.execute_command("reboot")
     shell.log_command_output(o, r)
     if shell.extract_remote_info().type.lower() == 'windows':
         time.sleep(wait_timeout * 5)
     else:
         time.sleep(wait_timeout)
     # disable firewall on the node
     shell = RemoteMachineShellConnection(self.server_fail)
     shell.disable_firewall()
     AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                         self)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy")
     self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
     self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
     self.assertTrue(self.rest.monitorRebalance())
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
Example #3
0
    def test_node_firewall_enabled(self):
        timeout = self.timeout / 2

        status = self.rest.update_autoreprovision_settings(True, 1)
        if not status:
            self.fail('failed to change autoreprovision_settings!')
        self.sleep(5)
        RemoteUtilHelper.enable_firewall(self.server_fail)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(
            self.master, 1,
            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
        self.sleep(5)
        shell = RemoteMachineShellConnection(self.server_fail)
        shell.disable_firewall()
        AutoReprovisionBaseTest.wait_for_failover_or_assert(
            self.master, 0,
            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
        self.rest.rebalance(
            otpNodes=[node.id for node in self.rest.node_statuses()],
            ejectedNodes=[])
        self.assertTrue(self.rest.monitorRebalance())
        buckets = self.rest.get_buckets()
        for bucket in buckets:
            self.verify_loaded_data(self.master, bucket.name,
                                    self.loaded_items[bucket.name])
Example #4
0
 def stop_firewall_on_node(self, node):
     """ Method to start a server which is subject to failover """
     for server in self.cluster.servers:
         if server.ip == node.ip:
             remote_client = RemoteMachineShellConnection(server)
             remote_client.disable_firewall()
             remote_client.disconnect()
Example #5
0
    def test_nwusage_with_auto_failover_and_bwthrottle_enabled(self):
        self.setup_xdcr()
        self.sleep(60)
        self._set_doc_size_num()
        self.src_cluster.rebalance_in()

        nw_limit = self._input.param("nw_limit", self._get_nwusage_limit())
        self._set_nwusage_limit(self.src_cluster,
                                nw_limit * self.num_src_nodes)

        src_conn = RestConnection(self.src_cluster.get_master_node())
        src_conn.update_autofailover_settings(enabled=True, timeout=30)

        self.src_cluster.pause_all_replications()

        gen_create = BlobGenerator('nwOne',
                                   'nwOne',
                                   self._value_size,
                                   end=self._num_items)
        self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_create)

        self.src_cluster.resume_all_replications()

        self.sleep(15)

        shell = RemoteMachineShellConnection(self._input.servers[1])
        shell.stop_couchbase()
        self.sleep(30)
        task = self.cluster.async_rebalance(self.src_cluster.get_nodes(), [],
                                            [])
        task.result()
        failover_time = self._get_current_time(
            self.src_cluster.get_master_node())
        self.log.info("Node auto failed over at {0}".format(failover_time))
        FloatingServers._serverlist.append(self._input.servers[1])

        self.sleep(15)

        shell.start_couchbase()
        shell.disable_firewall()
        self.sleep(45)
        self.src_cluster.rebalance_in()
        node_back_time = self._get_current_time(
            self.src_cluster.get_master_node())
        self.log.info("Node added back at {0}".format(node_back_time))

        self._wait_for_replication_to_catchup(timeout=600)

        self.verify_results()
        self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(),
                                     end_time=failover_time,
                                     no_of_nodes=3)
        self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(),
                                     event_time=failover_time,
                                     end_time=node_back_time,
                                     no_of_nodes=2)
        self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(),
                                     event_time=node_back_time,
                                     no_of_nodes=3)
    def test_topology_change_events(self):
        available_server_before_rebalance = copy.deepcopy(self.available_servers)
        try:
            self.log.info("Enabling firewall between Incoming node and CBAS CC "
                          "node to trigger topology_change_failed event")
            for node in available_server_before_rebalance:
                RemoteUtilHelper.enable_firewall(
                    node, bidirectional=False, xdcr=False,
                    action_on_packet="REJECT", block_ips=[self.cluster.cbas_cc_node.ip],
                    all_interface=True)

            self.log.info("Rebalancing IN CBAS node to trigger "
                          "topology_change_started event")
            rebalance_task, self.available_servers = self.rebalance_util.rebalance(
                self.cluster, kv_nodes_in=0, kv_nodes_out=0,
                cbas_nodes_in=1, cbas_nodes_out=0,
                available_servers=self.available_servers, exclude_nodes=[])

            if self.rebalance_util.wait_for_rebalance_task_to_complete(
                    rebalance_task, self.cluster, check_cbas_running=False):
                raise Exception("Rebalance passed when it should have failed.")

            self.log.info("Disabling firewall between Incoming node and CBAS CC "
                          "node and retriggering rebalance to trigger "
                          "topology_change_completed event")
            for node in available_server_before_rebalance:
                remote_client = RemoteMachineShellConnection(node)
                remote_client.disable_firewall()
                remote_client.disconnect()

            rebalance_task, self.available_servers = self.rebalance_util.rebalance(
                self.cluster, kv_nodes_in=0, kv_nodes_out=0,
                cbas_nodes_in=0, cbas_nodes_out=0,
                available_servers=self.available_servers, exclude_nodes=[])

            if not self.rebalance_util.wait_for_rebalance_task_to_complete(
                    rebalance_task, self.cluster, check_cbas_running=False):
                raise Exception("Rebalance failed even after disabling "
                                "firewall")

            self.log.info("Adding event for topology_change_started event")
            self.system_events.add_event(AnalyticsEvents.topology_change_started(
                self.cluster.cbas_cc_node.ip, 2, 0))

            self.log.info("Adding event for topology_change_failed event")
            self.system_events.add_event(AnalyticsEvents.topology_change_failed(
                self.cluster.cbas_cc_node.ip, 2, 0))

            self.log.info("Adding event for topology_change_completed event")
            self.system_events.add_event(AnalyticsEvents.topology_change_completed(
                self.cluster.cbas_cc_node.ip, 2, 0))
        except Exception as err:
            self.log.info("Disabling Firewall")
            for node in available_server_before_rebalance:
                remote_client = RemoteMachineShellConnection(node)
                remote_client.disable_firewall()
                remote_client.disconnect()
            self.fail(str(err))
Example #7
0
    def test_nwusage_with_auto_failover_and_bwthrottle_enabled_later(self):
        self.setup_xdcr()

        self.src_cluster.rebalance_in()

        self.src_cluster.pause_all_replications()

        gen_create = BlobGenerator('nwOne',
                                   'nwOne',
                                   self._value_size,
                                   end=self._num_items)
        self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_create)

        self.src_cluster.resume_all_replications()

        self.sleep(15)

        shell = RemoteMachineShellConnection(self._input.servers[1])
        shell.stop_couchbase()
        self.sleep(45)
        task = self.cluster.async_rebalance(self.src_cluster.get_nodes(), [],
                                            [])
        task.result()
        FloatingServers._serverlist.append(self._input.servers[1])

        self.sleep(15)

        nw_limit = self._input.param("nw_limit", 1)
        self.src_cluster.set_xdcr_param("networkUsageLimit", nw_limit)
        bw_enable_time = time.strftime('%Y-%m-%dT%H:%M:%S')
        self.log.info(
            "Bandwidth throttler enabled at {0}".format(bw_enable_time))

        self.sleep(60)

        shell.start_couchbase()
        shell.disable_firewall()
        self.sleep(30)
        self.src_cluster.rebalance_in()
        node_back_time = time.strftime('%Y-%m-%dT%H:%M:%S')
        self.log.info("Node added back at {0}".format(node_back_time))

        self._wait_for_replication_to_catchup(timeout=600)

        self.verify_results()
        self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(),
                                     event_time=bw_enable_time,
                                     end_time=node_back_time,
                                     no_of_nodes=2)
        self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(),
                                     event_time=node_back_time,
                                     no_of_nodes=3)
Example #8
0
    def test_nwusage_with_auto_failover_and_bwthrottle_enabled(self):
        self.setup_xdcr()

        self.src_cluster.rebalance_in()

        nw_limit = self._input.param("nw_limit", 1)
        self._set_nwusage_limit(self.src_cluster, nw_limit)

        src_conn = RestConnection(self.src_cluster.get_master_node())
        src_conn.update_autofailover_settings(enabled=True, timeout=30)

        self.src_cluster.pause_all_replications()

        gen_create = BlobGenerator('nwOne', 'nwOne', self._value_size, end=self._num_items)
        self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_create)

        self.src_cluster.resume_all_replications()

        self.sleep(15)

        shell = RemoteMachineShellConnection(self._input.servers[1])
        shell.stop_couchbase()
        self.sleep(30)
        task = self.cluster.async_rebalance(self.src_cluster.get_nodes(), [], [])
        task.result()
        failover_time = self._get_current_time(self.src_cluster.get_master_node())
        self.log.info("Node auto failed over at {0}".format(failover_time))
        FloatingServers._serverlist.append(self._input.servers[1])

        self.sleep(15)

        shell.start_couchbase()
        shell.disable_firewall()
        self.sleep(45)
        self.src_cluster.rebalance_in()
        node_back_time = self._get_current_time(self.src_cluster.get_master_node())
        self.log.info("Node added back at {0}".format(node_back_time))

        self._wait_for_replication_to_catchup(timeout=600)

        self.verify_results()
        self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), end_time=failover_time, no_of_nodes=3)
        self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), event_time=failover_time, end_time=node_back_time, no_of_nodes=2)
        self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), event_time=node_back_time, no_of_nodes=3)
    def test_node_firewall_enabled(self):
        timeout = self.timeout / 2

        status = self.rest.update_autoreprovision_settings(True, 1)
        if not status:
            self.fail('failed to change autoreprovision_settings!')
        self.sleep(5)
        RemoteUtilHelper.enable_firewall(self.server_fail)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 1,
                                                            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                            self)
        self.sleep(5)
        shell = RemoteMachineShellConnection(self.server_fail)
        shell.disable_firewall()
        AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                            self)
        self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
        self.assertTrue(self.rest.monitorRebalance())
        buckets = self.rest.get_buckets()
        for bucket in buckets:
            self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
    def test_cbcollect_with_redaction_enabled_with_xdcr(self):
        rest_src = RestConnection(self.master)
        rest_src.remove_all_replications()
        rest_src.remove_all_remote_clusters()

        rest_dest = RestConnection(self.servers[1])
        rest_dest_helper = RestHelper(rest_dest)

        try:
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            self.set_redaction_level()
            rest_src.add_remote_cluster(self.servers[1].ip, self.servers[1].port,
                                        self.servers[1].rest_username,
                                        self.servers[1].rest_password, "C2")

            """ at dest cluster """
            self.add_built_in_server_user(node=self.servers[1])
            rest_dest.create_bucket(bucket='default', ramQuotaMB=512)
            bucket_ready = rest_dest_helper.vbucket_map_ready('default')
            if not bucket_ready:
                self.fail("Bucket default at dest not created after 120 seconds.")
            repl_id = rest_src.start_replication('continuous', 'default', "C2")
            if repl_id is not None:
                self.log.info("Replication created successfully")
            gen = BlobGenerator("ent-backup", "ent-backup-", self.value_size, end=self.num_items)
            tasks = self._async_load_all_buckets(self.master, gen, "create", 0)
            for task in tasks:
                task.result()
            self.sleep(10)

            """ enable firewall """
            if self.interrupt_replication:
                RemoteUtilHelper.enable_firewall(self.master, xdcr=True)

            """ start collect logs """
            self.start_logs_collection()
            result = self.monitor_logs_collection()
            """ verify logs """
            try:
                logs_path = result["perNode"]["ns_1@" + str(self.master.ip)]["path"]
            except KeyError:
                logs_path = result["perNode"]["[email protected]"]["path"]
            redactFileName = logs_path.split('/')[-1]
            nonredactFileName = logs_path.split('/')[-1].replace('-redacted', '')
            remotepath = logs_path[0:logs_path.rfind('/')+1]
            self.verify_log_files_exist(remotepath=remotepath,
                                    redactFileName=redactFileName,
                                    nonredactFileName=nonredactFileName)
            self.log.info("Verify on log ns_server.goxdcr.log")
            self.verify_log_redaction(remotepath=remotepath,
                                  redactFileName=redactFileName,
                                  nonredactFileName=nonredactFileName,
                                  logFileName="ns_server.goxdcr.log")
        finally:
            """ clean up xdcr """
            rest_dest.delete_bucket()
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            if self.interrupt_replication:
                shell = RemoteMachineShellConnection(self.master)
                shell.disable_firewall()
                shell.disconnect()
Example #11
0
    def test_cbcollect_with_redaction_enabled_with_xdcr(self):
        rest_src = RestConnection(self.master)
        rest_src.remove_all_replications()
        rest_src.remove_all_remote_clusters()

        rest_dest = RestConnection(self.servers[1])
        rest_dest_helper = RestHelper(rest_dest)

        try:
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            self.set_redaction_level()
            rest_src.add_remote_cluster(self.servers[1].ip,
                                        self.servers[1].port,
                                        self.servers[1].rest_username,
                                        self.servers[1].rest_password, "C2")
            """ at dest cluster """
            self.add_built_in_server_user(node=self.servers[1])
            rest_dest.create_bucket(bucket='default', ramQuotaMB=512)
            bucket_ready = rest_dest_helper.vbucket_map_ready('default')
            if not bucket_ready:
                self.fail(
                    "Bucket default at dest not created after 120 seconds.")
            repl_id = rest_src.start_replication('continuous', 'default', "C2")
            if repl_id is not None:
                self.log.info("Replication created successfully")
            gen = BlobGenerator("ent-backup",
                                "ent-backup-",
                                self.value_size,
                                end=self.num_items)
            tasks = self._async_load_all_buckets(self.master, gen, "create", 0)
            for task in tasks:
                task.result()
            self.sleep(10)
            """ enable firewall """
            if self.interrupt_replication:
                RemoteUtilHelper.enable_firewall(self.master, xdcr=True)
            """ start collect logs """
            self.start_logs_collection()
            result = self.monitor_logs_collection()
            """ verify logs """
            try:
                logs_path = result["perNode"]["ns_1@" +
                                              str(self.master.ip)]["path"]
            except KeyError:
                logs_path = result["perNode"]["[email protected]"]["path"]
            redactFileName = logs_path.split('/')[-1]
            nonredactFileName = logs_path.split('/')[-1].replace(
                '-redacted', '')
            remotepath = logs_path[0:logs_path.rfind('/') + 1]
            self.verify_log_files_exist(remotepath=remotepath,
                                        redactFileName=redactFileName,
                                        nonredactFileName=nonredactFileName)
            self.log.info("Verify on log ns_server.goxdcr.log")
            self.verify_log_redaction(remotepath=remotepath,
                                      redactFileName=redactFileName,
                                      nonredactFileName=nonredactFileName,
                                      logFileName="ns_server.goxdcr.log")
        finally:
            """ clean up xdcr """
            rest_dest.delete_bucket()
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            if self.interrupt_replication:
                shell = RemoteMachineShellConnection(self.master)
                shell.disable_firewall()
                shell.disconnect()
Example #12
0
 def tearDown(self):
     super(SingleNodeUpgradeTests, self).tearDown()
     if self.input.param("op", None) == "close_port":
         remote = RemoteMachineShellConnection(self.master)
         remote.disable_firewall()