def test_node_reboot(self): wait_timeout = 120 timeout = self.timeout / 2 status = self.rest.update_autoreprovision_settings(True, 1) if not status: self.fail('failed to change autoreprovision_settings!') self.sleep(5) shell = RemoteMachineShellConnection(self.server_fail) if shell.extract_remote_info().type.lower() == 'windows': o, r = shell.execute_command("shutdown -r -f -t 0") elif shell.extract_remote_info().type.lower() == 'linux': o, r = shell.execute_command("reboot") shell.log_command_output(o, r) if shell.extract_remote_info().type.lower() == 'windows': time.sleep(wait_timeout * 5) else: time.sleep(wait_timeout) # disable firewall on the node shell = RemoteMachineShellConnection(self.server_fail) shell.disable_firewall() AutoReprovisionBaseTest.wait_for_failover_or_assert( self.master, 0, timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self) helper = RestHelper(self.rest) self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy") self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced") self.rest.rebalance( otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[]) self.assertTrue(self.rest.monitorRebalance()) buckets = self.rest.get_buckets() for bucket in buckets: self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
def test_node_reboot(self): wait_timeout = 120 timeout = self.timeout / 2 status = self.rest.update_autoreprovision_settings(True, 1) if not status: self.fail('failed to change autoreprovision_settings!') self.sleep(5) shell = RemoteMachineShellConnection(self.server_fail) if shell.extract_remote_info().type.lower() == 'windows': o, r = shell.execute_command("shutdown -r -f -t 0") elif shell.extract_remote_info().type.lower() == 'linux': o, r = shell.execute_command("reboot") shell.log_command_output(o, r) if shell.extract_remote_info().type.lower() == 'windows': time.sleep(wait_timeout * 5) else: time.sleep(wait_timeout) # disable firewall on the node shell = RemoteMachineShellConnection(self.server_fail) shell.disable_firewall() AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0, timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self) helper = RestHelper(self.rest) self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy") self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced") self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[]) self.assertTrue(self.rest.monitorRebalance()) buckets = self.rest.get_buckets() for bucket in buckets: self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
def test_node_firewall_enabled(self): timeout = self.timeout / 2 status = self.rest.update_autoreprovision_settings(True, 1) if not status: self.fail('failed to change autoreprovision_settings!') self.sleep(5) RemoteUtilHelper.enable_firewall(self.server_fail) AutoReprovisionBaseTest.wait_for_failover_or_assert( self.master, 1, timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self) self.sleep(5) shell = RemoteMachineShellConnection(self.server_fail) shell.disable_firewall() AutoReprovisionBaseTest.wait_for_failover_or_assert( self.master, 0, timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self) self.rest.rebalance( otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[]) self.assertTrue(self.rest.monitorRebalance()) buckets = self.rest.get_buckets() for bucket in buckets: self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
def stop_firewall_on_node(self, node): """ Method to start a server which is subject to failover """ for server in self.cluster.servers: if server.ip == node.ip: remote_client = RemoteMachineShellConnection(server) remote_client.disable_firewall() remote_client.disconnect()
def test_nwusage_with_auto_failover_and_bwthrottle_enabled(self): self.setup_xdcr() self.sleep(60) self._set_doc_size_num() self.src_cluster.rebalance_in() nw_limit = self._input.param("nw_limit", self._get_nwusage_limit()) self._set_nwusage_limit(self.src_cluster, nw_limit * self.num_src_nodes) src_conn = RestConnection(self.src_cluster.get_master_node()) src_conn.update_autofailover_settings(enabled=True, timeout=30) self.src_cluster.pause_all_replications() gen_create = BlobGenerator('nwOne', 'nwOne', self._value_size, end=self._num_items) self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_create) self.src_cluster.resume_all_replications() self.sleep(15) shell = RemoteMachineShellConnection(self._input.servers[1]) shell.stop_couchbase() self.sleep(30) task = self.cluster.async_rebalance(self.src_cluster.get_nodes(), [], []) task.result() failover_time = self._get_current_time( self.src_cluster.get_master_node()) self.log.info("Node auto failed over at {0}".format(failover_time)) FloatingServers._serverlist.append(self._input.servers[1]) self.sleep(15) shell.start_couchbase() shell.disable_firewall() self.sleep(45) self.src_cluster.rebalance_in() node_back_time = self._get_current_time( self.src_cluster.get_master_node()) self.log.info("Node added back at {0}".format(node_back_time)) self._wait_for_replication_to_catchup(timeout=600) self.verify_results() self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), end_time=failover_time, no_of_nodes=3) self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), event_time=failover_time, end_time=node_back_time, no_of_nodes=2) self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), event_time=node_back_time, no_of_nodes=3)
def test_topology_change_events(self): available_server_before_rebalance = copy.deepcopy(self.available_servers) try: self.log.info("Enabling firewall between Incoming node and CBAS CC " "node to trigger topology_change_failed event") for node in available_server_before_rebalance: RemoteUtilHelper.enable_firewall( node, bidirectional=False, xdcr=False, action_on_packet="REJECT", block_ips=[self.cluster.cbas_cc_node.ip], all_interface=True) self.log.info("Rebalancing IN CBAS node to trigger " "topology_change_started event") rebalance_task, self.available_servers = self.rebalance_util.rebalance( self.cluster, kv_nodes_in=0, kv_nodes_out=0, cbas_nodes_in=1, cbas_nodes_out=0, available_servers=self.available_servers, exclude_nodes=[]) if self.rebalance_util.wait_for_rebalance_task_to_complete( rebalance_task, self.cluster, check_cbas_running=False): raise Exception("Rebalance passed when it should have failed.") self.log.info("Disabling firewall between Incoming node and CBAS CC " "node and retriggering rebalance to trigger " "topology_change_completed event") for node in available_server_before_rebalance: remote_client = RemoteMachineShellConnection(node) remote_client.disable_firewall() remote_client.disconnect() rebalance_task, self.available_servers = self.rebalance_util.rebalance( self.cluster, kv_nodes_in=0, kv_nodes_out=0, cbas_nodes_in=0, cbas_nodes_out=0, available_servers=self.available_servers, exclude_nodes=[]) if not self.rebalance_util.wait_for_rebalance_task_to_complete( rebalance_task, self.cluster, check_cbas_running=False): raise Exception("Rebalance failed even after disabling " "firewall") self.log.info("Adding event for topology_change_started event") self.system_events.add_event(AnalyticsEvents.topology_change_started( self.cluster.cbas_cc_node.ip, 2, 0)) self.log.info("Adding event for topology_change_failed event") self.system_events.add_event(AnalyticsEvents.topology_change_failed( self.cluster.cbas_cc_node.ip, 2, 0)) self.log.info("Adding event for topology_change_completed event") self.system_events.add_event(AnalyticsEvents.topology_change_completed( self.cluster.cbas_cc_node.ip, 2, 0)) except Exception as err: self.log.info("Disabling Firewall") for node in available_server_before_rebalance: remote_client = RemoteMachineShellConnection(node) remote_client.disable_firewall() remote_client.disconnect() self.fail(str(err))
def test_nwusage_with_auto_failover_and_bwthrottle_enabled_later(self): self.setup_xdcr() self.src_cluster.rebalance_in() self.src_cluster.pause_all_replications() gen_create = BlobGenerator('nwOne', 'nwOne', self._value_size, end=self._num_items) self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_create) self.src_cluster.resume_all_replications() self.sleep(15) shell = RemoteMachineShellConnection(self._input.servers[1]) shell.stop_couchbase() self.sleep(45) task = self.cluster.async_rebalance(self.src_cluster.get_nodes(), [], []) task.result() FloatingServers._serverlist.append(self._input.servers[1]) self.sleep(15) nw_limit = self._input.param("nw_limit", 1) self.src_cluster.set_xdcr_param("networkUsageLimit", nw_limit) bw_enable_time = time.strftime('%Y-%m-%dT%H:%M:%S') self.log.info( "Bandwidth throttler enabled at {0}".format(bw_enable_time)) self.sleep(60) shell.start_couchbase() shell.disable_firewall() self.sleep(30) self.src_cluster.rebalance_in() node_back_time = time.strftime('%Y-%m-%dT%H:%M:%S') self.log.info("Node added back at {0}".format(node_back_time)) self._wait_for_replication_to_catchup(timeout=600) self.verify_results() self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), event_time=bw_enable_time, end_time=node_back_time, no_of_nodes=2) self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), event_time=node_back_time, no_of_nodes=3)
def test_nwusage_with_auto_failover_and_bwthrottle_enabled(self): self.setup_xdcr() self.src_cluster.rebalance_in() nw_limit = self._input.param("nw_limit", 1) self._set_nwusage_limit(self.src_cluster, nw_limit) src_conn = RestConnection(self.src_cluster.get_master_node()) src_conn.update_autofailover_settings(enabled=True, timeout=30) self.src_cluster.pause_all_replications() gen_create = BlobGenerator('nwOne', 'nwOne', self._value_size, end=self._num_items) self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_create) self.src_cluster.resume_all_replications() self.sleep(15) shell = RemoteMachineShellConnection(self._input.servers[1]) shell.stop_couchbase() self.sleep(30) task = self.cluster.async_rebalance(self.src_cluster.get_nodes(), [], []) task.result() failover_time = self._get_current_time(self.src_cluster.get_master_node()) self.log.info("Node auto failed over at {0}".format(failover_time)) FloatingServers._serverlist.append(self._input.servers[1]) self.sleep(15) shell.start_couchbase() shell.disable_firewall() self.sleep(45) self.src_cluster.rebalance_in() node_back_time = self._get_current_time(self.src_cluster.get_master_node()) self.log.info("Node added back at {0}".format(node_back_time)) self._wait_for_replication_to_catchup(timeout=600) self.verify_results() self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), end_time=failover_time, no_of_nodes=3) self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), event_time=failover_time, end_time=node_back_time, no_of_nodes=2) self._verify_bandwidth_usage(node=self.src_cluster.get_master_node(), event_time=node_back_time, no_of_nodes=3)
def test_node_firewall_enabled(self): timeout = self.timeout / 2 status = self.rest.update_autoreprovision_settings(True, 1) if not status: self.fail('failed to change autoreprovision_settings!') self.sleep(5) RemoteUtilHelper.enable_firewall(self.server_fail) AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 1, timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self) self.sleep(5) shell = RemoteMachineShellConnection(self.server_fail) shell.disable_firewall() AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0, timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self) self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[]) self.assertTrue(self.rest.monitorRebalance()) buckets = self.rest.get_buckets() for bucket in buckets: self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
def test_cbcollect_with_redaction_enabled_with_xdcr(self): rest_src = RestConnection(self.master) rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() rest_dest = RestConnection(self.servers[1]) rest_dest_helper = RestHelper(rest_dest) try: rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() self.set_redaction_level() rest_src.add_remote_cluster(self.servers[1].ip, self.servers[1].port, self.servers[1].rest_username, self.servers[1].rest_password, "C2") """ at dest cluster """ self.add_built_in_server_user(node=self.servers[1]) rest_dest.create_bucket(bucket='default', ramQuotaMB=512) bucket_ready = rest_dest_helper.vbucket_map_ready('default') if not bucket_ready: self.fail("Bucket default at dest not created after 120 seconds.") repl_id = rest_src.start_replication('continuous', 'default', "C2") if repl_id is not None: self.log.info("Replication created successfully") gen = BlobGenerator("ent-backup", "ent-backup-", self.value_size, end=self.num_items) tasks = self._async_load_all_buckets(self.master, gen, "create", 0) for task in tasks: task.result() self.sleep(10) """ enable firewall """ if self.interrupt_replication: RemoteUtilHelper.enable_firewall(self.master, xdcr=True) """ start collect logs """ self.start_logs_collection() result = self.monitor_logs_collection() """ verify logs """ try: logs_path = result["perNode"]["ns_1@" + str(self.master.ip)]["path"] except KeyError: logs_path = result["perNode"]["[email protected]"]["path"] redactFileName = logs_path.split('/')[-1] nonredactFileName = logs_path.split('/')[-1].replace('-redacted', '') remotepath = logs_path[0:logs_path.rfind('/')+1] self.verify_log_files_exist(remotepath=remotepath, redactFileName=redactFileName, nonredactFileName=nonredactFileName) self.log.info("Verify on log ns_server.goxdcr.log") self.verify_log_redaction(remotepath=remotepath, redactFileName=redactFileName, nonredactFileName=nonredactFileName, logFileName="ns_server.goxdcr.log") finally: """ clean up xdcr """ rest_dest.delete_bucket() rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() if self.interrupt_replication: shell = RemoteMachineShellConnection(self.master) shell.disable_firewall() shell.disconnect()
def test_cbcollect_with_redaction_enabled_with_xdcr(self): rest_src = RestConnection(self.master) rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() rest_dest = RestConnection(self.servers[1]) rest_dest_helper = RestHelper(rest_dest) try: rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() self.set_redaction_level() rest_src.add_remote_cluster(self.servers[1].ip, self.servers[1].port, self.servers[1].rest_username, self.servers[1].rest_password, "C2") """ at dest cluster """ self.add_built_in_server_user(node=self.servers[1]) rest_dest.create_bucket(bucket='default', ramQuotaMB=512) bucket_ready = rest_dest_helper.vbucket_map_ready('default') if not bucket_ready: self.fail( "Bucket default at dest not created after 120 seconds.") repl_id = rest_src.start_replication('continuous', 'default', "C2") if repl_id is not None: self.log.info("Replication created successfully") gen = BlobGenerator("ent-backup", "ent-backup-", self.value_size, end=self.num_items) tasks = self._async_load_all_buckets(self.master, gen, "create", 0) for task in tasks: task.result() self.sleep(10) """ enable firewall """ if self.interrupt_replication: RemoteUtilHelper.enable_firewall(self.master, xdcr=True) """ start collect logs """ self.start_logs_collection() result = self.monitor_logs_collection() """ verify logs """ try: logs_path = result["perNode"]["ns_1@" + str(self.master.ip)]["path"] except KeyError: logs_path = result["perNode"]["[email protected]"]["path"] redactFileName = logs_path.split('/')[-1] nonredactFileName = logs_path.split('/')[-1].replace( '-redacted', '') remotepath = logs_path[0:logs_path.rfind('/') + 1] self.verify_log_files_exist(remotepath=remotepath, redactFileName=redactFileName, nonredactFileName=nonredactFileName) self.log.info("Verify on log ns_server.goxdcr.log") self.verify_log_redaction(remotepath=remotepath, redactFileName=redactFileName, nonredactFileName=nonredactFileName, logFileName="ns_server.goxdcr.log") finally: """ clean up xdcr """ rest_dest.delete_bucket() rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() if self.interrupt_replication: shell = RemoteMachineShellConnection(self.master) shell.disable_firewall() shell.disconnect()
def tearDown(self): super(SingleNodeUpgradeTests, self).tearDown() if self.input.param("op", None) == "close_port": remote = RemoteMachineShellConnection(self.master) remote.disable_firewall()