def test_drift_stats(self): ''' @note: An exercise in filling out the matrix with the right amount of code, we want to test (ahead,behind) and (setwithmeta, deleteWithmeta) and (active,replica). So for now let's do the set/del in sequences ''' self.log.info('starting test_drift_stats') #Creating a user with the bucket name having admin access payload = "name={0}&roles=admin&password=password".format( self.buckets[0].name) self.rest.add_set_builtin_user(self.buckets[0].name, payload) check_ahead_threshold = self.input.param("check_ahead_threshold", True) self.log.info( 'Checking the ahead threshold? {0}'.format(check_ahead_threshold)) sdk_client = SDKClient(scheme='couchbase', hosts=[self.servers[0].ip], bucket=self.buckets[0].name) mc_client = MemcachedClientHelper.direct_client( self.servers[0], self.buckets[0]) shell = RemoteMachineShellConnection(self.servers[0]) # get the current time rc = sdk_client.set('key1', 'val1') current_time_cas = rc.cas test_key = 'test-set-with-metaxxxx' vbId = (((zlib.crc32(test_key)) >> 16) & 0x7fff) & (self.vbuckets - 1) #import pdb;pdb.set_trace() # verifying the case where we are within the threshold, do a set and del, neither should trigger #mc_active.setWithMeta(key, '123456789', 0, 0, 123, cas) rc = mc_client.setWithMeta(test_key, 'test-value', 0, 0, 1, current_time_cas) #rc = mc_client.setWithMetaLWW(test_key, 'test-value', 0, 0, current_time_cas) #rc = mc_client.delWithMetaLWW(test_key, 0, 0, current_time_cas+1) vbucket_stats = mc_client.stats('vbucket-details') ahead_exceeded = int(vbucket_stats['vb_' + str(vbId) + ':drift_ahead_threshold_exceeded']) self.assertTrue( ahead_exceeded == 0, 'Ahead exceeded expected is 0 but is {0}'.format(ahead_exceeded)) behind_exceeded = int( vbucket_stats['vb_' + str(vbId) + ':drift_behind_threshold_exceeded']) self.assertTrue( behind_exceeded == 0, 'Behind exceeded expected is 0 but is {0}'.format(behind_exceeded)) # out of curiousity, log the total counts self.log.info( 'Total stats: total abs drift {0} and total abs drift count {1}'. format(vbucket_stats['vb_' + str(vbId) + ':total_abs_drift'], vbucket_stats['vb_' + str(vbId) + ':total_abs_drift_count'])) # do the ahead set with meta case - verify: ahead threshold exceeded, total_abs_drift count and abs_drift if check_ahead_threshold: stat_descriptor = 'ahead' cas = current_time_cas + 5000 * LWWStatsTests.DEFAULT_THRESHOLD else: stat_descriptor = 'behind' cas = current_time_cas - (5000 * LWWStatsTests.DEFAULT_THRESHOLD) rc = mc_client.setWithMeta(test_key, 'test-value', 0, 0, 0, cas) #rc = mc_client.delWithMetaLWW(test_key, 0, 0, cas+1) # verify the vbucket stats vbucket_stats = mc_client.stats('vbucket-details') drift_counter_stat = 'vb_' + str( vbId) + ':drift_' + stat_descriptor + '_threshold_exceeded' threshold_exceeded = int( mc_client.stats('vbucket-details')[drift_counter_stat]) # MB-21450 self.assertTrue( ahead_exceeded == 2, '{0} exceeded expected is 1 but is {1}'. # format( stat_descriptor, threshold_exceeded)) self.log.info( 'Total stats: total abs drift {0} and total abs drift count {1}'. format(vbucket_stats['vb_' + str(vbId) + ':total_abs_drift'], vbucket_stats['vb_' + str(vbId) + ':total_abs_drift_count'])) # and verify the bucket stats: ep_active_hlc_drift_count, ep_clock_cas_drift_threshold_exceeded, # ep_active_hlc_drift bucket_stats = mc_client.stats() ep_active_hlc_drift_count = int( bucket_stats['ep_active_hlc_drift_count']) ep_clock_cas_drift_threshold_exceeded = int( bucket_stats['ep_clock_cas_drift_threshold_exceeded']) ep_active_hlc_drift = int(bucket_stats['ep_active_hlc_drift']) # Drift count appears to be the number of mutations self.assertTrue( ep_active_hlc_drift_count > 0, 'ep_active_hlc_drift_count is 0, expected a positive value') # drift itself is the sum of the absolute values of all drifts, so check that it is greater than 0 self.assertTrue(ep_active_hlc_drift > 0, 'ep_active_hlc_drift is 0, expected a positive value') # the actual drift count is a little more granular expected_drift_threshold_exceed_count = 1 self.assertTrue( expected_drift_threshold_exceed_count == ep_clock_cas_drift_threshold_exceeded, 'ep_clock_cas_drift_threshold_exceeded is incorrect. Expected {0}, actual {1}' .format(expected_drift_threshold_exceed_count, ep_clock_cas_drift_threshold_exceeded))
def kill_producer(self, server): remote_client = RemoteMachineShellConnection(server) remote_client.kill_eventing_process(name="eventing-producer") remote_client.disconnect()
def change_time_zone(self,server,timezone="UTC"): remote_client = RemoteMachineShellConnection(server) remote_client.execute_command("timedatectl set-timezone "+timezone) remote_client.disconnect()
def run_failover_operations_with_ops(self, chosen, failover_reason): """ Method to run fail over operations used in the test scenario based on failover reason """ # Perform Operations relalted to failover failed_over = True for node in chosen: unreachable = False if failover_reason == 'stop_server': unreachable = True self.cluster_util.stop_server(node) self.log.info( "10 seconds delay to wait for membase-server to shutdown") # wait for 5 minutes until node is down self.assertTrue( RestHelper(self.rest).wait_for_node_status( node, "unhealthy", 300), msg= "node status is not unhealthy even after waiting for 5 minutes" ) elif failover_reason == "firewall": unreachable = True self.filter_list.append(node.ip) server = [srv for srv in self.servers if node.ip == srv.ip][0] RemoteUtilHelper.enable_firewall( server, bidirectional=self.bidirectional) status = RestHelper(self.rest).wait_for_node_status( node, "unhealthy", 300) if status: self.log.info( "node {0}:{1} is 'unhealthy' as expected".format( node.ip, node.port)) else: # verify iptables on the node if something wrong for server in self.servers: if server.ip == node.ip: shell = RemoteMachineShellConnection(server) info = shell.extract_remote_info() if info.type.lower() == "windows": o, r = shell.execute_command( "netsh advfirewall show allprofiles") shell.log_command_output(o, r) else: o, r = shell.execute_command( "/sbin/iptables --list") shell.log_command_output(o, r) shell.disconnect() self.rest.print_UI_logs() api = self.rest.baseUrl + 'nodeStatuses' status, content, header = self.rest._http_request(api) json_parsed = json.loads(content) self.log.info("nodeStatuses: {0}".format(json_parsed)) self.fail( "node status is not unhealthy even after waiting for 5 minutes" ) nodes = self.filter_servers(self.servers, chosen) success_failed_over = self.rest.fail_over(node.id, graceful=(self.graceful)) # failed_over = self.task.async_failover([self.master], failover_nodes=chosen, graceful=self.graceful) # Perform Compaction compact_tasks = [] if self.compact: for bucket in self.buckets: compact_tasks.append( self.task.async_compact_bucket(self.master, bucket)) # Run View Operations if self.withViewsOps: self.query_and_monitor_view_tasks(nodes) # Run mutation operations if self.withMutationOps: self.run_mutation_operations() # failed_over.result() msg = "rebalance failed while removing failover nodes {0}" \ .format(node.id) self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg=msg) for task in compact_tasks: task.result()
self.pause_between_failover_action, self.failover_expected, self.timeout_buffer, failure_timers=node_down_timer_tasks) self.task_manager.add_new_task(task) try: self.task_manager.get_task_result(task) except Exception, e: self.fail("Exception: {}".format(e)) finally: self.sleep(120, "Sleeping for 2 min for the machines to restart") for node in self.server_to_fail: for _ in range(0, 2): try: _ = RemoteMachineShellConnection(node) break except Exception: self.log.info("Unable to connect to the host. " "Machine has not restarted") self.sleep(60, "Sleep for another minute and try " "again") def stop_memcached(self): """ Stop the memcached on the nodes to fail in the tests :return: Nothing """ node_down_timer_tasks = [] for node in self.server_to_fail: node_failure_timer_task = NodeDownTimerTask(node.ip, 11211)
def test_backup_restore_collection_sanity(self): """ 1. Create default bucket on the cluster and loads it with given number of items 2. Perform updates and create backups for specified number of times (test param number_of_backups) 3. Perform restores for the same number of times with random start and end values """ self.log.info("*** create collection in all buckets") self.log.info("*** start to load items to all buckets") self.active_resident_threshold = 100 self.load_all_buckets(self.backupset.cluster_host) self.log.info("*** done to load items to all buckets") self.ops_type = self.input.param("ops-type", "update") self.expected_error = self.input.param("expected_error", None) self.create_scope_cluster_host() self.create_collection_cluster_host(self.backupset.col_per_scope) backup_scopes = self.get_bucket_scope_cluster_host() scopes_id = [] for scope in backup_scopes: if scope == "_default": continue scopes_id.append(self.get_scopes_id_cluster_host(scope)) """ remove null and empty element """ scopes_id = [i for i in scopes_id if i] backup_collections = self.get_bucket_collection_cluster_host() col_stats = self.get_collection_stats_cluster_host() for backup_scope in backup_scopes: bk_scope_id = self.get_scopes_id_cluster_host(backup_scope) if self.auto_failover: self.log.info("Enabling auto failover on " + str(self.backupset.cluster_host)) rest_conn = RestConnection(self.backupset.cluster_host) rest_conn.update_autofailover_settings(self.auto_failover, self.auto_failover_timeout) self.backup_create_validate() for i in range(1, self.backupset.number_of_backups + 1): if self.ops_type == "update": self.log.info("*** start to update items in all buckets") col_cmd = "" if self.backupset.load_to_collection: self.backupset.load_scope_id = choice(scopes_id) col_cmd = " -c {0} ".format(self.backupset.load_scope_id) self.load_all_buckets(self.backupset.cluster_host, ratio=0.1, command_options=col_cmd) self.log.info("*** done update items in all buckets") self.sleep(10) self.log.info("*** start to validate backup cluster") self.backup_cluster_validate() self.targetMaster = True start = randrange(1, self.backupset.number_of_backups + 1) if start == self.backupset.number_of_backups: end = start else: end = randrange(start, self.backupset.number_of_backups + 1) self.log.info("*** start to restore cluster") restored = {"{0}/{1}".format(start, end): ""} for i in range(1, self.backupset.number_of_backups + 1): if self.reset_restore_cluster: self.log.info("*** start to reset cluster") self.backup_reset_clusters(self.cluster_to_restore) if self.same_cluster: self._initialize_nodes(Cluster(), self.servers[:self.nodes_init]) else: shell = RemoteMachineShellConnection(self.backupset.restore_cluster_host) shell.enable_diag_eval_on_non_local_hosts() shell.disconnect() rest = RestConnection(self.backupset.restore_cluster_host) rest.force_eject_node() rest.init_node() self.log.info("Done reset cluster") self.sleep(10) """ Add built-in user cbadminbucket to second cluster """ self.add_built_in_server_user(node=self.input.clusters[0][:self.nodes_init][0]) self.backupset.start = start self.backupset.end = end self.log.info("*** start restore validation") data_map_collection = [] for scope in backup_scopes: if "default" in scope: continue data_map_collection.append(self.buckets[0].name + "." + scope + "=" + \ self.buckets[0].name + "." + scope) self.bucket_map_collection = ",".join(data_map_collection) self.backup_restore_validate(compare_uuid=False, seqno_compare_function=">=", expected_error=self.expected_error) if self.backupset.number_of_backups == 1: continue while "{0}/{1}".format(start, end) in restored: start = randrange(1, self.backupset.number_of_backups + 1) if start == self.backupset.number_of_backups: end = start else: end = randrange(start, self.backupset.number_of_backups + 1) restored["{0}/{1}".format(start, end)] = "" restore_scopes = self.get_bucket_scope_restore_cluster_host() restore_collections = self.get_bucket_collection_restore_cluster_host() self.verify_collections_in_restore_cluster_host()
def run_failover_operations(self, chosen, failover_reason): """ Method to run fail over operations used in the test scenario based on failover reason """ # Perform Operations related to failover graceful_count = 0 graceful_failover = True failed_over = True for node in chosen: unreachable = False if failover_reason == 'stop_server': unreachable = True self.cluster_util.stop_server(node) self.log.info("10 secs delay for membase-server to shutdown") # wait for 5 minutes until node is down self.assertTrue( RestHelper(self.rest).wait_for_node_status( node, "unhealthy", self.wait_timeout * 10), msg="node status is healthy even after waiting for 5 mins") elif failover_reason == "firewall": unreachable = True self.filter_list.append(node.ip) server = [srv for srv in self.servers if node.ip == srv.ip][0] RemoteUtilHelper.enable_firewall( server, bidirectional=self.bidirectional) status = RestHelper(self.rest).wait_for_node_status( node, "unhealthy", self.wait_timeout * 10) if status: self.log.info( "node {0}:{1} is 'unhealthy' as expected".format( node.ip, node.port)) else: # verify iptables on the node if something wrong for server in self.servers: if server.ip == node.ip: shell = RemoteMachineShellConnection(server) info = shell.extract_remote_info() if info.type.lower() == "windows": o, r = shell.execute_command( "netsh advfirewall show allprofiles") shell.log_command_output(o, r) else: o, r = shell.execute_command( "/sbin/iptables --list") shell.log_command_output(o, r) shell.disconnect() self.rest.print_UI_logs() api = self.rest.baseUrl + 'nodeStatuses' status, content, header = self.rest._http_request(api) json_parsed = json.loads(content) self.log.info("nodeStatuses: {0}".format(json_parsed)) self.fail( "node status is not unhealthy even after waiting for 5 minutes" ) # verify the failover type if self.check_verify_failover_type: graceful_count, graceful_failover = self.verify_failover_type( node, graceful_count, self.num_replicas, unreachable) # define precondition check for failover success_failed_over = self.rest.fail_over( node.id, graceful=(self.graceful and graceful_failover)) if self.graceful and graceful_failover: if self.stopGracefulFailover or self.killNodes \ or self.stopNodes or self.firewallOnNodes: self.victim_node_operations(node) # Start Graceful Again self.log.info(" Start Graceful Failover Again !") self.sleep(60) success_failed_over = self.rest.fail_over( node.id, graceful=(self.graceful and graceful_failover)) msg = "graceful failover failed for nodes {0}" \ .format(node.id) self.assertTrue( self.rest.monitorRebalance(stop_if_loop=True), msg=msg) else: msg = "rebalance failed while removing failover nodes {0}"\ .format(node.id) self.assertTrue( self.rest.monitorRebalance(stop_if_loop=True), msg=msg) failed_over = failed_over and success_failed_over # Check for negative cases if self.graceful and (failover_reason in ['stop_server', 'firewall']): if failed_over: # MB-10479 self.rest.print_UI_logs() self.assertFalse( failed_over, "Graceful Failover was started for unhealthy node!!!") return elif self.gracefulFailoverFail and not failed_over: """ Check if the fail_over fails as expected """ self.assertFalse( failed_over, "Graceful failover should fail due to not enough replicas") return # Check if failover happened as expected or re-try one more time if not failed_over: self.log.info( "unable to failover the node the first time. try again in 60 seconds.." ) # try again in 75 seconds self.sleep(75) failed_over = self.rest.fail_over( node.id, graceful=(self.graceful and graceful_failover)) if self.graceful and (failover_reason not in ['stop_server', 'firewall']): reached = RestHelper(self.rest).rebalance_reached() self.assertTrue( reached, "rebalance failed for Graceful Failover, stuck or did not completed" ) # Verify Active and Replica Bucket Count if self.num_replicas > 0: nodes = self.filter_servers(self.servers, chosen) self.bucket_util.vb_distribution_analysis( servers=nodes, buckets=self.buckets, std=20.0, num_replicas=self.num_replicas, total_vbuckets=self.total_vbuckets, type="failover", graceful=(self.graceful and graceful_failover))
def test_with_persistence_issues(self): """ 1. Select nodes from the cluster to simulate the specified error 2. Perform CRUD on the target bucket with given timeout 3. Using cbstats to verify the operation succeeds 4. Validate all mutations met the durability condition """ if self.durability_level.upper() in [ Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE, Bucket.DurabilityLevel.PERSIST_TO_MAJORITY]: self.log.critical("Test not valid for persistence durability") return error_sim = dict() shell_conn = dict() cbstat_obj = dict() failover_info = dict() vb_info_info = dict() active_vbs_in_target_nodes = list() failover_info["init"] = dict() failover_info["afterCrud"] = dict() vb_info_info["init"] = dict() vb_info_info["afterCrud"] = dict() def_bucket = self.cluster.buckets[0] insert_end_index = self.num_items / 3 upsert_end_index = (self.num_items / 3) * 2 self.log.info("Selecting nodes to simulate error condition") target_nodes = self.getTargetNodes() self.log.info("Will simulate error condition on %s" % target_nodes) for node in target_nodes: # Create shell_connections shell_conn[node.ip] = RemoteMachineShellConnection(node) cbstat_obj[node.ip] = Cbstats(node) active_vbs = cbstat_obj[node.ip] .vbucket_list(def_bucket.name, "active") active_vbs_in_target_nodes += active_vbs vb_info_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno( def_bucket.name) failover_info["init"][node.ip] = \ cbstat_obj[node.ip].failover_stats(def_bucket.name) for node in target_nodes: # Perform specified action error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip]) error_sim[node.ip].create(self.simulate_error, bucket_name=def_bucket.name) # Load sub_docs for upsert/remove mutation to work sub_doc_gen = sub_doc_generator(self.key, start=insert_end_index, end=self.num_items, key_size=self.key_size, doc_size=self.sub_doc_size, target_vbucket=self.target_vbucket, vbuckets=self.cluster.vbuckets) task = self.task.async_load_gen_sub_docs( self.cluster, def_bucket, sub_doc_gen, DocLoading.Bucket.SubDocOps.INSERT, self.maxttl, path_create=True, batch_size=20, process_concurrency=8, persist_to=self.persist_to, replicate_to=self.replicate_to, durability=self.durability_level, timeout_secs=self.sdk_timeout) self.task_manager.get_task_result(task) # Perform CRUDs with induced error scenario is active tasks = list() gen_create = sub_doc_generator(self.key, 0, insert_end_index, key_size=self.key_size, target_vbucket=self.target_vbucket, vbuckets=self.cluster.vbuckets) gen_update = sub_doc_generator_for_edit( self.key, insert_end_index, upsert_end_index, key_size=self.key_size, template_index=0, target_vbucket=self.target_vbucket) gen_delete = sub_doc_generator_for_edit( self.key, upsert_end_index, self.num_items, key_size=self.key_size, template_index=2, target_vbucket=self.target_vbucket) self.log.info("Starting parallel doc_ops - insert/Read/upsert/remove") tasks.append(self.task.async_load_gen_sub_docs( self.cluster, def_bucket, gen_create, DocLoading.Bucket.SubDocOps.INSERT, 0, path_create=True, batch_size=10, process_concurrency=1, replicate_to=self.replicate_to, persist_to=self.persist_to, durability=self.durability_level, timeout_secs=self.sdk_timeout)) tasks.append(self.task.async_load_gen_sub_docs( self.cluster, def_bucket, gen_update, "read", 0, batch_size=10, process_concurrency=1, replicate_to=self.replicate_to, persist_to=self.persist_to, durability=self.durability_level, timeout_secs=self.sdk_timeout)) tasks.append(self.task.async_load_gen_sub_docs( self.cluster, def_bucket, gen_update, DocLoading.Bucket.SubDocOps.UPSERT, 0, path_create=True, batch_size=10, process_concurrency=1, replicate_to=self.replicate_to, persist_to=self.persist_to, durability=self.durability_level, timeout_secs=self.sdk_timeout)) tasks.append(self.task.async_load_gen_sub_docs( self.cluster, def_bucket, gen_delete, DocLoading.Bucket.SubDocOps.REMOVE, 0, batch_size=10, process_concurrency=1, replicate_to=self.replicate_to, persist_to=self.persist_to, durability=self.durability_level, timeout_secs=self.sdk_timeout)) # Wait for document_loader tasks to complete for task in tasks: self.task.jython_task_manager.get_task_result(task) # Verify there is not failed docs in the task if len(task.fail.keys()) != 0: self.log_failure("Some CRUD failed for {0}".format(task.fail)) # Revert the induced error condition for node in target_nodes: error_sim[node.ip].revert(self.simulate_error, bucket_name=def_bucket.name) # Disconnect the shell connection shell_conn[node.ip].disconnect() # Fetch latest failover stats and validate the values are updated self.log.info("Validating failover and seqno cbstats") for node in target_nodes: vb_info_info["afterCrud"][node.ip] = \ cbstat_obj[node.ip].vbucket_seqno(def_bucket.name) failover_info["afterCrud"][node.ip] = \ cbstat_obj[node.ip].failover_stats(def_bucket.name) # Failover validation val = failover_info["init"][node.ip] \ != failover_info["afterCrud"][node.ip] self.assertTrue(val, msg="Failover stats got updated") # Seq_no validation (High level) val = vb_info_info["init"][node.ip] \ != vb_info_info["afterCrud"][node.ip] self.assertTrue(val, msg="vbucket seq_no not updated after CRUDs") # Verify doc count self.log.info("Validating doc count") self.bucket_util._wait_for_stats_all_buckets(self.cluster, self.cluster.buckets) self.bucket_util.verify_stats_all_buckets(self.cluster, self.num_items) self.validate_test_failure()
def test_with_process_crash(self): """ Test to make sure durability will succeed even if a node goes down due to crash and has enough nodes to satisfy the durability 1. Select a node from the cluster to simulate the specified error 2. Perform CRUD on the target bucket with given timeout 3. Using cbstats to verify the operation succeeds 4. Validate all mutations are succeeded Note: self.sdk_timeout values is considered as 'seconds' """ if self.num_replicas < 2: self.assertTrue(False, msg="Required: num_replicas > 1") # Override num_of_nodes affected to 1 self.num_nodes_affected = 1 error_sim = dict() shell_conn = dict() cbstat_obj = dict() failover_info = dict() vb_info_info = dict() target_vbuckets = range(0, self.cluster.vbuckets) active_vbs_in_target_nodes = list() failover_info["init"] = dict() failover_info["afterCrud"] = dict() vb_info_info["init"] = dict() vb_info_info["afterCrud"] = dict() def_bucket = self.cluster.buckets[0] self.log.info("Selecting nodes to simulate error condition") target_nodes = self.getTargetNodes() self.log.info("Will simulate error condition on %s" % target_nodes) for node in target_nodes: # Create shell_connections shell_conn[node.ip] = RemoteMachineShellConnection(node) cbstat_obj[node.ip] = Cbstats(node) active_vbs = cbstat_obj[node.ip] .vbucket_list(def_bucket.name, "active") active_vbs_in_target_nodes += active_vbs vb_info_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno( def_bucket.name) failover_info["init"][node.ip] = \ cbstat_obj[node.ip].failover_stats(def_bucket.name) # Load sub_docs for upsert/remove mutation to work sub_doc_gen = sub_doc_generator(self.key, start=0, end=self.num_items/2, key_size=self.key_size, doc_size=self.sub_doc_size) task = self.task.async_load_gen_sub_docs( self.cluster, def_bucket, sub_doc_gen, DocLoading.Bucket.SubDocOps.INSERT, self.maxttl, path_create=True, batch_size=20, process_concurrency=8, persist_to=self.persist_to, replicate_to=self.replicate_to, durability=self.durability_level, timeout_secs=self.sdk_timeout) self.task_manager.get_task_result(task) for node in target_nodes: # Perform specified action error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip]) error_sim[node.ip].create(self.simulate_error, bucket_name=def_bucket.name) # Remove active vbuckets from doc_loading to avoid errors target_vbuckets = list(set(target_vbuckets) ^ set(active_vbs_in_target_nodes)) # Perform CRUDs with induced error scenario is active tasks = dict() gen = dict() gen["insert"] = sub_doc_generator( self.key, self.num_items/2, self.crud_batch_size, key_size=self.key_size, target_vbucket=target_vbuckets) gen["read"] = sub_doc_generator_for_edit( self.key, self.num_items/4, 50, key_size=self.key_size, template_index=0, target_vbucket=target_vbuckets) gen["upsert"] = sub_doc_generator_for_edit( self.key, self.num_items/4, 50, key_size=self.key_size, template_index=0, target_vbucket=target_vbuckets) gen["remove"] = sub_doc_generator_for_edit( self.key, 0, 50, key_size=self.key_size, template_index=2, target_vbucket=target_vbuckets) self.log.info("Starting parallel doc_ops - insert/Read/upsert/remove") tasks["insert"] = self.task.async_load_gen_sub_docs( self.cluster, def_bucket, gen["insert"], DocLoading.Bucket.SubDocOps.INSERT, 0, path_create=True, batch_size=1, process_concurrency=1, replicate_to=self.replicate_to, persist_to=self.persist_to, durability=self.durability_level, print_ops_rate=False, timeout_secs=self.sdk_timeout) tasks["read"] = self.task.async_load_gen_sub_docs( self.cluster, def_bucket, gen["read"], "read", 0, batch_size=1, process_concurrency=1, print_ops_rate=False, timeout_secs=self.sdk_timeout) tasks["upsert"] = self.task.async_load_gen_sub_docs( self.cluster, def_bucket, gen["upsert"], DocLoading.Bucket.SubDocOps.UPSERT, 0, path_create=True, batch_size=1, process_concurrency=1, replicate_to=self.replicate_to, persist_to=self.persist_to, durability=self.durability_level, print_ops_rate=False, timeout_secs=self.sdk_timeout) tasks["remove"] = self.task.async_load_gen_sub_docs( self.cluster, def_bucket, gen["remove"], DocLoading.Bucket.SubDocOps.REMOVE, 0, batch_size=1, process_concurrency=1, replicate_to=self.replicate_to, persist_to=self.persist_to, durability=self.durability_level, print_ops_rate=False, timeout_secs=self.sdk_timeout) # Wait for document_loader tasks to complete for _, task in tasks.items(): self.task_manager.get_task_result(task) # Revert the induced error condition for node in target_nodes: error_sim[node.ip].revert(self.simulate_error, bucket_name=def_bucket.name) # Read mutation field from all docs for validation gen_read = sub_doc_generator_for_edit(self.key, 0, self.num_items, 0, key_size=self.key_size) gen_read.template = '{{ "mutated": "" }}' reader_task = self.task.async_load_gen_sub_docs( self.cluster, def_bucket, gen_read, "read", key_size=self.key_size, batch_size=50, process_concurrency=8, timeout_secs=self.sdk_timeout) self.task_manager.get_task_result(reader_task) # Validation for each CRUD task for op_type, task in tasks.items(): if len(task.success.keys()) != len(gen[op_type].doc_keys): self.log_failure("Failure during %s operation" % op_type) elif len(task.fail.keys()) != 0: self.log_failure("Some CRUD failed during %s: %s" % (op_type, task.fail)) for doc_key, crud_result in task.success.items(): if crud_result["cas"] == 0: self.log_failure("%s failed for %s: %s" % (op_type, doc_key, crud_result)) if op_type == DocLoading.Bucket.SubDocOps.INSERT: if reader_task.success[doc_key]["value"][0] != 1: self.log_failure("%s value mismatch for %s: %s" % (op_type, doc_key, crud_result)) elif op_type in [DocLoading.Bucket.SubDocOps.UPSERT, DocLoading.Bucket.SubDocOps.REMOVE]: if reader_task.success[doc_key]["value"][0] != 2: self.log_failure("%s value mismatch for %s: %s" % (op_type, doc_key, crud_result)) # Verify there is not failed docs in the task # Fetch latest failover stats and validate the values are updated self.log.info("Validating failover and seqno cbstats") for node in target_nodes: vb_info_info["afterCrud"][node.ip] = \ cbstat_obj[node.ip].vbucket_seqno(def_bucket.name) failover_info["afterCrud"][node.ip] = \ cbstat_obj[node.ip].failover_stats(def_bucket.name) # Failover validation val = failover_info["init"][node.ip] \ == failover_info["afterCrud"][node.ip] error_msg = "Failover stats not updated after error condition" self.assertTrue(val, msg=error_msg) # Seq_no validation (High level) val = vb_info_info["init"][node.ip] \ != vb_info_info["afterCrud"][node.ip] self.assertTrue(val, msg="vbucket seq_no not updated after CRUDs") # Disconnect the shell connection for node in target_nodes: shell_conn[node.ip].disconnect() # Verify doc count self.log.info("Validating doc count") self.bucket_util._wait_for_stats_all_buckets(self.cluster, self.cluster.buckets) self.bucket_util.verify_stats_all_buckets(self.cluster, self.num_items) self.validate_test_failure()
def _generate_cert(self, servers, root_cn='Root\ Authority', type='go', encryption="", key_length=1024, client_ip=None, alt_names='default', dns=None, uri=None, wildcard_dns=None): shell = RemoteMachineShellConnection(self.slave_host) shell.execute_command("rm -rf " + x509main.CACERTFILEPATH) shell.execute_command("mkdir " + x509main.CACERTFILEPATH) if type == 'go': files = [] cert_file = "./pytests/security/" + x509main.GOCERTGENFILE output, error = shell.execute_command("go run " + cert_file + " -store-to=" + x509main.CACERTFILEPATH + "root -common-name=" + root_cn) log.info('Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command( "go run " + cert_file + " -store-to=" + x509main.CACERTFILEPATH + "interm -sign-with=" + x509main.CACERTFILEPATH + "root -common-name=Intemediate\ Authority") log.info('Output message is {0} and error message is {1}'.format( output, error)) for server in servers: if "[" in server.ip: server.ip = server.ip.replace("[", "").replace("]", "") output, error = shell.execute_command("go run " + cert_file + " -store-to=" + x509main.CACERTFILEPATH + server.ip + " -sign-with=" + x509main.CACERTFILEPATH + "interm -common-name=" + server.ip + " -final=true") log.info( 'Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command("cat " + x509main.CACERTFILEPATH + server.ip + ".crt " + x509main.CACERTFILEPATH + "interm.crt > " + " " + x509main.CACERTFILEPATH + "long_chain" + server.ip + ".pem") log.info( 'Output message is {0} and error message is {1}'.format( output, error)) shell.execute_command( "go run " + cert_file + " -store-to=" + x509main.CACERTFILEPATH + "incorrect_root_cert -common-name=Incorrect\ Authority") elif type == 'openssl': files = [] v3_ca = "./pytests/security/v3_ca.crt" output, error = shell.execute_command("openssl genrsa " + encryption + " -out " + x509main.CACERTFILEPATH + "ca.key " + str(key_length)) log.info('Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command( "openssl req -new -x509 -days 3650 -sha256 -key " + x509main.CACERTFILEPATH + "ca.key -out " + x509main.CACERTFILEPATH + "ca.pem -subj '/C=UA/O=My Company/CN=My Company Root CA'") log.info('Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command("openssl genrsa " + encryption + " -out " + x509main.CACERTFILEPATH + "int.key " + str(key_length)) log.info('Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command( "openssl req -new -key " + x509main.CACERTFILEPATH + "int.key -out " + x509main.CACERTFILEPATH + "int.csr -subj '/C=UA/O=My Company/CN=My Company Intermediate CA'" ) log.info('Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command("openssl x509 -req -in " + x509main.CACERTFILEPATH + "int.csr -CA " + x509main.CACERTFILEPATH + "ca.pem -CAkey " + x509main.CACERTFILEPATH + "ca.key -CAcreateserial -CAserial " \ + x509main.CACERTFILEPATH + "rootCA.srl -extfile ./pytests/security/v3_ca.ext -out " + x509main.CACERTFILEPATH + "int.pem -days 365 -sha256") log.info('Output message is {0} and error message is {1}'.format( output, error)) for server in servers: # check if the ip address is ipv6 raw ip address, remove [] brackets if "[" in server.ip: server.ip = server.ip.replace("[", "").replace("]", "") from shutil import copyfile copyfile("./pytests/security/clientconf.conf", "./pytests/security/clientconf3.conf") fin = open("./pytests/security/clientconf3.conf", "a+") if ".com" in server.ip and wildcard_dns is None: fin.write("\nDNS.0 = {0}".format(server.ip)) elif wildcard_dns: fin.write("\nDNS.0 = {0}".format(wildcard_dns)) else: fin.write("\nIP.0 = {0}".format( server.ip.replace('[', '').replace(']', ''))) fin.close() import fileinput import sys for line in fileinput.input( "./pytests/security/clientconf3.conf", inplace=1): if "ip_address" in line: line = line.replace("ip_address", server.ip) sys.stdout.write(line) # print file contents for easy debugging fout = open("./pytests/security/clientconf3.conf", "r") print((fout.read())) fout.close() output, error = shell.execute_command("openssl genrsa " + encryption + " -out " + x509main.CACERTFILEPATH + server.ip + ".key " + str(key_length)) log.info( 'Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command( "openssl req -new -key " + x509main.CACERTFILEPATH + server.ip + ".key -out " + x509main.CACERTFILEPATH + server.ip + ".csr -config ./pytests/security/clientconf3.conf") log.info( 'Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command("openssl x509 -req -in " + x509main.CACERTFILEPATH + server.ip + ".csr -CA " + x509main.CACERTFILEPATH + "int.pem -CAkey " + \ x509main.CACERTFILEPATH + "int.key -CAcreateserial -CAserial " + x509main.CACERTFILEPATH + "intermediateCA.srl -out " + x509main.CACERTFILEPATH + server.ip + ".pem -days 365 -sha256 -extfile ./pytests/security/clientconf3.conf -extensions req_ext") log.info( 'Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command("cat " + x509main.CACERTFILEPATH + server.ip + ".pem " + x509main.CACERTFILEPATH + "int.pem " + x509main.CACERTFILEPATH + "ca.pem > " + x509main.CACERTFILEPATH + "long_chain" + server.ip + ".pem") log.info( 'Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command("cp " + x509main.CACERTFILEPATH + "ca.pem " + x509main.CACERTFILEPATH + "root.crt") log.info('Output message is {0} and error message is {1}'.format( output, error)) os.remove("./pytests/security/clientconf3.conf") # Check if client_ip is ipv6, remove [] if "[" in client_ip: client_ip = client_ip.replace("[", "").replace("]", "") from shutil import copyfile copyfile("./pytests/security/clientconf.conf", "./pytests/security/clientconf2.conf") fin = open("./pytests/security/clientconf2.conf", "a+") if alt_names == 'default': fin.write("\nDNS.1 = us.cbadminbucket.com") fin.write("\nURI.1 = www.cbadminbucket.com") elif alt_names == 'non_default': if dns is not None: dns = "\nDNS.1 = " + dns fin.write(dns) if uri is not None: uri = "\nURI.1 = " + dns fin.write(uri) if ".com" in server.ip: fin.write("\nDNS.0 = {0}".format(server.ip)) else: fin.write("\nIP.0 = {0}".format( server.ip.replace('[', '').replace(']', ''))) fin.close() # print file contents for easy debugging fout = open("./pytests/security/clientconf2.conf", "r") print((fout.read())) fout.close() # Generate Certificate for the client output, error = shell.execute_command("openssl genrsa " + encryption + " -out " + x509main.CACERTFILEPATH + client_ip + ".key " + str(key_length)) log.info('Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command( "openssl req -new -key " + x509main.CACERTFILEPATH + client_ip + ".key -out " + x509main.CACERTFILEPATH + client_ip + ".csr -config ./pytests/security/clientconf2.conf") log.info('Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command("openssl x509 -req -in " + x509main.CACERTFILEPATH + client_ip + ".csr -CA " + x509main.CACERTFILEPATH + "int.pem -CAkey " + \ x509main.CACERTFILEPATH + "int.key -CAcreateserial -CAserial " + x509main.CACERTFILEPATH + "intermediateCA.srl -out " + x509main.CACERTFILEPATH + client_ip + ".pem -days 365 -sha256 -extfile ./pytests/security/clientconf2.conf -extensions req_ext") log.info('Output message is {0} and error message is {1}'.format( output, error)) output, error = shell.execute_command("cat " + x509main.CACERTFILEPATH + client_ip + ".pem " + x509main.CACERTFILEPATH + "int.pem " + x509main.CACERTFILEPATH + "ca.pem > " + x509main.CACERTFILEPATH + "long_chain" + client_ip + ".pem") log.info('Output message is {0} and error message is {1}'.format( output, error)) os.remove("./pytests/security/clientconf2.conf")
def test_read_docs_using_multithreads(self): """ Test Focus : Read same items simultaneously using MultiThreading. Test update n items(calculated based on fragmentation value), before get operation """ self.log.info("test_read_docs_using_multithreads starts") tasks_info = dict() upsert_doc_list = self.get_fragmentation_upsert_docs_list() for itr in upsert_doc_list: self.doc_ops = "update" self.update_start = 0 self.update_end = itr self.mutate = -1 self.generate_docs(doc_ops="update") update_task_info = self.loadgen_docs( self.retry_exceptions, self.ignore_exceptions, _sync=False) tasks_info.update(update_task_info.items()) count = 0 self.doc_ops = "read" ''' if self.next_half is true then one thread will read in ascending order and other in descending order ''' if self.next_half: start = -int(self.num_items - 1) end = 1 g_read = self.genrate_docs_basic(start, end) for node in self.cluster.nodes_in_cluster: shell = RemoteMachineShellConnection(node) shell.restart_couchbase() shell.disconnect() while count < self.read_thread_count: read_task_info = self.loadgen_docs( self.retry_exceptions, self.ignore_exceptions, _sync=False) tasks_info.update(read_task_info.items()) count += 1 if self.next_half and count < self.read_thread_count: read_task_info = self.bucket_util._async_validate_docs( self.cluster, g_read, "read", 0, batch_size=self.batch_size, process_concurrency=self.process_concurrency, timeout_secs=self.sdk_timeout, retry_exceptions=self.retry_exceptions, ignore_exceptions=self.ignore_exceptions) tasks_info.update(read_task_info.items()) count += 1 self.sleep(1,"Ensures all main read tasks will have unique names") for task in tasks_info: self.task_manager.get_task_result(task) self.log.info("Waiting for ep-queues to get drained") self.bucket_util._wait_for_stats_all_buckets(self.cluster, self.cluster.buckets) self.log.info("test_read_docs_using_multithreads ends")
def _copy_node_key_chain_cert(self, host, src_path, dest_path): shell = RemoteMachineShellConnection(host) shell.copy_file_local_to_remote(src_path, dest_path)
def _create_inbox_folder(self, host): shell = RemoteMachineShellConnection(self.host) final_path = self.install_path + x509main.CHAINFILEPATH shell.create_directory(final_path)
def test_poisoned_cas(self): """ @note: - set the clock ahead - do lots of sets and get some CASs - do a set and get the CAS (flag, CAS, value) and save it - set the clock back - verify the CAS is still big on new sets - reset the CAS - do the vbucket max cas and verify - do a new mutation and verify the CAS is smaller """ #creating a user 'default' for the bucket self.log.info('starting test_poisoned_cas') payload = "name={0}&roles=admin&password=password".format( self.buckets[0].name) self.rest.add_set_builtin_user(self.buckets[0].name, payload) sdk_client = SDKClient(scheme='couchbase', hosts=[self.servers[0].ip], bucket=self.buckets[0].name) mc_client = MemcachedClientHelper.direct_client( self.servers[0], self.buckets[0]) # move the system clock ahead to poison the CAS shell = RemoteMachineShellConnection(self.servers[0]) self.assertTrue( shell.change_system_time(LWWStatsTests.ONE_HOUR_IN_SECONDS), 'Failed to advance the clock') output, error = shell.execute_command('date') self.log.info('Date after is set forward {0}'.format(output)) rc = sdk_client.set('key1', 'val1') rc = mc_client.get('key1') poisoned_cas = rc[1] self.log.info('The poisoned CAS is {0}'.format(poisoned_cas)) # do lots of mutations to set the max CAS for all vbuckets gen_load = BlobGenerator('key-for-cas-test', 'value-for-cas-test-', self.value_size, end=10000) self._load_all_buckets(self.master, gen_load, "create", 0) # move the clock back again and verify the CAS stays large self.assertTrue( shell.change_system_time(-LWWStatsTests.ONE_HOUR_IN_SECONDS), 'Failed to change the clock') output, error = shell.execute_command('date') self.log.info('Date after is set backwards {0}'.format(output)) use_mc_bin_client = self.input.param("use_mc_bin_client", True) if use_mc_bin_client: rc = mc_client.set('key2', 0, 0, 'val2') second_poisoned_cas = rc[1] else: rc = sdk_client.set('key2', 'val2') second_poisoned_cas = rc.cas self.log.info( 'The second_poisoned CAS is {0}'.format(second_poisoned_cas)) self.assertTrue( second_poisoned_cas > poisoned_cas, 'Second poisoned CAS {0} is not larger than the first poisoned cas' .format(second_poisoned_cas, poisoned_cas)) # reset the CAS for all vbuckets. This needs to be done in conjunction with a clock change. If the clock is not # changed then the CAS will immediately continue with the clock. I see two scenarios: # 1. Set the clock back 1 hours and the CAS back 30 minutes, the CAS should be used # 2. Set the clock back 1 hour, set the CAS back 2 hours, the clock should be use # do case 1, set the CAS back 30 minutes. Calculation below assumes the CAS is in nanoseconds earlier_max_cas = poisoned_cas - 30 * 60 * 1000000000 for i in range(self.vbuckets): output, error = shell.execute_cbepctl( self.buckets[0], "", "set_vbucket_param", "max_cas ", str(i) + ' ' + str(earlier_max_cas)) if len(error) > 0: self.fail('Failed to set the max cas') # verify the max CAS for i in range(self.vbuckets): max_cas = int( mc_client.stats('vbucket-details')['vb_' + str(i) + ':max_cas']) self.assertTrue( max_cas == earlier_max_cas, 'Max CAS not properly set for vbucket {0} set as {1} and observed {2}' .format(i, earlier_max_cas, max_cas)) self.log.info( 'Per cbstats the max cas for bucket {0} is {1}'.format( i, max_cas)) rc1 = sdk_client.set('key-after-resetting cas', 'val1') rc2 = mc_client.get('key-after-resetting cas') set_cas_after_reset_max_cas = rc2[1] self.log.info( 'The later CAS is {0}'.format(set_cas_after_reset_max_cas)) self.assertTrue( set_cas_after_reset_max_cas < poisoned_cas, 'For {0} CAS has not decreased. Current CAS {1} poisoned CAS {2}'. format('key-after-resetting cas', set_cas_after_reset_max_cas, poisoned_cas)) # do a bunch of sets and verify the CAS is small - this is really only one set, need to do more gen_load = BlobGenerator('key-for-cas-test-after-cas-is-reset', 'value-for-cas-test-', self.value_size, end=1000) self._load_all_buckets(self.master, gen_load, "create", 0) gen_load.reset() while gen_load.has_next(): key, value = gen_load.next() try: rc = mc_client.get(key) #rc = sdk_client.get(key) cas = rc[1] self.assertTrue( cas < poisoned_cas, 'For key {0} CAS has not decreased. Current CAS {1} poisoned CAS {2}' .format(key, cas, poisoned_cas)) except: self.log.info('get error with {0}'.format(key)) rc = sdk_client.set('key3', 'val1') better_cas = rc.cas self.log.info('The better CAS is {0}'.format(better_cas)) self.assertTrue(better_cas < poisoned_cas, 'The CAS was not improved') # set the clock way ahead - remote_util_OS.py (new) # do a bunch of mutations - not really needed # do the fix command - cbepctl, the existing way (remote util) # do some mutations, verify they conform to the new CAS - build on the CAS code, # where to iterate over the keys and get the CAS? """
def testCreateRenameDeleteGroup(self): remote_client = RemoteMachineShellConnection(self.master) cli_command = "group-manage" source = self.source user = self.ldapUser rest = RestConnection(self.master) if self.os == "linux": # create group options = " --create --group-name=group2" output, error = remote_client.execute_couchbase_cli(cli_command=cli_command, \ options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) output = self.del_runCmd_value(output) self.assertEqual(output[1], "SUCCESS: Server group created") expectedResults = {'group_name':'group2', 'source':source, 'user':user, 'ip':'127.0.0.1', 'port':1234} tempStr = rest.get_zone_uri()[expectedResults['group_name']] tempStr = (tempStr.split("/"))[4] expectedResults['uuid'] = tempStr self.checkConfig(8210, self.master, expectedResults) # rename group test options = " --rename=group3 --group-name=group2" output, error = remote_client.execute_couchbase_cli(cli_command=cli_command, \ options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) output = self.del_runCmd_value(output) self.assertEqual(output[1], ["SUCCESS: group renamed group"]) expectedResults = {} expectedResults = {'group_name':'group3', 'source':source, 'user':user, 'ip':'127.0.0.1', 'port':1234, 'nodes':[]} expectedResults['uuid'] = tempStr self.checkConfig(8212, self.master, expectedResults) # delete group test options = " --delete --group-name=group3" output, error = remote_client.execute_couchbase_cli(cli_command=cli_command, \ options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) output = self.del_runCmd_value(output) self.assertEqual(output[1], ["SUCCESS: group deleted group"]) expectedResults = {} expectedResults = {'group_name':'group3', 'source':source, 'user':user, 'ip':'127.0.0.1', 'port':1234} expectedResults['uuid'] = tempStr self.checkConfig(8211, self.master, expectedResults) if self.os == "windows": # create group options = " --create --group-name=group2" output, error = remote_client.execute_couchbase_cli(cli_command=cli_command, \ options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) self.assertEqual(output[0], "SUCCESS: group created group2") expectedResults = {'group_name':'group2', 'source':source, 'user':user, 'ip':'127.0.0.1', 'port':1234} tempStr = rest.get_zone_uri()[expectedResults['group_name']] tempStr = (tempStr.split("/"))[4] expectedResults['uuid'] = tempStr self.checkConfig(8210, self.master, expectedResults) # rename group test options = " --rename=group3 --group-name=group2" output, error = remote_client.execute_couchbase_cli(cli_command=cli_command, \ options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) self.assertEqual(output[0], "SUCCESS: group renamed group2") expectedResults = {} expectedResults = {'group_name':'group3', 'source':source, 'user':user, 'ip':'127.0.0.1', 'port':1234, 'nodes':[]} expectedResults['uuid'] = tempStr self.checkConfig(8212, self.master, expectedResults) # delete group test options = " --delete --group-name=group3" output, error = remote_client.execute_couchbase_cli(cli_command=cli_command, \ options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) self.assertEqual(output[0], "SUCCESS: group deleted group3") expectedResults = {} expectedResults = {'group_name':'group3', 'source':source, 'user':user, 'ip':'127.0.0.1', 'port':1234} expectedResults['uuid'] = tempStr self.checkConfig(8211, self.master, expectedResults) remote_client.disconnect()
def test_cbbackupmgr_metadata_with_escape_characters_and_dataset_conflicts(self): self.log.info('Create dataset on default dataverse') dataset_name_escape_characters = '`ds-beer`' index_name_escape_characters = '`idx-name`' dataverse_name_escape_characters = '`dataverse-custom`' self.cbas_util.create_dataset_on_bucket(self.beer_sample_bucket, dataset_name_escape_characters, where_field='type', where_value='beer') self.log.info('Create index on default dataverse') create_idx_statement = 'create index {0} if not exists on {1}({2})'.format(index_name_escape_characters, dataset_name_escape_characters, self.index_field_composite) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util(create_idx_statement) self.assertTrue(status == 'success', 'Create Index query failed') self.log.info('Connect link Local') self.cbas_util.connect_link() self.log.info("Create primary index") query = "CREATE PRIMARY INDEX ON `{0}` using gsi".format(self.beer_sample_bucket) self.rest.query_tool(query) self.log.info('Validate dataset count on default bucket') count_n1ql = self.rest.query_tool('select count(*) from `%s` where type = "beer"' % self.beer_sample_bucket)['results'][0]['$1'] self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(dataset_name_escape_characters, count_n1ql), msg='Count mismatch on CBAS') self.log.info('Create custom dataverse') self.cbas_util.create_dataverse_on_cbas(dataverse_name_escape_characters) self.log.info('Create dataset on custom dataverse') self.cbas_util.create_dataset_on_bucket(self.beer_sample_bucket, dataset_name_escape_characters, dataverse=dataverse_name_escape_characters) self.log.info('Create index on default dataverse') create_idx_statement = 'create index {0} if not exists on {1}({2})'.format(index_name_escape_characters, dataverse_name_escape_characters + "." + dataset_name_escape_characters, self.index_field) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util(create_idx_statement) self.assertTrue(status == 'success', 'Create Index query failed') self.log.info('Connect link Local') self.cbas_util.execute_statement_on_cbas_util('connect link %s.Local' % dataverse_name_escape_characters) self.log.info('Validate dataset count on custom bucket') self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(dataverse_name_escape_characters + "." + dataset_name_escape_characters, self.beer_sample_docs_count), msg='Count mismatch on CBAS') self.log.info('Backup Analytics metadata using cbbackupmgr') shell = RemoteMachineShellConnection(self.master) o = shell.create_backup(self.master) self.assertTrue('Backup successfully completed' in ''.join(o), msg='Backup was unsuccessful') self.log.info('Drop all analytics data - Dataverses, Datasets, Indexes') self.cleanup_cbas() self.log.info('Load documents in KV, create dataverse, datasets, index and validate') self.create_ds_index_and_validate_count() self.log.info('Verify bucket state') self.build_bucket_status_map() self.assertEqual(self.dataverse_bucket_map[self.dataverse][self.beer_sample_bucket], 'disconnected') self.assertEqual(self.dataverse_bucket_map[dataverse_name_escape_characters][self.beer_sample_bucket], 'disconnected') self.assertEqual(self.dataverse_bucket_map[self.dataverse_1][self.beer_sample_bucket], 'connected') self.assertEqual(self.dataverse_bucket_map[self.dataverse_2][self.travel_sample_bucket], 'connected') self.log.info('Restore Analytics metadata using cbbackupmgr') shell = RemoteMachineShellConnection(self.master) o = shell.restore_backup(self.master) self.assertTrue('Restore completed successfully' in ''.join(o), msg='Restore was unsuccessful') self.log.info('Connect link Local') self.cbas_util.execute_statement_on_cbas_util('connect link %s.Local' % dataverse_name_escape_characters) self.log.info('Connect link Local') self.cbas_util.connect_link() self.log.info('Validate dataset count post restore') self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(dataset_name_escape_characters, count_n1ql), msg='Count mismatch on CBAS') self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(dataverse_name_escape_characters + "." + dataset_name_escape_characters, self.beer_sample_docs_count), msg='Count mismatch on CBAS') self.log.info('drop dataset on default dataverse') self.cbas_util.drop_dataset(dataset_name_escape_characters) self.log.info('create a dataset on Default dataverse') dataset_name = 'ds' self.cbas_util.create_dataset_on_bucket(self.beer_sample_bucket, dataset_name) self.log.info('Create index on default dataverse') create_idx_statement = 'create index {0} if not exists on {1}({2})'.format(self.index_name, dataset_name, self.index_field_composite) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util(create_idx_statement) self.assertTrue(status == 'success', 'Create Index query failed') self.log.info('Restore Analytics metadata using cbbackupmgr') shell = RemoteMachineShellConnection(self.master) o = shell.restore_backup(self.master) self.assertTrue('Restore completed successfully' in ''.join(o), msg='Restore was unsuccessful') self.log.info('Validate metadata for %s dataverse' % self.dataverse) self.validate_metadata(self.dataverse, dataset_name_escape_characters, index_name_escape_characters, dataverse_count=1, dataset_count=1, index_count=1) self.validate_metadata(self.dataverse, dataset_name, self.index_name, dataverse_count=1, dataset_count=1, index_count=1)
def setUp(self): self.times_teardown_called = 1 super(CliBaseTest, self).setUp() self.r = random.Random() self.vbucket_count = 1024 self.cluster = Cluster() self.clusters_dic = self.input.clusters if self.clusters_dic: if len(self.clusters_dic) > 1: self.dest_nodes = self.clusters_dic[1] self.dest_master = self.dest_nodes[0] elif len(self.clusters_dic) == 1: self.log.error( "=== need 2 cluster to setup xdcr in ini file ===") else: self.log.error("**** Cluster config is setup in ini file. ****") self.shell = RemoteMachineShellConnection(self.master) if not self.skip_init_check_cbserver: self.rest = RestConnection(self.master) self.cb_version = self.rest.get_nodes_version() """ cli output message """ self.cli_bucket_create_msg = "SUCCESS: Bucket created" self.cli_rebalance_msg = "SUCCESS: Rebalance complete" if self.cb_version[:3] == "4.6": self.cli_bucket_create_msg = "SUCCESS: bucket-create" self.cli_rebalance_msg = "SUCCESS: rebalanced cluster" self.import_back = self.input.param("import_back", False) if self.import_back: if len(self.servers) < 3: self.fail("This test needs minimum of 3 vms to run ") self.test_type = self.input.param("test_type", "import") self.import_file = self.input.param("import_file", None) self.imex_type = self.input.param("imex_type", "json") self.format_type = self.input.param("format_type", "lines") self.import_method = self.input.param("import_method", "file://") self.force_failover = self.input.param("force_failover", False) self.json_invalid_errors = self.input.param("json-invalid-errors", None) self.field_separator = self.input.param("field-separator", "comma") self.key_gen = self.input.param("key-gen", True) self.skip_docs = self.input.param("skip-docs", None) self.limit_docs = self.input.param("limit-docs", None) self.limit_rows = self.input.param("limit-rows", None) self.skip_rows = self.input.param("skip-rows", None) self.omit_empty = self.input.param("omit-empty", None) self.infer_types = self.input.param("infer-types", None) self.fx_generator = self.input.param("fx-generator", None) self.fx_gen_start = self.input.param("fx-gen-start", None) self.secure_conn = self.input.param("secure-conn", False) self.no_cacert = self.input.param("no-cacert", False) self.no_ssl_verify = self.input.param("no-ssl-verify", False) self.verify_data = self.input.param("verify-data", False) self.field_substitutions = self.input.param("field-substitutions", None) self.check_preload_keys = self.input.param("check-preload-keys", True) self.debug_logs = self.input.param("debug-logs", False) self.should_fail = self.input.param("should-fail", False) info = self.shell.extract_remote_info() self.os_version = info.distribution_version.lower() self.deliverable_type = info.deliverable_type.lower() type = info.type.lower() self.excluded_commands = self.input.param("excluded_commands", None) self.os = 'linux' self.full_v = None self.short_v = None self.build_number = None cmd = 'curl -g {0}:8091/diag/eval -u {1}:{2} '.format( self.master.ip, self.master.rest_username, self.master.rest_password) cmd += '-d "path_config:component_path(bin)."' bin_path = subprocess.check_output(cmd, shell=True) if "bin" not in bin_path: if "localhost only" in bin_path: self.enable_diag_eval_on_non_local_hosts() bin_path = subprocess.check_output(cmd, shell=True) if "bin" not in bin_path: self.fail("Check if cb server install on {0}".format( self.master.ip)) else: self.fail("Check if cb server install on {0}".format( self.master.ip)) self.cli_command_path = bin_path.replace('"', '') + "/" self.root_path = LINUX_ROOT_PATH self.tmp_path = "/tmp/" self.tmp_path_raw = "/tmp/" self.cmd_backup_path = LINUX_BACKUP_PATH self.backup_path = LINUX_BACKUP_PATH self.cmd_ext = "" self.src_file = "" self.des_file = "" self.sample_files_path = LINUX_COUCHBASE_SAMPLE_PATH self.log_path = LINUX_COUCHBASE_LOGS_PATH self.base_cb_path = LINUX_CB_PATH """ non root path """ if self.nonroot: self.sample_files_path = "/home/%s%s" % ( self.master.ssh_username, LINUX_COUCHBASE_SAMPLE_PATH) self.log_path = "/home/%s%s" % (self.master.ssh_username, LINUX_COUCHBASE_LOGS_PATH) self.base_cb_path = "/home/%s%s" % (self.master.ssh_username, LINUX_CB_PATH) self.root_path = "/home/%s/" % self.master.ssh_username if type == 'windows': self.os = 'windows' self.cmd_ext = ".exe" self.root_path = WIN_ROOT_PATH self.tmp_path = WIN_TMP_PATH self.tmp_path_raw = WIN_TMP_PATH_RAW self.cmd_backup_path = WIN_BACKUP_C_PATH self.backup_path = WIN_BACKUP_PATH self.sample_files_path = WIN_COUCHBASE_SAMPLE_PATH_C self.log_path = WIN_COUCHBASE_LOGS_PATH win_format = "C:/Program Files" cygwin_format = "/cygdrive/c/Program\ Files" if win_format in self.cli_command_path: self.cli_command_path = self.cli_command_path.replace( win_format, cygwin_format) self.base_cb_path = WIN_CB_PATH if info.distribution_type.lower() == 'mac': self.os = 'mac' self.full_v, self.short_v, self.build_number = self.shell.get_cbversion( type) self.couchbase_usrname = "%s" % ( self.input.membase_settings.rest_username) self.couchbase_password = "******" % ( self.input.membase_settings.rest_password) self.cb_login_info = "%s:%s" % (self.couchbase_usrname, self.couchbase_password) self.path_type = self.input.param("path_type", None) if self.path_type is None: self.log.info("Test command with absolute path ") elif self.path_type == "local": self.log.info("Test command at %s dir " % self.cli_command_path) self.cli_command_path = "cd %s; ./" % self.cli_command_path self.cli_command = self.input.param("cli_command", None) self.command_options = self.input.param("command_options", None) if self.command_options is not None: self.command_options = self.command_options.split(";") self.start_with_cluster = self.input.param("start_with_cluster", True) if str(self.__class__).find( 'couchbase_clitest.CouchbaseCliTest') == -1: if len(self.servers) > 1 and int( self.nodes_init) == 1 and self.start_with_cluster: servers_in = [ self.servers[i + 1] for i in range(self.num_servers - 1) ] self.cluster.rebalance(self.servers[:1], servers_in, []) for bucket in self.buckets: testuser = [{ 'id': bucket.name, 'name': bucket.name, 'password': '******' }] rolelist = [{ 'id': bucket.name, 'name': bucket.name, 'roles': 'admin' }] self.add_built_in_server_user(testuser=testuser, rolelist=rolelist)
def enable_dp(self): remote_client = RemoteMachineShellConnection(self.server) stdout, stderr = remote_client.execute_couchbase_cli("enable-developer-preview", self.hostname, "--enable", additional_input="yes") remote_client.disconnect() return stdout, stderr, self._was_success(stdout, "Developer mode enabled")
def getRemoteFile(self, host, remotepath, filename): shell = RemoteMachineShellConnection(host) shell.get_file(remotepath, filename, audit.DOWNLOADPATH)
def testAddRemoveNodes(self): if self.role in [ 'replication_admin', 'views_admin[*]', 'bucket_admin[*]' ]: result = "Forbidden" elif self.role in ['admin', 'cluster_admin']: result = 'SUCCESS' nodes_add = self.input.param("nodes_add", 1) nodes_rem = self.input.param("nodes_rem", 1) nodes_failover = self.input.param("nodes_failover", 0) force_failover = self.input.param("force_failover", False) nodes_readd = self.input.param("nodes_readd", 0) cli_command = self.input.param("cli_command", None) source = self.source remote_client = RemoteMachineShellConnection(self.master) for num in range(nodes_add): options = "--server-add=http://{0}:8091 --server-add-username=Administrator --server-add-password=password".format( self.servers[num + 1].ip) output, error = remote_client.execute_couchbase_cli( cli_command='server-add', options=options, cluster_host="127.0.0.1:8091", user=self.ldapUser, password=self.ldapPass) output, error = remote_client.execute_couchbase_cli( cli_command='rebalance', cluster_host="127.0.0.1:8091", user=self.ldapUser, password=self.ldapPass) self._validate_roles(output, result) if (cli_command == 'server-remove'): for num in range(nodes_rem): cli_command = "rebalance" options = "--server-remove={0}:8091".format( self.servers[nodes_add - num].ip) output, error = remote_client.execute_couchbase_cli( cli_command=cli_command, options=options, cluster_host="127.0.0.1:8091", user=self.ldapUser, password=self.ldapPass) self._validate_roles(output, result) if (cli_command in ["failover"]): cli_command = 'failover' for num in range(nodes_failover): self.log.info("failover node {0}".format( self.servers[nodes_add - nodes_rem - num].ip)) options = "--server-failover={0}:8091".format( self.servers[nodes_add - nodes_rem - num].ip) options += " --force" output, error = remote_client.execute_couchbase_cli( cli_command=cli_command, options=options, cluster_host="127.0.0.1:8091", user=self.ldapUser, password=self.ldapPass) self._validate_roles(output, result) if (cli_command == "server-readd"): for num in range(nodes_readd): cli_command = 'failover' self.log.info("failover node {0}".format( self.servers[nodes_add - nodes_rem - num].ip)) options = "--server-failover={0}:8091".format( self.servers[nodes_add - nodes_rem - num].ip) options += " --force" output, error = remote_client.execute_couchbase_cli( cli_command=cli_command, options=options, cluster_host="127.0.0.1:8091", user=self.ldapUser, password=self.ldapPass) self._validate_roles(output, result) self.log.info("add back node {0} to cluster".format( self.servers[nodes_add - nodes_rem - num].ip)) cli_command = "server-readd" options = "--server-add={0}:8091".format( self.servers[nodes_add - nodes_rem - num].ip) output, error = remote_client.execute_couchbase_cli( cli_command=cli_command, options=options, cluster_host="127.0.0.1:8091", user=self.ldapUser, password=self.ldapPass) self._validate_roles(output, result) remote_client.disconnect()
def common_test_body(self, failover_reason, rebalance_type=None): """ Main Test body which contains the flow of the failover basic steps 1. Starts Operations if programmed into the test case(before/after) 2. Start View and Index Building operations 3. Failover K out of N nodes (failover can be HARD/GRACEFUL) 4.1 Rebalance the cluster is failover of K nodeStatuses 4.2 Run Add-Back operation with recoveryType = (full/delta) with rebalance 5. Verify all expected operations completed by checking stats, replicaiton, views, data correctness """ # Pick the reference node for communication # We pick a node in the cluster which will NOT be failed over self.filter_list = [] if self.failoverMaster: self.master = self.cluster.servers[1] else: self.master = self.cluster.master self.log.info( " Picking node {0} as reference node for test case".format( self.master.ip)) self.print_test_params(failover_reason) self.rest = RestConnection(self.master) self.nodes = self.rest.node_statuses() # Set the data path for the cluster self.data_path = self.rest.get_data_path() # Variable to decide the durability outcome durability_will_fail = False # Variable to track the number of nodes failed num_nodes_failed = 1 # Check if the test case has to be run for 3.0.0 versions = self.rest.get_nodes_versions() self.version_greater_than_2_5 = True for version in versions: if "3" > version: self.version_greater_than_2_5 = False # Do not run this this test if graceful category is being used if not self.version_greater_than_2_5 \ and (self.graceful or self.recoveryType is not None): self.log.error( "Can't apply graceful failover to nodes with version < 3.*") self.log.error("Please check configuration params: SKIPPING TEST") return # Find nodes that will under go failover if self.failoverMaster: self.chosen = RebalanceHelper.pick_nodes( self.master, howmany=1, target_node=self.servers[0]) else: self.chosen = RebalanceHelper.pick_nodes( self.master, howmany=self.num_failed_nodes) # Perform operations - Create/Update/Delete # self.withMutationOps = True => Run Operations in parallel to failover # self.withMutationOps = False => Run Operations Before failover self.load_initial_data() if not self.withMutationOps: self.run_mutation_operations() # Perform View Creation Tasks and # check for completion if required before failover if self.withViewsOps: self.run_view_creation_operations(self.servers) if not self.createIndexesDuringFailover: self.query_and_monitor_view_tasks(self.servers) # Take snap-shot of data set used for validation record_static_data_set = {} prev_vbucket_stats = {} prev_failover_stats = {} if not self.withMutationOps: record_static_data_set = self.bucket_util.get_data_set_all( self.cluster.servers, self.buckets, path=None) # Capture vbucket and failover stats if test version >= 2.5.* if self.version_greater_than_2_5 and self.upr_check: prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos( self.servers, self.buckets) prev_failover_stats = self.bucket_util.get_failovers_logs( self.servers, self.buckets) # Perform Operations related to failover if self.withMutationOps or self.withViewsOps or self.compact: self.run_failover_operations_with_ops(self.chosen, failover_reason) else: self.run_failover_operations(self.chosen, failover_reason) target_bucket = self.bucket_util.buckets[0] # Update new_replica value, if provided in the conf if self.new_replica: self.num_replicas = self.new_replica bucket_helper = BucketHelper(self.master) bucket_helper.change_bucket_props(target_bucket.name, replicaNumber=self.num_replicas) # Decide whether the durability is going to fail or not if self.num_failed_nodes >= 1 and self.num_replicas > 1: durability_will_fail = True # Construct target vbucket list from the nodes # which are going to be failed over vbucket_list = list() for target_node in self.chosen: shell_conn = RemoteMachineShellConnection(target_node) cb_stats = Cbstats(shell_conn) vbuckets = cb_stats.vbucket_list(target_bucket.name, self.target_vbucket_type) shell_conn.disconnect() vbucket_list += vbuckets # Code to generate doc_loaders that will work on vbucket_type # based on targeted nodes. This will perform CRUD only on # vbuckets which will be affected by the failover self.gen_create = doc_generator(self.key, self.num_items, self.num_items * 1.5, target_vbucket=vbucket_list) self.gen_update = doc_generator(self.key, self.num_items / 2, self.num_items, target_vbucket=vbucket_list) self.gen_delete = doc_generator(self.key, self.num_items / 4, self.num_items / 2 - 1, target_vbucket=vbucket_list) self.afterfailover_gen_create = doc_generator( self.key, self.num_items * 1.6, self.num_items * 2, target_vbucket=vbucket_list) self.afterfailover_gen_update = doc_generator( self.key, 1, self.num_items / 4, target_vbucket=vbucket_list) self.afterfailover_gen_delete = doc_generator( self.key, self.num_items * 0.5, self.num_items * 0.75, target_vbucket=vbucket_list) # Perform Add Back Operation with Rebalance # or only Rebalance with verifications if not self.gracefulFailoverFail and self.runRebalanceAfterFailover: if self.failover_onebyone: # Reset it back to False durability_will_fail = False for node_chosen in self.chosen: if num_nodes_failed > 1: durability_will_fail = True if self.add_back_flag: # In add-back case, durability should never fail, since # the num_nodes in the cluster will remain the same self.run_add_back_operation_and_verify( [node_chosen], prev_vbucket_stats, record_static_data_set, prev_failover_stats, rebalance_type=rebalance_type) else: self.run_rebalance_after_failover_and_verify( [node_chosen], prev_vbucket_stats, record_static_data_set, prev_failover_stats, durability_will_fail=durability_will_fail) num_nodes_failed += 1 else: if self.add_back_flag: self.run_add_back_operation_and_verify( self.chosen, prev_vbucket_stats, record_static_data_set, prev_failover_stats, durability_will_fail=durability_will_fail, rebalance_type=rebalance_type) else: self.run_rebalance_after_failover_and_verify( self.chosen, prev_vbucket_stats, record_static_data_set, prev_failover_stats, durability_will_fail=durability_will_fail) else: return # Will verify_unacked_bytes only if the durability is not going to fail if self.during_ops is None and not durability_will_fail: self.bucket_util.verify_unacked_bytes_all_buckets( filter_list=self.filter_list)
def testBucketModification(self): if self.role in ['replication_admin', 'views_admin[*]']: result = "Forbidden" elif self.role in ['admin', 'cluster_admin', 'bucket_admin[*]']: result = 'SUCCESS' cli_command = "bucket-edit" bucket_type = self.input.param("bucket_type", "couchbase") enable_flush = self.input.param("enable_flush", None) bucket_port_new = self.input.param("bucket_port_new", None) bucket_password_new = self.input.param("bucket_password_new", None) bucket_ramsize_new = self.input.param("bucket_ramsize_new", None) enable_flush_new = self.input.param("enable_flush_new", None) enable_index_replica_new = self.input.param("enable_index_replica_new", None) bucket_ramsize_new = self.input.param("bucket_ramsize_new", None) bucket = self.input.param("bucket", "default") bucket_ramsize = self.input.param("bucket_ramsize", 200) bucket_replica = self.input.param("bucket_replica", 1) enable_flush = self.input.param("enable_flush", None) enable_index_replica = self.input.param("enable_index_replica", None) wait = self.input.param("wait", False) remote_client = RemoteMachineShellConnection(self.master) self._create_bucket(remote_client, bucket, bucket_type=bucket_type, bucket_ramsize=bucket_ramsize, bucket_replica=bucket_replica, wait=wait, enable_flush=enable_flush, enable_index_replica=enable_index_replica, user="******", password='******') cli_command = "bucket-edit" options = "--bucket={0}".format(bucket) options += (" --enable-flush={0}".format(enable_flush_new), "")[enable_flush_new is None] options += ( " --enable-index-replica={0}".format(enable_index_replica_new), "")[enable_index_replica_new is None] #options += (" --bucket-port={0}".format(bucket_port_new), "")[bucket_port_new is None] options += (" --bucket-ramsize={0}".format(bucket_ramsize_new), "")[bucket_ramsize_new is None] output, error = remote_client.execute_couchbase_cli( cli_command=cli_command, options=options, cluster_host="127.0.0.1:8091", user=self.ldapUser, password=self.ldapPass) self._validate_roles(output, result) cli_command = "bucket-flush --force" options = "--bucket={0}".format(bucket) if enable_flush_new is not None: output, error = remote_client.execute_couchbase_cli( cli_command=cli_command, options=options, cluster_host="127.0.0.1:8091", user=self.ldapUser, password=self.ldapPass) self._validate_roles(output, result) cli_command = "bucket-delete" output, error = remote_client.execute_couchbase_cli( cli_command=cli_command, options=options, cluster_host="127.0.0.1:8091", user=self.ldapUser, password=self.ldapPass) expectedResults = { "bucket_name": "BBB", "source": self.source, "user": self.ldapUser, "ip": "127.0.0.1", "port": 57457 } self._validate_roles(output, result) remote_client.disconnect()
def cleanup_cluster(servers, wait_for_rebalance=True, master=None): log = logger.Logger.get_logger() if master is None: master = servers[0] rest = RestConnection(master) helper = RestHelper(rest) helper.is_ns_server_running( timeout_in_seconds=testconstants.NS_SERVER_TIMEOUT) nodes = rest.node_statuses() master_id = rest.get_nodes_self().id for node in nodes: if int(node.port) in range(9091, 9991): rest.eject_node(node) nodes.remove(node) if len(nodes) > 1: log.info("rebalancing all nodes in order to remove nodes") rest.log_client_error("Starting rebalance from test, ejected nodes %s" % \ [node.id for node in nodes if node.id != master_id]) removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[ node.id for node in nodes if node.id != master_id ], wait_for_rebalance=wait_for_rebalance) success_cleaned = [] alt_addr = TestInputSingleton.input.param("alt_addr", False) for removed in [node for node in nodes if (node.id != master_id)]: removed.rest_password = servers[0].rest_password removed.rest_username = servers[0].rest_username try: if alt_addr: for server in servers: shell = RemoteMachineShellConnection(server) internal_IP = shell.get_ip_address() internal_IP = [ x for x in internal_IP if x != "127.0.0.1" ] shell.disconnect() if internal_IP == removed.ip: rest = RestConnection(server) break else: rest = RestConnection(removed) except Exception as ex: log.error( "can't create rest connection after rebalance out for ejected nodes,\ will retry after 10 seconds according to MB-8430: {0} " .format(ex)) time.sleep(10) rest = RestConnection(removed) start = time.time() while time.time() - start < 30: if len(rest.get_pools_info()["pools"]) == 0: success_cleaned.append(removed) break else: time.sleep(0.1) if time.time() - start > 10: log.error("'pools' on node {0}:{1} - {2}".format( removed.ip, removed.port, rest.get_pools_info()["pools"])) for node in {node for node in nodes if (node.id != master_id)} - set(success_cleaned): log.error( "node {0}:{1} was not cleaned after removing from cluster". format(removed.ip, removed.port)) try: if alt_addr: for server in servers: shell = RemoteMachineShellConnection(server) internal_IP = shell.get_ip_address() internal_IP = [ x for x in internal_IP if x != "127.0.0.1" ] shell.disconnect() if internal_IP == removed.ip: rest = RestConnection(server) break else: rest = RestConnection(removed) if not alt_addr: rest.force_eject_node() except Exception as ex: log.error("force_eject_node {0}:{1} failed: {2}".format( removed.ip, removed.port, ex)) if len({node for node in nodes if (node.id != master_id)}\ - set(success_cleaned)) != 0: if not alt_addr: raise Exception( "not all ejected nodes were cleaned successfully") log.info("removed all the nodes from cluster associated with {0} ? {1}".format(servers[0], \ [(node.id, node.port) for node in nodes if (node.id != master_id)]))
def testSettingCompacttion(self): if self.role in [ 'replication_admin', 'views_admin[*]', 'bucket_admin[*]' ]: result = "Forbidden" elif self.role in ['admin', 'cluster_admin']: result = 'SUCCESS' cli_command = "bucket-edit" '''setting-compacttion OPTIONS: --compaction-db-percentage=PERCENTAGE at which point database compaction is triggered --compaction-db-size=SIZE[MB] at which point database compaction is triggered --compaction-view-percentage=PERCENTAGE at which point view compaction is triggered --compaction-view-size=SIZE[MB] at which point view compaction is triggered --compaction-period-from=HH:MM allow compaction time period from --compaction-period-to=HH:MM allow compaction time period to --enable-compaction-abort=[0|1] allow compaction abort when time expires --enable-compaction-parallel=[0|1] allow parallel compaction for database and view''' compaction_db_percentage = self.input.param("compaction-db-percentage", None) compaction_db_size = self.input.param("compaction-db-size", None) compaction_view_percentage = self.input.param( "compaction-view-percentage", None) compaction_view_size = self.input.param("compaction-view-size", None) compaction_period_from = self.input.param("compaction-period-from", None) compaction_period_to = self.input.param("compaction-period-to", None) enable_compaction_abort = self.input.param("enable-compaction-abort", None) enable_compaction_parallel = self.input.param( "enable-compaction-parallel", None) bucket = self.input.param("bucket", "default") output = self.input.param("output", '') remote_client = RemoteMachineShellConnection(self.master) cli_command = "setting-compaction" options = ( " --compaction-db-percentage={0}".format(compaction_db_percentage), "")[compaction_db_percentage is None] options += (" --compaction-db-size={0}".format(compaction_db_size), "")[compaction_db_size is None] options += (" --compaction-view-percentage={0}".format( compaction_view_percentage), "")[compaction_view_percentage is None] options += (" --compaction-view-size={0}".format(compaction_view_size), "")[compaction_view_size is None] options += ( " --compaction-period-from={0}".format(compaction_period_from), "")[compaction_period_from is None] options += (" --compaction-period-to={0}".format(compaction_period_to), "")[compaction_period_to is None] options += ( " --enable-compaction-abort={0}".format(enable_compaction_abort), "")[enable_compaction_abort is None] options += (" --enable-compaction-parallel={0}".format( enable_compaction_parallel), "")[enable_compaction_parallel is None] output, error = remote_client.execute_couchbase_cli( cli_command=cli_command, options=options, cluster_host="127.0.0.1:8091", user=self.ldapUser, password=self.ldapPass) self._validate_roles(output, result) remote_client.disconnect()
def _create_data_locations(self, server): shell = RemoteMachineShellConnection(server) shell.create_new_partition(self.disk_location, self.disk_location_size) shell.create_directory(self.data_location) shell.give_directory_permissions_to_couchbase(self.data_location) shell.disconnect()
def testClusterEdit(self): options = "--server-add={0}:8091 --server-add-username=Administrator --server-add-password=password".format(self.servers[num + 1].ip) remote_client = RemoteMachineShellConnection(self.master) output, error = remote_client.execute_couchbase_cli(cli_command='cluster-edit', options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass)
def kill_memcached_service(self, server): remote_client = RemoteMachineShellConnection(server) remote_client.kill_memcached() remote_client.disconnect()
def testAddRemoveNodes(self): nodes_add = self.input.param("nodes_add", 1) nodes_rem = self.input.param("nodes_rem", 1) nodes_failover = self.input.param("nodes_failover", 0) force_failover = self.input.param("force_failover", False) nodes_readd = self.input.param("nodes_readd", 0) cli_command = self.input.param("cli_command", None) source = self.source remote_client = RemoteMachineShellConnection(self.master) for num in range(nodes_add): options = "--server-add={0} --server-add-username=Administrator --server-add-password=password".format(self.servers[num + 1].ip) output, error = remote_client.execute_couchbase_cli(cli_command='server-add', options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) output, error = remote_client.execute_couchbase_cli(cli_command='rebalance', cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) if (cli_command == "server-add"): expectedResults = {"services":['kv'], 'port':18091, 'hostname':self.servers[num + 1].ip, 'groupUUID':"0", 'node':'ns_1@' + self.servers[num + 1].ip, 'source':source, 'user':self.master.rest_username, "real_userid:user":self.ldapUser, "ip":'::1', "remote:port":57457} self.checkConfig(self.eventID, self.master, expectedResults) expectedResults = {"delta_recovery_buckets":"all", 'known_nodes':["ns_1@" + self.master.ip, "ns_1@" + self.servers[num + 1].ip], 'ejected_nodes':[], 'source':'ns_server', 'source':source, 'user':self.master.rest_username, "ip":'::1', "port":57457, "real_userid:user":self.ldapUser} self.checkConfig(8200, self.master, expectedResults) if (cli_command == 'server-remove'): for num in range(nodes_rem): cli_command = "rebalance" options = "--server-remove={0}:8091".format(self.servers[nodes_add - num].ip) output, error = remote_client.execute_couchbase_cli(cli_command=cli_command, options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) #expectedResults = {'node':'ns_1@' + self.servers[num + 1].ip, 'source':source, 'user':self.master.rest_username, "ip":'127.0.0.1', "port":57457} #self.checkConfig(self.eventID, self.master, expectedResults) expectedResults = {"delta_recovery_buckets":"all", 'known_nodes':["ns_1@" + self.master.ip, "ns_1@" + self.servers[num + 1].ip], 'ejected_nodes':["ns_1@" + self.servers[num + 1].ip], 'source':source, 'user':self.master.rest_username, "ip":'::1', "port":57457, "real_userid:user":self.ldapUser} self.checkConfig(8200, self.master, expectedResults) if (cli_command in ["failover"]): cli_command = 'failover' for num in range(nodes_failover): self.log.info("failover node {0}".format(self.servers[nodes_add - nodes_rem - num].ip)) options = "--server-failover={0}:8091".format(self.servers[nodes_add - nodes_rem - num].ip) options += " --force" output, error = remote_client.execute_couchbase_cli(cli_command=cli_command, options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) expectedResults = {'source':source, "real_userid:user":self.ldapUser, 'user':self.master.rest_username, "ip":'::1', "port":57457, 'type':'hard', 'nodes':'[ns_1@' + self.servers[nodes_add - nodes_rem - num].ip + ']'} self.checkConfig(self.eventID, self.master, expectedResults) if (cli_command == "recovery"): for num in range(nodes_readd): cli_command = 'failover' self.log.info("failover node {0}".format(self.servers[nodes_add - nodes_rem - num].ip)) options = "--server-failover={0}:8091".format(self.servers[nodes_add - nodes_rem - num].ip) options += " --hard" output, error = remote_client.execute_couchbase_cli(cli_command=cli_command, options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) self.log.info("add back node {0} to cluster".format(self.servers[nodes_add - nodes_rem - num ].ip)) cli_command = "recovery" options = "--server-recovery={0}:8091 --recovery-type full".format(self.servers[nodes_add - nodes_rem - num ].ip) output, error = remote_client.execute_couchbase_cli(cli_command=cli_command, options=options, cluster_host="localhost", user=self.ldapUser, password=self.ldapPass) expectedResults = {'node':'ns_1@' + self.servers[nodes_add - nodes_rem - num ].ip, 'type':'full', "real_userid:user":self.ldapUser, 'source':source, 'user':self.master.rest_username, "ip":'::1', "port":57457} self.checkConfig(self.eventID, self.master, expectedResults) remote_client.disconnect()
def test_clusterOps(self): Audit = audit(eventID=self.eventID, host=self.master) ops = self.input.param('ops', None) servs_inout = self.servers[1:self.nodes_in + 1] source = 'ns_server' if (ops in ['addNodeKV']): self.cluster.rebalance(self.servers, servs_inout, []) print(servs_inout) print(servs_inout[0].ip) expectedResults = { "services": ['kv'], 'port': 8091, 'hostname': servs_inout[0].ip, 'groupUUID': "0", 'node': 'ns_1@' + servs_inout[0].ip, 'source': source, 'user': self.master.rest_username, "ip": self.ipAddress, "remote:port": 57457 } if (ops in ['addNodeN1QL']): rest = RestConnection(self.master) rest.add_node(user=self.master.rest_username, password=self.master.rest_password, remoteIp=servs_inout[0].ip, services=['n1ql']) expectedResults = { "services": ['n1ql'], 'port': 8091, 'hostname': servs_inout[0].ip, 'groupUUID': "0", 'node': 'ns_1@' + servs_inout[0].ip, 'source': source, 'user': self.master.rest_username, "ip": self.ipAddress, "remote:port": 57457 } if (ops in ['addNodeIndex']): rest = RestConnection(self.master) rest.add_node(user=self.master.rest_username, password=self.master.rest_password, remoteIp=servs_inout[0].ip, services=['index']) expectedResults = { "services": ['index'], 'port': 8091, 'hostname': servs_inout[0].ip, 'groupUUID': "0", 'node': 'ns_1@' + servs_inout[0].ip, 'source': source, 'user': self.master.rest_username, "ip": self.ipAddress, "remote:port": 57457 } if (ops in ['removeNode']): self.cluster.rebalance(self.servers, [], servs_inout) shell = RemoteMachineShellConnection(self.master) os_type = shell.extract_remote_info().distribution_type log.info("OS type is {0}".format(os_type)) if os_type == 'windows': expectedResults = {"delta_recovery_buckets":"all", 'known_nodes':["ns_1@" + servs_inout[0].ip, "ns_1@" + self.master.ip], 'ejected_nodes':['ns_1@' + servs_inout[0].ip], 'source':'ns_server', \ 'source':source, 'user':self.master.rest_username, "ip":self.ipAddress, "port":57457} else: expectedResults = {"delta_recovery_buckets":"all", 'known_nodes':["ns_1@" + servs_inout[0].ip, "ns_1@" + self.master.ip], 'ejected_nodes':['ns_1@' + servs_inout[0].ip], 'source':'ns_server', \ 'source':source, 'user':self.master.rest_username, "ip":self.ipAddress, "port":57457} if (ops in ['rebalanceIn']): self.cluster.rebalance(self.servers, servs_inout, []) shell = RemoteMachineShellConnection(self.master) os_type = shell.extract_remote_info().distribution_type log.info("OS type is {0}".format(os_type)) if os_type == 'windows': expectedResults = {"delta_recovery_buckets":"all", 'known_nodes':["ns_1@" + servs_inout[0].ip, "ns_1@" + self.master.ip], 'ejected_nodes':[], 'source':'ns_server', \ 'source':source, 'user':self.master.rest_username, "ip":self.ipAddress, "port":57457} else: expectedResults = {"delta_recovery_buckets":"all", 'known_nodes':["ns_1@" + servs_inout[0].ip, "ns_1@" + self.master.ip], 'ejected_nodes':[], 'source':'ns_server', \ 'source':source, 'user':self.master.rest_username, "ip":self.ipAddress, "port":57457} if (ops in ['rebalanceOut']): self.cluster.rebalance(self.servers, [], servs_inout) shell = RemoteMachineShellConnection(self.master) os_type = shell.extract_remote_info().distribution_type log.info("OS type is {0}".format(os_type)) if os_type == 'windows': expectedResults = {"delta_recovery_buckets":"all", 'known_nodes':["ns_1@" + servs_inout[0].ip, "ns_1@" + self.master.ip], 'ejected_nodes':['ns_1@' + servs_inout[0].ip], 'source':'ns_server', \ 'source':source, 'user':self.master.rest_username, "ip":self.ipAddress, "port":57457} else: expectedResults = {"delta_recovery_buckets":"all", 'known_nodes':["ns_1@" + servs_inout[0].ip, "ns_1@" + self.master.ip], 'ejected_nodes':['ns_1@' + servs_inout[0].ip], 'source':'ns_server', \ 'source':source, 'user':self.master.rest_username, "ip":self.ipAddress, "port":57457} if (ops in ['failover']): type = self.input.param('type', None) self.cluster.failover(self.servers, servs_inout) self.cluster.rebalance(self.servers, [], []) expectedResults = { 'source': source, 'user': self.master.rest_username, "ip": self.ipAddress, "port": 57457, 'type': type, 'nodes': '[ns_1@' + servs_inout[0].ip + ']' } if (ops == 'nodeRecovery'): expectedResults = { 'node': 'ns_1@' + servs_inout[0].ip, 'type': 'delta', 'source': source, 'user': self.master.rest_username, "ip": self.ipAddress, "port": 57457 } self.cluster.failover(self.servers, servs_inout) rest = RestConnection(self.master) rest.set_recovery_type(expectedResults['node'], 'delta') # Pending of failover - soft self.checkConfig(self.eventID, self.master, expectedResults)
def test_rebalance_in_out_at_once_persistence_stopped(self): """ PERFORMANCE:Rebalance in/out at once with stopped persistence. This test begins by loading a given number of items into the cluster with self.nodes_init nodes in it. Then we stop persistence on some nodes. Test starts to update some data and load new data in the cluster. At that time we add servs_in nodes and remove servs_out nodes and start rebalance. After rebalance and data ops are completed we start verification phase: wait for the disk queues to drain, verify the number of items that were/or not persisted with expected values, verify that there has been no data loss, sum(curr_items) match the curr_items_total.Once All checks passed, test is finished. Available parameters by default are: nodes_init=1, nodes_in=1, nodes_out=1, num_nodes_with_stopped_persistence=1 num_items_without_persistence=100000 """ num_nodes_with_stopped_persistence = self.input.param( "num_nodes_with_stopped_persistence", 1) servs_init = self.servers[:self.nodes_init] servs_in = [ self.servers[i + self.nodes_init] for i in range(self.nodes_in) ] servs_out = [ self.servers[self.nodes_init - i - 1] for i in range(self.nodes_out) ] rest = RestConnection(self.master) self._wait_for_stats_all_buckets(servs_init) for server in servs_init[:min(num_nodes_with_stopped_persistence, self. nodes_init)]: shell = RemoteMachineShellConnection(server) for bucket in self.buckets: shell.execute_cbepctl(bucket, "stop", "", "", "") self.sleep(5) self.num_items_without_persistence = self.input.param( "num_items_without_persistence", 100000) gen_extra = BlobGenerator('mike', 'mike-', self.value_size, start=self.num_items // 2, end=self.num_items // 2 + self.num_items_without_persistence) self.log.info("current nodes : {0}".format( [node.id for node in rest.node_statuses()])) self.log.info("adding nodes {0} to cluster".format(servs_in)) self.log.info("removing nodes {0} from cluster".format(servs_out)) tasks = self._async_load_all_buckets(self.master, gen_extra, "create", 0, batch_size=1000) result_nodes = set(servs_init + servs_in) - set(servs_out) # wait timeout in 60 min because MB-7386 rebalance stuck self.cluster.rebalance( servs_init[:self.nodes_init], servs_in, servs_out, timeout=self.wait_timeout * 60, sleep_before_rebalance=self.sleep_before_rebalance) for task in tasks: task.result() # Validate seq_no snap_start/stop values after rebalance self.check_snap_start_corruption() self._wait_for_stats_all_buckets( servs_init[:self.nodes_init - self.nodes_out], ep_queue_size=self.num_items_without_persistence * 0.9, ep_queue_size_cond='>') self._wait_for_stats_all_buckets(servs_in) self._verify_all_buckets(self.master, timeout=None) self._verify_stats_all_buckets(result_nodes) # verify that curr_items_tot corresponds to sum of curr_items from all nodes verified = True for bucket in self.buckets: verified &= RebalanceHelper.wait_till_total_numbers_match( self.master, bucket) self.assertTrue( verified, "Lost items!!! Replication was completed but sum(curr_items) don't match the curr_items_total" ) self.verify_unacked_bytes_all_buckets()