def test_basic_xdcr_with_cert_regenerate(self): cluster1 = self.servers[0:2] cluster2 = self.servers[2:4] remote_cluster_name = 'sslcluster' restCluster1 = RestConnection(cluster1[0]) restCluster2 = RestConnection(cluster2[0]) try: #Setup cluster1 x509main(cluster1[0]).setup_master() x509main(cluster1[1])._setup_node_certificates(reload_cert=False) restCluster1.add_node('Administrator','password',cluster1[1].ip) known_nodes = ['ns_1@'+cluster1[0].ip,'ns_1@' + cluster1[1].ip] restCluster1.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(restCluster1),"Issue with rebalance") restCluster1.create_bucket(bucket='default', ramQuotaMB=100) restCluster1.remove_all_replications() restCluster1.remove_all_remote_clusters() #Setup cluster2 x509main(cluster2[0]).setup_master() x509main(cluster2[1])._setup_node_certificates(reload_cert=False) restCluster2.add_node('Administrator','password',cluster2[1].ip) known_nodes = ['ns_1@'+cluster2[0].ip,'ns_1@' + cluster2[1].ip] restCluster2.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(restCluster2),"Issue with rebalance") restCluster2.create_bucket(bucket='default', ramQuotaMB=100) test = x509main.CACERTFILEPATH + x509main.CACERTFILE data = open(test, 'rb').read() restCluster1.add_remote_cluster(cluster2[0].ip,cluster2[0].port,'Administrator','password',remote_cluster_name,certificate=data) replication_id = restCluster1.start_replication('continuous','default',remote_cluster_name) #restCluster1.set_xdcr_param('default','default','pauseRequested',True) x509main(self.master)._delete_inbox_folder() x509main(self.master)._generate_cert(self.servers,root_cn="CB\ Authority") self.log.info ("Setting up the first cluster for new certificate") x509main(cluster1[0]).setup_master() x509main(cluster1[1])._setup_node_certificates(reload_cert=False) self.log.info ("Setting up the second cluster for new certificate") x509main(cluster2[0]).setup_master() x509main(cluster2[1])._setup_node_certificates(reload_cert=False) status = restCluster1.is_replication_paused('default','default') if not status: restCluster1.set_xdcr_param('default','default','pauseRequested',False) restCluster1.set_xdcr_param('default','default','pauseRequested',True) status = restCluster1.is_replication_paused('default','default') self.assertTrue(status,"Replication has not started after certificate upgrade") finally: known_nodes = ['ns_1@'+cluster2[0].ip,'ns_1@'+cluster2[1].ip] restCluster2.rebalance(known_nodes,['ns_1@' + cluster2[1].ip]) self.assertTrue(self.check_rebalance_complete(restCluster2),"Issue with rebalance") restCluster2.delete_bucket()
def cbrecover_multiple_failover_swapout_reb_routine(self): self.common_preSetup() if self._failover is not None: if "source" in self._failover: rest = RestConnection(self.src_master) if self._default_bucket: self.initial_node_count = len(self.src_nodes) self.vbucket_map_before = rest.fetch_vbucket_map() # JUST FOR DEFAULT BUCKET AS OF NOW if self._failover_count >= len(self.src_nodes): raise Exception("Won't failover .. count exceeds available servers on source : SKIPPING TEST") if len(self._floating_servers_set) < self._add_count: raise Exception("Not enough spare nodes available, to match the failover count : SKIPPING TEST") self.log.info("Failing over {0} nodes on source ..".format(self._failover_count)) self.failed_nodes = self.src_nodes[(len(self.src_nodes) - self._failover_count):len(self.src_nodes)] self.cluster.failover(self.src_nodes, self.failed_nodes) for node in self.failed_nodes: self.src_nodes.remove(node) self.sleep(10) add_nodes = self._floating_servers_set[0:self._add_count] for node in add_nodes: rest.add_node(user=node.rest_username, password=node.rest_password, remoteIp=node.ip, port=node.port) self.src_nodes.extend(add_nodes) self.sleep(10) # CALL THE CBRECOVERY ROUTINE self.cbr_routine(self.dest_master, self.src_master) self.trigger_rebalance(rest) if self._default_bucket: self.vbucket_map_after = rest.fetch_vbucket_map() self.final_node_count = len(self.src_nodes) elif "destination" in self._failover: rest = RestConnection(self.dest_master) if self._default_bucket: self.initial_node_count = len(self.dest_nodes) self.vbucket_map_before = rest.fetch_vbucket_map() # JUST FOR DEFAULT BUCKET AS OF NOW if self._failover_count >= len(self.dest_nodes): raise Exception("Won't failover .. count exceeds available servers on sink : SKIPPING TEST") if len(self._floating_servers_set) < self._add_count: raise Exception("Not enough spare nodes available, to match the failover count : SKIPPING TEST") self.log.info("Failing over {0} nodes on destination ..".format(self._failover_count)) self.failed_nodes = self.dest_nodes[(len(self.dest_nodes) - self._failover_count):len(self.dest_nodes)] self.cluster.failover(self.dest_nodes, self.failed_nodes) for node in self.failed_nodes: self.dest_nodes.remove(node) self.sleep(10) add_nodes = self._floating_servers_set[0:self._add_count] for node in add_nodes: rest.add_node(user=node.rest_username, password=node.rest_password, remoteIp=node.ip, port=node.port) self.dest_nodes.extend(add_nodes) self.sleep(10) # CALL THE CBRECOVERY ROUTINE self.cbr_routine(self.src_master, self.dest_master) self.trigger_rebalance(rest) if self._default_bucket: self.vbucket_map_after = rest.fetch_vbucket_map() self.final_node_count = len(self.dest_nodes) self.common_tearDown_verification()
def test_add_remove_add_back_node_with_cert(self,rebalance=None): rebalance = self.input.param('rebalance') rest = RestConnection(self.master) servs_inout = self.servers[1:3] serv_out = 'ns_1@' + servs_inout[1].ip known_nodes = ['ns_1@'+self.master.ip] x509main(self.master).setup_master() x509main().setup_cluster_nodes_ssl(servs_inout) for server in servs_inout: rest.add_node('Administrator','password',server.ip) known_nodes.append('ns_1@' + server.ip) rest.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(rest),"Issue with rebalance") for server in servs_inout: status = x509main(server)._validate_ssl_login() self.assertEqual(status,200,"Not able to login via SSL code") rest.fail_over(serv_out,graceful=False) if (rebalance): rest.rebalance(known_nodes,[serv_out]) self.assertTrue(self.check_rebalance_complete(rest),"Issue with rebalance") rest.add_node('Administrator','password',servs_inout[1].ip) else: rest.add_back_node(serv_out) rest.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(rest),"Issue with rebalance") for server in servs_inout: response = x509main(server)._validate_ssl_login() self.assertEqual(status,200,"Not able to login via SSL code")
def add_nodes_with_ddoc_ops(self): # load initial documents self._load_doc_data_all_buckets() #create ddocs for bucket in self.buckets: self._execute_ddoc_ops("create", self.test_with_view, self.num_ddocs, self.num_views_per_ddoc, "dev_test", "v1") #execute ddocs asynchronously for bucket in self.buckets: if self.ddoc_ops == "create": #create some more ddocs tasks_ddoc = self._async_execute_ddoc_ops(self.ddoc_ops, self.test_with_view, self.num_ddocs / 2, self.num_views_per_ddoc / 2, "dev_test_1", "v1") elif self.ddoc_ops in ["update", "delete"]: #update delete the same ddocs tasks_ddoc = self._async_execute_ddoc_ops(self.ddoc_ops, self.test_with_view, self.num_ddocs / 2, self.num_views_per_ddoc / 2, "dev_test", "v1") rest = RestConnection(self.master) for node in self.servers[1:]: self.log.info("adding node {0}:{1} to cluster".format(node.ip, node.port)) rest.add_node(self.master.rest_username, self.master.rest_password, node.ip, node.port) for task in tasks_ddoc: task.result(self.wait_timeout * 2) self._verify_ddoc_ops_all_buckets() if self.test_with_view: self._verify_ddoc_data_all_buckets()
def test_add_remove_graceful_add_back_node_with_cert(self,recovery_type=None): recovery_type = self.input.param('recovery_type') rest = RestConnection(self.master) known_nodes = ['ns_1@'+self.master.ip] progress = None count = 0 servs_inout = self.servers[1:] serv_out = 'ns_1@' + servs_inout[1].ip rest.create_bucket(bucket='default', ramQuotaMB=100) x509main(self.master).setup_master() x509main().setup_cluster_nodes_ssl(servs_inout) for server in servs_inout: rest.add_node('Administrator','password',server.ip) known_nodes.append('ns_1@' + server.ip) rest.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(rest),"Issue with rebalance") for server in servs_inout: status = x509main(server)._validate_ssl_login() self.assertEqual(status,200,"Not able to login via SSL code") rest.fail_over(serv_out,graceful=True) self.assertTrue(self.check_rebalance_complete(rest),"Issue with rebalance") rest.set_recovery_type(serv_out,recovery_type) rest.add_back_node(serv_out) rest.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(rest),"Issue with rebalance") for server in servs_inout: status = x509main(server)._validate_ssl_login() self.assertEqual(status,200,"Not able to login via SSL code")
def rebalance_in(self, how_many): rest = RestConnection(self._servers[0]) nodes = rest.node_statuses() #choose how_many nodes from self._servers which are not part of # nodes nodeIps = [node.ip for node in nodes] self.log.info("current nodes : {0}".format(nodeIps)) toBeAdded = [] selection = self._servers[1:] shuffle(selection) for server in selection: if not server.ip in nodeIps: toBeAdded.append(server) if len(toBeAdded) == how_many: break for server in toBeAdded: rest.add_node('Administrator', 'password', server.ip) #check if its added ? nodes = rest.node_statuses() otpNodes = [node.id for node in nodes] started = rest.rebalance(otpNodes, []) msg = "rebalance operation started ? {0}" self.log.info(msg.format(started)) if started: result = rest.monitorRebalance() msg = "successfully rebalanced out selected nodes from the cluster ? {0}" self.log.info(msg.format(result)) return result return False
def test_add_node_no_rebalance(self): services_in = ["index", "n1ql", "kv"] # rebalance in a node rest = RestConnection(self.master) rest.add_node(self.master.rest_username, self.master.rest_password, self.servers[self.nodes_init].ip, self.servers[self.nodes_init].port, services=services_in) self.sleep(30) self.run_cbq_query( query="PREPARE p1 from select * from default limit 5", server=self.servers[0]) self.sleep(5) nodes = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[]) self.sleep(30) for i in range(self.nodes_init + 1): try: self.run_cbq_query(query="execute p1", server=self.servers[i]) except CBQError, ex: self.assertTrue( "No such prepared statement: p1" in str(ex), "There error should be no such prepared " "statement, it really is %s" % ex) self.log.info(ex) self.log.info("node: %s:%s does not have the statement" % (self.servers[i].ip, self.servers[i].port))
def test_add_remove_autofailover(self): rest = RestConnection(self.master) serv_out = self.servers[3] shell = RemoteMachineShellConnection(serv_out) known_nodes = ['ns_1@'+self.master.ip] rest.create_bucket(bucket='default', ramQuotaMB=100) rest.update_autofailover_settings(True,30) x509main(self.master).setup_master() x509main().setup_cluster_nodes_ssl(self.servers[1:4]) for server in self.servers[1:4]: rest.add_node('Administrator','password',server.ip) known_nodes.append('ns_1@'+server.ip) rest.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(rest),"Issue with rebalance") shell.stop_server() self.sleep(60) shell.start_server() self.sleep(30) for server in self.servers: status = x509main(server)._validate_ssl_login() self.assertEqual(status,200,"Not able to login via SSL code")
def _add_node_already_added_body(self): self.common_setUp(False) master = self.servers[0] master_rest = RestConnection(master) for i in range(1, len(self.servers)): ip = self.servers[i].ip self.log.info('adding node : {0} to the cluster'.format(ip)) otpNode = master_rest.add_node(user=self.membase.rest_username, password=self.membase.rest_password, remoteIp=ip, port=self.servers[i].port) if otpNode: self.log.info('added node : {0} to the cluster'.format(otpNode.id)) #try to add again try: readd_otpNode = master_rest.add_node(user=self.membase.rest_username, password=self.membase.rest_password, remoteIp=ip, port=self.servers[i].port) if readd_otpNode: self.fail("server did not raise any exception when calling add_node on an already added node") except ServerAlreadyJoinedException: self.log.info("server raised ServerAlreadyJoinedException as expected") #now lets eject it self.log.info("ejecting the node {0}".format(otpNode.id)) ejected = master_rest.eject_node(user=self.membase.rest_username, password=self.membase.rest_password, otpNode=otpNode.id) self.assertTrue(ejected, msg="unable to eject the node {0}".format(otpNode.id)) else: self.fail(msg="unable to add node : {0} to the cluster".format(ip))
def add_nodes_with_ddoc_ops(self): # load initial documents self._load_doc_data_all_buckets() #create ddocs for bucket in self.buckets: self._execute_ddoc_ops("create", self.test_with_view, self.num_ddocs, self.num_views_per_ddoc, "dev_test", "v1") #execute ddocs asynchronously for bucket in self.buckets: if self.ddoc_ops == "create": #create some more ddocs tasks_ddoc = self._async_execute_ddoc_ops(self.ddoc_ops, self.test_with_view, self.num_ddocs / 2, self.num_views_per_ddoc / 2, "dev_test_1", "v1") elif self.ddoc_ops in ["update", "delete"]: #update delete the same ddocs tasks_ddoc = self._async_execute_ddoc_ops(self.ddoc_ops, self.test_with_view, self.num_ddocs / 2, self.num_views_per_ddoc / 2, "dev_test", "v1") rest = RestConnection(self.master) for node in self.servers[1:]: self.log.info("adding node {0}:{1} to cluster".format(node.ip, node.port)) rest.add_node(self.master.rest_username, self.master.rest_password, node.ip, node.port) for task in tasks_ddoc: task.result(self.wait_timeout * 2) self.verify_cluster_stats(servers=self.servers[:self.nodes_init]) self._verify_ddoc_ops_all_buckets() if self.test_with_view: self._verify_ddoc_data_all_buckets()
def add_nodes(self, task_manager): master = self.servers[0] rest = RestConnection(master) for node in self.to_add: self.log.info("adding node {0}:{1} to cluster".format(node.ip, node.port)) rest.add_node(master.rest_username, master.rest_password, node.ip, node.port)
def restart_cbrecover_multiple_failover_swapout_reb_routine(self): self.common_preSetup() when_step = self._input.param("when_step", "recovery_when_rebalance") if self._failover is not None: if "source" in self._failover: rest = RestConnection(self.src_master) if self._default_bucket: self.initial_node_count = len(self.src_nodes) self.vbucket_map_before = rest.fetch_vbucket_map() # JUST FOR DEFAULT BUCKET AS OF NOW if self._failover_count >= len(self.src_nodes): raise Exception("Won't failover .. count exceeds available servers on source : SKIPPING TEST") if len(self._floating_servers_set) < self._add_count: raise Exception("Not enough spare nodes available, to match the failover count : SKIPPING TEST") self.log.info("Failing over {0} nodes on source ..".format(self._failover_count)) self.failed_nodes = self.src_nodes[(len(self.src_nodes) - self._failover_count):len(self.src_nodes)] self.cluster.failover(self.src_nodes, self.failed_nodes) for node in self.failed_nodes: self.src_nodes.remove(node) add_nodes = self._floating_servers_set[0:self._add_count] for node in add_nodes: rest.add_node(user=node.rest_username, password=node.rest_password, remoteIp=node.ip, port=node.port) self.src_nodes.extend(add_nodes) self.sleep(self.wait_timeout / 4) # CALL THE CBRECOVERY ROUTINE WITHOUT WAIT FOR COMPLETED self.cbr_routine(self.dest_master, self.src_master, False) if "create_bucket_when_recovery" in when_step: name = 'standard_bucket' try: self.cluster.create_standard_bucket(self.src_master, name, STANDARD_BUCKET_PORT + 10, 100, 1) except BucketCreationException, e: self.log.info("bucket creation failed during cbrecovery as expected") # but still able to create bucket on destination self.cluster.create_standard_bucket(self.dest_master, name, STANDARD_BUCKET_PORT + 10, 100, 1) # here we try to re-call cbrecovery(seems it's supported even it's still running) # if recovery fast(=completed) we can get "No recovery needed" self.cbr_routine(self.dest_master, self.src_master) elif "recovery_when_rebalance" in when_step: rest.remove_all_recoveries() self.trigger_rebalance(rest, 15) try: self.cbr_routine(self.dest_master, self.src_master) self.log.exception("cbrecovery should be failed when rebalance is in progress") except CBRecoveryFailedException, e: self.log.info("cbrecovery failed as expected when there are no failovered nodes") reached = RestHelper(rest).rebalance_reached() self.assertTrue(reached, "rebalance failed or did not completed") if self._replication_direction_str == "unidirection": self.log.warn("we expect data lost on source cluster with unidirection replication") self.log.warn("verification data will be skipped") return elif "recovery_when_rebalance_stopped" in when_step: rest.remove_all_recoveries() self.trigger_rebalance(rest, 15) rest.stop_rebalance() try: self.cbr_routine(self.dest_master, self.src_master) self.log.exception("cbrecovery should be failed when rebalance has been stopped") except CBRecoveryFailedException, e: self.log.info("cbrecovery failed as expected when there are no failovered nodes")
def _add_node_already_added_body(self): self.common_setUp(False) master = self.servers[0] master_rest = RestConnection(master) for i in range(1,len(self.servers)): ip = self.servers[i].ip self.log.info('adding node : {0} to the cluster'.format(ip)) otpNode = master_rest.add_node(user=self.membase.rest_username, password=self.membase.rest_password, remoteIp=ip, port=self.servers[i].port) if otpNode: self.log.info('added node : {0} to the cluster'.format(otpNode.id)) #try to add again try: readd_otpNode = master_rest.add_node(user=self.membase.rest_username, password=self.membase.rest_password, remoteIp=ip,port=self.servers[i].port) if readd_otpNode: self.fail("server did not raise any exception when calling add_node on an already added node") except ServerAlreadyJoinedException: self.log.info("server raised ServerAlreadyJoinedException as expected") #now lets eject it self.log.info("ejecting the node {0}".format(otpNode.id)) ejected = master_rest.eject_node(user=self.membase.rest_username, password=self.membase.rest_password, otpNode=otpNode.id) self.assertTrue(ejected, msg="unable to eject the node {0}".format(otpNode.id)) else: self.fail(msg="unable to add node : {0} to the cluster".format(ip))
def test_add_node_with_cert_non_master(self): rest = RestConnection(self.master) x509main(self.master).setup_master() x509main().setup_cluster_nodes_ssl(self.servers[1:3]) servs_inout = self.servers[1] rest.add_node('Administrator', 'password', servs_inout.ip) known_nodes = ['ns_1@' + self.master.ip, 'ns_1@' + servs_inout.ip] rest.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(rest), "Issue with rebalance") rest = RestConnection(self.servers[1]) servs_inout = self.servers[2] rest.add_node('Administrator', 'password', servs_inout.ip) known_nodes = [ 'ns_1@' + self.master.ip, 'ns_1@' + servs_inout.ip, 'ns_1@' + self.servers[1].ip ] rest.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(rest), "Issue with rebalance") for server in self.servers[:3]: status = x509main(server)._validate_ssl_login() self.assertEqual( status, 200, "Not able to login via SSL code for ip - {0}".format( server.ip))
def _config_cluster(self, nodes): master = nodes[0] rest = RestConnection(master) for node in nodes[1:]: rest.add_node(master.rest_username, master.rest_password, node.ip, node.port) servers = rest.node_statuses() rest.rebalance(otpNodes=[node.id for node in servers], ejectedNodes=[]) time.sleep(5)
def cbrecover_multiple_failover_swapout_reb_routine(self): self.common_preSetup() if self._failover is not None: if "source" in self._failover: rest = RestConnection(self.src_master) if self._default_bucket: self.initial_node_count = len(self.src_nodes) self.vbucket_map_before = rest.fetch_vbucket_map() # JUST FOR DEFAULT BUCKET AS OF NOW if self._failover_count >= len(self.src_nodes): raise Exception("Won't failover .. count exceeds available servers on source : SKIPPING TEST") if len(self._floating_servers_set) < self._add_count: raise Exception("Not enough spare nodes available, to match the failover count : SKIPPING TEST") self.log.info("Failing over {0} nodes on source ..".format(self._failover_count)) self.failed_nodes = self.src_nodes[(len(self.src_nodes) - self._failover_count):len(self.src_nodes)] self.cluster.failover(self.src_nodes, self.failed_nodes) for node in self.failed_nodes: self.src_nodes.remove(node) add_nodes = self._floating_servers_set[0:self._add_count] for node in add_nodes: rest.add_node(user=node.rest_username, password=node.rest_password, remoteIp=node.ip, port=node.port) self.src_nodes.extend(add_nodes) self.sleep(self.wait_timeout / 4) # CALL THE CBRECOVERY ROUTINE self.cbr_routine(self.dest_master, self.src_master) self.trigger_rebalance(rest) if self._default_bucket: self.vbucket_map_after = rest.fetch_vbucket_map() self.final_node_count = len(self.src_nodes) elif "destination" in self._failover: rest = RestConnection(self.dest_master) if self._default_bucket: self.initial_node_count = len(self.dest_nodes) self.vbucket_map_before = rest.fetch_vbucket_map() # JUST FOR DEFAULT BUCKET AS OF NOW if self._failover_count >= len(self.dest_nodes): raise Exception("Won't failover .. count exceeds available servers on sink : SKIPPING TEST") if len(self._floating_servers_set) < self._add_count: raise Exception("Not enough spare nodes available, to match the failover count : SKIPPING TEST") self.log.info("Failing over {0} nodes on destination ..".format(self._failover_count)) self.failed_nodes = self.dest_nodes[(len(self.dest_nodes) - self._failover_count):len(self.dest_nodes)] self.cluster.failover(self.dest_nodes, self.failed_nodes) for node in self.failed_nodes: self.dest_nodes.remove(node) add_nodes = self._floating_servers_set[0:self._add_count] for node in add_nodes: rest.add_node(user=node.rest_username, password=node.rest_password, remoteIp=node.ip, port=node.port) self.dest_nodes.extend(add_nodes) self.sleep(self.wait_timeout / 4) # CALL THE CBRECOVERY ROUTINE self.cbr_routine(self.src_master, self.dest_master) self.trigger_rebalance(rest) if self._default_bucket: self.vbucket_map_after = rest.fetch_vbucket_map() self.final_node_count = len(self.dest_nodes) self.common_tearDown_verification()
def test_failover_swap_rebalance(self): """ add and failover node then perform swap rebalance """ assert len(self.servers) > 2, "not enough servers" nodeA = self.servers[0] nodeB = self.servers[1] nodeC = self.servers[2] gen_create = doc_generator('dcp', 0, self.num_items, doc_size=64) self._load_all_buckets(nodeA, gen_create, "create", 0) vbucket = 0 # rebalance in nodeB assert self.cluster.rebalance([nodeA], [nodeB], []) # add nodeC rest = RestConnection(nodeB) rest.add_node(user=nodeC.rest_username, password=nodeC.rest_password, remoteIp=nodeC.ip, port=nodeC.port) # stop and failover nodeA assert self.stop_node(0) self.stopped_nodes.append(0) self.master = nodeB assert self.cluster.failover([nodeB], [nodeA]) try: assert self.cluster.rebalance([nodeB], [], []) except Exception: pass self.add_built_in_server_user() # verify seqnos and stream mutations rest = RestConnection(nodeB) total_mutations = 0 # Create connection for CbStats shell_conn = RemoteMachineShellConnection(self.cluster.master) cb_stat_obj = Cbstats(shell_conn) vb_info = cb_stat_obj.vbucket_seqno(self.bucket_util.buckets[0].name) for vb in range(0, self.vbuckets): total_mutations += int(vb_info[vb]["high_seqno"]) # Disconnect the Cbstats shell_conn shell_conn.disconnect() # / 2 # divide by because the items are split between 2 servers self.assertTrue(total_mutations == self.num_items, msg="Number mismatch. {0} != {1}".format( total_mutations, self.num_items)) task = self.cluster.async_rebalance([nodeB], [], [nodeC]) task.result()
def test_add_node(self): hostnames = self.rename_nodes(self.servers) self.verify_referenced_by_names(self.servers, hostnames) self._set_hostames_to_servers_objs(hostnames) master_rest = RestConnection(self.master) self.sleep(5, "Sleep to wait renaming") for server in self.servers[1:self.nodes_in + 1]: master_rest.add_node(server.rest_username, server.rest_password, hostnames[server], server.port) self.verify_referenced_by_names(self.servers, hostnames)
def _common_test_body(self): master = self.servers[0] rest = RestConnection(master) creds = self.input.membase_settings rebalanced_servers = [master] bucket_data = RebalanceBaseTest.bucket_data_init(rest) self.log.info("INTIAL LOAD") RebalanceBaseTest.load_all_buckets_task(rest, self.task_manager, bucket_data, self.load_ratio, keys_count=self.keys_count) for name in bucket_data: for thread in bucket_data[name]["threads"]: bucket_data[name]["items_inserted_count"] += thread.inserted_keys_count() for server in self.servers[1:]: self.log.info("current nodes : {0}".format(RebalanceHelper.getOtpNodeIds(master))) #do this 2 times , start rebalance , failover the node , remove the node and rebalance for i in range(0, self.num_rebalance): distribution = RebalanceBaseTest.get_distribution(self.load_ratio) RebalanceBaseTest.load_data_for_buckets(rest, self.load_ratio, distribution, [master], bucket_data, self) self.log.info("adding node {0} and rebalance afterwards".format(server.ip)) otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip, server.port) msg = "unable to add node {0} to the cluster {1}" self.assertTrue(otpNode, msg.format(server.ip, master.ip)) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[]) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(server.ip)) rebalanced_servers.append(server) RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self, True) rest.fail_over(otpNode.id) self.log.info("failed over {0}".format(otpNode.id)) time.sleep(10) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[otpNode.id]) msg = "rebalance failed while removing failover nodes {0}".format(otpNode.id) self.assertTrue(rest.monitorRebalance(), msg=msg) #now verify the numbers again ? RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self, True) #wait 6 minutes time.sleep(6 * 60) self.log.info("adding node {0} and rebalance afterwards".format(server.ip)) otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip, server.port) msg = "unable to add node {0} to the cluster {1}" self.assertTrue(otpNode, msg.format(server.ip, master.ip)) distribution = RebalanceBaseTest.get_distribution(self.load_ratio) RebalanceBaseTest.load_data_for_buckets(rest, self.load_ratio, distribution, rebalanced_servers, bucket_data, self) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[]) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(server.ip)) rebalanced_servers.append(server) RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self, True)
def rest_api_addNode(self): hostnames = self.convert_to_hostname(self, self.servers[0:2]) master_rest = RestConnection(self.master) master_rest.add_node(self.servers[1].rest_username, self.servers[1].rest_password, hostnames[1], self.servers[1].port) #Now check whether the node which we added is still referred via hostname or not. obj = RestConnection(self.servers[1]) var = obj.get_nodes_self().hostname flag = True if self.servers[1].ip in var else False self.assertEqual(flag, False, msg="Fail - Name of node {0} got converted to IP. Failing the test!!!".format(self.servers[1].ip)) self.log.info("Test Passed!!") self.sleep(10)
def test_add_node_without_cert(self): rest = RestConnection(self.master) servs_inout = self.servers[1] x509main(self.master).setup_master() try: rest.add_node('Administrator','password',servs_inout.ip) except Exception, ex: ex = str(ex) expected_result = "Error adding node: " + servs_inout.ip + " to the cluster:" + self.master.ip + " - [\"Prepare join failed. Error applying node certificate. Unable to read certificate chain file\"]" #self.assertEqual(ex,expected_result) self.assertTrue(expected_result in ex,"Incorrect Error message in exception")
def test_failover_swap_rebalance(self): """ add and failover node then perform swap rebalance """ assert len(self.servers) > 2, "not enough servers" nodeA = self.servers[0] nodeB = self.servers[1] nodeC = self.servers[2] gen_create = BlobGenerator('dcp', 'dcp-', 64, start=0, end=self.num_items) self._load_all_buckets(nodeA, gen_create, "create", 0) vbucket = 0 vb_uuid, seqno, high_seqno = self.vb_info(nodeA, vbucket) # rebalance in nodeB assert self.cluster.rebalance([nodeA], [nodeB], []) # add nodeC rest = RestConnection(nodeB) rest.add_node(user=nodeC.rest_username, password=nodeC.rest_password, remoteIp=nodeC.ip, port=nodeC.port) # stop and failover nodeA assert self.stop_node(0) self.stopped_nodes.append(0) self.master = nodeB assert self.cluster.failover([nodeB], [nodeA]) try: assert self.cluster.rebalance([nodeB], [], []) except: pass self.add_built_in_server_user() # verify seqnos and stream mutations rest = RestConnection(nodeB) vbuckets = rest.get_vbuckets() total_mutations = 0 for vb in vbuckets: mcd_client = self.mcd_client(nodeB, auth_user=True) stats = mcd_client.stats(VBSEQNO_STAT) vbucket = vb.id key = 'vb_{0}:high_seqno'.format(vbucket) total_mutations += int(stats[key]) assert total_mutations == self.num_items #/ 2 # divide by because the items are split between 2 servers task = self.cluster.async_rebalance([nodeB], [], [nodeC]) task.result()
def _add_node_itself_body(self): self.common_setUp(False) master = self.servers[0] master_rest = RestConnection(master) self.log.info('adding node : {0} to the cluster'.format(master)) try: master_rest.add_node(user=self.membase.rest_username, password=self.membase.rest_password, remoteIp=master.ip, port=master.port) self.fail("server did not raise any exception while adding the node to itself") except ServerJoinException as ex: self.assertEquals(ex.type,MembaseHttpExceptionTypes.NODE_CANT_ADD_TO_ITSELF)
def add_remove_servers_and_rebalance(self, to_add, to_remove): """ Add and/or remove servers and rebalance. :param to_add: List of nodes to be added. :param to_remove: List of nodes to be removed. """ serverinfo = self.servers[0] rest = RestConnection(serverinfo) for node in to_add: rest.add_node(user=serverinfo.rest_username, password=serverinfo.rest_password, remoteIp=node.ip) self.shuffle_nodes_between_zones_and_rebalance(to_remove)
def _add_node_itself_body(self): self.common_setUp(False) master = self.servers[0] master_rest = RestConnection(master) self.log.info('adding node : {0} to the cluster'.format(master)) try: master_rest.add_node(user=self.membase.rest_username, password=self.membase.rest_password, remoteIp=master.ip, port=master.port) self.fail("server did not raise any exception while adding the node to itself") except ServerSelfJoinException as ex: self.assertEquals(ex.type, MembaseHttpExceptionTypes.NODE_CANT_ADD_TO_ITSELF)
def test_basic_xdcr_with_cert(self): cluster1 = self.servers[0:2] cluster2 = self.servers[2:4] remote_cluster_name = 'sslcluster' restCluster1 = RestConnection(cluster1[0]) restCluster2 = RestConnection(cluster2[0]) try: #Setup cluster1 x509main(cluster1[0]).setup_master() x509main(cluster1[1])._setup_node_certificates(reload_cert=False) restCluster1.add_node('Administrator', 'password', cluster1[1].ip) known_nodes = ['ns_1@' + cluster1[0].ip, 'ns_1@' + cluster1[1].ip] restCluster1.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(restCluster1), "Issue with rebalance") restCluster1.create_bucket(bucket='default', ramQuotaMB=100) restCluster1.remove_all_replications() restCluster1.remove_all_remote_clusters() #Setup cluster2 x509main(cluster2[0]).setup_master() x509main(cluster2[1])._setup_node_certificates(reload_cert=False) restCluster2.add_node('Administrator', 'password', cluster2[1].ip) known_nodes = ['ns_1@' + cluster2[0].ip, 'ns_1@' + cluster2[1].ip] restCluster2.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(restCluster2), "Issue with rebalance") restCluster2.create_bucket(bucket='default', ramQuotaMB=100) test = x509main.CACERTFILEPATH + x509main.CACERTFILE data = open(test, 'rb').read() restCluster1.add_remote_cluster(cluster2[0].ip, cluster2[0].port, 'Administrator', 'password', remote_cluster_name, certificate=data) replication_id = restCluster1.start_replication( 'continuous', 'default', remote_cluster_name) if replication_id is not None: self.assertTrue(True, "Replication was not created successfully") finally: known_nodes = ['ns_1@' + cluster2[0].ip, 'ns_1@' + cluster2[1].ip] restCluster2.rebalance(known_nodes, ['ns_1@' + cluster2[1].ip]) self.assertTrue(self.check_rebalance_complete(restCluster2), "Issue with rebalance") restCluster2.delete_bucket()
def test_verify_mb20463(self): src_version = NodeHelper.get_cb_version( self.src_cluster.get_master_node()) if float(src_version[:3]) != 4.5: self.log.info("Source cluster has to be at 4.5 for this test") return servs = self._input.servers[2:4] params = {} params['num_nodes'] = len(servs) params['product'] = 'cb' params['version'] = '4.1.2-6088' params['vbuckets'] = [1024] self.log.info("will install {0} on {1}".format('4.1.2-6088', [s.ip for s in servs])) InstallerJob().parallel_install(servs, params) if params['product'] in ["couchbase", "couchbase-server", "cb"]: success = True for server in servs: success &= RemoteMachineShellConnection( server).is_couchbase_installed() if not success: self.fail( "some nodes were not installed successfully on target cluster!" ) self.log.info("4.1.2 installed successfully on target cluster") conn = RestConnection(self.dest_cluster.get_master_node()) conn.add_node(user=self._input.servers[3].rest_username, password=self._input.servers[3].rest_password, remoteIp=self._input.servers[3].ip) self.sleep(30) conn.rebalance(otpNodes=[node.id for node in conn.node_statuses()]) self.sleep(30) conn.create_bucket(bucket='default', ramQuotaMB=512) tasks = self.setup_xdcr_async_load() self.sleep(30) NodeHelper.enable_firewall(self.dest_master) self.sleep(30) NodeHelper.disable_firewall(self.dest_master) for task in tasks: task.result() self._wait_for_replication_to_catchup(timeout=600) self.verify_results()
def add_nodes(self, task_manager): master = self.servers[0] rest = RestConnection(master) try: for node in self.to_add: self.log.info("adding node {0}:{1} to cluster".format(node.ip, node.port)) rest.add_node(master.rest_username, master.rest_password, node.ip, node.port) self.state = "start_rebalance" task_manager.schedule(self) except Exception as e: self.state = "finished" self.set_result({"status": "error", "value": e})
def test_get_cluster_ca_cluster(self): servs_inout = self.servers[1] rest = RestConnection(self.master) x509main(self.master).setup_master() x509main(servs_inout)._setup_node_certificates(reload_cert=False) servs_inout = self.servers[1] rest.add_node('Administrator','password',servs_inout.ip) for server in self.servers[:2]: status, content, header = x509main(server)._get_cluster_ca_cert() content = json.loads(content) self.assertTrue(status,"Issue while Cluster CA Cert") self.assertEqual(content['cert']['type'],"uploaded","Type of certificate is mismatch") self.assertEqual(content['cert']['subject'],"CN=Root Authority","Common Name is incorrect")
def test_get_cluster_ca_cluster(self): servs_inout = self.servers[1] rest = RestConnection(self.master) x509main(self.master).setup_master() x509main(servs_inout)._setup_node_certificates(reload_cert=False) self.sleep(30) servs_inout = self.servers[1] rest.add_node('Administrator','password',servs_inout.ip) for server in self.servers[:2]: status, content, header = x509main(server)._get_cluster_ca_cert() content = json.loads(content) self.assertTrue(status,"Issue while Cluster CA Cert") self.assertEqual(content['cert']['type'],"uploaded","Type of certificate is mismatch") self.assertEqual(content['cert']['subject'],"CN=Root Authority","Common Name is incorrect")
def test_add_node_with_cert(self): servs_inout = self.servers[1:4] rest = RestConnection(self.master) x509main(self.master).setup_master() x509main().setup_cluster_nodes_ssl(servs_inout) known_nodes = ['ns_1@'+self.master.ip] for server in servs_inout: rest.add_node('Administrator','password',server.ip) known_nodes.append('ns_1@' + server.ip) rest.rebalance(known_nodes) self.assertTrue(self.check_rebalance_complete(rest),"Issue with rebalance") for server in self.servers: status = x509main(server)._validate_ssl_login() self.assertEqual(status,200,"Not able to login via SSL code")
def test_failover_swap_rebalance(self): """ add and failover node then perform swap rebalance """ assert len(self.servers) > 2, "not enough servers" nodeA = self.servers[0] nodeB = self.servers[1] nodeC = self.servers[2] gen_create = BlobGenerator('dcp', 'dcp-', 64, start=0, end=self.num_items) self._load_all_buckets(nodeA, gen_create, "create", 0) vbucket = 0 vb_uuid, seqno, high_seqno = self.vb_info(nodeA, vbucket) # rebalance in nodeB assert self.cluster.rebalance([nodeA], [nodeB], []) # add nodeC rest = RestConnection(nodeB) rest.add_node(user=nodeC.rest_username, password=nodeC.rest_password, remoteIp=nodeC.ip, port=nodeC.port) # stop and failover nodeA assert self.stop_node(0) self.stopped_nodes.append(0) assert self.cluster.failover([nodeB], [nodeA]) try: assert self.cluster.rebalance([nodeB], [], []) except: pass # verify seqnos and stream mutations rest = RestConnection(nodeB) vbuckets = rest.get_vbuckets() total_mutations = 0 for vb in vbuckets: mcd_client = self.mcd_client(nodeB) stats = mcd_client.stats(VBSEQNO_STAT) vbucket = vb.id key = 'vb_{0}:high_seqno'.format(vbucket) total_mutations += int(stats[key]) assert total_mutations == self.num_items #/ 2 # divide by because the items are split between 2 servers task = self.cluster.async_rebalance([nodeB], [], [nodeC]) task.result()
def add_nodes(self, task_manager): master = self.servers[0] rest = RestConnection(master) try: for node in self.to_add: self.log.info("adding node {0}:{1} to cluster".format( node.ip, node.port)) rest.add_node(master.rest_username, master.rest_password, node.ip, node.port) self.state = "start_rebalance" task_manager.schedule(self) except Exception as e: self.state = "finished" self.set_result({"status": "error", "value": e})
def test_add_node_without_cert(self): rest = RestConnection(self.master) servs_inout = self.servers[1] x509main(self.master).setup_master() try: rest.add_node('Administrator','password',servs_inout.ip) except Exception, ex: ex = str(ex) #expected_result = "Error adding node: " + servs_inout.ip + " to the cluster:" + self.master.ip + " - [\"Prepare join failed. Error applying node certificate. Unable to read certificate chain file\"]" expected_result = "Error adding node: " + servs_inout.ip + " to the cluster:" + self.master.ip self.assertTrue(expected_result in ex,"Incorrect Error message in exception") expected_result = "Error applying node certificate. Unable to read certificate chain file" self.assertTrue(expected_result in ex,"Incorrect Error message in exception") expected_result = "The file does not exist." self.assertTrue(expected_result in ex,"Incorrect Error message in exception")
def add_all_nodes_or_assert(master, all_servers, rest_settings, test_case): log = logger.Logger.get_logger() otpNodes = [] all_nodes_added = True rest = RestConnection(master) for serverInfo in all_servers: if serverInfo.ip != master.ip: log.info('adding node : {0}:{1} to the cluster'.format( serverInfo.ip, serverInfo.port)) otpNode = rest.add_node(rest_settings.rest_username, rest_settings.rest_password, serverInfo.ip) if otpNode: log.info('added node : {0} to the cluster'.format( otpNode.id)) otpNodes.append(otpNode) else: all_nodes_added = False if not all_nodes_added: if test_case: test_case.assertTrue( all_nodes_added, msg="unable to add all the nodes to the cluster") else: log.error("unable to add all the nodes to the cluster") return otpNodes
def begin_rebalance_in(master, servers, timeout=5): log = logger.Logger.get_logger() rest = RestConnection(master) otpNode = None for server in servers: if server == master: continue log.info("adding node {0}:{1} to cluster".format( server.ip, server.port)) try: otpNode = rest.add_node(master.rest_username, master.rest_password, server.ip, server.port) msg = "unable to add node {0}:{1} to the cluster" assert otpNode, msg.format(server.ip, server.port) except ServerAlreadyJoinedException: log.info("server {0} already joined".format(server)) log.info("beginning rebalance in") try: rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[]) except: log.error( "rebalance failed, trying again after {0} seconds".format( timeout))
def _common_test_body(self, moxi=False): master = self.servers[0] rest = RestConnection(master) creds = self.input.membase_settings bucket_data = RebalanceBaseTest.bucket_data_init(rest) for server in self.servers[1:]: self.log.info("current nodes : {0}".format(RebalanceHelper.getOtpNodeIds(master))) self.log.info("adding node {0}:{1} and rebalance afterwards".format(server.ip, server.port)) otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip, server.port) msg = "unable to add node {0} to the cluster {1}" self.assertTrue(otpNode, msg.format(server.ip, master.ip)) for name in bucket_data: inserted_keys, rejected_keys = \ MemcachedClientHelper.load_bucket_and_return_the_keys(servers=[self.servers[0]], name=name, ram_load_ratio= -1, number_of_items=self.keys_count, number_of_threads=1, write_only=True) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[]) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(server.ip)) self.log.info("completed rebalancing in server {0}".format(server)) IncrementalRebalanceWithParallelReadTests._reader_thread(self, inserted_keys, bucket_data, moxi=moxi) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(server.ip)) break
def _add_and_rebalance(self, servers, wait_for_rebalance=True): log = logger.Logger.get_logger() master = servers[0] all_nodes_added = True rebalanced = True rest = RestConnection(master) if len(servers) > 1: for serverInfo in servers[1:]: log.info('adding {0} node : {1}:{2} to the cluster'.format( serverInfo.services, serverInfo.ip, serverInfo.port)) services = serverInfo.services.split() if self.skip_services: services = None otpNode = rest.add_node(master.rest_username, master.rest_password, serverInfo.ip, port=serverInfo.port, services=services) if otpNode: log.info('added node : {0} to the cluster'.format(otpNode.id)) else: all_nodes_added = False break if all_nodes_added: rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[]) if wait_for_rebalance: rebalanced &= rest.monitorRebalance() else: rebalanced = False return all_nodes_added and rebalanced
def rebalance_in(servers, how_many, do_shuffle=True, monitor=True): servers_rebalanced = [] log = logger.Logger.get_logger() rest = RestConnection(servers[0]) nodes = rest.node_statuses() nodeIps = ["{0}:{1}".format(node.ip,node.port) for node in nodes] log.info("current nodes : {0}".format(nodeIps)) toBeAdded = [] master = servers[0] selection = servers[1:] if do_shuffle: shuffle(selection) for server in selection: if not "{0}:{1}".format(server.ip,server.port) in nodeIps: toBeAdded.append(server) servers_rebalanced.append(server) if len(toBeAdded) == int(how_many): break for server in toBeAdded: otpNode = rest.add_node(master.rest_username, master.rest_password, server.ip, server.port) otpNodes = [node.id for node in rest.node_statuses()] started = rest.rebalance(otpNodes, []) msg = "rebalance operation started ? {0}" log.info(msg.format(started)) if monitor is not True: return True, servers_rebalanced if started: result = rest.monitorRebalance() msg = "successfully rebalanced in selected nodes from the cluster ? {0}" log.info(msg.format(result)) return result, servers_rebalanced return False, servers_rebalanced
def _add_all_node_body(self): self.common_setUp(False) master = self.servers[0] master_rest = RestConnection(master) added_otps = [] for i in range(1, len(self.servers)): ip = self.servers[i].ip port = self.servers[i].port self.log.info('adding node : {0} to the cluster'.format(ip)) otpNode = master_rest.add_node(user=self.membase.rest_username, password=self.membase.rest_password, remoteIp=ip, port=port) if otpNode: added_otps.append(otpNode) self.log.info('added node : {0} to the cluster'.format(otpNode.id)) else: self.fail(msg="unable to add node : {0} to the cluster".format(ip)) time.sleep(5) for otpNode in added_otps: #now lets eject it self.log.info("ejecting the node {0}".format(otpNode.id)) ejected = master_rest.eject_node(user=self.membase.rest_username, password=self.membase.rest_password, otpNode=otpNode.id) self.assertTrue(ejected, msg="unable to eject the node {0}".format(otpNode.id))
def rebalance_swap(servers, how_many, monitor=True): log = logging.getLogger("infra") if how_many < 1: log.error("failed to swap rebalance %s servers - invalid count" % how_many) return False, [] rest = RestConnection(servers[0]) cur_nodes = rest.node_statuses() cur_ips = map(lambda node: node.ip, cur_nodes) cur_ids = map(lambda node: node.id, cur_nodes) free_servers = filter(lambda server: server.ip not in cur_ips, servers) if len(cur_ids) <= how_many or len(free_servers) < how_many: log.error( "failed to swap rebalance %s servers - not enough servers" % how_many) return False, [] ejections = cur_ids[-how_many:] additions = free_servers[:how_many] log.info("swap rebalance: cur: %s, eject: %s, add: %s" % (cur_ids, ejections, additions)) try: map( lambda server: rest.add_node(servers[0].rest_username, servers[ 0].rest_password, server.ip, server.port), additions) except (ServerAlreadyJoinedException, ServerSelfJoinException, AddNodeException), e: log.error("failed to swap rebalance - addition failed %s: %s" % (additions, e)) return False, []
def _failover_swap_rebalance(self): master = self.servers[0] rest = RestConnection(master) creds = self.input.membase_settings num_initial_servers = self.num_initial_servers intial_severs = self.servers[:num_initial_servers] self.log.info("CREATE BUCKET PHASE") SwapRebalanceBase.create_buckets(self) # Cluster all starting set of servers self.log.info("INITIAL REBALANCE PHASE") status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1) self.assertTrue(status, msg="Rebalance was failed") self.log.info("DATA LOAD PHASE") self.loaders = SwapRebalanceBase.start_load_phase(self, master) # Wait till load phase is over SwapRebalanceBase.stop_load(self.loaders, do_stop=False) self.log.info("DONE LOAD PHASE") # Start the swap rebalance self.log.info("current nodes : {0}".format(RebalanceHelper.getOtpNodeIds(master))) toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.failover_factor) optNodesIds = [node.id for node in toBeEjectedNodes] if self.fail_orchestrator: status, content = ClusterOperationHelper.find_orchestrator(master) self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\ format(status, content)) optNodesIds[0] = content self.log.info("FAILOVER PHASE") # Failover selected nodes for node in optNodesIds: self.log.info("failover node {0} and rebalance afterwards".format(node)) rest.fail_over(node) self.assertTrue(rest.monitorRebalance(), msg="failed after failover of {0}".format(node)) new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.failover_factor] for server in new_swap_servers: otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip, server.port) msg = "unable to add node {0} to the cluster" self.assertTrue(otpNode, msg.format(server.ip)) if self.fail_orchestrator: rest = RestConnection(new_swap_servers[0]) master = new_swap_servers[0] self.log.info("DATA ACCESS PHASE") self.loaders = SwapRebalanceBase.start_access_phase(self, master) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], \ ejectedNodes=optNodesIds) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(new_swap_servers)) SwapRebalanceBase.verification_phase(self, master)
def _add_all_node_body(self): self.common_setUp(False) master = self.servers[0] master_rest = RestConnection(master) added_otps = [] for i in range(1, len(self.servers)): ip = self.servers[i].ip port = self.servers[i].port self.log.info("adding node : {0} to the cluster".format(ip)) otpNode = master_rest.add_node( user=self.membase.rest_username, password=self.membase.rest_password, remoteIp=ip, port=port ) if otpNode: added_otps.append(otpNode) self.log.info("added node : {0} to the cluster".format(otpNode.id)) else: self.fail(msg="unable to add node : {0} to the cluster".format(ip)) time.sleep(5) for otpNode in added_otps: # now lets eject it self.log.info("ejecting the node {0}".format(otpNode.id)) ejected = master_rest.eject_node( user=self.membase.rest_username, password=self.membase.rest_password, otpNode=otpNode.id ) self.assertTrue(ejected, msg="unable to eject the node {0}".format(otpNode.id))
def add_and_rebalance(servers, wait_for_rebalance=True): log = logger.Logger.get_logger() master = servers[0] all_nodes_added = True rebalanced = True rest = RestConnection(master) if len(servers) > 1: for serverInfo in servers[1:]: log.info('adding node : {0}:{1} to the cluster'.format( serverInfo.ip, serverInfo.port)) otpNode = rest.add_node(master.rest_username, master.rest_password, serverInfo.ip, port=serverInfo.port) if otpNode: log.info('added node : {0} to the cluster'.format( otpNode.id)) else: all_nodes_added = False break if all_nodes_added: rest.rebalance( otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[]) if wait_for_rebalance: rebalanced &= rest.monitorRebalance() else: rebalanced = False return all_nodes_added and rebalanced
def rebalance_swap(servers, how_many, monitor=True): if how_many < 1: log.error("failed to swap rebalance %s servers - invalid count" % how_many) return False, [] rest = RestConnection(servers[0]) cur_nodes = rest.node_statuses() cur_ips = map(lambda node: node.ip, cur_nodes) cur_ids = map(lambda node: node.id, cur_nodes) free_servers = filter(lambda server: server.ip not in cur_ips, servers) if len(cur_ids) <= how_many or len(free_servers) < how_many: log.error("failed to swap rebalance %s servers - not enough servers" % how_many) return False, [] ejections = cur_ids[-how_many:] additions = free_servers[:how_many] log.info("swap rebalance: cur: %s, eject: %s, add: %s" % (cur_ids, ejections, additions)) try: map( lambda server: rest.add_node( servers[0].rest_username, servers[0].rest_password, server.ip, server.port ), additions, ) except (ServerAlreadyJoinedException, ServerSelfJoinException, AddNodeException), e: log.error("failed to swap rebalance - addition failed %s: %s" % (additions, e)) return False, []
def test_verify_mb20463(self): src_version = NodeHelper.get_cb_version(self.src_cluster.get_master_node()) if float(src_version[:3]) != 4.5: self.log.info("Source cluster has to be at 4.5 for this test") return servs = self._input.servers[2:4] params = {} params['num_nodes'] = len(servs) params['product'] = 'cb' params['version'] = '4.1.2-6088' params['vbuckets'] = [1024] self.log.info("will install {0} on {1}".format('4.1.2-6088', [s.ip for s in servs])) InstallerJob().parallel_install(servs, params) if params['product'] in ["couchbase", "couchbase-server", "cb"]: success = True for server in servs: success &= RemoteMachineShellConnection(server).is_couchbase_installed() if not success: self.fail("some nodes were not installed successfully on target cluster!") self.log.info("4.1.2 installed successfully on target cluster") conn = RestConnection(self.dest_cluster.get_master_node()) conn.add_node(user=self._input.servers[3].rest_username, password=self._input.servers[3].rest_password, remoteIp=self._input.servers[3].ip) self.sleep(30) conn.rebalance(otpNodes=[node.id for node in conn.node_statuses()]) self.sleep(30) conn.create_bucket(bucket='default', ramQuotaMB=512) tasks = self.setup_xdcr_async_load() self.sleep(30) NodeHelper.enable_firewall(self.dest_master) self.sleep(30) NodeHelper.disable_firewall(self.dest_master) for task in tasks: task.result() self._wait_for_replication_to_catchup(timeout=600) self.verify_results()
def test_add_same_node_to_cluster(self): self.assertTrue(len(self.servers) > 1, "test require more than 1 node") self.assertTrue(self.use_names > 1, "test require more than 1 use_names") hostnames = self.rename_nodes(self.servers[:self.nodes_in + self.nodes_init]) self._set_hostames_to_servers_objs(hostnames) self.cluster.rebalance(self.servers[:self.nodes_init], self.servers[self.nodes_init:self.nodes_in + self.nodes_init], [], use_hostnames=True) self.verify_referenced_by_names(self.servers[:self.nodes_init], hostnames) add_node = self.servers[:self.nodes_in + self.nodes_init][-1] new_name = self.name_prefix + str(add_node.ip.split('.')[-1]) + '_1' + '.' + self.domain master_rest = RestConnection(self.master) try: master_rest.add_node(add_node.rest_username, add_node.rest_password, new_name) except ServerAlreadyJoinedException: self.log.info('Expected exception was raised.') else: self.fail('Expected exception wasn\'t raised')
def _failover_swap_rebalance(self): master = self.servers[0] rest = RestConnection(master) creds = self.input.membase_settings num_initial_servers = self.num_initial_servers intial_severs = self.servers[:num_initial_servers] self.log.info("CREATE BUCKET PHASE") SwapRebalanceBase.create_buckets(self) # Cluster all starting set of servers self.log.info("INITIAL REBALANCE PHASE") status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1) self.assertTrue(status, msg="Rebalance was failed") self.log.info("DATA LOAD PHASE") self.loaders = SwapRebalanceBase.start_load_phase(self, master) # Wait till load phase is over SwapRebalanceBase.stop_load(self.loaders, do_stop=False) self.log.info("DONE LOAD PHASE") # Start the swap rebalance self.log.info("current nodes : {0}".format(RebalanceHelper.getOtpNodeIds(master))) toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.failover_factor) optNodesIds = [node.id for node in toBeEjectedNodes] if self.fail_orchestrator: status, content = ClusterOperationHelper.find_orchestrator(master) self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\ format(status, content)) optNodesIds[0] = content self.log.info("FAILOVER PHASE") # Failover selected nodes for node in optNodesIds: self.log.info("failover node {0} and rebalance afterwards".format(node)) rest.fail_over(node) new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.failover_factor] for server in new_swap_servers: otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip) msg = "unable to add node {0} to the cluster" self.assertTrue(otpNode, msg.format(server.ip)) if self.fail_orchestrator: rest = RestConnection(new_swap_servers[0]) master = new_swap_servers[0] self.log.info("DATA ACCESS PHASE") self.loaders = SwapRebalanceBase.start_access_phase(self, master) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], \ ejectedNodes=optNodesIds) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(new_swap_servers)) SwapRebalanceBase.verification_phase(self, master)
def rebalance_in(servers, how_many, do_shuffle=True, monitor=True, do_check=True): servers_rebalanced = [] log = logger.Logger.get_logger() rest = RestConnection(servers[0]) nodes = rest.node_statuses() #are all ips the same nodes_on_same_ip = True firstIp = nodes[0].ip if len(nodes) == 1: nodes_on_same_ip = False else: for node in nodes: if node.ip != firstIp: nodes_on_same_ip = False break nodeIps = ["{0}:{1}".format(node.ip,node.port) for node in nodes] log.info("current nodes : {0}".format(nodeIps)) toBeAdded = [] master = servers[0] selection = servers[1:] if do_shuffle: shuffle(selection) for server in selection: if nodes_on_same_ip: if not "{0}:{1}".format(firstIp,server.port) in nodeIps: toBeAdded.append(server) servers_rebalanced.append(server) log.info("choosing {0}:{1}".format(server.ip, server.port)) elif not "{0}:{1}".format(server.ip,server.port) in nodeIps: toBeAdded.append(server) servers_rebalanced.append(server) log.info("choosing {0}:{1}".format(server.ip, server.port)) if len(toBeAdded) == int(how_many): break if do_check and len(toBeAdded) < how_many: raise Exception("unable to find {0} nodes to rebalance_in".format(how_many)) for server in toBeAdded: otpNode = rest.add_node(master.rest_username, master.rest_password, server.ip, server.port) otpNodes = [node.id for node in rest.node_statuses()] started = rest.rebalance(otpNodes, []) msg = "rebalance operation started ? {0}" log.info(msg.format(started)) if monitor is not True: return True, servers_rebalanced if started: try: result = rest.monitorRebalance() except RebalanceFailedException as e: log.error("rebalance failed: {0}".format(e)) return False, servers_rebalanced msg = "successfully rebalanced in selected nodes from the cluster ? {0}" log.info(msg.format(result)) return result, servers_rebalanced return False, servers_rebalanced
def pending_add_with_ddoc_ops(self): # load initial documents self._load_doc_data_all_buckets() rest = RestConnection(self.master) for node in self.servers[1:]: self.log.info("adding node {0}:{1} to cluster".format(node.ip, node.port)) rest.add_node(self.master.rest_username, self.master.rest_password, node.ip, node.port) for bucket in self.buckets: self._execute_ddoc_ops("create", self.test_with_view, self.num_ddocs, self.num_views_per_ddoc) if self.ddoc_ops in ["update", "delete"]: self._execute_ddoc_ops(self.ddoc_ops, self.test_with_view, self.num_ddocs / 2, self.num_views_per_ddoc / 2) self._verify_ddoc_ops_all_buckets() if self.test_with_view: self._verify_ddoc_data_all_buckets()