Ejemplo n.º 1
0
 def _get_cb_os(self):
     rest = RestConnection(self.master)
     return rest.get_nodes_self().os
Ejemplo n.º 2
0
    def validate_results(self,
                         servers,
                         results,
                         clusterLevelEncryption,
                         ntonStatus='enable'):
        finalResult = True
        for node in servers:
            node_info = "{0}:{1}".format(node.ip, node.port)
            log.info(node_info)
            log.info(ntonStatus)
            rest = RestConnection(node)
            #return_map = rest.get_nodes_services()
            node_services_list = rest.get_nodes_services()[node_info]

            for service in node_services_list:
                if service == 'kv':
                    node_ports = self.PORTS_NSSERVER_SSL
                elif service == 'cbas':
                    node_ports = self.PORTS_ANALYTICS
                elif service == 'index':
                    node_ports = self.PORTS_INDEXER
                elif service == 'n1ql':
                    node_ports = self.PORTS_QUERY
                elif service == 'fts':
                    node_ports = self.PORTS_FTS

                if ntonStatus == 'enable':
                    for node_port in node_ports:
                        for result in results:
                            if node_port == result['port']:
                                if clusterLevelEncryption == 'all' and result[
                                        'service'] in [
                                            'cbas', 'kv', 'index', 'n1ql',
                                            'fts'
                                        ]:
                                    if result['result'] == False:
                                        finalResult = False
                                elif clusterLevelEncryption == 'control' and result[
                                        'service'] in ['cbas', 'index']:
                                    if result['result'] == True:
                                        finalResult = False
                                elif clusterLevelEncryption == 'control' and result[
                                        'service'] in ['n1ql', 'fts']:
                                    if result['result'] == False:
                                        finalResult = False
                                elif clusterLevelEncryption == 'control' and result[
                                        'service'] in ['kv']:
                                    if result['result'] == False:
                                        finalResult = False
                            log.info(
                                "Value of {0} result {1} finalResult {2} port {3}"
                                .format(result['service'], result['result'],
                                        finalResult, result['port']))
                else:
                    for node_port in node_ports:
                        for result in results:
                            if node_port == result['port']:
                                if result['service'] in [
                                        'cbas', 'kv', 'index'
                                ]:
                                    if result['result'] is not False:
                                        finalResult = False
                                elif result['service'] in ['n1ql', 'fts']:
                                    if result['result'] is False:
                                        finalResult = False
                                log.info(
                                    "Value of {0} result {1} finalResult {2} port {3}"
                                    .format(result['service'],
                                            result['result'], finalResult,
                                            result['port']))
        return finalResult
Ejemplo n.º 3
0
def xdcr_link_cluster(src_master, dest_master, dest_cluster_name):
    rest_conn_src = RestConnection(src_master)
    rest_conn_src.add_remote_cluster(dest_master.ip, dest_master.port,
                                     dest_master.rest_username,
                                     dest_master.rest_password,
                                     dest_cluster_name)
Ejemplo n.º 4
0
    def cleanup_cluster(servers, wait_for_rebalance=True, master=None):
        log = logger.Logger.get_logger()
        if master is None:
            master = servers[0]
        rest = RestConnection(master)
        helper = RestHelper(rest)
        helper.is_ns_server_running(
            timeout_in_seconds=testconstants.NS_SERVER_TIMEOUT)
        nodes = rest.node_statuses()
        master_id = rest.get_nodes_self().id
        for node in nodes:
            if int(node.port) in xrange(9091, 9991):
                rest.eject_node(node)
                nodes.remove(node)

        if len(nodes) > 1:
            log.info("rebalancing all nodes in order to remove nodes")
            rest.log_client_error("Starting rebalance from test, ejected nodes %s" % \
                                                             [node.id for node in nodes if node.id != master_id])
            removed = helper.remove_nodes(
                knownNodes=[node.id for node in nodes],
                ejectedNodes=[
                    node.id for node in nodes if node.id != master_id
                ],
                wait_for_rebalance=wait_for_rebalance)
            success_cleaned = []
            for removed in [node for node in nodes if (node.id != master_id)]:
                removed.rest_password = servers[0].rest_password
                removed.rest_username = servers[0].rest_username
                try:
                    rest = RestConnection(removed)
                except Exception as ex:
                    log.error(
                        "can't create rest connection after rebalance out for ejected nodes,\
                        will retry after 10 seconds according to MB-8430: {0} "
                        .format(ex))
                    time.sleep(10)
                    rest = RestConnection(removed)
                start = time.time()
                while time.time() - start < 30:
                    if len(rest.get_pools_info()["pools"]) == 0:
                        success_cleaned.append(removed)
                        break
                    else:
                        time.sleep(0.1)
                if time.time() - start > 10:
                    log.error("'pools' on node {0}:{1} - {2}".format(
                        removed.ip, removed.port,
                        rest.get_pools_info()["pools"]))
            for node in set([node for node in nodes if (node.id != master_id)
                             ]) - set(success_cleaned):
                log.error(
                    "node {0}:{1} was not cleaned after removing from cluster".
                    format(removed.ip, removed.port))
                try:
                    rest = RestConnection(node)
                    rest.force_eject_node()
                except Exception as ex:
                    log.error("force_eject_node {0}:{1} failed: {2}".format(
                        removed.ip, removed.port, ex))
            if len(set([node for node in nodes if (node.id != master_id)])\
                    - set(success_cleaned)) != 0:
                raise Exception(
                    "not all ejected nodes were cleaned successfully")

            log.info("removed all the nodes from cluster associated with {0} ? {1}".format(servers[0], \
                    [(node.id, node.port) for node in nodes if (node.id != master_id)]))
Ejemplo n.º 5
0
    def execute(self):
        try:
            rest = RestConnection(self.server)
        except ServerUnavailableException as error:
            self.state = FINISHED
            self.set_unexpected_exception(error)
            return
        info = Task.wait_until(lambda: rest.get_nodes_self(),
                               lambda x: x.memoryTotal > 0, 10)
        self.test_log.debug("server: %s, nodes/self: %s", self.server,
                            info.__dict__)

        username = self.server.rest_username
        password = self.server.rest_password

        if int(info.port) in range(9091, 9991):
            self.state = FINISHED
            self.set_result(True)
            return

        total_memory = int(info.mcdMemoryReserved - 100)
        if self.quota_percent:
            total_memory = int(total_memory * self.quota_percent / 100)

        set_services = copy.deepcopy(self.services)
        if set_services is None:
            set_services = ["kv"]
        if "index" in set_services:
            if self.index_quota_percent:
                index_memory = total_memory * self.index_quota_percent / 100
            else:
                index_memory = INDEX_QUOTA
            self.test_log.debug("Quota for index service will be %s MB" %
                                index_memory)
            total_memory -= index_memory
            rest.set_service_memoryQuota(service='indexMemoryQuota',
                                         memoryQuota=index_memory)
        if "fts" in set_services:
            if self.fts_quota_percent:
                fts_memory = total_memory * self.fts_quota_percent / 100
            else:
                fts_memory = FTS_QUOTA
            self.test_log.debug("Quota for fts service will be %s MB" %
                                fts_memory)
            total_memory -= fts_memory
            rest.set_service_memoryQuota(service='ftsMemoryQuota',
                                         memoryQuota=fts_memory)
        if "cbas" in set_services:
            if self.cbas_quota_percent:
                cbas_memory = total_memory * self.cbas_quota_percent / 100
            else:
                cbas_memory = CBAS_QUOTA
            self.test_log.debug("Quota for cbas service will be %s MB" %
                                cbas_memory)
            total_memory -= cbas_memory
            rest.set_service_memoryQuota(service="cbasMemoryQuota",
                                         memoryQuota=cbas_memory)
        if total_memory < MIN_KV_QUOTA:
            raise Exception("KV RAM needs to be more than %s MB"
                            " at node  %s" % (MIN_KV_QUOTA, self.server.ip))

        rest.init_cluster_memoryQuota(username, password, total_memory)
        rest.set_indexer_storage_mode(username, password, self.gsi_type)

        if self.services:
            status = rest.init_node_services(username=username,
                                             password=password,
                                             port=self.port,
                                             hostname=self.server.ip,
                                             services=self.services)
            if not status:
                self.state = FINISHED
                self.set_unexpected_exception(
                    Exception('unable to set services for server %s' %
                              self.server.ip))
                return
        if self.disable_consistent_view is not None:
            rest.set_reb_cons_view(self.disable_consistent_view)
        if self.rebalanceIndexWaitingDisabled is not None:
            rest.set_reb_index_waiting(self.rebalanceIndexWaitingDisabled)
        if self.rebalanceIndexPausingDisabled is not None:
            rest.set_rebalance_index_pausing(
                self.rebalanceIndexPausingDisabled)
        if self.maxParallelIndexers is not None:
            rest.set_max_parallel_indexers(self.maxParallelIndexers)
        if self.maxParallelReplicaIndexers is not None:
            rest.set_max_parallel_replica_indexers(
                self.maxParallelReplicaIndexers)

        rest.init_cluster(username, password, self.port)
        self.server.port = self.port
        try:
            rest = RestConnection(self.server)
        except ServerUnavailableException as error:
            self.state = FINISHED
            self.set_unexpected_exception(error)
            return
        info = rest.get_nodes_self()

        if info is None:
            self.state = FINISHED
            self.set_unexpected_exception(
                Exception(
                    'unable to get information on a server %s, it is available?'
                    % self.server.ip))
            return
        self.set_result(total_memory)
        self.state = CHECKING
        self.call()
Ejemplo n.º 6
0
    def test_start_stop_rebalance(self):
        """
        Start-stop rebalance in/out with adding/removing aditional after stopping rebalance.

        This test begins by loading a given number of items into the cluster. It then
        add  servs_in nodes and remove  servs_out nodes and start rebalance. Then rebalance
        is stopped when its progress reached 20%. After we add  extra_nodes_in and remove
        extra_nodes_out. Restart rebalance with new cluster configuration. Later rebalance
        will be stop/restart on progress 40/60/80%. After each iteration we wait for
        the disk queues to drain, and then verify that there has been no data loss,
        sum(curr_items) match the curr_items_total. Once cluster was rebalanced the test is finished.
        The oder of add/remove nodes looks like:
        self.nodes_init|servs_in|extra_nodes_in|extra_nodes_out|servs_out
        """
        rest = RestConnection(self.master)
        self._wait_for_stats_all_buckets(self.servs_init)
        self.log.info("Current nodes : {0}".format(
            [node.id for node in rest.node_statuses()]))
        self.log.info("Adding nodes {0} to cluster".format(self.servs_in))
        self.log.info("Removing nodes {0} from cluster".format(self.servs_out))
        add_in_once = self.extra_servs_in
        result_nodes = set(self.servs_init + self.servs_in) - set(
            self.servs_out)
        # the latest iteration will be with i=5, for this case rebalance should be completed,
        # that also is verified and tracked
        for i in range(1, 6):
            if i == 1:
                rebalance = self.cluster.async_rebalance(
                    self.servs_init[:self.nodes_init], self.servs_in,
                    self.servs_out)
            else:
                rebalance = self.cluster.async_rebalance(
                    self.servs_init[:self.nodes_init] + self.servs_in,
                    add_in_once, self.servs_out + self.extra_servs_out)
                add_in_once = []
                result_nodes = set(self.servs_init + self.servs_in +
                                   self.extra_servs_in) - set(
                                       self.servs_out + self.extra_servs_out)
            self.sleep(20)
            expected_progress = 20 * i
            reached = RestHelper(rest).rebalance_reached(expected_progress)
            self.assertTrue(
                reached, "Rebalance failed or did not reach {0}%".format(
                    expected_progress))
            if not RestHelper(rest).is_cluster_rebalanced():
                self.log.info("Stop the rebalance")
                stopped = rest.stop_rebalance(wait_timeout=self.wait_timeout /
                                              3)
                self.assertTrue(stopped, msg="Unable to stop rebalance")
            rebalance.result()
            if RestHelper(rest).is_cluster_rebalanced():
                self.verify_cluster_stats(result_nodes)
                self.log.info(
                    "Rebalance was completed when tried to stop rebalance on {0}%"
                    .format(str(expected_progress)))
                break
            else:
                self.log.info(
                    "Rebalance is still required. Verifying the data in the buckets"
                )
                self._verify_all_buckets(self.master,
                                         timeout=None,
                                         max_verify=self.max_verify,
                                         batch_size=1)
                self.verify_cluster_stats(result_nodes,
                                          check_bucket_stats=False,
                                          verify_total_items=False)
        self.verify_unacked_bytes_all_buckets()
Ejemplo n.º 7
0
    def setUp(self):
        super(RackzoneBaseTest, self).setUp()
        self.product = self.input.param("product", "cb")
        self.version = self.input.param("version", "2.5.1-1082")
        self.type = self.input.param('type', 'enterprise')
        self.doc_ops = self.input.param("doc_ops", None)
        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")
        self.default_map_func = "function (doc) {\n  emit(doc._id, doc);\n}"

        self.nodes_init = self.input.param("nodes_init", 1)
        self.nodes_in = self.input.param("nodes_in", 1)
        self.nodes_out = self.input.param("nodes_out", 1)
        self.doc_ops = self.input.param("doc_ops", "create")
        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.append(self.cluster.master)
        self.bucket_util.create_default_bucket(self.cluster)
        self.bucket_util.add_rbac_user(self.cluster.master)
        # define the data that will be used to test
        self.blob_generator = self.input.param("blob_generator", False)
        server_info = self.servers[0]
        rest = RestConnection(server_info)
        if not rest.is_enterprise_edition():
            raise Exception("This couchbase server is not Enterprise Edition.\
                  This RZA feature requires Enterprise Edition to work")
        if self.blob_generator:
            # gen_load data is used for upload before each test
            self.gen_load = BlobGenerator('test',
                                          'test-',
                                          self.doc_size,
                                          end=self.num_items)
            # gen_update is used for doing mutation for 1/2th of uploaded data
            self.gen_update = BlobGenerator('test',
                                            'test-',
                                            self.doc_size,
                                            end=(self.num_items / 2 - 1))
            # upload data before each test
            tasks = []
            for bucket in self.cluster.buckets:
                tasks.append(
                    self.task.async_load_gen_docs(
                        self.cluster,
                        bucket,
                        self.gen_load,
                        "create",
                        0,
                        batch_size=20,
                        persist_to=self.persist_to,
                        replicate_to=self.replicate_to,
                        pause_secs=5,
                        timeout_secs=self.sdk_timeout,
                        retries=self.sdk_retries))
            for task in tasks:
                self.task.jython_task_manager.get_task_result(task)
        else:
            tasks = []
            age = range(5)
            first = ['james', 'sharon']
            template = '{{ "mutated" : 0, "age": {0}, "first_name": "{1}" }}'
            self.gen_load = DocumentGenerator('test_docs',
                                              template,
                                              age,
                                              first,
                                              start=0,
                                              end=self.num_items)
            for bucket in self.cluster.buckets:
                tasks.append(
                    self.task.async_load_gen_docs(
                        self.cluster,
                        bucket,
                        self.gen_load,
                        "create",
                        0,
                        batch_size=20,
                        persist_to=self.persist_to,
                        replicate_to=self.replicate_to,
                        pause_secs=5,
                        timeout_secs=self.sdk_timeout,
                        retries=self.sdk_retries))
            for task in tasks:
                self.task.jython_task_manager.get_task_result(task)
        shell = RemoteMachineShellConnection(self.cluster.master)
        s_type = shell.extract_remote_info().distribution_type
        shell.disconnect()
        self.os_name = "linux"
        self.is_linux = True
        self.cbstat_command = "%scbstats" % LINUX_COUCHBASE_BIN_PATH
        if s_type.lower() == 'windows':
            self.is_linux = False
            self.os_name = "windows"
            self.cbstat_command = "%scbstats.exe" % WIN_COUCHBASE_BIN_PATH
        if s_type.lower() == 'mac':
            self.cbstat_command = "%scbstats" % MAC_COUCHBASE_BIN_PATH
        if self.nonroot:
            self.cbstat_command = "/home/%s%scbstats" \
                                  % (self.cluster.master.ssh_username,
                                     LINUX_COUCHBASE_BIN_PATH)
    def _common_test_body_swap_rebalance(self, do_stop_start=False):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]

        if self.swap_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                format(status, content))
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info("removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip, server.port)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        if self.do_access:
            self.log.info("DATA ACCESS PHASE")
            self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                       ejectedNodes=optNodesIds)

        if do_stop_start:
            # Rebalance is stopped at 20%, 40% and 60% completion
            retry = 0
            for expected_progress in (20, 40, 60):
                self.log.info("STOP/START SWAP REBALANCE PHASE WITH PROGRESS {0}%".
                              format(expected_progress))
                while True:
                    progress = rest._rebalance_progress()
                    if progress < 0:
                        self.log.error("rebalance progress code : {0}".format(progress))
                        break
                    elif progress == 100:
                        self.log.warning("Rebalance has already reached 100%")
                        break
                    elif progress >= expected_progress:
                        self.log.info("Rebalance will be stopped with {0}%".format(progress))
                        stopped = rest.stop_rebalance()
                        self.assertTrue(stopped, msg="unable to stop rebalance")
                        SwapRebalanceBase.sleep(self, 20)
                        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                                       ejectedNodes=optNodesIds)
                        break
                    elif retry > 100:
                        break
                    else:
                        retry += 1
                        SwapRebalanceBase.sleep(self, 1)
        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(optNodesIds))
        SwapRebalanceBase.verification_phase(self, master)
    def _common_test_body_failed_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.swap_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info("removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip, server.port)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
            ejectedNodes=optNodesIds)
        SwapRebalanceBase.sleep(self, 10, "Rebalance should start")
        self.log.info("FAIL SWAP REBALANCE PHASE @ {0}".format(self.percentage_progress))
        reached = RestHelper(rest).rebalance_reached(self.percentage_progress)
        if reached and RestHelper(rest).is_cluster_rebalanced():
            # handle situation when rebalance failed at the beginning
            self.log.error('seems rebalance failed!')
            rest.print_UI_logs()
            self.fail("rebalance failed even before killing memcached")
        bucket = rest.get_buckets()[0].name
        pid = None
        if self.swap_orchestrator and not self.cluster_run:
            # get PID via remote connection if master is a new node
            shell = RemoteMachineShellConnection(master)
            pid = shell.get_memcache_pid()
            shell.disconnect()
        else:
            times = 2
            if self.cluster_run:
                times = 20
            for i in range(times):
                try:
                    _mc = MemcachedClientHelper.direct_client(master, bucket)
                    pid = _mc.stats()["pid"]
                    break
                except (EOFError, KeyError) as e:
                    self.log.error("{0}.Retry in 2 sec".format(e))
                    SwapRebalanceBase.sleep(self, 2)
        if pid is None:
            # sometimes pid is not returned by mc.stats()
            shell = RemoteMachineShellConnection(master)
            pid = shell.get_memcache_pid()
            shell.disconnect()
            if pid is None:
                self.fail("impossible to get a PID")
        command = "os:cmd(\"kill -9 {0} \")".format(pid)
        self.log.info(command)
        killed = rest.diag_eval(command)
        self.log.info("killed {0}:{1}??  {2} ".format(master.ip, master.port, killed))
        self.log.info("sleep for 10 sec after kill memcached")
        SwapRebalanceBase.sleep(self, 10)
        # we can't get stats for new node when rebalance falls
        if not self.swap_orchestrator:
            ClusterOperationHelper._wait_warmup_completed(self, [master], bucket, wait_time=600)
        i = 0
        # we expect that rebalance will be failed
        try:
            rest.monitorRebalance()
        except RebalanceFailedException:
            # retry rebalance if it failed
            self.log.warning("Rebalance failed but it's expected")
            SwapRebalanceBase.sleep(self, 30)
            self.assertFalse(RestHelper(rest).is_cluster_rebalanced(), msg="cluster need rebalance")
            knownNodes = rest.node_statuses();
            self.log.info("nodes are still in cluster: {0}".format([(node.ip, node.port) for node in knownNodes]))
            ejectedNodes = list(set(optNodesIds) & {node.id for node in knownNodes})
            rest.rebalance(otpNodes=[node.id for node in knownNodes], ejectedNodes=ejectedNodes)
            SwapRebalanceBase.sleep(self, 10, "Wait for rebalance to start")
            self.assertTrue(rest.monitorRebalance(),
                            msg="rebalance operation failed after adding node {0}".format(toBeEjectedNodes))
        else:
            self.log.info("rebalance completed successfully")
        SwapRebalanceBase.verification_phase(self, master)
Ejemplo n.º 10
0
 def _log_finish(self):
     try:
         msg = "{0} : {1} finished ".format(datetime.datetime.now(), self._testMethodName)
         RestConnection(self.servers[0]).log_client_error(msg)
     except:
         pass
Ejemplo n.º 11
0
    def common_setup(self):
        self.cluster_helper = Cluster()
        self.log = logger.Logger.get_logger()
        self.cluster_run = False
        self.input = TestInputSingleton.input
        self.servers = self.input.servers
        serverInfo = self.servers[0]
        rest = RestConnection(serverInfo)
        if len({server.ip for server in self.servers}) == 1:
            ip = rest.get_nodes_self().ip
            for server in self.servers:
                server.ip = ip
            self.cluster_run = True
        self.case_number = self.input.param("case_number", 0)
        self.replica = self.input.param("replica", 1)
        self.keys_count = self.input.param("keys-count", 1000)
        self.load_ratio = self.input.param("load-ratio", 1)
        self.ratio_expiry = self.input.param("ratio-expiry", 0.03)
        self.ratio_deletes = self.input.param("ratio-deletes", 0.13)
        self.num_buckets = self.input.param("num-buckets", 1)
        self.failover_factor = self.num_swap = self.input.param("num-swap", 1)
        self.num_initial_servers = self.input.param("num-initial-servers", 3)
        self.fail_orchestrator = self.swap_orchestrator = self.input.param("swap-orchestrator", False)
        self.do_access = self.input.param("do-access", True)
        self.load_started = False
        self.loaders = []
        try:
            # Clear the state from Previous invalid run
            if rest._rebalance_progress_status() == 'running':
                self.log.warning("rebalancing is still running, previous test should be verified")
                stopped = rest.stop_rebalance()
                self.assertTrue(stopped, msg="unable to stop rebalance")
            self.log.info("==============  SwapRebalanceBase setup was started for test #{0} {1}=============="\
                      .format(self.case_number, self._testMethodName))
            SwapRebalanceBase.reset(self)

            # Make sure the test is setup correctly
            min_servers = int(self.num_initial_servers) + int(self.num_swap)
            msg = "minimum {0} nodes required for running swap rebalance"
            self.assertTrue(len(self.servers) >= min_servers, msg=msg.format(min_servers))

            self.log.info('picking server : {0} as the master'.format(serverInfo))
            node_ram_ratio = BucketOperationHelper.base_bucket_ratio(self.servers)
            info = rest.get_nodes_self()
            rest.init_cluster(username=serverInfo.rest_username, password=serverInfo.rest_password)
            rest.init_cluster_memoryQuota(memoryQuota=int(info.mcdMemoryReserved * node_ram_ratio))
            SwapRebalanceBase.enable_diag_eval_on_non_local_hosts(self, serverInfo)
            # Add built-in user
            testuser = [{'id': 'cbadminbucket', 'name': 'cbadminbucket', 'password': '******'}]
            RbacBase().create_user_source(testuser, 'builtin', self.servers[0])

            # Assign user to role
            role_list = [{'id': 'cbadminbucket', 'name': 'cbadminbucket', 'roles': 'admin'}]
            RbacBase().add_user_role(role_list, RestConnection(self.servers[0]), 'builtin')

            if self.num_buckets > 10:
                BaseTestCase.change_max_buckets(self, self.num_buckets)
            self.log.info("==============  SwapRebalanceBase setup was finished for test #{0} {1} =============="
                      .format(self.case_number, self._testMethodName))
            SwapRebalanceBase._log_start(self)
        except Exception as e:
            self.cluster_helper.shutdown()
            self.fail(e)
Ejemplo n.º 12
0
    def test_basic_xdcr_with_cert_regenerate(self):

        cluster1 = self.servers[0:2]
        cluster2 = self.servers[2:4]
        remote_cluster_name = 'sslcluster'
        restCluster1 = RestConnection(cluster1[0])
        restCluster2 = RestConnection(cluster2[0])

        try:
            #Setup cluster1
            x509main(cluster1[0]).setup_master()
            x509main(cluster1[1])._setup_node_certificates(reload_cert=False)

            restCluster1.add_node('Administrator', 'password', cluster1[1].ip)
            known_nodes = ['ns_1@' + cluster1[0].ip, 'ns_1@' + cluster1[1].ip]
            restCluster1.rebalance(known_nodes)
            self.assertTrue(self.check_rebalance_complete(restCluster1),
                            "Issue with rebalance")
            restCluster1.create_bucket(bucket='default', ramQuotaMB=100)
            restCluster1.remove_all_replications()
            restCluster1.remove_all_remote_clusters()

            #Setup cluster2
            x509main(cluster2[0]).setup_master()
            x509main(cluster2[1])._setup_node_certificates(reload_cert=False)

            restCluster2.add_node('Administrator', 'password', cluster2[1].ip)
            known_nodes = ['ns_1@' + cluster2[0].ip, 'ns_1@' + cluster2[1].ip]
            restCluster2.rebalance(known_nodes)
            self.assertTrue(self.check_rebalance_complete(restCluster2),
                            "Issue with rebalance")
            restCluster2.create_bucket(bucket='default', ramQuotaMB=100)

            test = x509main.CACERTFILEPATH + x509main.CACERTFILE
            data = open(test, 'rb').read()
            restCluster1.add_remote_cluster(cluster2[0].ip,
                                            cluster2[0].port,
                                            'Administrator',
                                            'password',
                                            remote_cluster_name,
                                            certificate=data)
            replication_id = restCluster1.start_replication(
                'continuous', 'default', remote_cluster_name)

            #restCluster1.set_xdcr_param('default','default','pauseRequested',True)

            x509main(self.master)._delete_inbox_folder()
            x509main(self.master)._generate_cert(self.servers,
                                                 root_cn="CB\ Authority")
            self.log.info("Setting up the first cluster for new certificate")

            x509main(cluster1[0]).setup_master()
            x509main(cluster1[1])._setup_node_certificates(reload_cert=False)
            self.log.info("Setting up the second cluster for new certificate")
            x509main(cluster2[0]).setup_master()
            x509main(cluster2[1])._setup_node_certificates(reload_cert=False)

            status = restCluster1.is_replication_paused('default', 'default')
            if not status:
                restCluster1.set_xdcr_param('default', 'default',
                                            'pauseRequested', False)

            restCluster1.set_xdcr_param('default', 'default', 'pauseRequested',
                                        True)
            status = restCluster1.is_replication_paused('default', 'default')
            self.assertTrue(
                status,
                "Replication has not started after certificate upgrade")
        finally:
            known_nodes = ['ns_1@' + cluster2[0].ip, 'ns_1@' + cluster2[1].ip]
            restCluster2.rebalance(known_nodes, ['ns_1@' + cluster2[1].ip])
            self.assertTrue(self.check_rebalance_complete(restCluster2),
                            "Issue with rebalance")
            restCluster2.delete_bucket()
Ejemplo n.º 13
0
    def testClusterInit(self):
        cluster_init_username = self.input.param("cluster_init_username",
                                                 "Administrator")
        cluster_init_password = self.input.param("cluster_init_password",
                                                 "password")
        cluster_init_port = self.input.param("cluster_init_port", 8091)
        cluster_init_ramsize = self.input.param("cluster_init_ramsize", 300)
        command_init = self.input.param("command_init", "cluster-init")
        server = self.servers[-1]
        remote_client = RemoteMachineShellConnection(server)
        rest = RestConnection(server)
        rest.force_eject_node()
        self.sleep(5)

        try:
            cli_command = command_init
            options = "--cluster-init-username={0} --cluster-init-password={1} --cluster-init-port={2} --cluster-init-ramsize={3}".\
                format(cluster_init_username, cluster_init_password, cluster_init_port, cluster_init_ramsize)
            output, error = remote_client.execute_couchbase_cli(
                cli_command=cli_command,
                options=options,
                cluster_host="localhost",
                user="******",
                password="******")
            self.assertEqual(output[0], "SUCCESS: init localhost")

            options = "--cluster-init-username={0} --cluster-init-password={1} --cluster-init-port={2}".\
                format(cluster_init_username + "1", cluster_init_password + "1", str(cluster_init_port)[:-1] + "9")
            output, error = remote_client.execute_couchbase_cli(
                cli_command=cli_command,
                options=options,
                cluster_host="localhost",
                user=cluster_init_username,
                password=cluster_init_password)
            # MB-8202 cluster-init/edit doesn't provide status
            self.assertTrue(output == [])
            server.rest_username = cluster_init_username + "1"
            server.rest_password = cluster_init_password + "1"
            server.port = str(cluster_init_port)[:-1] + "9"

            cli_command = "server-list"
            output, error = remote_client.execute_couchbase_cli(
                cli_command=cli_command,
                cluster_host="localhost",
                cluster_port=str(cluster_init_port)[:-1] + "9",
                user=cluster_init_username + "1",
                password=cluster_init_password + "1")
            self.assertTrue(
                "{0} healthy active".format(str(cluster_init_port)[:-1] +
                                            "9") in output[0])
            server_info = self._get_cluster_info(remote_client,
                                                 cluster_port=server.port,
                                                 user=server.rest_username,
                                                 password=server.rest_password)
            result = server_info["otpNode"] + " " + server_info[
                "hostname"] + " " + server_info["status"] + " " + server_info[
                    "clusterMembership"]
            self.assertTrue(
                "{0} healthy active".format(str(cluster_init_port)[:-1] +
                                            "9") in result)

            cli_command = command_init
            options = "--cluster-init-username={0} --cluster-init-password={1} --cluster-init-port={2}".\
                format(cluster_init_username, cluster_init_password, cluster_init_port)
            output, error = remote_client.execute_couchbase_cli(
                cli_command=cli_command,
                options=options,
                cluster_host="localhost",
                cluster_port=str(cluster_init_port)[:-1] + "9",
                user=(cluster_init_username + "1"),
                password=cluster_init_password + "1")
            # MB-8202 cluster-init/edit doesn't provide status
            self.assertTrue(output == [])

            server.rest_username = cluster_init_username
            server.rest_password = cluster_init_password
            server.port = cluster_init_port
            remote_client = RemoteMachineShellConnection(server)
            cli_command = "server-list"
            output, error = remote_client.execute_couchbase_cli(
                cli_command=cli_command,
                cluster_host="localhost",
                user=cluster_init_username,
                password=cluster_init_password)
            self.assertTrue("{0} healthy active".format(str(cluster_init_port))
                            in output[0])
            server_info = self._get_cluster_info(remote_client,
                                                 cluster_port=server.port,
                                                 user=server.rest_username,
                                                 password=server.rest_password)
            result = server_info["otpNode"] + " " + server_info[
                "hostname"] + " " + server_info["status"] + " " + server_info[
                    "clusterMembership"]
            self.assertTrue(
                "{0} healthy active".format(str(cluster_init_port)) in result)
            remote_client.disconnect()
        finally:
            rest = RestConnection(server)
            rest.force_eject_node()
            self.sleep(5)
            rest.init_cluster()
Ejemplo n.º 14
0
    def offline_cluster_upgrade_non_default_path(self):
        try:
            num_nodes_with_not_default = self.input.param(
                'num_nodes_with_not_default', 1)
            prefix_path = ''
            if not self.is_linux:
                prefix_path = "C:"
            data_path = prefix_path + self.input.param(
                'data_path', '/tmp/data').replace('|', "/")
            index_path = self.input.param('index_path',
                                          data_path).replace('|', "/")
            if not self.is_linux and not index_path.startswith("C:"):
                index_path = prefix_path + index_path
            num_nodes_remove_data = self.input.param('num_nodes_remove_data',
                                                     0)
            servers_with_not_default = self.servers[:
                                                    num_nodes_with_not_default]
            old_paths = {}
            for server in servers_with_not_default:
                #to restore servers paths in finally
                old_paths[server.ip] = [server.data_path, server.index_path]
                server.data_path = data_path
                server.index_path = index_path
                shell = RemoteMachineShellConnection(server)
                for path in set([data_path, index_path]):
                    shell.create_directory(path)
                shell.disconnect()
            self._install(self.servers[:self.nodes_init])
            self.operations(self.servers[:self.nodes_init])
            if self.ddocs_num and not self.input.param('extra_verification',
                                                       False):
                self.create_ddocs_and_views()
            self.sleep(self.sleep_time,
                       "Pre-setup of old version is done. Wait for upgrade")
            for upgrade_version in self.upgrade_versions:
                for server in self.servers[:self.nodes_init]:
                    remote = RemoteMachineShellConnection(server)
                    remote.stop_server()
                    remote.disconnect()
                self.sleep(self.sleep_time)
                #remove data for nodes with non default data paths
                tmp = min(num_nodes_with_not_default, num_nodes_remove_data)
                self.delete_data(self.servers[:tmp], [data_path, index_path])
                #remove data for nodes with default data paths
                self.delete_data(
                    self.servers[tmp:max(tmp, num_nodes_remove_data)],
                    ["/opt/couchbase/var/lib/couchbase/data"])
                upgrade_threads = self._async_update(
                    upgrade_version, self.servers[:self.nodes_init])
                for upgrade_thread in upgrade_threads:
                    upgrade_thread.join()
                success_upgrade = True
                while not self.queue.empty():
                    success_upgrade &= self.queue.get()
                if not success_upgrade:
                    self.fail("Upgrade failed!")
                self.sleep(self.expire_time)
                for server in servers_with_not_default:
                    rest = RestConnection(server)
                    node = rest.get_nodes_self()
                    self.assertTrue(node.storage[0].path, data_path)
                    self.assertTrue(node.storage[0].index_path, index_path)
            if num_nodes_remove_data:
                for bucket in self.buckets:
                    if self.rest_helper.bucket_exists(bucket):
                        raise Exception("bucket: %s still exists" %
                                        bucket.name)
                self.buckets = []

            if self.input.param('extra_verification', False):
                self.bucket_size = 100
                self._create_sasl_buckets(self.master, 1)
                self._create_standard_buckets(self.master, 1)
                if self.ddocs_num:
                    self.create_ddocs_and_views()
                    gen_load = BlobGenerator('upgrade',
                                             'upgrade-',
                                             self.value_size,
                                             end=self.num_items)
                    self._load_all_buckets(self.master,
                                           gen_load,
                                           "create",
                                           self.expire_time,
                                           flag=self.item_flag)
            self.verification(self.servers[:self.nodes_init],
                              check_items=not num_nodes_remove_data)
        finally:
            for server in servers_with_not_default:
                server.data_path = old_paths[server.ip][0]
                server.index_path = old_paths[server.ip][1]
Ejemplo n.º 15
0
    def test_setting_alternate_address(self):
        server1 = self.servers[0]
        url_format = ""
        secure_port = ""
        secure_conn = ""
        self.skip_set_alt_addr = False
        shell = RemoteMachineShellConnection(server1)
        if self.secure_conn:
            cacert = self.get_cluster_certificate_info(server1)
            secure_port = "1"
            url_format = "s"
            if not self.no_cacert:
                secure_conn = "--cacert {0}".format(cacert)
            if self.no_ssl_verify:
                secure_conn = "--no-ssl-verify"
        output = self.list_alt_address(server=server1,
                                       url_format=url_format,
                                       secure_port=secure_port,
                                       secure_conn=secure_conn)
        if output:
            output, _ = self.remove_alt_address_setting(
                server=server1,
                url_format=url_format,
                secure_port=secure_port,
                secure_conn=secure_conn)
            mesg = 'SUCCESS: Alternate address configuration deleted'
            if not self._check_output(mesg, output):
                self.fail("Fail to remove alternate address")
        output = self.list_alt_address(server=server1,
                                       url_format=url_format,
                                       secure_port=secure_port,
                                       secure_conn=secure_conn)
        if output and output[0] != "[]":
            self.fail("Fail to remove alternate address with remove command")

        self.log.info("Start to set alternate address")
        internal_IP = self.get_internal_IP(server1)
        setting_cmd = "{0}couchbase-cli{1} {2}"\
                       .format(self.cli_command_path, self.cmd_ext,
                               "setting-alternate-address")
        setting_cmd += " -c http{0}://{1}:{2}{3} --username {4} --password {5} {6}"\
                       .format(url_format, internal_IP , secure_port, server1.port,
                               server1.rest_username, server1.rest_password, secure_conn)
        setting_cmd = setting_cmd + "--set --hostname {0} ".format(server1.ip)
        shell.execute_command(setting_cmd)
        output = self.list_alt_address(server=server1,
                                       url_format=url_format,
                                       secure_port=secure_port,
                                       secure_conn=secure_conn)
        if output and output[0]:
            output = output[0]
            output = output[1:-1]
            output = ast.literal_eval(output)
            if output["hostname"] != server1.ip:
                self.fail("Fail to set correct hostname")
        else:
            self.fail("Fail to set alternate address")
        self.log.info("Start to add node to cluster use internal IP")
        services_in = self.alt_addr_services_in
        if "-" in services_in:
            set_services = services_in.split("-")
        else:
            set_services = services_in.split(",")
        i = 0
        num_hostname_add = 1
        for server in self.servers[1:]:
            add_node_IP = self.get_internal_IP(server)
            node_services = "kv"
            if len(set_services) == 1:
                node_services = set_services[0]
            elif len(set_services) > 1:
                if len(set_services) == len(self.servers[1:]):
                    node_services = set_services[i]
                    i += 1
            if self.add_hostname_node and num_hostname_add <= self.num_hostname_add:
                add_node_IP = server.ip
                num_hostname_add += 1

            try:
                shell.alt_addr_add_node(main_server=server1,
                                        internal_IP=add_node_IP,
                                        server_add=server,
                                        services=node_services,
                                        cmd_ext=self.cmd_ext)
            except Exception as e:
                if e:
                    self.fail("Error: {0}".format(e))
        rest = RestConnection(self.master)
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                       ejectedNodes=[])
        rest.monitorRebalance()
        self.log.info("Create default bucket")
        self._create_default_bucket(self.master)
        buckets = rest.get_buckets()
        status = RestHelper(rest).vbucket_map_ready(buckets[0].name)
        if not status:
            self.fail("Failed to create bucket.")

        if self.run_alt_addr_loader:
            if self.alt_addr_kv_loader:
                self.kv_loader(server1, client_os=self.client_os)
            if self.alt_addr_n1ql_query:
                self.n1ql_query(server1.ip,
                                self.client_os,
                                create_travel_sample_bucket=True)
            if self.alt_addr_eventing_function:
                self.create_eventing_function(server1,
                                              self.client_os,
                                              create_travel_sample_bucket=True)
                self.skip_set_alt_addr = True
        alt_addr_status = []
        if not self.skip_set_alt_addr:
            for server in self.servers[1:]:
                internal_IP = self.get_internal_IP(server)
                status = self.set_alternate_address(server,
                                                    url_format=url_format,
                                                    secure_port=secure_port,
                                                    secure_conn=secure_conn,
                                                    internal_IP=internal_IP)
                alt_addr_status.append(status)
            if False in alt_addr_status:
                self.fail("Fail to set alt address")
            else:
                self.all_alt_addr_set = True
                if self.run_alt_addr_loader:
                    if self.alt_addr_kv_loader:
                        self.kv_loader(server1, self.client_os)
                    if self.alt_addr_n1ql_query:
                        self.n1ql_query(server1.ip, self.client_os)
        remove_node = ""
        if self.alt_addr_rebalance_out:
            internal_IP = self.get_internal_IP(self.servers[-1])
            reject_node = "ns_1@{0}".format(internal_IP)
            self.log.info("Rebalance out a node {0}".format(internal_IP))
            rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],\
                                                     ejectedNodes=[reject_node])
            reb_status = rest.monitorRebalance()
            self.assertTrue(
                reb_status,
                "Rebalance out node {0} failed".format(internal_IP))
            remove_node = internal_IP
        if self.alt_addr_rebalance_in and self.alt_addr_rebalance_out:
            if remove_node:
                free_node = remove_node
                if self.add_hostname_node:
                    free_node = self.get_external_IP(remove_node)
                cmd = 'curl -X POST -d  "hostname={0}&user={1}&password={2}&services={3}" '\
                             .format(free_node, server1.rest_username, server1.rest_password,
                                     self.alt_addr_rebalance_in_services)
                cmd += '-u Administrator:password http://{0}:8091/controller/addNode'\
                             .format(server1.ip)
                shell.execute_command(cmd)
                rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],\
                                                                    ejectedNodes=[])
                reb_status = rest.monitorRebalance()
                self.assertTrue(reb_status, "Rebalance back in failed")
                status = self.set_alternate_address(self.servers[-1],
                                                    url_format=url_format,
                                                    secure_port=secure_port,
                                                    secure_conn=secure_conn,
                                                    internal_IP=free_node)
                if status:
                    self.all_alt_addr_set = True
                else:
                    self.all_alt_addr_set = False
            else:
                self.fail("We need a free node to add to cluster")
            if self.run_alt_addr_loader:
                if self.alt_addr_kv_loader:
                    self.kv_loader(server1, self.client_os)
                if self.alt_addr_n1ql_query:
                    self.n1ql_query(server1.ip, self.client_os)
        status = self.remove_all_alternate_address_settings()
        if not status:
            self.fail("Failed to remove all alternate address setting")
Ejemplo n.º 16
0
    def _add_back_failed_node(self, do_node_cleanup=False):
        master = self.servers[0]
        rest = RestConnection(master)
        creds = self.input.membase_settings

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(self.servers, len(self.servers) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.failover_factor)
        optNodesIds = [node.id for node in toBeEjectedNodes]

        # List of servers that will not be failed over
        not_failed_over = []
        for server in self.servers:
            if self.cluster_run:
                if server.port not in [node.port for node in toBeEjectedNodes]:
                    not_failed_over.append(server)
                    self.log.info("Node {0}:{1} not failed over".format(server.ip, server.port))
            else:
                if server.ip not in [node.ip for node in toBeEjectedNodes]:
                    not_failed_over.append(server)
                    self.log.info("Node {0}:{1} not failed over".format(server.ip, server.port))

        if self.fail_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content
            master = not_failed_over[-1]

        self.log.info("DATA ACCESS PHASE")
        self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        # Failover selected nodes
        for node in optNodesIds:
            self.log.info("failover node {0} and rebalance afterwards".format(node))
            rest.fail_over(node)

        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], \
            ejectedNodes=optNodesIds)

        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(optNodesIds))

        # Add back the same failed over nodes

        # Cleanup the node, somehow
        # TODO: cluster_run?
        if do_node_cleanup:
            pass

        # Make rest connection with node part of cluster
        rest = RestConnection(master)

        # Given the optNode, find ip
        add_back_servers = []
        nodes = rest.get_nodes()
        for server in nodes:
            if isinstance(server.ip, str):
                add_back_servers.append(server)
        final_add_back_servers = []
        for server in self.servers:
            if self.cluster_run:
                if server.port not in [serv.port for serv in add_back_servers]:
                    final_add_back_servers.append(server)
            else:
                if server.ip not in [serv.ip for serv in add_back_servers]:
                    final_add_back_servers.append(server)
        for server in final_add_back_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip, server.port)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[])

        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(add_back_servers))

        SwapRebalanceBase.verification_phase(self, master)
Ejemplo n.º 17
0
    def test_start_stop_rebalance_after_failover(self):
        """
            Rebalances nodes out and in with failover
            Use different nodes_in and nodes_out params to have uneven add and deletion. Use 'zone'
            param to have nodes divided into server groups by having zone > 1.

            The test begin with loading the bucket with given number of items. It then fails over a node. We then
            rebalance the cluster, while adding or removing given number of nodes. Once the rebalance reaches 50%,
            we stop the rebalance and validate the cluster stats. We then restart the rebalance and validate rebalance
            was completed successfully.
            """
        fail_over = self.input.param("fail_over", False)
        gen = BlobGenerator('mike',
                            'mike-',
                            self.value_size,
                            end=self.num_items)
        self._load_all_buckets(self.master, gen, "create", 0)
        tasks = self._async_load_all_buckets(self.master, gen, "update", 0)
        for task in tasks:
            task.result(self.wait_timeout * 20)
        self._verify_stats_all_buckets(self.servers[:self.nodes_init],
                                       timeout=120)
        self._wait_for_stats_all_buckets(self.servers[:self.nodes_init])
        self.sleep(20)
        prev_vbucket_stats = self.get_vbucket_seqnos(
            self.servers[:self.nodes_init], self.buckets)
        prev_failover_stats = self.get_failovers_logs(
            self.servers[:self.nodes_init], self.buckets)
        disk_replica_dataset, disk_active_dataset = self.get_and_compare_active_replica_data_set_all(
            self.servers[:self.nodes_init], self.buckets, path=None)
        self.compare_vbucketseq_failoverlogs(prev_vbucket_stats,
                                             prev_failover_stats)
        self.rest = RestConnection(self.master)
        chosen = RebalanceHelper.pick_nodes(self.master, howmany=1)
        result_nodes = list(
            set(self.servers[:self.nodes_init] + self.servs_in) -
            set(self.servs_out))
        for node in self.servs_in:
            self.rest.add_node(self.master.rest_username,
                               self.master.rest_password, node.ip, node.port)
        # Mark Node for failover
        self.rest.fail_over(chosen[0].id, graceful=fail_over)
        rebalance = self.cluster.async_rebalance(
            self.servers[:self.nodes_init], self.servs_in, self.servs_out)
        expected_progress = 50
        rest = RestConnection(self.master)
        reached = RestHelper(rest).rebalance_reached(expected_progress)
        self.assertTrue(
            reached,
            "Rebalance failed or did not reach {0}%".format(expected_progress))
        if not RestHelper(rest).is_cluster_rebalanced():
            self.log.info("Stop the rebalance")
            stopped = rest.stop_rebalance(wait_timeout=self.wait_timeout / 3)
            self.assertTrue(stopped, msg="Unable to stop rebalance")
            self._verify_all_buckets(self.master,
                                     timeout=None,
                                     max_verify=self.max_verify,
                                     batch_size=1)
        self.shuffle_nodes_between_zones_and_rebalance()
        self.verify_cluster_stats(result_nodes,
                                  check_ep_items_remaining=True,
                                  check_bucket_stats=False)
        self.sleep(30)
        self.data_analysis_active_replica_all(disk_active_dataset,
                                              disk_replica_dataset,
                                              result_nodes,
                                              self.buckets,
                                              path=None)
        self.verify_unacked_bytes_all_buckets()
        nodes = self.get_nodes_in_cluster(self.master)
        self.vb_distribution_analysis(servers=nodes,
                                      std=1.0,
                                      total_vbuckets=self.total_vbuckets)
Ejemplo n.º 18
0
 def setUp(self):
     super(SecondaryIndexingClusterOpsTests, self).setUp()
     server = self.get_nodes_from_services_map(service_type = "n1ql")
     self.rest = RestConnection(server)
Ejemplo n.º 19
0
 def tearDown(self):
     RestConnection(self.master).delete_all_buckets()
Ejemplo n.º 20
0
    def setUp(self):
        super(RebalanceBaseTest, self).setUp()
        self.rest = RestConnection(self.cluster.master)
        self.doc_ops = self.input.param("doc_ops", "create")
        self.key_size = self.input.param("key_size", 0)
        self.zone = self.input.param("zone", 1)
        self.replica_to_update = self.input.param("new_replica", None)
        self.default_view_name = "default_view"
        self.defaul_map_func = "function (doc) {\n  emit(doc._id, doc);\n}"
        self.default_view = View(self.default_view_name, self.defaul_map_func,
                                 None)
        self.max_verify = self.input.param("max_verify", None)
        self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
        self.flusher_total_batch_limit = self.input.param(
            "flusher_total_batch_limit", None)
        self.test_abort_snapshot = self.input.param("test_abort_snapshot",
                                                    False)
        self.items = self.num_items
        self.logs_folder = self.input.param("logs_folder")
        self.retry_get_process_num = self.input.param("retry_get_process_num",
                                                      200)

        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []

        if not self.cluster.cloud_cluster:
            node_ram_ratio = self.bucket_util.base_bucket_ratio(
                self.cluster.servers)
            info = self.rest.get_nodes_self()
            self.rest.init_cluster(username=self.cluster.master.rest_username,
                                   password=self.cluster.master.rest_password)
            kv_mem_quota = int(info.mcdMemoryReserved * node_ram_ratio)
            self.rest.set_service_mem_quota(
                {CbServer.Settings.KV_MEM_QUOTA: kv_mem_quota})
            self.bucket_util.add_rbac_user(self.cluster.master)

            services = None
            if self.services_init:
                services = list()
                for service in self.services_init.split("-"):
                    services.append(service.replace(":", ","))
                services = services[1:] if len(services) > 1 else None

            if nodes_init:
                result = self.task.rebalance(
                    self.cluster,
                    nodes_init, [],
                    services=services,
                    retry_get_process_num=self.retry_get_process_num)
                self.assertTrue(result, "Initial rebalance failed")

        self.check_temporary_failure_exception = False
        self.cluster.nodes_in_cluster.extend([self.cluster.master])
        self.check_replica = self.input.param("check_replica", False)
        self.spec_name = self.input.param("bucket_spec", None)
        self.disk_optimized_thread_settings = self.input.param(
            "disk_optimized_thread_settings", False)
        if self.disk_optimized_thread_settings:
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
        # Buckets creation and initial data load done by bucket_spec
        if self.spec_name is not None:
            try:
                self.collection_setup()
            except Java_base_exception as exception:
                self.handle_setup_exception(exception)
            except Exception as exception:
                self.handle_setup_exception(exception)
        else:
            if self.standard_buckets > 10:
                self.bucket_util.change_max_buckets(self.cluster.master,
                                                    self.standard_buckets)
            self.create_buckets(self.bucket_size)

            # Create Scope/Collection based on inputs given
            for bucket in self.cluster.buckets:
                if self.scope_name != CbServer.default_scope:
                    self.scope_name = BucketUtils.get_random_name()
                    BucketUtils.create_scope(self.cluster.master, bucket,
                                             {"name": self.scope_name})
                if self.collection_name != CbServer.default_collection:
                    self.collection_name = BucketUtils.get_random_name()
                    BucketUtils.create_collection(
                        self.cluster.master, bucket, self.scope_name, {
                            "name": self.collection_name,
                            "num_items": self.num_items
                        })
                    self.log.info(
                        "Bucket %s using scope::collection - '%s::%s'" %
                        (bucket.name, self.scope_name, self.collection_name))

                # Update required num_items under default collection
                bucket.scopes[self.scope_name] \
                    .collections[self.collection_name] \
                    .num_items = self.num_items

            if self.flusher_total_batch_limit:
                self.bucket_util.set_flusher_total_batch_limit(
                    self.cluster, self.cluster.buckets,
                    self.flusher_total_batch_limit)

            self.gen_create = self.get_doc_generator(0, self.num_items)
            if self.active_resident_threshold < 100:
                self.check_temporary_failure_exception = True
                # Reset num_items=0 since the num_items will be populated
                # by the DGM load task
                for bucket in self.cluster.buckets:
                    bucket.scopes[self.scope_name] \
                        .collections[self.collection_name] \
                        .num_items = 0

            # Create clients in SDK client pool
            if self.sdk_client_pool:
                self.log.info("Creating SDK clients for client_pool")
                for bucket in self.cluster.buckets:
                    self.sdk_client_pool.create_clients(
                        bucket, [self.cluster.master],
                        self.sdk_pool_capacity,
                        compression_settings=self.sdk_compression)

            if not self.atomicity:
                _ = self._load_all_buckets(self.cluster,
                                           self.gen_create,
                                           "create",
                                           0,
                                           batch_size=self.batch_size)
                self.log.info("Verifying num_items counts after doc_ops")
                self.bucket_util._wait_for_stats_all_buckets(
                    self.cluster, self.cluster.buckets, timeout=1200)
                self.bucket_util.validate_docs_per_collections_all_buckets(
                    self.cluster, timeout=self.wait_timeout)
            else:
                self.transaction_commit = True
                self._load_all_buckets_atomicty(self.gen_create, "create")
                self.transaction_commit = self.input.param(
                    "transaction_commit", True)

            # Initialize doc_generators
            self.active_resident_threshold = 100
            self.gen_create = None
            self.gen_delete = None
            self.gen_update = self.get_doc_generator(0, (self.items / 2))
            self.durability_helper = DurabilityHelper(
                self.log,
                len(self.cluster.nodes_in_cluster),
                durability=self.durability_level,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to)
            self.cluster_util.print_cluster_stats(self.cluster)
            self.bucket_util.print_bucket_stats(self.cluster)
        self.log_setup_status("RebalanceBase", "complete")
Ejemplo n.º 21
0
    def test_max_buckets(self):
        log = logger.Logger.get_logger()
        serverInfo = self.servers[0]
        log.info('picking server : {0} as the master'.format(serverInfo))
        rest = RestConnection(serverInfo)
        proxyPort = rest.get_nodes_self().moxi
        info = rest.get_nodes_self()
        rest.init_cluster(username=serverInfo.rest_username,
                          password=serverInfo.rest_password)
        bucket_num = rest.get_internalSettings("maxBucketCount")
        log.info("max # buckets allow in cluster: {0}".format(bucket_num))
        bucket_ram = 100
        cluster_ram = info.memoryQuota
        max_buckets = cluster_ram / bucket_ram
        log.info("RAM setting for this cluster: {0}".format(cluster_ram))
        testuser = [{
            'id': 'cbadminbucket',
            'name': 'cbadminbucket',
            'password': '******'
        }]
        rolelist = [{
            'id': 'cbadminbucket',
            'name': 'cbadminbucket',
            'roles': 'admin'
        }]
        RbacBase().create_user_source(testuser, 'builtin', self.master)
        RbacBase().add_user_role(rolelist, RestConnection(self.master),
                                 'builtin')

        for i in range(max_buckets):
            bucket_name = 'max_buckets-{0}'.format(uuid.uuid4())
            rest.create_bucket(bucket=bucket_name,
                               ramQuotaMB=bucket_ram,
                               authType='sasl',
                               proxyPort=proxyPort)
            ready = BucketOperationHelper.wait_for_memcached(
                serverInfo, bucket_name)
            log.info("kv RAM left in cluster: {0}".format(cluster_ram - 100))
            cluster_ram -= bucket_ram
            self.assertTrue(ready, "wait_for_memcached failed")

        buckets = rest.get_buckets()
        if len(buckets) != max_buckets:
            msg = 'tried to create {0} buckets, only created {1}'\
                               .format(bucket_count, len(buckets))
            self.fail(msg)
        try:
            rest.create_bucket(bucket=bucket_name,
                               ramQuotaMB=bucket_ram,
                               authType='sasl',
                               proxyPort=proxyPort)
            msg = 'bucket creation did not fail even though system was overcommited'
            self.fail(msg)
        except BucketCreationException as ex:
            log.info(
                '\n******\nBucketCreationException was thrown as expected when\
                           we try to create {0} buckets'.format(max_buckets +
                                                                1))
        buckets = rest.get_buckets()
        if len(buckets) != max_buckets:
            msg = 'tried to create {0} buckets, only created {1}'\
                                           .format(max_buckets + 1, len(buckets))
            self.fail(msg)
Ejemplo n.º 22
0
 def reset_retry_rebalance_settings(self):
     body = dict()
     body["enabled"] = "false"
     rest = RestConnection(self.cluster.master)
     rest.set_retry_rebalance_settings(body)
     self.log.debug("Retry Rebalance settings reset ....")
Ejemplo n.º 23
0
 def set_vbuckets(master, vbuckets):
     rest = RestConnection(master)
     command = "rpc:eval_everywhere(ns_config, set, [couchbase_num_vbuckets_default, {0}]).".format(
         vbuckets)
     status, content = rest.diag_eval(command)
     return status, content
Ejemplo n.º 24
0
                raise e
        except exceptions.EOFError:
            awareness.reset(rest)
        except socket.error:
            awareness.reset(rest)
    if not passed:
        raise Exception("failed delete after 60 seconds")


if __name__ == "__main__":

    config = Config(sys.argv[1:])

    kv = KVStore()

    rest = RestConnection(config.master)
    awareness = VBucketAwareMemcached(rest, config.bucket)

    for i in range(config.sets):
        key = config.prefix + str(i)
        value = str(uuid.uuid4())
        kv.set(key, 0, 0, value)
        set_aware(awareness, rest, key, 0, 0, value)

    for i in range(config.mutations):
        key = config.prefix + str(random.randint(0, config.sets))
        value = str(uuid.uuid4())
        kv.set(key, 0, 0, value)
        set_aware(awareness, rest, key, 0, 0, value)

    for i in range(config.deletes):
Ejemplo n.º 25
0
 def disable_autofailover(self, servers):
     for server in servers:
         rest = RestConnection(server)
         rest.update_autofailover_settings(False, 120)
Ejemplo n.º 26
0
 def __set_stat_setting(nodes, key, value):
     for node in nodes:
         RestConnection(node).diag_eval(
             "ns_config:set_sub(stats_settings, [{%s, %s}])" % (key, value))
Ejemplo n.º 27
0
def create_rest(server_ip=cfg.COUCHBASE_IP,
                port=cfg.COUCHBASE_PORT,
                username=cfg.COUCHBASE_USER,
                password=cfg.COUCHBASE_PWD):
    return RestConnection(
        create_server_obj(server_ip, port, username, password))
Ejemplo n.º 28
0
    def test_alt_addr_with_xdcr(self):
        url_format = ""
        secure_port = ""
        secure_conn = ""
        self.setup_xdcr_cluster()
        des_alt_addr_set = False

        self.log.info("Create bucket at source")
        src_master = self.clusters_dic[0][0]
        self._create_buckets(src_master)
        src_rest = RestConnection(src_master)
        src_buckets = src_rest.get_buckets()
        if src_buckets and src_buckets[0]:
            src_bucket_name = src_buckets[0].name
        else:
            self.fail("Failed to create bucket at src cluster")

        des_master = self.clusters_dic[1][0]
        self.log.info("Create bucket at destination")
        self._create_buckets(des_master)
        des_rest = RestConnection(des_master)
        des_buckets = des_rest.get_buckets()
        if des_buckets and des_buckets[0]:
            des_bucket_name = des_buckets[0].name
        else:
            self.fail("Failed to create bucket at des cluster")

        for server in self.clusters_dic[0]:
            internal_IP = self.get_internal_IP(server)
            status = self.set_alternate_address(server,
                                                url_format=url_format,
                                                secure_port=secure_port,
                                                secure_conn=secure_conn,
                                                internal_IP=internal_IP)
        self.all_alt_addr_set = True

        self.kv_loader(src_master, "mac")
        self.create_xdcr_reference(src_master.ip, des_master.ip)

        src_num_docs = int(src_rest.get_active_key_count(src_bucket_name))
        count = 0
        src_num_docs = int(src_rest.get_active_key_count(src_bucket_name))
        while count < 10:
            if src_num_docs < 10000:
                self.sleep(10, "wait for items written to bucket")
                src_num_docs = int(
                    src_rest.get_active_key_count(src_bucket_name))
                count += 1
            if src_num_docs == 10000:
                self.log.info("all bucket items set")
                break
            if count == 2:
                self.fail("bucket items does not set after 30 seconds")

        self.create_xdcr_replication(src_master.ip, des_master.ip,
                                     src_bucket_name)
        self.sleep(25, "time needed for replication to be created")

        self.log.info("Reduce check point time to 30 seconds")
        self.set_xdcr_checkpoint(src_master.ip, 30)
        #self.set_xdcr_checkpoint(des_master.ip, 30)

        self.log.info("Get xdcr configs from cluster")
        shell = RemoteMachineShellConnection(self.master)
        rep_id_cmd = "curl -u Administrator:password http://{0}:8091/pools/default/remoteClusters"\
                                                                            .format(self.master.ip)
        output, error = shell.execute_command(rep_id_cmd)
        output = output[0][1:-1]
        xdcr_config = json.loads(output)

        cmd = "curl -u Administrator:password http://localhost:8091/sasl_logs/goxdcr "
        cmd += "|  grep  'Execution timed out' | tail -n 1 "
        output, error = shell.execute_command(cmd)
        self.log.info(
            "Verify replication timeout due to alt addr does not enable at des cluster"
        )
        if xdcr_config["uuid"] in output[
                0] and "Execution timed out" in output[0]:
            self.log.info(
                "replication failed as expected as alt addr does not enable at des"
            )
        else:
            self.fail("Alt addr failed to disable at des cluster")

        count = 0
        des_num_docs = int(des_rest.get_active_key_count(des_bucket_name))
        while count < 6:
            if src_num_docs != des_num_docs:
                self.sleep(60, "wait for replication ...")
                des_num_docs = int(
                    des_rest.get_active_key_count(des_bucket_name))
                count += 1
            elif src_num_docs == des_num_docs:
                self.fail(
                    "Replication should fail.  Alt addr at des does not block")
                break
            if count == 6:
                if not des_alt_addr_set:
                    self.log.info(
                        "This is expected since alt addr is not set yet")

        des_alt_addr_status = []
        for server in self.clusters_dic[1]:
            internal_IP = self.get_internal_IP(server)
            des_alt_addr_status.append(
                self.set_alternate_address(server,
                                           url_format=url_format,
                                           secure_port=secure_port,
                                           secure_conn=secure_conn,
                                           internal_IP=internal_IP))
        if False in des_alt_addr_status:
            self.fail("Failed to set alt addr at des cluster")
        else:
            des_alt_addr_set = True

        count = 0
        self.log.info("Restart replication")
        cmd = "curl -X POST -u Administrator:password "
        cmd += "http://{0}:8091/settings/replications/{1}%2F{2}%2F{2} "\
                 .format(self.master.ip, xdcr_config["uuid"], des_bucket_name)
        cmd += "-d pauseRequested="
        try:
            check_output(cmd + "true", shell=True, stderr=STDOUT)
            self.sleep(20)
            check_output(cmd + "false", shell=True, stderr=STDOUT)
        except CalledProcessError as e:
            print("Error return code: {0}".format(e.returncode))
            if e.output:
                self.fail(e.output)
        des_rest = RestConnection(des_master)

        self.log.info("Verify docs is replicated to des cluster")
        while count < 6:
            if src_num_docs != des_num_docs:
                self.sleep(60, "wait for replication start...")
                des_num_docs = int(
                    des_rest.get_active_key_count(des_bucket_name))
                count += 1
            elif src_num_docs == des_num_docs:
                self.log.info("Replication is complete")
                break
            if count == 6:
                if des_alt_addr_set:
                    self.fail("Replication does not complete after 6 minutes")

        self.delete_xdcr_replication(src_master.ip, xdcr_config["uuid"])
Ejemplo n.º 29
0
    def _run_observe(self):
        tasks = []
        query_set = "true"
        persisted = 0
        mutated = False
        count = 0
        for bucket in self.buckets:
            self.cluster.create_view(self.master, self.default_design_doc,
                                     self.default_view, bucket,
                                     self.wait_timeout * 2)
            client = VBucketAwareMemcached(RestConnection(self.master), bucket)
            self.max_time = timedelta(microseconds=0)
            if self.mutate_by == "multi_set":
                key_val = self._create_multi_set_batch()
                client.setMulti(0, 0, key_val)
            keys = ["observe%s" % (i) for i in xrange(self.num_items)]
            for key in keys:
                mutated = False
                while not mutated and count < 60:
                    try:
                        if self.mutate_by == "set":
                            # client.memcached(key).set(key, 0, 0, "set")
                            client.set(key, 0, 0, "setvalue")
                        elif self.mutate_by == "append":
                            client.memcached(key).append(key, "append")
                        elif self.mutate_by == "prepend":
                            client.memcached(key).prepend(key, "prepend")
                        elif self.mutate_by == "incr":
                            client.memcached(key).incr(key, 1)
                        elif self.mutate_by == "decr":
                            client.memcached(key).decr(key)
                        mutated = True
                        t_start = datetime.now()
                    except MemcachedError as error:
                        if error.status == 134:
                            loaded = False
                            self.log.error(
                                "Memcached error 134, wait for 5 seconds and then try again"
                            )
                            count += 1
                            time.sleep(5)
                while persisted == 0:
                    opaque, rep_time, persist_time, persisted, cas = client.observe(
                        key)
                t_end = datetime.now()
                self.log.info("##########key:-%s################" % (key))
                self.log.info("Persisted:- %s" % (persisted))
                self.log.info("Persist_Time:- %s" % (rep_time))
                self.log.info("Time2:- %s" % (t_end - t_start))
                if self.max_time <= (t_end - t_start):
                    self.max_time = (t_end - t_start)
                    self.log.info("Max Time taken for observe is :- %s" %
                                  self.max_time)
                    self.log.info("Cas Value:- %s" % (cas))
            query = {
                "stale": "false",
                "full_set": "true",
                "connection_timeout": 60000
            }
            self.cluster.query_view(self.master,
                                    "dev_Doc1",
                                    self.default_view.name,
                                    query,
                                    self.num_items,
                                    bucket,
                                    timeout=self.wait_timeout)
            self.log.info(
                "Observe Validation:- view: %s in design doc dev_Doc1 and in bucket %s"
                % (self.default_view, bucket))
            # check whether observe has to run with delete and delete parallel with observe or not
            if len(self.observe_with) > 0:
                if self.observe_with == "delete":
                    self.log.info("Deleting 0- %s number of items" %
                                  (self.num_items / 2))
                    self._load_doc_data_all_buckets('delete', 0,
                                                    self.num_items / 2)
                    query_set = "true"
                elif self.observe_with == "delete_parallel":
                    self.log.info("Deleting Parallel 0- %s number of items" %
                                  (self.num_items / 2))
                    tasks = self._async_load_doc_data_all_buckets(
                        'delete', 0, self.num_items / 2)
                    query_set = "false"
                for key in keys:
                    opaque, rep_time, persist_time, persisted, cas = client.memcached(
                        key).observe(key)
                    self.log.info("##########key:-%s################" % (key))
                    self.log.info("Persisted:- %s" % (persisted))
                if self.observe_with == "delete_parallel":
                    for task in tasks:
                        task.result()

                query = {
                    "stale": "false",
                    "full_set": query_set,
                    "connection_timeout": 60000
                }
                self.cluster.query_view(self.master,
                                        "dev_Doc1",
                                        self.default_view.name,
                                        query,
                                        self.num_items / 2,
                                        bucket,
                                        timeout=self.wait_timeout)
                self.log.info(
                    "Observe Validation:- view: %s in design doc dev_Doc1 and in bucket %s"
                    % (self.default_view, self.default_bucket_name))
        """test_observe_basic_data_load_delete will test observer basic scenario
Ejemplo n.º 30
0
 def _get_cb_version(self):
     rest = RestConnection(self.master)
     version = rest.get_nodes_self().version
     return version[:version.rfind('-')]