예제 #1
0
 def setUp(self):
     self.log = logger.Logger.get_logger()
     self.input = TestInputSingleton.input
     self.assertTrue(self.input, msg="input parameters missing...")
     self.servers = self.input.servers
     self.master = self.servers[0]
     rest = RestConnection(self.master)
     rest.init_cluster(username=self.master.rest_username,
                       password=self.master.rest_password)
     info = rest.get_nodes_self()
     node_ram_ratio = BucketOperationHelper.base_bucket_ratio(self.servers)
     rest.init_cluster_memoryQuota(memoryQuota=int(info.mcdMemoryReserved * node_ram_ratio))
     BucketOperationHelper.delete_all_buckets_or_assert(servers=self.servers, test_case=self)
     ClusterOperationHelper.cleanup_cluster(servers=self.servers)
     credentials = self.input.membase_settings
     ClusterOperationHelper.add_all_nodes_or_assert(master=self.master, all_servers=self.servers, rest_settings=credentials, test_case=self)
     rest = RestConnection(self.master)
     nodes = rest.node_statuses()
     otpNodeIds = []
     for node in nodes:
         otpNodeIds.append(node.id)
     rebalanceStarted = rest.rebalance(otpNodeIds, [])
     self.assertTrue(rebalanceStarted,
                     "unable to start rebalance on master node {0}".format(self.master.ip))
     self.log.info('started rebalance operation on master node {0}'.format(self.master.ip))
     rebalanceSucceeded = rest.monitorRebalance()
예제 #2
0
    def setUp(self):
        self._cleanup_nodes = []
        self._failed_nodes = []
        super(FailoverBaseTest, self).setUp()
        self.bidirectional = self.input.param("bidirectional", False)
        self._value_size = self.input.param("value_size", 256)
        self.dgm_run = self.input.param("dgm_run", True)
        credentials = self.input.membase_settings
        self.add_back_flag = False
        self.during_ops = self.input.param("during_ops", None)

        self.log.info(
            "==============  FailoverBaseTest setup was started for test #{0} {1}==============".format(
                self.case_number, self._testMethodName
            )
        )
        try:
            rest = RestConnection(self.master)
            ClusterOperationHelper.add_all_nodes_or_assert(self.master, self.servers, credentials, self)
            nodes = rest.node_statuses()
            rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
            msg = "rebalance failed after adding these nodes {0}".format(nodes)
            self.assertTrue(rest.monitorRebalance(), msg=msg)
        except Exception, e:
            self.cluster.shutdown()
            self.fail(e)
예제 #3
0
    def common_setup(self, replica):
        self._input = TestInputSingleton.input
        self._servers = self._input.servers
        first = self._servers[0]
        self.log = logger.Logger().get_logger()
        self.log.info(self._input)
        rest = RestConnection(first)
        for server in self._servers:
            RestHelper(RestConnection(server)).is_ns_server_running()

        ClusterOperationHelper.cleanup_cluster(self._servers)
        BucketOperationHelper.delete_all_buckets_or_assert(self._servers, self)
        ClusterOperationHelper.add_all_nodes_or_assert(self._servers[0], self._servers, self._input.membase_settings, self)
        nodes = rest.node_statuses()
        otpNodeIds = []
        for node in nodes:
            otpNodeIds.append(node.id)
        info = rest.get_nodes_self()
        bucket_ram = info.mcdMemoryReserved * 3 / 4
        rest.create_bucket(bucket="default",
                           ramQuotaMB=int(bucket_ram),
                           replicaNumber=replica,
                           proxyPort=rest.get_nodes_self().moxi)
        msg = "wait_for_memcached fails"
        ready = BucketOperationHelper.wait_for_memcached(first, "default"),
        self.assertTrue(ready, msg)
        rebalanceStarted = rest.rebalance(otpNodeIds, [])
        self.assertTrue(rebalanceStarted,
                        "unable to start rebalance on master node {0}".format(first.ip))
        self.log.info('started rebalance operation on master node {0}'.format(first.ip))
        rebalanceSucceeded = rest.monitorRebalance()
        # without a bucket this seems to fail
        self.assertTrue(rebalanceSucceeded,
                        "rebalance operation for nodes: {0} was not successful".format(otpNodeIds))
        self.awareness = VBucketAwareMemcached(rest, "default")
예제 #4
0
 def add_node_and_rebalance(self, master, servers):
     ClusterOperationHelper.add_all_nodes_or_assert(
         master, servers, self.input.membase_settings, self)
     rest = RestConnection(master)
     nodes = rest.node_statuses()
     otpNodeIds = []
     for node in nodes:
         otpNodeIds.append(node.id)
     rebalanceStarted = rest.rebalance(otpNodeIds, [])
     self.assertTrue(
         rebalanceStarted,
         "unable to start rebalance on master node {0}".format(master.ip))
     self.log.info('started rebalance operation on master node {0}'.format(
         master.ip))
     rebalanceSucceeded = rest.monitorRebalance()
     self.assertTrue(
         rebalanceSucceeded,
         "rebalance operation for nodes: {0} was not successful".format(
             otpNodeIds))
     self.log.info(
         'rebalance operaton succeeded for nodes: {0}'.format(otpNodeIds))
     #now remove the nodes
     #make sure its rebalanced and node statuses are healthy
     helper = RestHelper(rest)
     self.assertTrue(helper.is_cluster_healthy,
                     "cluster status is not healthy")
     self.assertTrue(helper.is_cluster_rebalanced,
                     "cluster is not balanced")
예제 #5
0
 def _setup_cluster(self):
     rest = RestConnection(self.master)
     credentials = self._input.membase_settings
     ClusterOperationHelper.add_all_nodes_or_assert(self.master, self._servers, credentials, self)
     nodes = rest.node_statuses()
     rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
     msg = "rebalance failed after adding these nodes {0}".format(nodes)
     self.assertTrue(rest.monitorRebalance(), msg=msg)
예제 #6
0
 def _setup_cluster(self):
     rest = RestConnection(self.master)
     credentials = self._input.membase_settings
     ClusterOperationHelper.add_all_nodes_or_assert(self.master,
                                                    self._servers,
                                                    credentials, self)
     nodes = rest.node_statuses()
     rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
     msg = "rebalance failed after adding these nodes {0}".format(nodes)
     self.assertTrue(rest.monitorRebalance(), msg=msg)
예제 #7
0
    def _cluster_setup(self):
        log = logger.Logger.get_logger()

        replicas = self._input.param("replicas", 1)
        keys_count = self._input.param("keys-count", 0)
        num_buckets = self._input.param("num-buckets", 1)

        bucket_name = "default"
        master = self._servers[0]
        credentials = self._input.membase_settings
        rest = RestConnection(self.master)
        info = rest.get_nodes_self()
        rest.init_cluster(username=self.master.rest_username,
                          password=self.master.rest_password)
        rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved)
        rest.reset_autofailover()
        ClusterOperationHelper.add_all_nodes_or_assert(self.master,
                                                       self._servers,
                                                       credentials, self)
        bucket_ram = info.memoryQuota * 2 / 3

        if num_buckets == 1:
            rest.create_bucket(bucket=bucket_name,
                               ramQuotaMB=bucket_ram,
                               replicaNumber=replicas,
                               proxyPort=info.moxi)
            ready = BucketOperationHelper.wait_for_memcached(
                self.master, bucket_name)
            nodes = rest.node_statuses()
            rest.rebalance(otpNodes=[node.id for node in nodes],
                           ejectedNodes=[])
            buckets = rest.get_buckets()
        else:
            created = BucketOperationHelper.create_multiple_buckets(
                self.master, replicas, howmany=num_buckets)
            self.assertTrue(created, "unable to create multiple buckets")
            buckets = rest.get_buckets()
            for bucket in buckets:
                ready = BucketOperationHelper.wait_for_memcached(
                    self.master, bucket.name)
                self.assertTrue(ready, msg="wait_for_memcached failed")
                nodes = rest.node_statuses()
                rest.rebalance(otpNodes=[node.id for node in nodes],
                               ejectedNodes=[])

        for bucket in buckets:
            inserted_keys_cnt = self.load_data(self.master, bucket.name,
                                               keys_count)
            log.info('inserted {0} keys'.format(inserted_keys_cnt))

        msg = "rebalance failed after adding these nodes {0}".format(nodes)
        self.assertTrue(rest.monitorRebalance(), msg=msg)
        self.assertTrue(ready, "wait_for_memcached failed")
예제 #8
0
파일: xdcr.py 프로젝트: vmx/testrunner
    def rebalance_servers_in(servers, rest_settings, testcase):
        log = logger.Logger().get_logger()
        master = servers[0]
        rest = RestConnection(master)
        ClusterOperationHelper.add_all_nodes_or_assert(master, servers, rest_settings, testcase)

        otpNodeIds = []
        for node in rest.node_statuses():
            otpNodeIds.append(node.id)
        rebalanceStarted = rest.rebalance(otpNodeIds, [])
        testcase.assertTrue(rebalanceStarted,
                            "unable to start rebalance on master node {0}".format(master.ip))
        log.info('started rebalance operation on master node {0}'.format(master.ip))
        rebalanceSucceeded = rest.monitorRebalance()
        testcase.assertTrue(rebalanceSucceeded,
                            "rebalance operation for nodes: {0} was not successful".format(otpNodeIds))
예제 #9
0
    def _cluster_setup(self):
        log = logger.Logger.get_logger()

        replicas = self._input.param("replicas", 1)
        keys_count = self._input.param("keys-count", 0)
        num_buckets = self._input.param("num-buckets", 1)

        bucket_name = "default"
        master = self._servers[0]
        credentials = self._input.membase_settings
        rest = RestConnection(master)
        info = rest.get_nodes_self()
        rest.init_cluster(username=master.rest_username,
                          password=master.rest_password)
        rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved)
        rest.reset_autofailover()
        ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self)
        bucket_ram = info.memoryQuota * 2 / 3

        if num_buckets == 1:
            rest.create_bucket(bucket=bucket_name,
                               ramQuotaMB=bucket_ram,
                               replicaNumber=replicas,
                               proxyPort=info.moxi)
            ready = BucketOperationHelper.wait_for_memcached(master, bucket_name)
            nodes = rest.node_statuses()
            rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
            buckets = rest.get_buckets()
        else:
            created = BucketOperationHelper.create_multiple_buckets(master, replicas, howmany=num_buckets)
            self.assertTrue(created, "unable to create multiple buckets")
            buckets = rest.get_buckets()
            for bucket in buckets:
                ready = BucketOperationHelper.wait_for_memcached(master, bucket.name)
                self.assertTrue(ready, msg="wait_for_memcached failed")
                nodes = rest.node_statuses()
                rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])

#        self.load_data(master, bucket_name, keys_count)

        for bucket in buckets:
            inserted_keys_cnt = self.load_data(master, bucket.name, keys_count)
            log.info('inserted {0} keys'.format(inserted_keys_cnt))

        msg = "rebalance failed after adding these nodes {0}".format(nodes)
        self.assertTrue(rest.monitorRebalance(), msg=msg)
        self.assertTrue(ready, "wait_for_memcached failed")
예제 #10
0
 def add_node_and_rebalance(self, master, servers):
     ClusterOperationHelper.add_all_nodes_or_assert(master, servers, self.input.membase_settings, self)
     rest = RestConnection(master)
     nodes = rest.node_statuses()
     otpNodeIds = []
     for node in nodes:
         otpNodeIds.append(node.id)
     rebalanceStarted = rest.rebalance(otpNodeIds, [])
     self.assertTrue(rebalanceStarted,
                     "unable to start rebalance on master node {0}".format(master.ip))
     self.log.info('started rebalance operation on master node {0}'.format(master.ip))
     rebalanceSucceeded = rest.monitorRebalance()
     self.assertTrue(rebalanceSucceeded,
                     "rebalance operation for nodes: {0} was not successful".format(otpNodeIds))
     self.log.info('rebalance operaton succeeded for nodes: {0}'.format(otpNodeIds))
     #now remove the nodes
     #make sure its rebalanced and node statuses are healthy
     helper = RestHelper(rest)
     self.assertTrue(helper.is_cluster_healthy, "cluster status is not healthy")
     self.assertTrue(helper.is_cluster_rebalanced, "cluster is not balanced")
예제 #11
0
 def setUp(self):
     self._cleanup_nodes = []
     super(FailoverBaseTest, self).setUp()
     self.bidirectional = self.input.param("bidirectional", False)
     self._value_size = self.input.param("value_size", 256)
     self._failed_nodes = []
     self.dgm_run = self.input.param("dgm_run", True)
     self.gen_create = BlobGenerator('loadOne', 'loadOne_', self._value_size, end=self.num_items)
     self.add_back_flag = False
     self.log.info("==============  FailoverBaseTest setup was started for test #{0} {1}=============="\
                   .format(self.case_number, self._testMethodName))
     rest = RestConnection(self.master)
     credentials = self.input.membase_settings
     ClusterOperationHelper.add_all_nodes_or_assert(self.master, self.servers, credentials, self)
     nodes = rest.node_statuses()
     rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
     msg = "rebalance failed after adding these nodes {0}".format(nodes)
     self.assertTrue(rest.monitorRebalance(), msg=msg)
     self.log.info("==============  FailoverBaseTest setup was finished for test #{0} {1} =============="\
                   .format(self.case_number, self._testMethodName))
예제 #12
0
 def setUp(self):
     self._cleanup_nodes = []
     super(FailoverBaseTest, self).setUp()
     self.bidirectional = self.input.param("bidirectional", False)
     self._value_size = self.input.param("value_size", 256)
     self._failed_nodes = []
     self.dgm_run = self.input.param("dgm_run", True)
     self.gen_create = BlobGenerator('loadOne', 'loadOne_', self._value_size, end=self.num_items)
     self.add_back_flag = False
     self.log.info("==============  FailoverBaseTest setup was started for test #{0} {1}=============="\
                   .format(self.case_number, self._testMethodName))
     rest = RestConnection(self.master)
     credentials = self.input.membase_settings
     ClusterOperationHelper.add_all_nodes_or_assert(self.master, self.servers, credentials, self)
     nodes = rest.node_statuses()
     rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
     msg = "rebalance failed after adding these nodes {0}".format(nodes)
     self.assertTrue(rest.monitorRebalance(), msg=msg)
     self.log.info("==============  FailoverBaseTest setup was finished for test #{0} {1} =============="\
                   .format(self.case_number, self._testMethodName))
예제 #13
0
    def _common_test_body(self):
        master = self.servers[0]
        rest = RestConnection(master)
        creds = self.input.membase_settings
        bucket_data = RebalanceBaseTest.bucket_data_init(rest)

        ClusterHelper.add_all_nodes_or_assert(master, self.servers, creds, self)
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[])
        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding nodes")

        nodes = rest.node_statuses()

        #dont rebalance out the current node
        while len(nodes) > 1:
            #pick a node that is not the master node
            toBeEjectedNode = RebalanceHelper.pick_node(master)
            distribution = RebalanceBaseTest.get_distribution(self.load_ratio)
            RebalanceBaseTest.load_data_for_buckets(rest, self.load_ratio, distribution, [master], bucket_data, self)
            self.log.info("current nodes : {0}".format([node.id for node in rest.node_statuses()]))
            #let's start/step rebalance three times
            self.log.info("removing node {0} and rebalance afterwards".format(toBeEjectedNode.id))
            rest.fail_over(toBeEjectedNode.id)
            self.log.info("failed over {0}".format(toBeEjectedNode.id))
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                ejectedNodes=[toBeEjectedNode.id])
            expected_progress = 30
            reached = RestHelper(rest).rebalance_reached(expected_progress)
            self.assertTrue(reached, "rebalance failed or did not reach {0}%".format(expected_progress))
            stopped = rest.stop_rebalance()
            self.assertTrue(stopped, msg="unable to stop rebalance")
            time.sleep(20)
            RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self)
            rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[toBeEjectedNode.id])
            self.assertTrue(rest.monitorRebalance(),
                msg="rebalance operation failed after adding node {0}".format(toBeEjectedNode.id))
            time.sleep(20)

            RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self)
            nodes = rest.node_statuses()
예제 #14
0
    def common_setup(self, replica):
        self._input = TestInputSingleton.input
        self._servers = self._input.servers
        first = self._servers[0]
        self.log = logger.Logger().get_logger()
        self.log.info(self._input)
        rest = RestConnection(first)
        for server in self._servers:
            RestHelper(RestConnection(server)).is_ns_server_running()

        ClusterOperationHelper.cleanup_cluster(self._servers)
        BucketOperationHelper.delete_all_buckets_or_assert(self._servers, self)
        ClusterOperationHelper.add_all_nodes_or_assert(
            self._servers[0], self._servers, self._input.membase_settings,
            self)
        nodes = rest.node_statuses()
        otpNodeIds = []
        for node in nodes:
            otpNodeIds.append(node.id)
        info = rest.get_nodes_self()
        bucket_ram = info.mcdMemoryReserved * 3 / 4
        rest.create_bucket(bucket="default",
                           ramQuotaMB=int(bucket_ram),
                           replicaNumber=replica,
                           proxyPort=rest.get_nodes_self().moxi)
        msg = "wait_for_memcached fails"
        ready = BucketOperationHelper.wait_for_memcached(first, "default"),
        self.assertTrue(ready, msg)
        rebalanceStarted = rest.rebalance(otpNodeIds, [])
        self.assertTrue(
            rebalanceStarted,
            "unable to start rebalance on master node {0}".format(first.ip))
        self.log.info('started rebalance operation on master node {0}'.format(
            first.ip))
        rebalanceSucceeded = rest.monitorRebalance()
        # without a bucket this seems to fail
        self.assertTrue(
            rebalanceSucceeded,
            "rebalance operation for nodes: {0} was not successful".format(
                otpNodeIds))
        self.awareness = VBucketAwareMemcached(rest, "default")
예제 #15
0
 def _cluster_setup(self):
     bucket_name = "default"
     master = self._servers[0]
     credentials = self._input.membase_settings
     rest = RestConnection(master)
     info = rest.get_nodes_self()
     rest.init_cluster(username=master.rest_username,
                       password=master.rest_password)
     rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved)
     rest.reset_autofailover()
     ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self)
     bucket_ram = info.memoryQuota * 2 / 3
     rest.create_bucket(bucket=bucket_name,
                        ramQuotaMB=bucket_ram,
                        proxyPort=info.moxi)
     ready = BucketOperationHelper.wait_for_memcached(master, bucket_name)
     nodes = rest.node_statuses()
     rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
     msg = "rebalance failed after adding these nodes {0}".format(nodes)
     self.assertTrue(rest.monitorRebalance(), msg=msg)
     self.assertTrue(ready, "wait_for_memcached failed")
예제 #16
0
    def _install_and_upgrade(self,
                             initial_version='1.6.5.3',
                             create_buckets=False,
                             insert_data=False,
                             start_upgraded_first=True,
                             load_ratio=-1,
                             roll_upgrade=False,
                             upgrade_path=[],
                             do_new_rest=False):
        node_upgrade_path = []
        node_upgrade_path.extend(upgrade_path)
        #then start them in whatever order you want
        inserted_keys = []
        log = logger.Logger.get_logger()
        if roll_upgrade:
            log.info("performing an online upgrade")
        input = TestInputSingleton.input
        rest_settings = input.membase_settings
        servers = input.servers
        save_upgrade_config = False
        is_amazon = False
        if input.test_params.get('amazon', False):
            is_amazon = True
        if initial_version.startswith("1.6") or initial_version.startswith(
                "1.7"):
            product = 'membase-server-enterprise'
        else:
            product = 'couchbase-server-enterprise'
        # install older build on all nodes
        for server in servers:
            remote = RemoteMachineShellConnection(server)
            rest = RestConnection(server)
            info = remote.extract_remote_info()
            # check to see if we are installing from latestbuilds or releases
            # note: for newer releases (1.8.0) even release versions can have the
            #  form 1.8.0r-55
            if re.search('r', initial_version):
                builds, changes = BuildQuery().get_all_builds()
                older_build = BuildQuery().find_membase_build(
                    builds,
                    deliverable_type=info.deliverable_type,
                    os_architecture=info.architecture_type,
                    build_version=initial_version,
                    product=product,
                    is_amazon=is_amazon)

            else:
                older_build = BuildQuery().find_membase_release_build(
                    deliverable_type=info.deliverable_type,
                    os_architecture=info.architecture_type,
                    build_version=initial_version,
                    product=product,
                    is_amazon=is_amazon)

            remote.membase_uninstall()
            remote.couchbase_uninstall()
            remote.stop_membase()
            remote.stop_couchbase()
            remote.download_build(older_build)
            #now let's install ?
            remote.membase_install(older_build)
            RestHelper(rest).is_ns_server_running(
                testconstants.NS_SERVER_TIMEOUT)
            rest.init_cluster_port(rest_settings.rest_username,
                                   rest_settings.rest_password)
            rest.init_cluster_memoryQuota(
                memoryQuota=rest.get_nodes_self().mcdMemoryReserved)
            remote.disconnect()

        bucket_data = {}
        master = servers[0]
        if create_buckets:
            #let's create buckets
            #wait for the bucket
            #bucket port should also be configurable , pass it as the
            #parameter to this test ? later

            self._create_default_bucket(master)
            inserted_keys = self._load_data(master, load_ratio)
            _create_load_multiple_bucket(self, master, bucket_data, howmany=2)

        # cluster all the nodes together
        ClusterOperationHelper.add_all_nodes_or_assert(master, servers,
                                                       rest_settings, self)
        rest = RestConnection(master)
        nodes = rest.node_statuses()
        otpNodeIds = []
        for node in nodes:
            otpNodeIds.append(node.id)
        rebalanceStarted = rest.rebalance(otpNodeIds, [])
        self.assertTrue(
            rebalanceStarted,
            "unable to start rebalance on master node {0}".format(master.ip))
        log.info('started rebalance operation on master node {0}'.format(
            master.ip))
        rebalanceSucceeded = rest.monitorRebalance()
        self.assertTrue(
            rebalanceSucceeded,
            "rebalance operation for nodes: {0} was not successful".format(
                otpNodeIds))

        if initial_version == "1.7.0" or initial_version == "1.7.1":
            self._save_config(rest_settings, master)

        input_version = input.test_params['version']
        node_upgrade_path.append(input_version)
        current_version = initial_version
        previous_version = current_version
        #if we dont want to do roll_upgrade ?
        log.info("Upgrade path: {0} -> {1}".format(initial_version,
                                                   node_upgrade_path))
        log.info("List of servers {0}".format(servers))
        if not roll_upgrade:
            for version in node_upgrade_path:
                previous_version = current_version
                current_version = version
                if version != initial_version:
                    log.info("Upgrading to version {0}".format(version))
                    self._stop_membase_servers(servers)
                    if previous_version.startswith(
                            "1.7") and current_version.startswith("1.8"):
                        save_upgrade_config = True
                    # No need to save the upgrade config from 180 to 181
                    if previous_version.startswith(
                            "1.8.0") and current_version.startswith("1.8.1"):
                        save_upgrade_config = False
                    appropriate_build = _get_build(servers[0],
                                                   version,
                                                   is_amazon=is_amazon)
                    self.assertTrue(
                        appropriate_build.url,
                        msg="unable to find build {0}".format(version))
                    for server in servers:
                        remote = RemoteMachineShellConnection(server)
                        remote.download_build(appropriate_build)
                        remote.membase_upgrade(
                            appropriate_build,
                            save_upgrade_config=save_upgrade_config)
                        RestHelper(
                            RestConnection(server)).is_ns_server_running(
                                testconstants.NS_SERVER_TIMEOUT)

                        #verify admin_creds still set
                        pools_info = RestConnection(server).get_pools_info()
                        self.assertTrue(pools_info['implementationVersion'],
                                        appropriate_build.product_version)

                        if start_upgraded_first:
                            log.info("Starting server {0} post upgrade".format(
                                server))
                            remote.start_membase()
                        else:
                            remote.stop_membase()

                        remote.disconnect()
                    if not start_upgraded_first:
                        log.info("Starting all servers together")
                        self._start_membase_servers(servers)
                    time.sleep(TIMEOUT_SECS)
                    if version == "1.7.0" or version == "1.7.1":
                        self._save_config(rest_settings, master)

                    if create_buckets:
                        self.assertTrue(
                            BucketOperationHelper.wait_for_bucket_creation(
                                'default', RestConnection(master)),
                            msg="bucket 'default' does not exist..")
                    if insert_data:
                        self._verify_data(master, rest, inserted_keys)

        # rolling upgrade
        else:
            version = input.test_params['version']
            appropriate_build = _get_build(servers[0],
                                           version,
                                           is_amazon=is_amazon)
            self.assertTrue(appropriate_build.url,
                            msg="unable to find build {0}".format(version))
            # rebalance node out
            # remove membase from node
            # install destination version onto node
            # rebalance it back into the cluster
            for server_index in range(len(servers)):
                server = servers[server_index]
                master = servers[server_index - 1]
                log.info("current master is {0}, rolling node is {1}".format(
                    master, server))

                rest = RestConnection(master)
                nodes = rest.node_statuses()
                allNodes = []
                toBeEjectedNodes = []
                for node in nodes:
                    allNodes.append(node.id)
                    if "{0}:{1}".format(node.ip,
                                        node.port) == "{0}:{1}".format(
                                            server.ip, server.port):
                        toBeEjectedNodes.append(node.id)
                helper = RestHelper(rest)
                removed = helper.remove_nodes(knownNodes=allNodes,
                                              ejectedNodes=toBeEjectedNodes)
                self.assertTrue(
                    removed,
                    msg="Unable to remove nodes {0}".format(toBeEjectedNodes))
                remote = RemoteMachineShellConnection(server)
                remote.download_build(appropriate_build)
                # if initial version is 180
                # Don't uninstall the server
                if not initial_version.startswith('1.8.0'):
                    remote.membase_uninstall()
                    remote.couchbase_uninstall()
                    remote.membase_install(appropriate_build)
                else:
                    remote.membase_upgrade(appropriate_build)

                RestHelper(rest).is_ns_server_running(
                    testconstants.NS_SERVER_TIMEOUT)
                log.info(
                    "sleep for 10 seconds to wait for membase-server to start..."
                )
                time.sleep(TIMEOUT_SECS)
                rest.init_cluster_port(rest_settings.rest_username,
                                       rest_settings.rest_password)
                rest.init_cluster_memoryQuota(
                    memoryQuota=rest.get_nodes_self().mcdMemoryReserved)
                remote.disconnect()

                #readd this to the cluster
                ClusterOperationHelper.add_all_nodes_or_assert(
                    master, [server], rest_settings, self)
                nodes = rest.node_statuses()
                otpNodeIds = []
                for node in nodes:
                    otpNodeIds.append(node.id)
                # Issue rest call to the newly added node
                # MB-5108
                if do_new_rest:
                    master = server
                    rest = RestConnection(master)
                rebalanceStarted = rest.rebalance(otpNodeIds, [])
                self.assertTrue(
                    rebalanceStarted,
                    "unable to start rebalance on master node {0}".format(
                        master.ip))
                log.info(
                    'started rebalance operation on master node {0}'.format(
                        master.ip))
                rebalanceSucceeded = rest.monitorRebalance()
                self.assertTrue(
                    rebalanceSucceeded,
                    "rebalance operation for nodes: {0} was not successful".
                    format(otpNodeIds))

            #TODO: how can i verify that the cluster init config is preserved
            # verify data on upgraded nodes
            if create_buckets:
                self.assertTrue(BucketOperationHelper.wait_for_bucket_creation(
                    'default', RestConnection(master)),
                                msg="bucket 'default' does not exist..")
            if insert_data:
                self._verify_data(master, rest, inserted_keys)
                rest = RestConnection(master)
                buckets = rest.get_buckets()
                for bucket in buckets:
                    BucketOperationHelper.keys_exist_or_assert(
                        bucket_data[bucket.name]["inserted_keys"], master,
                        bucket.name, self)
예제 #17
0
    def _install_and_upgrade(self, initial_version='1.6.5.3',
                             create_buckets=False,
                             insert_data=False,
                             start_upgraded_first=True,
                             load_ratio=-1,
                             roll_upgrade=False,
                             upgrade_path=[]):
        node_upgrade_path = []
        node_upgrade_path.extend(upgrade_path)
        #then start them in whatever order you want
        inserted_keys = []
        log = logger.Logger.get_logger()
        if roll_upgrade:
            log.info("performing a rolling upgrade")
        input = TestInputSingleton.input
        rest_settings = input.membase_settings
        servers = input.servers
        save_upgrade_config = False
        is_amazon = False
        if input.test_params.get('amazon',False):
            is_amazon = True
        # install older build on all nodes
        for server in servers:
            remote = RemoteMachineShellConnection(server)
            rest = RestConnection(server)
            info = remote.extract_remote_info()
            older_build = BuildQuery().find_membase_release_build(deliverable_type=info.deliverable_type,
                                                              os_architecture=info.architecture_type,
                                                              build_version=initial_version,
                                                              product='membase-server-enterprise', is_amazon=is_amazon)

            remote.membase_uninstall()
            remote.couchbase_uninstall()
            remote.execute_command('/etc/init.d/membase-server stop')
            remote.download_build(older_build)
            #now let's install ?
            remote.membase_install(older_build)
            RestHelper(rest).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT)
            rest.init_cluster_port(rest_settings.rest_username, rest_settings.rest_password)
            rest.init_cluster_memoryQuota(memoryQuota=rest.get_nodes_self().mcdMemoryReserved)
            remote.disconnect()

        bucket_data = {}
        master = servers[0]
        if create_buckets:
            #let's create buckets
            #wait for the bucket
            #bucket port should also be configurable , pass it as the
            #parameter to this test ? later

            self._create_default_bucket(master)
            inserted_keys = self._load_data(master, load_ratio)
            _create_load_multiple_bucket(self, master, bucket_data, howmany=2)

        # cluster all the nodes together
        ClusterOperationHelper.add_all_nodes_or_assert(master,
                                                       servers,
                                                       rest_settings, self)
        rest = RestConnection(master)
        nodes = rest.node_statuses()
        otpNodeIds = []
        for node in nodes:
            otpNodeIds.append(node.id)
        rebalanceStarted = rest.rebalance(otpNodeIds, [])
        self.assertTrue(rebalanceStarted,
                        "unable to start rebalance on master node {0}".format(master.ip))
        log.info('started rebalance operation on master node {0}'.format(master.ip))
        rebalanceSucceeded = rest.monitorRebalance()
        self.assertTrue(rebalanceSucceeded,
                        "rebalance operation for nodes: {0} was not successful".format(otpNodeIds))

        if initial_version == "1.7.0" or initial_version == "1.7.1":
            self._save_config(rest_settings, master)

        input_version = input.test_params['version']
        node_upgrade_path.append(input_version)
        #if we dont want to do roll_upgrade ?
        log.info("Upgrade path: {0} -> {1}".format(initial_version, node_upgrade_path))
        log.info("List of servers {0}".format(servers))
        if not roll_upgrade:
            for version in node_upgrade_path:
                if version is not initial_version:
                    log.info("Upgrading to version {0}".format(version))
                    self._stop_membase_servers(servers)
                    if re.search('1.8', version):
                        save_upgrade_config = True

                    appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon)
                    self.assertTrue(appropriate_build.url, msg="unable to find build {0}".format(version))
                    for server in servers:
                        remote = RemoteMachineShellConnection(server)
                        remote.download_build(appropriate_build)
                        remote.membase_upgrade(appropriate_build, save_upgrade_config=save_upgrade_config)
                        RestHelper(RestConnection(server)).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT)

                        #verify admin_creds still set
                        pools_info = RestConnection(server).get_pools_info()
                        self.assertTrue(pools_info['implementationVersion'], appropriate_build.product_version)

                        if start_upgraded_first:
                            log.info("Starting server {0} post upgrade".format(server))
                            remote.start_membase()
                        else:
                            remote.stop_membase()

                        remote.disconnect()
                    if not start_upgraded_first:
                        log.info("Starting all servers together")
                        self._start_membase_servers(servers)
                    time.sleep(TIMEOUT_SECS)
                    if version == "1.7.0" or version == "1.7.1":
                        self._save_config(rest_settings, master)

                    if create_buckets:
                        self.assertTrue(BucketOperationHelper.wait_for_bucket_creation('default', RestConnection(master)),
                                        msg="bucket 'default' does not exist..")
                    if insert_data:
                        self._verify_data(master, rest, inserted_keys)

        # rolling upgrade
        else:
            version = input.test_params['version']
            appropriate_build = _get_build(servers[0], version, is_amazon=is_amazon)
            self.assertTrue(appropriate_build.url, msg="unable to find build {0}".format(version))
            # rebalance node out
            # remove membase from node
            # install destination version onto node
            # rebalance it back into the cluster
            for server_index in range(len(servers)):
                server = servers[server_index]
                master = servers[server_index - 1]
                log.info("current master is {0}, rolling node is {1}".format(master, server))

                rest = RestConnection(master)
                nodes = rest.node_statuses()
                allNodes = []
                toBeEjectedNodes = []
                for node in nodes:
                    allNodes.append(node.id)
                    if "{0}:{1}".format(node.ip, node.port) == "{0}:{1}".format(server.ip, server.port):
                        toBeEjectedNodes.append(node.id)
                helper = RestHelper(rest)
                removed = helper.remove_nodes(knownNodes=allNodes, ejectedNodes=toBeEjectedNodes)
                self.assertTrue(removed, msg="Unable to remove nodes {0}".format(toBeEjectedNodes))
                remote = RemoteMachineShellConnection(server)
                remote.membase_uninstall()
                remote.couchbase_uninstall()
                remote.download_build(appropriate_build)
                remote.membase_install(appropriate_build)
                RestHelper(rest).is_ns_server_running(testconstants.NS_SERVER_TIMEOUT)
                log.info("sleep for 10 seconds to wait for membase-server to start...")
                time.sleep(TIMEOUT_SECS)
                rest.init_cluster_port(rest_settings.rest_username, rest_settings.rest_password)
                rest.init_cluster_memoryQuota(memoryQuota=rest.get_nodes_self().mcdMemoryReserved)
                remote.disconnect()

                #readd this to the cluster
                ClusterOperationHelper.add_all_nodes_or_assert(master, [server], rest_settings, self)
                nodes = rest.node_statuses()
                otpNodeIds = []
                for node in nodes:
                    otpNodeIds.append(node.id)
                rebalanceStarted = rest.rebalance(otpNodeIds, [])
                self.assertTrue(rebalanceStarted,
                                "unable to start rebalance on master node {0}".format(master.ip))
                log.info('started rebalance operation on master node {0}'.format(master.ip))
                rebalanceSucceeded = rest.monitorRebalance()
                self.assertTrue(rebalanceSucceeded,
                                "rebalance operation for nodes: {0} was not successful".format(otpNodeIds))
                #ClusterOperationHelper.verify_persistence(servers, self)

            #TODO: how can i verify that the cluster init config is preserved
            # verify data on upgraded nodes
            if create_buckets:
                self.assertTrue(BucketOperationHelper.wait_for_bucket_creation('default', RestConnection(master)),
                                msg="bucket 'default' does not exist..")
            if insert_data:
                self._verify_data(master, rest, inserted_keys)
                rest = RestConnection(master)
                buckets = rest.get_buckets()
                for bucket in buckets:
                    BucketOperationHelper.keys_exist_or_assert(bucket_data[bucket.name]["inserted_keys"],
                                                               master,
                                                               bucket.name, self)
예제 #18
0
    def _install_and_upgrade(self,
                             initial_version='1.6.5.3',
                             create_buckets=False,
                             insert_data=False,
                             start_upgraded_first=True,
                             load_ratio=-1,
                             roll_upgrade=False,
                             upgrade_path=[]):
        node_upgrade_path = []
        node_upgrade_path.extend(upgrade_path)
        #then start them in whatever order you want
        inserted_keys = []
        log = logger.Logger.get_logger()
        if roll_upgrade:
            log.info("performing a rolling upgrade")
        input = TestInputSingleton.input
        input_version = input.test_params['version']
        rest_settings = input.membase_settings
        servers = input.servers
        is_amazon = False
        if input.test_params.get('amazon', False):
            is_amazon = True

        # install older build on all nodes
        for server in servers:
            remote = RemoteMachineShellConnection(server)
            rest = RestConnection(server)
            info = remote.extract_remote_info()
            older_build = BuildQuery().find_membase_release_build(
                deliverable_type=info.deliverable_type,
                os_architecture=info.architecture_type,
                build_version=initial_version,
                product='membase-server-enterprise',
                is_amazon=is_amazon)

            remote.membase_uninstall()
            remote.couchbase_uninstall()
            if older_build.product_version.startswith("1.8"):
                abbr_product = "cb"
            else:
                abbr_product = "mb"
            remote.download_binary_in_win(older_build.url, abbr_product,
                                          initial_version)
            #now let's install ?
            remote.install_server_win(older_build, initial_version)
            RestHelper(rest).is_ns_server_running(
                testconstants.NS_SERVER_TIMEOUT)
            rest.init_cluster(rest_settings.rest_username,
                              rest_settings.rest_password)
            rest.init_cluster_memoryQuota(
                memoryQuota=rest.get_nodes_self().mcdMemoryReserved)
            remote.disconnect()

        bucket_data = {}
        master = servers[0]
        # cluster all the nodes together
        ClusterOperationHelper.add_all_nodes_or_assert(master, servers,
                                                       rest_settings, self)
        rest = RestConnection(master)
        nodes = rest.node_statuses()
        otpNodeIds = []
        for node in nodes:
            otpNodeIds.append(node.id)
        rebalanceStarted = rest.rebalance(otpNodeIds, [])
        self.assertTrue(
            rebalanceStarted,
            "unable to start rebalance on master node {0}".format(master.ip))
        log.info('started rebalance operation on master node {0}'.format(
            master.ip))
        rebalanceSucceeded = rest.monitorRebalance()
        self.assertTrue(
            rebalanceSucceeded,
            "rebalance operation for nodes: {0} was not successful".format(
                otpNodeIds))

        if create_buckets:
            #let's create buckets
            #wait for the bucket
            #bucket port should also be configurable , pass it as the
            #parameter to this test ? later

            self._create_default_bucket(master)
            inserted_keys = self._load_data(master, load_ratio)
            _create_load_multiple_bucket(self, master, bucket_data, howmany=2)

        #if initial_version == "1.7.0" or initial_version == "1.7.1":
        #   self._save_config(rest_settings, master)

        node_upgrade_path.append(input_version)
        #if we dont want to do roll_upgrade ?
        log.info("Upgrade path: {0} -> {1}".format(initial_version,
                                                   node_upgrade_path))
        log.info("List of servers {0}".format(servers))
        if not roll_upgrade:
            for version in node_upgrade_path:
                if version is not initial_version:
                    log.info(
                        "SHUTDOWN ALL CB OR MB SERVERS IN CLUSTER BEFORE DOING UPGRADE"
                    )
                    for server in servers:
                        shell = RemoteMachineShellConnection(server)
                        shell.stop_membase()
                        shell.disconnect()
                    log.info("Upgrading to version {0}".format(version))
                    appropriate_build = _get_build(servers[0],
                                                   version,
                                                   is_amazon=is_amazon)
                    self.assertTrue(
                        appropriate_build.url,
                        msg="unable to find build {0}".format(version))
                    for server in servers:
                        remote = RemoteMachineShellConnection(server)
                        if version.startswith("1.8"):
                            abbr_product = "cb"
                        remote.download_binary_in_win(appropriate_build.url,
                                                      abbr_product, version)
                        log.info("###### START UPGRADE. #########")
                        remote.membase_upgrade_win(info.architecture_type,
                                                   info.windows_name, version,
                                                   initial_version)
                        RestHelper(
                            RestConnection(server)).is_ns_server_running(
                                testconstants.NS_SERVER_TIMEOUT)

                        #verify admin_creds still set
                        pools_info = RestConnection(server).get_pools_info()
                        self.assertTrue(pools_info['implementationVersion'],
                                        appropriate_build.product_version)

                        if not start_upgraded_first:
                            remote.stop_membase()

                        remote.disconnect()
                    if not start_upgraded_first:
                        log.info("Starting all servers together")
                        self._start_membase_servers(servers)
                    time.sleep(TIMEOUT_SECS)

                    if create_buckets:
                        self.assertTrue(
                            BucketOperationHelper.wait_for_bucket_creation(
                                'default', RestConnection(master)),
                            msg="bucket 'default' does not exist..")
                    if insert_data:
                        self._verify_data(master, rest, inserted_keys)

        # rolling upgrade
        else:
            version = input.test_params['version']
            if version.startswith("1.8"):
                abbr_product = "cb"
            appropriate_build = _get_build(servers[0],
                                           version,
                                           is_amazon=is_amazon)
            self.assertTrue(appropriate_build.url,
                            msg="unable to find build {0}".format(version))
            # rebalance node out
            # remove membase from node
            # install destination version onto node
            # rebalance it back into the cluster
            for server_index in range(len(servers)):
                server = servers[server_index]
                master = servers[server_index - 1]
                log.info("current master is {0}, rolling node is {1}".format(
                    master, server))

                rest = RestConnection(master)
                nodes = rest.node_statuses()
                allNodes = []
                toBeEjectedNodes = []
                for node in nodes:
                    allNodes.append(node.id)
                    if "{0}:{1}".format(node.ip,
                                        node.port) == "{0}:{1}".format(
                                            server.ip, server.port):
                        toBeEjectedNodes.append(node.id)
                helper = RestHelper(rest)
                removed = helper.remove_nodes(knownNodes=allNodes,
                                              ejectedNodes=toBeEjectedNodes)
                self.assertTrue(
                    removed,
                    msg="Unable to remove nodes {0}".format(toBeEjectedNodes))
                remote = RemoteMachineShellConnection(server)
                remote.membase_uninstall()
                remote.couchbase_uninstall()
                if appropriate_build.product == 'membase-server-enterprise':
                    abbr_product = "mb"
                else:
                    abbr_product = "cb"
                remote.download_binary_in_win(appropriate_build.url,
                                              abbr_product, version)
                remote.install_server_win(appropriate_build, version)
                RestHelper(rest).is_ns_server_running(
                    testconstants.NS_SERVER_TIMEOUT)
                time.sleep(TIMEOUT_SECS)
                rest.init_cluster(rest_settings.rest_username,
                                  rest_settings.rest_password)
                rest.init_cluster_memoryQuota(
                    memoryQuota=rest.get_nodes_self().mcdMemoryReserved)
                remote.disconnect()

                #readd this to the cluster
                ClusterOperationHelper.add_all_nodes_or_assert(
                    master, [server], rest_settings, self)
                nodes = rest.node_statuses()
                log.info(
                    "wait 30 seconds before asking older node for start rebalance"
                )
                time.sleep(30)
                otpNodeIds = []
                for node in nodes:
                    otpNodeIds.append(node.id)
                rebalanceStarted = rest.rebalance(otpNodeIds, [])
                self.assertTrue(
                    rebalanceStarted,
                    "unable to start rebalance on master node {0}".format(
                        master.ip))
                log.info(
                    'started rebalance operation on master node {0}'.format(
                        master.ip))
                rebalanceSucceeded = rest.monitorRebalance()
                self.assertTrue(
                    rebalanceSucceeded,
                    "rebalance operation for nodes: {0} was not successful".
                    format(otpNodeIds))
                #ClusterOperationHelper.verify_persistence(servers, self)

            #TODO: how can i verify that the cluster init config is preserved
            # verify data on upgraded nodes
            if create_buckets:
                self.assertTrue(BucketOperationHelper.wait_for_bucket_creation(
                    'default', RestConnection(master)),
                                msg="bucket 'default' does not exist..")
            if insert_data:
                self._verify_data(master, rest, inserted_keys)
                rest = RestConnection(master)
                buckets = rest.get_buckets()
                for bucket in buckets:
                    BucketOperationHelper.keys_exist_or_assert(
                        bucket_data[bucket.name]["inserted_keys"], master,
                        bucket.name, self)
예제 #19
0
    def common_test_body(self, keys_count, replica, load_ratio, failover_reason):
        log = logger.Logger.get_logger()
        log.info("keys_count : {0}".format(keys_count))
        log.info("replica : {0}".format(replica))
        log.info("load_ratio : {0}".format(load_ratio))
        log.info("failover_reason : {0}".format(failover_reason))
        master = self._servers[0]
        log.info('picking server : {0} as the master'.format(master))
        rest = RestConnection(master)
        info = rest.get_nodes_self()
        rest.init_cluster(username=master.rest_username,
                          password=master.rest_password)
        rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved)
        bucket_ram = info.memoryQuota * 2 / 3
        bucket = 'default'
        rest.create_bucket(bucket=bucket,
                           ramQuotaMB=bucket_ram,
                           replicaNumber=replica,
                           proxyPort=info.moxi)
        ready = BucketOperationHelper.wait_for_memcached(master, bucket)
        self.assertTrue(ready, "wait_for_memcached_failed")
        credentials = self._input.membase_settings

        ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self)
        nodes = rest.node_statuses()
        rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
        msg = "rebalance failed after adding these nodes {0}".format(nodes)
        self.assertTrue(rest.monitorRebalance(), msg=msg)

        inserted_keys = FailoverBaseTest.load_data(master, bucket, keys_count, load_ratio)
        inserted_count = len(inserted_keys)
        log.info('inserted {0} keys'.format(inserted_count))

        nodes = rest.node_statuses()
        while (len(nodes) - replica) > 1:
            final_replication_state = RestHelper(rest).wait_for_replication(900)
            msg = "replication state after waiting for up to 15 minutes : {0}"
            self.log.info(msg.format(final_replication_state))
            chosen = RebalanceHelper.pick_nodes(master, howmany=replica)
            for node in chosen:
                #let's do op
                if failover_reason == 'stop_server':
                    self.stop_server(node)
                    log.info("10 seconds delay to wait for membase-server to shutdown")
                    #wait for 5 minutes until node is down
                    self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")
                elif failover_reason == "firewall":
                    RemoteUtilHelper.enable_firewall(self._servers, node, bidirectional=self.bidirectional)
                    self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")

                failed_over = rest.fail_over(node.id)
                if not failed_over:
                    self.log.info("unable to failover the node the first time. try again in  60 seconds..")
                    #try again in 60 seconds
                    time.sleep(75)
                    failed_over = rest.fail_over(node.id)
                self.assertTrue(failed_over, "unable to failover node after {0}".format(failover_reason))
                log.info("failed over node : {0}".format(node.id))
            #REMOVEME -
            log.info("10 seconds sleep after failover before invoking rebalance...")
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                           ejectedNodes=[node.id for node in chosen])
            msg = "rebalance failed while removing failover nodes {0}".format(chosen)
            self.assertTrue(rest.monitorRebalance(), msg=msg)
            FailoverBaseTest.replication_verification(master, bucket, replica, inserted_count, self)

            nodes = rest.node_statuses()
        FailoverBaseTest.verify_data(master, inserted_keys, bucket, self)
예제 #20
0
    def common_test_body(self, keys_count, replica, load_ratio,
                         failover_reason):
        log = logger.Logger.get_logger()
        log.info("keys_count : {0}".format(keys_count))
        log.info("replica : {0}".format(replica))
        log.info("load_ratio : {0}".format(load_ratio))
        log.info("failover_reason : {0}".format(failover_reason))
        master = self._servers[0]
        log.info('picking server : {0} as the master'.format(master))
        rest = RestConnection(master)
        info = rest.get_nodes_self()
        rest.init_cluster(username=master.rest_username,
                          password=master.rest_password)
        rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved)
        bucket_ram = info.memoryQuota * 2 / 3
        bucket = 'default'
        rest.create_bucket(bucket=bucket,
                           ramQuotaMB=bucket_ram,
                           replicaNumber=replica,
                           proxyPort=info.moxi)
        ready = BucketOperationHelper.wait_for_memcached(master, bucket)
        self.assertTrue(ready, "wait_for_memcached_failed")
        credentials = self._input.membase_settings

        ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers,
                                                       credentials, self)
        nodes = rest.node_statuses()
        rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
        msg = "rebalance failed after adding these nodes {0}".format(nodes)
        self.assertTrue(rest.monitorRebalance(), msg=msg)

        inserted_keys = FailoverBaseTest.load_data(master, bucket, keys_count,
                                                   load_ratio)
        inserted_count = len(inserted_keys)
        log.info('inserted {0} keys'.format(inserted_count))

        nodes = rest.node_statuses()
        while (len(nodes) - replica) > 1:
            final_replication_state = RestHelper(rest).wait_for_replication(
                900)
            msg = "replication state after waiting for up to 15 minutes : {0}"
            self.log.info(msg.format(final_replication_state))
            chosen = RebalanceHelper.pick_nodes(master, howmany=replica)
            for node in chosen:
                #let's do op
                if failover_reason == 'stop_server':
                    self.stop_server(node)
                    log.info(
                        "10 seconds delay to wait for membase-server to shutdown"
                    )
                    #wait for 5 minutes until node is down
                    self.assertTrue(
                        RestHelper(rest).wait_for_node_status(
                            node, "unhealthy", 300),
                        msg=
                        "node status is not unhealthy even after waiting for 5 minutes"
                    )
                elif failover_reason == "firewall":
                    RemoteUtilHelper.enable_firewall(
                        self._servers, node, bidirectional=self.bidirectional)
                    self.assertTrue(
                        RestHelper(rest).wait_for_node_status(
                            node, "unhealthy", 300),
                        msg=
                        "node status is not unhealthy even after waiting for 5 minutes"
                    )

                failed_over = rest.fail_over(node.id)
                if not failed_over:
                    self.log.info(
                        "unable to failover the node the first time. try again in  60 seconds.."
                    )
                    #try again in 60 seconds
                    time.sleep(75)
                    failed_over = rest.fail_over(node.id)
                self.assertTrue(
                    failed_over, "unable to failover node after {0}".format(
                        failover_reason))
                log.info("failed over node : {0}".format(node.id))
                self._failed_nodes.append(node.ip)

            log.info(
                "10 seconds sleep after failover before invoking rebalance...")
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                           ejectedNodes=[node.id for node in chosen])
            msg = "rebalance failed while removing failover nodes {0}".format(
                chosen)
            self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
            FailoverBaseTest.replication_verification(master, bucket, replica,
                                                      inserted_count, self)

            nodes = rest.node_statuses()
        FailoverBaseTest.verify_data(master, inserted_keys, bucket, self)
예제 #21
0
    def common_test_body(self, replica, failover_reason, load_ratio, age, max_nodes):
        log = logger.Logger.get_logger()
        bucket_name = "default"
        log.info("replica : {0}".format(replica))
        log.info("failover_reason : {0}".format(failover_reason))
        log.info("load_ratio : {0}".format(load_ratio))
        log.info("age : {0}".format(age))
        log.info("max_nodes : {0}".format(max_nodes))
        master = self._servers[0]
        log.info('picking server : {0} as the master'.format(master))
        rest = RestConnection(master)
        info = rest.get_nodes_self()
        rest.init_cluster(username=master.rest_username,
                          password=master.rest_password)
        rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved)
        rest.update_autofailover_settings(True, age, max_nodes)
        rest.reset_autofailover()
        bucket_ram = info.memoryQuota * 2 / 3
        rest.create_bucket(bucket=bucket_name,
                           ramQuotaMB=bucket_ram,
                           replicaNumber=replica,
                           proxyPort=info.moxi)
        ready = BucketOperationHelper.wait_for_memcached(master, bucket_name)
        self.assertTrue(ready, "wait_for_memcached failed")

        credentials = self._input.membase_settings

        log.info("inserting some items in the master before adding any nodes")
        distribution = {512: 0.4, 1 * 1024: 0.59, 5 * 1024: 0.01}
        if load_ratio > 10:
            distribution = {5 * 1024: 0.4, 10 * 1024: 0.5, 20 * 1024: 0.1}

        ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self)
        nodes = rest.node_statuses()
        rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
        msg = "rebalance failed after adding these nodes {0}".format(nodes)
        self.assertTrue(rest.monitorRebalance(), msg=msg)

        inserted_count, rejected_count =\
        MemcachedClientHelper.load_bucket(servers=self._servers,
                                          ram_load_ratio=load_ratio,
                                          value_size_distribution=distribution,
                                          number_of_threads=1)
        log.info('inserted {0} keys'.format(inserted_count))
        nodes = rest.node_statuses()
        # why are we in this while loop?
        while (len(nodes) - replica) >= 1:
            final_replication_state = RestHelper(rest).wait_for_replication(900)
            msg = "replication state after waiting for up to 15 minutes : {0}"
            self.log.info(msg.format(final_replication_state))
            chosen = AutoFailoverBaseTest.choose_nodes(master, nodes, replica)
            for node in chosen:
                #let's do op
                if failover_reason == 'stop_membase':
                    self.stop_membase(node)
                    log.info("10 seconds delay to wait for membase-server to shutdown")
                    #wait for 5 minutes until node is down
                    self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")
                elif failover_reason == "firewall":
                    self.enable_firewall(node)
                    self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
                                    msg="node status is not unhealthy even after waiting for 5 minutes")
            # list pre-autofailover stats
            stats = rest.get_bucket_stats()
            self.log.info("pre-autofail - curr_items : {0} versus {1}".format(stats["curr_items"], inserted_count))
            AutoFailoverBaseTest.wait_for_failover_or_assert(master, replica, age, self)

            # manually fail over any unhealthy:active nodes left, max that we should need to manually failover is replica-max_nodes
            manual_failover_count = replica - max_nodes
            for node in chosen:
                self.log.info("checking {0}".format(node.ip))
                if node.status.lower() == "unhealthy" and node.clusterMembership == "active":
                    msg = "node {0} not failed over and we are over out manual failover limit of {1}"
                    self.assertTrue(manual_failover_count > 0, msg.format(node.ip, (replica - max_nodes)))
                    self.log.info("manual failover {0}".format(node.ip))
                    rest.fail_over(node.id)
                    manual_failover_count -= 1

            stats = rest.get_bucket_stats()
            self.log.info("post-autofail - curr_items : {0} versus {1}".format(stats["curr_items"], inserted_count))
            self.assertTrue(stats["curr_items"] == inserted_count, "failover completed but curr_items ({0}) does not match inserted items ({1})".format(stats["curr_items"], inserted_count))

            log.info("10 seconds sleep after autofailover before invoking rebalance...")
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in nodes],
                           ejectedNodes=[node.id for node in chosen])
            msg="rebalance failed while removing failover nodes {0}".format(chosen)
            self.assertTrue(rest.monitorRebalance(), msg=msg)

            nodes = rest.node_statuses()
            if len(nodes) / (1 + replica) >= 1:
                final_replication_state = RestHelper(rest).wait_for_replication(900)
                msg = "replication state after waiting for up to 15 minutes : {0}"
                self.log.info(msg.format(final_replication_state))
                self.assertTrue(RebalanceHelper.wait_till_total_numbers_match(master,bucket_name,600),
                                msg="replication was completed but sum(curr_items) dont match the curr_items_total")

                start_time = time.time()
                stats = rest.get_bucket_stats()
                while time.time() < (start_time + 120) and stats["curr_items"] != inserted_count:
                    self.log.info("curr_items : {0} versus {1}".format(stats["curr_items"], inserted_count))
                    time.sleep(5)
                    stats = rest.get_bucket_stats()
                RebalanceHelper.print_taps_from_all_nodes(rest, bucket_name)
                self.log.info("curr_items : {0} versus {1}".format(stats["curr_items"], inserted_count))
                stats = rest.get_bucket_stats()
                msg = "curr_items : {0} is not equal to actual # of keys inserted : {1}"
                self.assertEquals(stats["curr_items"], inserted_count,
                                  msg=msg.format(stats["curr_items"], inserted_count))
            nodes = rest.node_statuses()