예제 #1
0
 def online_upgrade_swap_rebalance(self):
     self._install(self.servers[:self.nodes_init])
     self.operations(self.servers[:self.nodes_init])
     self.sleep(self.sleep_time, "Pre-setup of old version is done. Wait for upgrade")
     self.initial_version = self.upgrade_versions[0]
     self.product = 'couchbase-server'
     self._install(self.servers[self.nodes_init:self.num_servers])
     self.sleep(self.sleep_time, "Installation of new version is done. Wait for rebalance")
     self.swap_num_servers = self.input.param('swap_num_servers', 1)
     old_servers = self.servers[:self.nodes_init]
     new_servers = []
     for i in range(self.nodes_init / self.swap_num_servers):
         servers_in = self.servers[(self.nodes_init + i * self.swap_num_servers):
                                   (self.nodes_init + (i + 1) * self.swap_num_servers)]
         servers_out = self.servers[(i * self.swap_num_servers):((i + 1) * self.swap_num_servers)]
         servers = old_servers + new_servers
         self.log.info("Swap rebalance: rebalance out %s old version nodes, rebalance in %s 2.0 Nodes"
                       % (self.swap_num_servers, self.swap_num_servers))
         self.cluster.rebalance(servers, servers_in, servers_out)
         self.sleep(self.sleep_time)
         old_servers = self.servers[((i + 1) * self.swap_num_servers):self.nodes_init]
         new_servers = new_servers + servers_in
         servers = old_servers + new_servers
         status, content = ClusterOperationHelper.find_orchestrator(servers[0])
         self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                         format(status, content))
         FIND_MASTER = False
         for new_server in new_servers:
             if content.find(new_server.ip) >= 0:
                 self._new_master(new_server)
                 FIND_MASTER = True
                 self.log.info("2.0 Node %s becomes the master" % (new_server.ip))
         if not FIND_MASTER:
             raise Exception("After rebalance in 2.0 nodes, 2.0 doesn't become the master ")
     self.verification(self.servers[self.nodes_init : self.num_servers])
예제 #2
0
    def _failover_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        creds = self.input.membase_settings
        num_initial_servers = self.num_initial_servers
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        self.log.info("current nodes : {0}".format(RebalanceHelper.getOtpNodeIds(master)))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.failover_factor)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.fail_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            optNodesIds[0] = content

        self.log.info("FAILOVER PHASE")
        # Failover selected nodes
        for node in optNodesIds:
            self.log.info("failover node {0} and rebalance afterwards".format(node))
            rest.fail_over(node)
            self.assertTrue(rest.monitorRebalance(),
                msg="failed after failover of {0}".format(node))

        new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.failover_factor]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip, server.port)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.fail_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], \
            ejectedNodes=optNodesIds)

        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(new_swap_servers))

        SwapRebalanceBase.verification_phase(self, master)
예제 #3
0
    def online_upgrade(self):
        servers_in = self.servers[self.nodes_init:self.num_servers]
        self.cluster.rebalance(self.servers[:self.nodes_init], servers_in, [])
        self.log.info("Rebalance in all {0} nodes" \
                       .format(self.input.param("upgrade_version", "")))
        self.sleep(self.sleep_time)
        status, content = ClusterOperationHelper.find_orchestrator(self.master)
        self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                        format(status, content))
        FIND_MASTER = False
        for new_server in servers_in:
            if content.find(new_server.ip) >= 0:
                self._new_master(new_server)
                FIND_MASTER = True
                self.log.info("%s node %s becomes the master" \
                    % (self.input.param("upgrade_version", ""), new_server.ip))
                break
        if self.input.param("initial_version", "")[:5] in COUCHBASE_VERSION_2 \
            and not FIND_MASTER and not self.is_downgrade:
            raise Exception( \
                "After rebalance in {0} nodes, {0} node doesn't become master" \
                .format(self.input.param("upgrade_version", "")))

        servers_out = self.servers[:self.nodes_init]
        self.log.info("Rebalanced out all old version nodes")
        self.cluster.rebalance(self.servers[:self.num_servers], [], servers_out)
예제 #4
0
    def online_upgrade_rebalance_in_out(self):
        self._install(self.servers[: self.initial_num_servers])
        self.operations(multi_nodes=True)
        self.log.info("Installation of old version is done. Wait for %s sec for upgrade" % (self.sleep_time))
        time.sleep(self.sleep_time)
        upgrade_version = self.input.param("upgrade_version", "2.0.0-1870-rel")
        self.initial_version = upgrade_version
        self.product = "couchbase-server"
        self._install(self.servers[self.initial_num_servers : self.num_servers])
        self.log.info("Installation of new version is done. Wait for %s sec for rebalance" % (self.sleep_time))
        time.sleep(self.sleep_time)

        servers_in = self.servers[self.initial_num_servers : self.num_servers]
        self.cluster.rebalance(self.servers[: self.initial_num_servers], servers_in, [])
        self.log.info("Rebalance in all 2.0 Nodes")
        time.sleep(self.sleep_time)
        status, content = ClusterHelper.find_orchestrator(self.master)
        self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".format(status, content))
        FIND_MASTER = False
        for new_server in servers_in:
            if content.find(new_server.ip) >= 0:
                FIND_MASTER = True
                self.log.info("2.0 Node %s becomes the master" % (new_server.ip))
        if not FIND_MASTER:
            raise Exception("After rebalance in 2.0 Nodes, 2.0 doesn't become the master")

        servers_out = self.servers[: self.initial_num_servers]
        self.cluster.rebalance(self.servers[: self.num_servers], [], servers_out)
        self.log.info("Rebalance out all old version nodes")
        time.sleep(self.sleep_time)
        self.verify_upgrade_rebalance_in_out()
예제 #5
0
 def _online_upgrade(self,
                     update_servers,
                     extra_servers,
                     check_newmaster=True):
     RestConnection(update_servers[0]).get_nodes_versions()
     added_versions = RestConnection(extra_servers[0]).get_nodes_versions()
     self.cluster.rebalance(update_servers + extra_servers, extra_servers,
                            [])
     self.log.info("Rebalance in all {0} nodes completed".format(
         added_versions[0]))
     RestConnection(update_servers[0]).get_nodes_versions()
     self.sleep(self.sleep_time)
     status, content = ClusterOperationHelper.find_orchestrator(
         update_servers[0])
     self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                     format(status, content))
     self.log.info("after rebalance in the master is {0}".format(content))
     if check_newmaster and not self.upgrade_same_version:
         FIND_MASTER = False
         for new_server in extra_servers:
             if content.find(new_server.ip) >= 0:
                 FIND_MASTER = True
                 self.log.info("{0} Node {1} becomes the master".format(
                     added_versions[0], new_server.ip))
                 break
         if not FIND_MASTER:
             raise Exception(
                 "After rebalance in {0} Nodes, one of them doesn't become the master"
                 .format(added_versions[0]))
     self.log.info("Rebalanced out all old version nodes")
     self.cluster.rebalance(update_servers + extra_servers, [],
                            update_servers)
     if self.upgrade_versions[0] >= "3.0.0":
         self._enable_xdcr_trace_logging(extra_servers)
예제 #6
0
 def online_upgrade_with_failover(self, services=None):
     servers_in = self.servers[self.nodes_init:self.num_servers]
     self.cluster.rebalance(self.servers[:self.nodes_init],
                            servers_in, [],
                            services=services)
     log.info("Rebalance in all {0} nodes" \
              .format(self.input.param("upgrade_version", "")))
     self.sleep(self.sleep_time)
     status, content = ClusterOperationHelper.find_orchestrator(self.master)
     self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}". \
                     format(status, content))
     FIND_MASTER = False
     for new_server in servers_in:
         if content.find(new_server.ip) >= 0:
             self._new_master(new_server)
             FIND_MASTER = True
             self.log.info("%s node %s becomes the master" \
                           % (self.input.param("upgrade_version", ""), new_server.ip))
             break
     if self.input.param("initial_version", "")[:5] in COUCHBASE_VERSION_2 \
             and not FIND_MASTER:
         raise Exception( \
             "After rebalance in {0} nodes, {0} node doesn't become master" \
                 .format(self.input.param("upgrade_version", "")))
     servers_out = self.servers[:self.nodes_init]
     self._new_master(self.servers[self.nodes_init])
     log.info("failover and rebalance nodes")
     self.cluster.failover(self.servers[:self.num_servers],
                           failover_nodes=servers_out,
                           graceful=False)
     self.cluster.rebalance(self.servers[:self.num_servers], [],
                            servers_out)
     self.sleep(180)
예제 #7
0
    def _failover_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        creds = self.input.membase_settings
        num_initial_servers = self.num_initial_servers
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        self.log.info("current nodes : {0}".format(RebalanceHelper.getOtpNodeIds(master)))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.failover_factor)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.fail_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            optNodesIds[0] = content

        self.log.info("FAILOVER PHASE")
        # Failover selected nodes
        for node in optNodesIds:
            self.log.info("failover node {0} and rebalance afterwards".format(node))
            rest.fail_over(node)

        new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.failover_factor]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.fail_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], \
            ejectedNodes=optNodesIds)

        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(new_swap_servers))

        SwapRebalanceBase.verification_phase(self, master)
예제 #8
0
 def online_upgrade_swap_rebalance(self, services=None):
     servers_in = self.servers[self.nodes_init:self.num_servers]
     self.sleep(self.sleep_time)
     status, content = ClusterOperationHelper.find_orchestrator(self.master)
     self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}". \
                     format(status, content))
     i = 0
     for server_in, service_in in zip(servers_in, services):
         log.info("Swap rebalance nodes")
         self.cluster.rebalance(self.servers[:self.nodes_init], [server_in], [self.servers[i]], [service_in])
         self._new_master(self.servers[self.nodes_init])
         i += 1
예제 #9
0
 def online_upgrade_swap_rebalance(self, services=None):
     servers_in = self.servers[self.nodes_init:self.num_servers]
     self.sleep(self.sleep_time)
     status, content = ClusterOperationHelper.find_orchestrator(self.master)
     self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}". \
                     format(status, content))
     i = 0
     for server_in, service_in in zip(servers_in, services):
         log.info("Swap rebalance nodes")
         self.cluster.rebalance(self.servers[:self.nodes_init], [server_in],
                                [self.servers[i]], [service_in])
         self._new_master(self.servers[self.nodes_init])
         i += 1
예제 #10
0
    def online_upgrade_swap_rebalance(self):
        self._install(self.servers[:self.initial_num_servers])
        self.operations(multi_nodes=True)
        self.log.info(
            "Installation of old version is done. Wait for %s sec for upgrade"
            % (self.sleep_time))
        time.sleep(self.sleep_time)
        upgrade_version = self.input.param('upgrade_version', '2.0.0-1870-rel')
        self.initial_version = upgrade_version
        self.product = 'couchbase-server'
        self._install(self.servers[self.initial_num_servers:self.num_servers])
        self.log.info(
            "Installation of new version is done. Wait for %s sec for rebalance"
            % (self.sleep_time))
        time.sleep(self.sleep_time)

        self.swap_num_servers = self.input.param('swap_num_servers', 1)
        old_servers = self.servers[:self.initial_num_servers]
        new_servers = []
        for i in range(self.initial_num_servers / self.swap_num_servers):
            servers_in = self.servers[(self.initial_num_servers +
                                       i * self.swap_num_servers):(
                                           self.initial_num_servers +
                                           (i + 1) * self.swap_num_servers)]
            servers_out = self.servers[(
                i * self.swap_num_servers):((i + 1) * self.swap_num_servers)]
            servers = old_servers + new_servers
            self.cluster.rebalance(servers, servers_in, servers_out)
            self.log.info(
                "Swap rebalance: rebalance out %s old version nodes, rebalance in %s 2.0 Nodes"
                % (self.swap_num_servers, self.swap_num_servers))
            time.sleep(self.sleep_time)
            old_servers = self.servers[(
                (i + 1) * self.swap_num_servers):self.initial_num_servers]
            new_servers = new_servers + servers_in
            servers = old_servers + new_servers
            status, content = ClusterHelper.find_orchestrator(servers[0])
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                            format(status, content))
            FIND_MASTER = False
            for new_server in new_servers:
                if content.find(new_server.ip) >= 0:
                    FIND_MASTER = True
                    self.log.info("2.0 Node %s becomes the master" %
                                  (new_server.ip))
            if not FIND_MASTER:
                raise Exception(
                    "After rebalance in 2.0 nodes, 2.0 doesn't become the master "
                )

        self.verify_upgrade_rebalance_in_out()
예제 #11
0
    def online_upgrade_swap_rebalance(self):
        self._install(self.servers[: self.nodes_init])
        self.operations(self.servers[: self.nodes_init])
        self.initial_version = self.upgrade_versions[0]
        self.product = "couchbase-server"
        self.sleep(
            self.sleep_time,
            "Pre-setup of old version is done. Wait for online upgrade to {0} version".format(self.initial_version),
        )
        self._install(self.servers[self.nodes_init : self.num_servers])
        self.sleep(self.sleep_time, "Installation of new version is done. Wait for rebalance")
        self.swap_num_servers = self.input.param("swap_num_servers", 1)
        old_servers = self.servers[: self.nodes_init]
        new_vb_nums = RestHelper(RestConnection(self.master))._get_vbuckets(
            old_servers, bucket_name=self.buckets[0].name
        )
        new_servers = []
        for i in range(self.nodes_init / self.swap_num_servers):
            old_vb_nums = copy.deepcopy(new_vb_nums)
            servers_in = self.servers[
                (self.nodes_init + i * self.swap_num_servers) : (self.nodes_init + (i + 1) * self.swap_num_servers)
            ]
            servers_out = self.servers[(i * self.swap_num_servers) : ((i + 1) * self.swap_num_servers)]
            servers = old_servers + new_servers
            self.log.info(
                "Swap rebalance: rebalance out %s old version nodes, rebalance in %s 2.0 Nodes"
                % (self.swap_num_servers, self.swap_num_servers)
            )
            self.cluster.rebalance(servers, servers_in, servers_out)
            self.sleep(self.sleep_time)
            old_servers = self.servers[((i + 1) * self.swap_num_servers) : self.nodes_init]
            new_servers = new_servers + servers_in
            servers = old_servers + new_servers
            new_vb_nums = RestHelper(RestConnection(self.master))._get_vbuckets(
                servers, bucket_name=self.buckets[0].name
            )
            self._verify_vbucket_nums_for_swap(old_vb_nums, new_vb_nums)
            status, content = ClusterOperationHelper.find_orchestrator(servers[0])
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".format(status, content))
            FIND_MASTER = False
            for new_server in new_servers:
                if content.find(new_server.ip) >= 0:
                    self._new_master(new_server)
                    FIND_MASTER = True
                    self.log.info("3.0 Node %s becomes the master" % (new_server.ip))
            if not FIND_MASTER:
                raise Exception("After rebalance in 3.0 nodes, 3.0 doesn't become the master ")
        """ verify DCP upgrade in 3.0.0 version """
        self.monitor_dcp_rebalance()

        self.verification(new_servers)
예제 #12
0
 def _online_upgrade(self, update_servers, extra_servers, check_newmaster=True):
     self.cluster.rebalance(update_servers + extra_servers, extra_servers, [])
     self.log.info("Rebalance in all 2.0 Nodes")
     self.sleep(self.sleep_time)
     status, content = ClusterOperationHelper.find_orchestrator(update_servers[0])
     self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                     format(status, content))
     if check_newmaster:
         FIND_MASTER = False
         for new_server in extra_servers:
             if content.find(new_server.ip) >= 0:
                 FIND_MASTER = True
                 self.log.info("2.0 Node %s becomes the master" % (new_server.ip))
                 break
         if not FIND_MASTER:
             raise Exception("After rebalance in 2.0 Nodes, 2.0 doesn't become the master")
     self.log.info("Rebalanced out all old version nodes")
     self.cluster.rebalance(update_servers + extra_servers, [], update_servers)
예제 #13
0
    def online_upgrade_swap_rebalance(self):
        self._install(self.servers[: self.initial_num_servers])
        self.operations(multi_nodes=True)
        self.log.info("Installation of old version is done. Wait for %s sec for upgrade" % (self.sleep_time))
        time.sleep(self.sleep_time)
        upgrade_version = self.input.param("upgrade_version", "2.0.0-1870-rel")
        self.initial_version = upgrade_version
        self.product = "couchbase-server"
        self._install(self.servers[self.initial_num_servers : self.num_servers])
        self.log.info("Installation of new version is done. Wait for %s sec for rebalance" % (self.sleep_time))
        time.sleep(self.sleep_time)

        self.swap_num_servers = self.input.param("swap_num_servers", 1)
        old_servers = self.servers[: self.initial_num_servers]
        new_servers = []
        for i in range(self.initial_num_servers / self.swap_num_servers):
            servers_in = self.servers[
                (self.initial_num_servers + i * self.swap_num_servers) : (
                    self.initial_num_servers + (i + 1) * self.swap_num_servers
                )
            ]
            servers_out = self.servers[(i * self.swap_num_servers) : ((i + 1) * self.swap_num_servers)]
            servers = old_servers + new_servers
            self.cluster.rebalance(servers, servers_in, servers_out)
            self.log.info(
                "Swap rebalance: rebalance out %s old version nodes, rebalance in %s 2.0 Nodes"
                % (self.swap_num_servers, self.swap_num_servers)
            )
            time.sleep(self.sleep_time)
            old_servers = self.servers[((i + 1) * self.swap_num_servers) : self.initial_num_servers]
            new_servers = new_servers + servers_in
            servers = old_servers + new_servers
            status, content = ClusterHelper.find_orchestrator(servers[0])
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".format(status, content))
            FIND_MASTER = False
            for new_server in new_servers:
                if content.find(new_server.ip) >= 0:
                    FIND_MASTER = True
                    self.log.info("2.0 Node %s becomes the master" % (new_server.ip))
            if not FIND_MASTER:
                raise Exception("After rebalance in 2.0 nodes, 2.0 doesn't become the master ")

        self.verify_upgrade_rebalance_in_out()
예제 #14
0
    def online_upgrade_swap_rebalance(self):
        self._install(self.servers[:self.nodes_init])
        self.operations(self.servers[:self.nodes_init])
        self.initial_version = self.upgrade_versions[0]
        self.product = 'couchbase-server'
        self.sleep(self.sleep_time, "Pre-setup of old version is done. Wait for online upgrade to {0} version".\
                       format(self.initial_version))
        self._install(self.servers[self.nodes_init:self.num_servers])
        self.sleep(self.sleep_time, "Installation of new version is done. Wait for rebalance")
        self.swap_num_servers = self.input.param('swap_num_servers', 1)
        old_servers = self.servers[:self.nodes_init]
        new_vb_nums = RestHelper(RestConnection(self.master))._get_vbuckets(old_servers, bucket_name=self.buckets[0].name)
        new_servers = []
        for i in range(self.nodes_init / self.swap_num_servers):
            old_vb_nums = copy.deepcopy(new_vb_nums)
            servers_in = self.servers[(self.nodes_init + i * self.swap_num_servers):
                                      (self.nodes_init + (i + 1) * self.swap_num_servers)]
            servers_out = self.servers[(i * self.swap_num_servers):((i + 1) * self.swap_num_servers)]
            servers = old_servers + new_servers
            self.log.info("Swap rebalance: rebalance out %s old version nodes, rebalance in %s 2.0 Nodes"
                          % (self.swap_num_servers, self.swap_num_servers))
            self.cluster.rebalance(servers, servers_in, servers_out)
            self.sleep(self.sleep_time)
            old_servers = self.servers[((i + 1) * self.swap_num_servers):self.nodes_init]
            new_servers = new_servers + servers_in
            servers = old_servers + new_servers
            new_vb_nums = RestHelper(RestConnection(self.master))._get_vbuckets(servers, bucket_name=self.buckets[0].name)
            self._verify_vbucket_nums_for_swap(old_vb_nums, new_vb_nums)
            status, content = ClusterOperationHelper.find_orchestrator(servers[0])
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                            format(status, content))
            FIND_MASTER = False
            for new_server in new_servers:
                if content.find(new_server.ip) >= 0:
                    self._new_master(new_server)
                    FIND_MASTER = True
                    self.log.info("3.0 Node %s becomes the master" % (new_server.ip))
            if not FIND_MASTER:
                raise Exception("After rebalance in 3.0 nodes, 3.0 doesn't become the master ")
        """ verify DCP upgrade in 3.0.0 version """
        self.monitor_dcp_rebalance()

        self.verification(new_servers)
예제 #15
0
    def online_upgrade(self):
        servers_in = self.servers[self.nodes_init : self.num_servers]
        self.cluster.rebalance(self.servers[: self.nodes_init], servers_in, [])
        self.log.info("Rebalance in all 2.0 Nodes")
        self.sleep(self.sleep_time)
        status, content = ClusterOperationHelper.find_orchestrator(self.master)
        self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".format(status, content))
        FIND_MASTER = False
        for new_server in servers_in:
            if content.find(new_server.ip) >= 0:
                self._new_master(new_server)
                FIND_MASTER = True
                self.log.info("2.0 Node %s becomes the master" % (new_server.ip))
                break
        if not FIND_MASTER and not self.is_downgrade:
            raise Exception("After rebalance in 3.0 Nodes, 3.0 doesn't become the master")

        servers_out = self.servers[: self.nodes_init]
        self.log.info("Rebalanced out all old version nodes")
        self.cluster.rebalance(self.servers[: self.num_servers], [], servers_out)
예제 #16
0
 def _online_upgrade(self, update_servers, extra_servers, check_newmaster=True):
     self.cluster.rebalance(update_servers + extra_servers, extra_servers, [])
     current_versions = RestConnection(update_servers[0]).get_nodes_versions()
     added_versions = RestConnection(extra_servers[0]).get_nodes_versions()
     self.log.info("Rebalance in all {0} nodes completed".format(added_versions[0]))
     self.sleep(self.sleep_time)
     status, content = ClusterOperationHelper.find_orchestrator(update_servers[0])
     self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                     format(status, content))
     self.log.info("after rebalance in the master is {0}".format(content))
     if check_newmaster:
         FIND_MASTER = False
         for new_server in extra_servers:
             if content.find(new_server.ip) >= 0:
                 FIND_MASTER = True
                 self.log.info("{0} Node {1} becomes the master".format(added_versions[0], new_server.ip))
                 break
         if not FIND_MASTER:
             raise Exception("After rebalance in {0} Nodes, one of them doesn't become the master".format(added_versions[0]))
     self.log.info("Rebalanced out all old version nodes")
     self.cluster.rebalance(update_servers + extra_servers, [], update_servers)
예제 #17
0
    def online_upgrade_rebalance_in_out(self):
        self._install(self.servers[:self.initial_num_servers])
        self.operations(multi_nodes=True)
        self.log.info(
            "Installation of old version is done. Wait for %s sec for upgrade"
            % (self.sleep_time))
        time.sleep(self.sleep_time)
        upgrade_version = self.input.param('upgrade_version', '2.0.0-1870-rel')
        self.initial_version = upgrade_version
        self.product = 'couchbase-server'
        self._install(self.servers[self.initial_num_servers:self.num_servers])
        self.log.info(
            "Installation of new version is done. Wait for %s sec for rebalance"
            % (self.sleep_time))
        time.sleep(self.sleep_time)

        servers_in = self.servers[self.initial_num_servers:self.num_servers]
        self.cluster.rebalance(self.servers[:self.initial_num_servers],
                               servers_in, [])
        self.log.info("Rebalance in all 2.0 Nodes")
        time.sleep(self.sleep_time)
        status, content = ClusterHelper.find_orchestrator(self.master)
        self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                        format(status, content))
        FIND_MASTER = False
        for new_server in servers_in:
            if content.find(new_server.ip) >= 0:
                FIND_MASTER = True
                self.log.info("2.0 Node %s becomes the master" %
                              (new_server.ip))
        if not FIND_MASTER:
            raise Exception(
                "After rebalance in 2.0 Nodes, 2.0 doesn't become the master")

        servers_out = self.servers[:self.initial_num_servers]
        self.cluster.rebalance(self.servers[:self.num_servers], [],
                               servers_out)
        self.log.info("Rebalance out all old version nodes")
        time.sleep(self.sleep_time)
        self.verify_upgrade_rebalance_in_out()
예제 #18
0
    def online_upgrade(self):
        servers_in = self.servers[self.nodes_init:self.num_servers]
        self.cluster.rebalance(self.servers[:self.nodes_init], servers_in, [])
        self.log.info("Rebalance in all 2.0 Nodes")
        self.sleep(self.sleep_time)
        status, content = ClusterOperationHelper.find_orchestrator(self.master)
        self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                        format(status, content))
        FIND_MASTER = False
        for new_server in servers_in:
            if content.find(new_server.ip) >= 0:
                self._new_master(new_server)
                FIND_MASTER = True
                self.log.info("2.0 Node %s becomes the master" %
                              (new_server.ip))
                break
        if not FIND_MASTER:
            raise Exception(
                "After rebalance in 2.0 Nodes, 2.0 doesn't become the master")

        servers_out = self.servers[:self.nodes_init]
        self.log.info("Rebalanced out all old version nodes")
        self.cluster.rebalance(self.servers[:self.num_servers], [],
                               servers_out)
예제 #19
0
    def _common_test_body_failed_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)

        self.log.info("DATA LOAD PHASE")
        loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master,
                                                      howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.swap_orchestrator:
            status, content = ClusterHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info(
                "removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[
            num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password,
                                    server.ip)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                       ejectedNodes=optNodesIds)

        # Rebalance is failed at 20%, 40% and 60% completion
        for i in [1, 2, 3]:
            expected_progress = 20 * i
            self.log.info(
                "FAIL SWAP REBALANCE PHASE @ {0}".format(expected_progress))
            RestHelper(rest).rebalance_reached(expected_progress)
            bucket = rest.get_buckets()[0].name
            pid = StatsCommon.get_stats([master], bucket, "", "pid")[master]
            command = "os:cmd(\"kill -9 {0} \")".format(pid)
            self.log.info(command)
            killed = rest.diag_eval(command)
            self.log.info("killed {0}:{1}??  {2} ".format(
                master.ip, master.port, killed))
            BaseTestCase._wait_warmup_completed(self, [master],
                                                bucket,
                                                wait_time=600)
            time.sleep(5)

            rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                           ejectedNodes=optNodesIds)

        self.assertTrue(
            rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(
                toBeEjectedNodes))

        # Stop loaders
        SwapRebalanceBase.stop_load(loaders)

        self.log.info("DONE DATA ACCESS PHASE")
        #for bucket in rest.get_buckets():
        #    SwapRebalanceBase.verify_data(new_swap_servers[0], bucket_data[bucket.name].get('inserted_keys'),\
        #        bucket.name, self)
        #    RebalanceHelper.wait_for_persistence(master, bucket.name)

        self.log.info("VERIFICATION PHASE")
        SwapRebalanceBase.items_verification(master, self)
예제 #20
0
파일: capiXDCR.py 프로젝트: umang-cb/Jython
    def test_capi_with_online_upgrade(self):
        self._install(self._input.servers[:self.src_init + self.dest_init])
        upgrade_version = self._input.param("upgrade_version", "5.0.0-1797")
        upgrade_nodes = self.src_cluster.get_nodes()
        extra_nodes = self._input.servers[self.src_init + self.dest_init:]

        repl_id = self._start_es_replication()

        rest_conn = RestConnection(self.src_master)
        rest_conn.pause_resume_repl_by_id(repl_id, REPL_PARAM.PAUSE_REQUESTED, 'true')

        gen = DocumentGenerator('es', '{{"key":"value","mutated":0}}',  xrange(100), start=0, end=self._num_items)
        self.src_cluster.load_all_buckets_from_generator(gen)

        self.perform_update_delete()

        rest_conn.pause_resume_repl_by_id(repl_id, REPL_PARAM.PAUSE_REQUESTED, 'false')

        self._wait_for_es_replication_to_catchup()

        rest_conn.pause_resume_repl_by_id(repl_id, REPL_PARAM.PAUSE_REQUESTED, 'true')

        RestConnection(upgrade_nodes[0]).get_nodes_versions()
        added_versions = RestConnection(extra_nodes[0]).get_nodes_versions()
        self.cluster.rebalance(upgrade_nodes + extra_nodes, extra_nodes, [])
        self.log.info("Rebalance in all {0} nodes completed".format(added_versions[0]))
        RestConnection(upgrade_nodes[0]).get_nodes_versions()
        self.sleep(15)
        status, content = ClusterOperationHelper.find_orchestrator(upgrade_nodes[0])
        self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                        format(status, content))
        self.log.info("after rebalance in the master is {0}".format(content))
        find_master = False
        for new_server in extra_nodes:
            if content.find(new_server.ip) >= 0:
                find_master = True
                self.log.info("{0} Node {1} becomes the master".format(added_versions[0], new_server.ip))
                break
        if not find_master:
            raise Exception("After rebalance in {0} Nodes, one of them doesn't become the master".
                            format(added_versions[0]))
        self.log.info("Rebalancing out all old version nodes")
        self.cluster.rebalance(upgrade_nodes + extra_nodes, [], upgrade_nodes)
        self.src_master = self._input.servers[self.src_init + self.dest_init]

        self._install(self.src_cluster.get_nodes(), version=upgrade_version)
        upgrade_nodes = self._input.servers[self.src_init + self.dest_init:]
        extra_nodes = self.src_cluster.get_nodes()

        RestConnection(upgrade_nodes[0]).get_nodes_versions()
        added_versions = RestConnection(extra_nodes[0]).get_nodes_versions()
        self.cluster.rebalance(upgrade_nodes + extra_nodes, extra_nodes, [])
        self.log.info("Rebalance in all {0} nodes completed".format(added_versions[0]))
        RestConnection(upgrade_nodes[0]).get_nodes_versions()
        self.sleep(15)
        status, content = ClusterOperationHelper.find_orchestrator(upgrade_nodes[0])
        self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                        format(status, content))
        self.log.info("after rebalance in the master is {0}".format(content))
        self.log.info("Rebalancing out all old version nodes")
        self.cluster.rebalance(upgrade_nodes + extra_nodes, [], upgrade_nodes)
        self.src_master = self._input.servers[0]

        self.log.info("######### Upgrade of CB cluster completed ##########")

        rest_conn = RestConnection(self.src_master)
        rest_conn.pause_resume_repl_by_id(repl_id, REPL_PARAM.PAUSE_REQUESTED, 'true')

        gen = DocumentGenerator('es', '{{"key":"value"}}',  xrange(100), start=0, end=self._num_items)
        self.src_cluster.load_all_buckets_from_generator(gen)

        self.perform_update_delete()

        rest_conn.pause_resume_repl_by_id(repl_id, REPL_PARAM.PAUSE_REQUESTED, 'false')

        self._wait_for_es_replication_to_catchup()

        self._verify_es_results()
예제 #21
0
파일: xdcr.py 프로젝트: steveyen/testrunner
    def test_failover_continuous_bidirectional_sets_deletes(self):
        cluster_ref_a = "cluster_ref_a"
        master_a = self._input.clusters.get(0)[0]
        rest_conn_a = RestConnection(master_a)

        cluster_ref_b = "cluster_ref_b"
        master_b = self._input.clusters.get(1)[0]
        rest_conn_b = RestConnection(master_b)

        # Rebalance all the nodes together
        servers_a = self._input.clusters.get(0)
        servers_b = self._input.clusters.get(1)
        rebalanced_servers_a = []
        rebalanced_servers_b = []

        RebalanceHelper.rebalance_in(servers_a, len(servers_a)-1)
        RebalanceHelper.rebalance_in(servers_b, len(servers_b)-1)
        rebalanced_servers_a.extend(servers_a)
        rebalanced_servers_b.extend(servers_b)

        # Setup bi-directional continuous replication
        replication_type = "continuous"

        rest_conn_a.add_remote_cluster(master_b.ip, master_b.port,
            master_b.rest_username,
            master_b.rest_password, cluster_ref_b)
        rest_conn_b.add_remote_cluster(master_a.ip, master_a.port,
            master_a.rest_username,
            master_a.rest_password, cluster_ref_a)
        (rep_database_a, rep_id_a) = rest_conn_a.start_replication(
            replication_type, self._buckets[0],
            cluster_ref_b)
        (rep_database_b, rep_id_b) = rest_conn_b.start_replication(
            replication_type, self._buckets[0],
            cluster_ref_a)

        load_thread_list = []
        # Start load
        kvstore = ClientKeyValueStore()

        self._params["ops"] = "set"
        task_def = RebalanceDataGenerator.create_loading_tasks(self._params)
        load_thread = RebalanceDataGenerator.start_load(rest_conn_a,
            self._buckets[0],
            task_def, kvstore)
        load_thread.start()
        load_thread.join()
        RebalanceHelper.wait_for_persistence(master_a, self._buckets[0])

        # Do some deletes
        self._params["ops"] = "delete"
        self._params["count"] = self._num_items/5
        task_def = RebalanceDataGenerator.create_loading_tasks(self._params)
        load_thread = RebalanceDataGenerator.start_load(rest_conn_a,
            self._buckets[0],
            task_def, kvstore)
        load_thread_list.append(load_thread)

        # Start all loads concurrently
        for lt in load_thread_list:
            lt.start()

        # Do the failover of nodes on both clusters
        self.log.info("Failing over nodes")
        self.log.info("current nodes on cluster 1: {0}".format(RebalanceHelper.getOtpNodeIds(master_a)))
        self.log.info("current nodes on cluster 2: {0}".format(RebalanceHelper.getOtpNodeIds(master_b)))

        # Find nodes to be failed_over
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master_a, howmany=self._failover_factor)
        optNodesIds_a = [node.id for node in toBeEjectedNodes]
        if self._fail_orchestrator_a:
            status, content = ClusterOperationHelper.find_orchestrator(master_a)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            optNodesIds_a[0] = content
            master_a = self._input.clusters.get(0)[-1]
            rest_conn_a = RestConnection(master_a)

        #Failover selected nodes
        for node in optNodesIds_a:
            self.log.info("failover node {0} and rebalance afterwards".format(node))
            rest_conn_a.fail_over(node)

        toBeEjectedNodes = RebalanceHelper.pick_nodes(master_b, howmany=self._failover_factor)
        optNodesIds_b = [node.id for node in toBeEjectedNodes]
        if self._fail_orchestrator_b:
            status, content = ClusterOperationHelper.find_orchestrator(master_b)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            optNodesIds_b[0] = content
            master_b = self._input.clusters.get(1)[-1]
            rest_conn_b = RestConnection(master_b)

        self._state.append((rest_conn_a, cluster_ref_b, rep_database_a, rep_id_a))
        self._state.append((rest_conn_b, cluster_ref_a, rep_database_b, rep_id_b))

        #Failover selected nodes
        for node in optNodesIds_b:
            self.log.info("failover node {0} and rebalance afterwards".format(node))
            rest_conn_b.fail_over(node)

        rest_conn_a.rebalance(otpNodes=[node.id for node in rest_conn_a.node_statuses()],\
            ejectedNodes=optNodesIds_a)
        rest_conn_b.rebalance(otpNodes=[node.id for node in rest_conn_b.node_statuses()],\
            ejectedNodes=optNodesIds_b)

        self.assertTrue(rest_conn_a.monitorRebalance(),
            msg="rebalance operation failed after adding node on cluster 1")
        self.assertTrue(rest_conn_b.monitorRebalance(),
            msg="rebalance operation failed after adding node on cluster 2")

        # Wait for loading threads to finish
        for lt in load_thread_list:
            lt.join()
        self.log.info("All loading threads finished")

        # Verify replication
        self.assertTrue(XDCRBaseTest.verify_replicated_data(rest_conn_b,
            self._buckets[0],
            kvstore,
            self._poll_sleep,
            self._poll_timeout),
            "Verification of replicated data failed")
        self.assertTrue(XDCRBaseTest.verify_replicated_revs(rest_conn_a,
            rest_conn_b,
            self._buckets[0],
            self._poll_sleep,
            self._poll_timeout),
            "Verification of replicated revisions failed")
예제 #22
0
    def _common_test_body_failed_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.swap_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info("removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
            ejectedNodes=optNodesIds)
        SwapRebalanceBase.sleep(self, 10, "Rebalance should start")
        self.log.info("FAIL SWAP REBALANCE PHASE @ {0}".format(self.percentage_progress))
        reached = RestHelper(rest).rebalance_reached(self.percentage_progress)
        if reached == 100 and not RestHelper(rest).is_cluster_rebalanced():
            # handle situation when rebalance failed at the beginning
            self.log.error('seems rebalance failed!')
            self.log.info("Latest logs from UI:")
            for i in rest.get_logs(): self.log.error(i)
            self.fail("rebalance failed even before killing memcached")
        bucket = rest.get_buckets()[0].name
        pid = None
        if self.swap_orchestrator:
            # get PID via remote connection if master is a new node
            shell = RemoteMachineShellConnection(master)
            o, _ = shell.execute_command("ps -eo comm,pid | awk '$1 == \"memcached\" { print $2 }'")
            pid = o[0]
            shell.disconnect()
        else:
            for i in xrange(2):
                try:
                    _mc = MemcachedClientHelper.direct_client(master, bucket)
                    pid = _mc.stats()["pid"]
                    break
                except EOFError as e:
                    self.log.error("{0}.Retry in 2 sec".format(e))
                    SwapRebalanceBase.sleep(self, 1)
        if pid is None:
            self.fail("impossible to get a PID")
        command = "os:cmd(\"kill -9 {0} \")".format(pid)
        self.log.info(command)
        killed = rest.diag_eval(command)
        self.log.info("killed {0}:{1}??  {2} ".format(master.ip, master.port, killed))
        self.log.info("sleep for 10 sec after kill memcached")
        SwapRebalanceBase.sleep(self, 10)
        # we can't get stats for new node when rebalance falls
        if not self.swap_orchestrator:
            ClusterOperationHelper._wait_warmup_completed(self, [master], bucket, wait_time=600)
        i = 0
        # we expect that rebalance will be failed
        try:
            rest.monitorRebalance()
        except RebalanceFailedException:
            # retry rebalance if it failed
            self.log.warn("Rebalance failed but it's expected")
            SwapRebalanceBase.sleep(self, 30)
            self.assertFalse(RestHelper(rest).is_cluster_rebalanced(), msg="cluster need rebalance")
            knownNodes = rest.node_statuses();
            self.log.info("nodes are still in cluster: {0}".format([(node.ip, node.port) for node in knownNodes]))
            ejectedNodes = list(set(optNodesIds) & set([node.id for node in knownNodes]))
            rest.rebalance(otpNodes=[node.id for node in knownNodes], ejectedNodes=ejectedNodes)
            self.assertTrue(rest.monitorRebalance(),
                            msg="rebalance operation failed after adding node {0}".format(toBeEjectedNodes))
        else:
            self.log.info("rebalance completed successfully")
        SwapRebalanceBase.verification_phase(self, master)
예제 #23
0
    def _common_test_body_failed_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(
            intial_severs,
            len(intial_severs) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master,
                                                      howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.swap_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info(
                "removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[
            num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password,
                                    server.ip, server.port)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                       ejectedNodes=optNodesIds)
        SwapRebalanceBase.sleep(self, 10, "Rebalance should start")
        self.log.info("FAIL SWAP REBALANCE PHASE @ {0}".format(
            self.percentage_progress))
        reached = RestHelper(rest).rebalance_reached(self.percentage_progress)
        if reached and RestHelper(rest).is_cluster_rebalanced():
            # handle situation when rebalance failed at the beginning
            self.log.error('seems rebalance failed!')
            rest.print_UI_logs()
            self.fail("rebalance failed even before killing memcached")
        bucket = rest.get_buckets()[0].name
        pid = None
        if self.swap_orchestrator and not self.cluster_run:
            # get PID via remote connection if master is a new node
            shell = RemoteMachineShellConnection(master)
            pid = shell.get_memcache_pid()
            shell.disconnect()
        else:
            times = 2
            if self.cluster_run:
                times = 20
            for i in xrange(times):
                try:
                    _mc = MemcachedClientHelper.direct_client(master, bucket)
                    pid = _mc.stats()["pid"]
                    break
                except (EOFError, KeyError) as e:
                    self.log.error("{0}.Retry in 2 sec".format(e))
                    SwapRebalanceBase.sleep(self, 2)
        if pid is None:
            # sometimes pid is not returned by mc.stats()
            shell = RemoteMachineShellConnection(master)
            pid = shell.get_memcache_pid()
            shell.disconnect()
            if pid is None:
                self.fail("impossible to get a PID")
        command = "os:cmd(\"kill -9 {0} \")".format(pid)
        self.log.info(command)
        killed = rest.diag_eval(command)
        self.log.info("killed {0}:{1}??  {2} ".format(master.ip, master.port,
                                                      killed))
        self.log.info("sleep for 10 sec after kill memcached")
        SwapRebalanceBase.sleep(self, 10)
        # we can't get stats for new node when rebalance falls
        if not self.swap_orchestrator:
            ClusterOperationHelper._wait_warmup_completed(self, [master],
                                                          bucket,
                                                          wait_time=600)
        i = 0
        # we expect that rebalance will be failed
        try:
            rest.monitorRebalance()
        except RebalanceFailedException:
            # retry rebalance if it failed
            self.log.warn("Rebalance failed but it's expected")
            SwapRebalanceBase.sleep(self, 30)
            self.assertFalse(RestHelper(rest).is_cluster_rebalanced(),
                             msg="cluster need rebalance")
            knownNodes = rest.node_statuses()
            self.log.info("nodes are still in cluster: {0}".format([
                (node.ip, node.port) for node in knownNodes
            ]))
            ejectedNodes = list(
                set(optNodesIds) & set([node.id for node in knownNodes]))
            rest.rebalance(otpNodes=[node.id for node in knownNodes],
                           ejectedNodes=ejectedNodes)
            SwapRebalanceBase.sleep(self, 10, "Wait for rebalance to start")
            self.assertTrue(
                rest.monitorRebalance(),
                msg="rebalance operation failed after adding node {0}".format(
                    toBeEjectedNodes))
        else:
            self.log.info("rebalance completed successfully")
        SwapRebalanceBase.verification_phase(self, master)
예제 #24
0
    def _common_test_body_swap_rebalance(self, do_stop_start=False):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]

        if self.swap_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                format(status, content))
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info("removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        if self.do_access:
            self.log.info("DATA ACCESS PHASE")
            self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
            ejectedNodes=optNodesIds)

        if do_stop_start:
            # Rebalance is stopped at 20%, 40% and 60% completion
            retry = 0
            for expected_progress in (20, 40, 60):
                self.log.info("STOP/START SWAP REBALANCE PHASE WITH PROGRESS {0}%".
                              format(expected_progress))
                while True:
                    progress = rest._rebalance_progress()
                    if progress < 0:
                        self.log.error("rebalance progress code : {0}".format(progress))
                        break
                    elif progress == 100:
                        self.log.warn("Rebalance has already reached 100%")
                        break
                    elif progress >= expected_progress:
                        self.log.info("Rebalance will be stopped with {0}%".format(progress))
                        stopped = rest.stop_rebalance()
                        self.assertTrue(stopped, msg="unable to stop rebalance")
                        SwapRebalanceBase.sleep(self, 20)
                        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                                       ejectedNodes=optNodesIds)
                        break
                    elif retry > 100:
                        break
                    else:
                        retry += 1
                        SwapRebalanceBase.sleep(self, 1)
        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(optNodesIds))
        SwapRebalanceBase.verification_phase(self, master)
예제 #25
0
    def _add_back_failed_node(self, do_node_cleanup=False):
        master = self.servers[0]
        rest = RestConnection(master)
        creds = self.input.membase_settings

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(
            self.servers,
            len(self.servers) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(
            master, howmany=self.failover_factor)
        optNodesIds = [node.id for node in toBeEjectedNodes]

        # List of servers that will not be failed over
        not_failed_over = []
        for server in self.servers:
            if self.cluster_run:
                if server.port not in [node.port for node in toBeEjectedNodes]:
                    not_failed_over.append(server)
                    self.log.info("Node {0}:{1} not failed over".format(
                        server.ip, server.port))
            else:
                if server.ip not in [node.ip for node in toBeEjectedNodes]:
                    not_failed_over.append(server)
                    self.log.info("Node {0}:{1} not failed over".format(
                        server.ip, server.port))

        if self.fail_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content
            master = not_failed_over[-1]

        self.log.info("DATA ACCESS PHASE")
        self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        # Failover selected nodes
        for node in optNodesIds:
            self.log.info(
                "failover node {0} and rebalance afterwards".format(node))
            rest.fail_over(node)

        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], \
            ejectedNodes=optNodesIds)

        self.assertTrue(
            rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(
                optNodesIds))

        # Add back the same failed over nodes

        # Cleanup the node, somehow
        # TODO: cluster_run?
        if do_node_cleanup:
            pass

        # Make rest connection with node part of cluster
        rest = RestConnection(master)

        # Given the optNode, find ip
        add_back_servers = []
        nodes = rest.get_nodes()
        for server in nodes:
            if isinstance(server.ip, unicode):
                add_back_servers.append(server)
        final_add_back_servers = []
        for server in self.servers:
            if self.cluster_run:
                if server.port not in [serv.port for serv in add_back_servers]:
                    final_add_back_servers.append(server)
            else:
                if server.ip not in [serv.ip for serv in add_back_servers]:
                    final_add_back_servers.append(server)
        for server in final_add_back_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password,
                                    server.ip, server.port)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                       ejectedNodes=[])

        self.assertTrue(
            rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(
                add_back_servers))

        SwapRebalanceBase.verification_phase(self, master)
예제 #26
0
    def _add_back_failed_node(self, do_node_cleanup=False):
        master = self.servers[0]
        rest = RestConnection(master)
        creds = self.input.membase_settings

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(self.servers, len(self.servers) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.failover_factor)
        optNodesIds = [node.id for node in toBeEjectedNodes]

        # List of servers that will not be failed over
        not_failed_over = []
        for server in self.servers:
            if server.ip not in [node.ip for node in toBeEjectedNodes]:
                not_failed_over.append(server)
                self.log.info("Node %s not failed over" % server.ip)

        if self.fail_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content
            master = not_failed_over[-1]

        self.log.info("DATA ACCESS PHASE")
        self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        # Failover selected nodes
        for node in optNodesIds:
            self.log.info("failover node {0} and rebalance afterwards".format(node))
            rest.fail_over(node)

        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], \
            ejectedNodes=optNodesIds)

        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(optNodesIds))

        # Add back the same failed over nodes

        # Cleanup the node, somehow
        # TODO: cluster_run?
        if do_node_cleanup:
            pass

        # Make rest connection with node part of cluster
        rest = RestConnection(master)

        # Given the optNode, find ip
        add_back_servers = []
        nodes = rest.get_nodes()
        for server in [node.ip for node in nodes]:
            if isinstance(server, unicode):
                add_back_servers.append(server)
        final_add_back_servers = []
        for server in self.servers:
            if server.ip not in add_back_servers:
                final_add_back_servers.append(server)

        for server in final_add_back_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[])

        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(add_back_servers))

        SwapRebalanceBase.verification_phase(self, master)
예제 #27
0
    def _common_test_body_failed_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        RebalanceHelper.rebalance_in(intial_severs, len(intial_severs)-1)

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        self.log.info("DATA LOAD PHASE")
        loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.swap_orchestrator:
            status, content = ClusterHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info("removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[num_initial_servers:num_initial_servers+self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],\
            ejectedNodes=optNodesIds)

        # Rebalance is failed at 20%, 40% and 60% completion
        for i in [1, 2, 3]:
            expected_progress = 20*i
            self.log.info("FAIL SWAP REBALANCE PHASE @ {0}".format(expected_progress))
            reached = RestHelper(rest).rebalance_reached(expected_progress)
            command = "[erlang:exit(element(2, X), kill) || X <- supervisor:which_children(ns_port_sup)]."
            memcached_restarted = rest.diag_eval(command)
            self.assertTrue(memcached_restarted, "unable to restart memcached/moxi process through diag/eval")
            time.sleep(20)

            rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],\
                ejectedNodes=optNodesIds)

        # Stop loaders
        SwapRebalanceBase.stop_load(loaders)

        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(toBeEjectedNodes))

        self.log.info("DONE DATA ACCESS PHASE")
        #for bucket in rest.get_buckets():
        #    SwapRebalanceBase.verify_data(new_swap_servers[0], bucket_data[bucket.name].get('inserted_keys'),\
        #        bucket.name, self)
        #    RebalanceHelper.wait_for_persistence(master, bucket.name)

        self.log.info("VERIFICATION PHASE")
        SwapRebalanceBase.items_verification(master, self)
예제 #28
0
    def _common_test_body_swap_rebalance(self, do_stop_start=False):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        status, servers_rebalanced = RebalanceHelper.rebalance_in(
            intial_severs,
            len(intial_severs) - 1)
        self.assertTrue(status, msg="Rebalance was failed")

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master,
                                                      howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]

        if self.swap_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
                format(status, content))
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info(
                "removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[
            num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password,
                                    server.ip, server.port)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        if self.do_access:
            self.log.info("DATA ACCESS PHASE")
            self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                       ejectedNodes=optNodesIds)

        if do_stop_start:
            # Rebalance is stopped at 20%, 40% and 60% completion
            retry = 0
            for expected_progress in (20, 40, 60):
                self.log.info(
                    "STOP/START SWAP REBALANCE PHASE WITH PROGRESS {0}%".
                    format(expected_progress))
                while True:
                    progress = rest._rebalance_progress()
                    if progress < 0:
                        self.log.error(
                            "rebalance progress code : {0}".format(progress))
                        break
                    elif progress == 100:
                        self.log.warn("Rebalance has already reached 100%")
                        break
                    elif progress >= expected_progress:
                        self.log.info(
                            "Rebalance will be stopped with {0}%".format(
                                progress))
                        stopped = rest.stop_rebalance()
                        self.assertTrue(stopped,
                                        msg="unable to stop rebalance")
                        SwapRebalanceBase.sleep(self, 20)
                        rest.rebalance(otpNodes=[
                            node.id for node in rest.node_statuses()
                        ],
                                       ejectedNodes=optNodesIds)
                        break
                    elif retry > 100:
                        break
                    else:
                        retry += 1
                        SwapRebalanceBase.sleep(self, 1)
        self.assertTrue(
            rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(
                optNodesIds))
        SwapRebalanceBase.verification_phase(self, master)
예제 #29
0
    def _common_test_body_failed_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)

        self.log.info("DATA LOAD PHASE")
        self.loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.swap_orchestrator:
            status, content = ClusterOperationHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info("removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        self.loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
            ejectedNodes=optNodesIds)

        # Rebalance is failed at 20%, 40% and 60% completion
        for i in [1, 2, 3]:
            expected_progress = 20 * i
            self.log.info("FAIL SWAP REBALANCE PHASE @ {0}".format(expected_progress))
            RestHelper(rest).rebalance_reached(expected_progress)
            bucket = rest.get_buckets()[0].name
            pid = None
            if self.swap_orchestrator:
                # get PID via remote connection if master is a new node
                shell = RemoteMachineShellConnection(master)
                o, _ = shell.execute_command("ps -eo comm,pid | awk '$1 == \"memcached\" { print $2 }'")
                pid = o[0]
                shell.disconnect()
            else:
                for i in xrange(2):
                    try:
                        _mc = MemcachedClientHelper.direct_client(master, bucket)
                        pid = _mc.stats()["pid"]
                        break
                    except EOFError as e:
                        self.log.error("{0}.Retry in 2 sec".format(e))
                        time.sleep(1)
            if pid is None:
                self.fail("impossible to get a PID")
            command = "os:cmd(\"kill -9 {0} \")".format(pid)
            self.log.info(command)
            killed = rest.diag_eval(command)
            self.log.info("killed {0}:{1}??  {2} ".format(master.ip, master.port, killed))
            self.log.info("sleep for 10 sec after kill memcached")
            time.sleep(10)
            # we can't get stats for new node when rebalance falls
            if not self.swap_orchestrator:
                ClusterOperationHelper._wait_warmup_completed(self, [master], bucket, wait_time=600)
            i = 0
            #we expect that rebalance will be failed
            while rest._rebalance_progress_status() == "running" and i < 60:
                self.log.info("rebalance progress: {0}".format(rest._rebalance_progress()))
                time.sleep(1)
                i += 1
            self.log.info("rebalance progress status:{0}".format(rest._rebalance_progress_status()))
            knownNodes = rest.node_statuses();
            self.log.info("nodes are still in cluster: {0}".format([(node.ip, node.port) for node in knownNodes]))
            ejectedNodes = list(set(optNodesIds) & set([node.id for node in knownNodes]))
            rest.rebalance(otpNodes=[node.id for node in knownNodes],
                ejectedNodes=ejectedNodes)

        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(toBeEjectedNodes))

        SwapRebalanceBase.verification_phase(self, master)