Exemple #1
0
 def test_Backup(self):
     shell = RemoteMachineShellConnection(self.master)
     gen_update = BlobGenerator('testdata',
                                'testdata-',
                                self.value_size,
                                end=100)
     self._load_all_buckets(self.master,
                            gen_update,
                            "create",
                            0,
                            1,
                            0,
                            True,
                            batch_size=20000,
                            pause_secs=5,
                            timeout_secs=180)
     self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
     info = shell.extract_remote_info()
     path = '/tmp/backup'
     #if info.type.lower() == "windows":
     #path = 'c:' + path
     shell.delete_files(path)
     create_dir = "mkdir " + path
     shell.execute_command(create_dir)
     shell.execute_cluster_backup(backup_location=path)
     expectedResults = {
         "peername": self.master.ip,
         "sockname": self.master.ip + ":11210",
         "source": "memcached",
         "user": "******",
         'bucket': 'default'
     }
     self.checkConfig(self.eventID, self.master, expectedResults)
Exemple #2
0
 def test_Transfer(self):
     shell = RemoteMachineShellConnection(self.master)
     gen_update = BlobGenerator('testdata',
                                'testdata-',
                                self.value_size,
                                end=100)
     self._load_all_buckets(self.master,
                            gen_update,
                            "create",
                            0,
                            1,
                            0,
                            True,
                            batch_size=20000,
                            pause_secs=5,
                            timeout_secs=180)
     self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
     source = "http://" + self.master.ip + ":8091"
     info = shell.extract_remote_info()
     path = '/tmp/backup'
     #if info.type.lower() == "windows":
     #    path = '/cygdrive/c' + path
     shell.delete_files(path)
     create_dir = "mkdir " + path
     shell.execute_command(create_dir)
     options = "-b default " + " -u " + self.master.rest_username + " -p " + self.master.rest_password
     shell.execute_cbtransfer(source, path, options)
     expectedResults = {
         "peername": self.master.ip,
         "sockname": self.master.ip + ":11210",
         "source": "memcached",
         "user": "******",
         'bucket': 'default'
     }
     self.checkConfig(self.eventID, self.master, expectedResults)
 def test_resetPass(self):
     shell = RemoteMachineShellConnection(self.master)
     info = shell.extract_remote_info()
     if info.type.lower() == "windows":
         command = "%scbreset_password.exe" % (testconstants.WIN_COUCHBASE_BIN_PATH_RAW)
     else:
         command = "%scbreset_password" % (testconstants.LINUX_COUCHBASE_BIN_PATH)
     shell.delete_files(path)
Exemple #4
0
 def test_resetPass(self):
     shell = RemoteMachineShellConnection(self.master)
     info = shell.extract_remote_info()
     if info.type.lower() == "windows":
         command = "%scbreset_password.exe" % (testconstants.WIN_COUCHBASE_BIN_PATH_RAW)
     else:
         command = "%scbreset_password" % (testconstants.LINUX_COUCHBASE_BIN_PATH)
     shell.delete_files(path)
Exemple #5
0
 def test_Backup(self):
      shell = RemoteMachineShellConnection(self.master)
      gen_update = BlobGenerator('testdata', 'testdata-', self.value_size, end=100)
      self._load_all_buckets(self.master, gen_update, "create", 0, 1, 0, True, batch_size=20000,
                                                                     pause_secs=5, timeout_secs=180)
      self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
      info = shell.extract_remote_info()
      path = '/tmp/backup'
      #if info.type.lower() == "windows":
          #path = 'c:' + path
      shell.delete_files(path)
      create_dir = "mkdir " + path
      shell.execute_command(create_dir)
      shell.execute_cluster_backup(backup_location=path)
      expectedResults = {"peername":self.master.ip, "sockname":self.master.ip + ":11210", "source":"memcached", "user":"******", 'bucket':'default'}
      self.checkConfig(self.eventID, self.master, expectedResults)
Exemple #6
0
 def test_Transfer(self):
      shell = RemoteMachineShellConnection(self.master)
      gen_update = BlobGenerator('testdata', 'testdata-', self.value_size, end=100)
      self._load_all_buckets(self.master, gen_update, "create", 0, 1, 0, True, batch_size=20000,
                                                                     pause_secs=5, timeout_secs=180)
      self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
      source = "http://" + self.master.ip + ":8091"
      info = shell.extract_remote_info()
      path = '/tmp/backup'
      #if info.type.lower() == "windows":
      #    path = '/cygdrive/c' + path
      shell.delete_files(path)
      create_dir = "mkdir " + path
      shell.execute_command(create_dir)
      options = "-b default " + " -u " + self.master.rest_username + " -p " + self.master.rest_password
      shell.execute_cbtransfer(source, path, options)
      expectedResults = {"peername":self.master.ip, "sockname":self.master.ip + ":11210", "source":"memcached", "user":"******", 'bucket':'default'}
      self.checkConfig(self.eventID, self.master, expectedResults)
Exemple #7
0
class bidirectional(XDCRNewBaseTest):
    def setUp(self):
        super(bidirectional, self).setUp()
        self.src_cluster = self.get_cb_cluster_by_name('C1')
        self.src_master = self.src_cluster.get_master_node()
        self.dest_cluster = self.get_cb_cluster_by_name('C2')
        self.dest_master = self.dest_cluster.get_master_node()

    def tearDown(self):
        super(bidirectional, self).tearDown()

    def __perform_ops_joint_sets(self):
        # Merging the keys as keys are actually replicated.
        temp_expires = self._expires
        self._expires = 0   # Assigning it to 0, so that merge_buckets don't wait for expiration here.
        self.merge_all_buckets()

        tasks = []
        kv_gen_src = self.src_cluster.get_kv_gen()[OPS.CREATE]
        gen_update = BlobGenerator(kv_gen_src.name,
                                   kv_gen_src.seed,
                                   kv_gen_src.value_size,
                                   start=0,
                                   end=int(kv_gen_src.end * (float)(self._perc_upd) / 100))
        gen_delete = BlobGenerator(kv_gen_src.name,
                                   kv_gen_src.seed,
                                   kv_gen_src.value_size,
                                   start=int((kv_gen_src.end) * (float)(100 - self._perc_del) / 100),
                                   end=kv_gen_src.end)
        if "C1" in self._upd_clusters:
            tasks += self.src_cluster.async_load_all_buckets_from_generator(gen_update, OPS.UPDATE, self._expires)
        if "C2" in self._upd_clusters:
            tasks += self.dest_cluster.async_load_all_buckets_from_generator(gen_update, OPS.UPDATE, self._expires)
        if "C1" in self._del_clusters:
            tasks += self.src_cluster.async_load_all_buckets_from_generator(gen_delete, OPS.DELETE, 0)
        if "C2" in self._del_clusters:
            tasks += self.dest_cluster.async_load_all_buckets_from_generator(gen_delete, OPS.DELETE, 0)

        for task in tasks:
            task.result()

        self._expires = temp_expires
        if (self._wait_for_expiration and self._expires) and ("C1" in self._upd_clusters or "C2" in self._upd_clusters):
            self.sleep(self._expires)

        self.sleep(self._wait_timeout)

    """Bidirectional replication between two clusters(currently), create-updates-deletes on DISJOINT sets on same bucket."""
    def load_with_ops(self):
        self.setup_xdcr_and_load()
        self.perform_update_delete()
        self.verify_results()

    """Bidirectional replication between two clusters(currently), create-updates-deletes on DISJOINT sets on same bucket.
    Here running incremental load on both cluster1 and cluster2 as specified by the user/conf file"""

    def load_with_async_ops(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    """Testing Bidirectional load( Loading at source/destination). Failover node at Source/Destination while
    Create/Update/Delete are performed in parallel based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """
    def load_with_async_ops_and_joint_sets(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    def load_with_async_ops_with_warmup(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        NodeHelper.wait_warmup_completed(warmupnodes)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)
        self.verify_results()

    def load_with_async_ops_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        NodeHelper.wait_warmup_completed(warmupnodes)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)
        self.verify_results()

    def load_with_async_ops_and_joint_sets_with_warmup(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_async_ops_and_joint_sets_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_failover(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes()

        self.sleep(self._wait_timeout / 6)
        self.perform_update_delete()

        self.verify_results()

    def load_with_failover_then_add_back(self):

        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.src_cluster.add_back_node()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.dest_cluster.add_back_node()

        self.perform_update_delete()

        self.verify_results()

    def load_with_failover_master(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_master()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_master()

        self.sleep(self._wait_timeout / 6)
        self.perform_update_delete()

        self.verify_results()

    """Replication with compaction ddocs and view queries on both clusters.

    This test begins by loading a given number of items on both clusters.
    It creates _num_views as development/production view with default
    map view funcs(_is_dev_ddoc = True by default) on both clusters.
    Then we disabled compaction for ddoc on src cluster. While we don't reach
    expected fragmentation for ddoc on src cluster we update docs and perform
    view queries for all views. Then we start compaction when fragmentation
    was reached fragmentation_value. When compaction was completed we perform
    a full verification: wait for the disk queues to drain
    and then verify that there has been no data loss on both clusters."""
    def replication_with_ddoc_compaction(self):
        self.setup_xdcr()
        self.src_cluster.load_all_buckets(self._num_items)
        self.dest_cluster.load_all_buckets(self._num_items)

        num_views = self._input.param("num_views", 5)
        is_dev_ddoc = self._input.param("is_dev_ddoc", True)
        fragmentation_value = self._input.param("fragmentation_value", 80)
        for bucket in self.src_cluster.get_buckets():
            views = Utility.make_default_views(bucket.name, num_views, is_dev_ddoc)

        ddoc_name = "ddoc1"
        prefix = ("", "dev_")[is_dev_ddoc]

        query = {"full_set": "true", "stale": "false"}

        tasks = self.src_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
        tasks += self.dest_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
        for task in tasks:
            task.result(self._poll_timeout)

        self.src_cluster.disable_compaction()
        fragmentation_monitor = self.src_cluster.async_monitor_view_fragmentation(prefix + ddoc_name, fragmentation_value, BUCKET_NAME.DEFAULT)
        # generate load until fragmentation reached
        while fragmentation_monitor.state != "FINISHED":
            # update docs to create fragmentation
            self.src_cluster.update_delete_data(OPS.UPDATE)
            for view in views:
                # run queries to create indexes
                self.src_cluster.query_view(prefix + ddoc_name, view.name, query)
                self.dest_cluster.query_view(prefix + ddoc_name, view.name, query)
        fragmentation_monitor.result()

        compaction_task = self.src_cluster.async_compact_view(prefix + ddoc_name, 'default')

        self.assertTrue(compaction_task.result())

        self.verify_results()

    def replication_with_view_queries_and_ops(self):
        tasks = []
        try:
            self.setup_xdcr()
            self.src_cluster.load_all_buckets(self._num_items)
            self.dest_cluster.load_all_buckets(self._num_items)

            num_views = self._input.param("num_views", 5)
            is_dev_ddoc = self._input.param("is_dev_ddoc", True)
            for bucket in self.src_cluster.get_buckets():
                views = Utility.make_default_views(bucket.name, num_views, is_dev_ddoc)

            ddoc_name = "ddoc1"
            prefix = ("", "dev_")[is_dev_ddoc]

            query = {"full_set": "true", "stale": "false", "connection_timeout": 60000}

            tasks = self.src_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
            tasks += self.dest_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)

            for task in tasks:
                task.result(self._poll_timeout)

            tasks = []
            # Setting up doc-ops at source nodes
            if "C1" in self._upd_clusters:
                tasks.extend(self.src_cluster.async_update_delete(OPS.UPDATE, self._perc_upd, self._expires))
            if "C1" in self._del_clusters:
                tasks.extend(self.src_cluster.async_update_delete(OPS.DELETE, self._perc_del))
            if "C2" in self._upd_clusters:
                tasks.extend(self.dest_cluster.async_update_delete(OPS.UPDATE, self._perc_upd, self._expires))
            if "C2" in self._del_clusters:
                tasks.extend(self.dest_cluster.async_update_delete(OPS.DELETE, self._perc_del))

            self.sleep(5)
            while True:
                for view in views:
                    self.src_cluster.query_view(prefix + ddoc_name, view.name, query)
                    self.dest_cluster.query_view(prefix + ddoc_name, view.name, query)
                if set([task.state for task in tasks]) != set(["FINISHED"]):
                    continue
                else:
                    if self._wait_for_expiration:
                        if "C1" in self._upd_clusters or "C2" in self._upd_clusters:
                            self.sleep(self._expires)
                    break

            self.merge_all_buckets()
            self.src_cluster.verify_items_count()
            self.dest_cluster.verify_items_count()

            tasks = []
            src_buckets = self.src_cluster.get_buckets()
            dest_buckets = self.dest_cluster.get_buckets()
            for view in views:
                tasks.append(self.src_cluster.async_query_view(prefix + ddoc_name, view.name, query, src_buckets[0].kvs[1].__len__()))
                tasks.append(self.src_cluster.async_query_view(prefix + ddoc_name, view.name, query, dest_buckets[0].kvs[1].__len__()))

            for task in tasks:
                task.result(self._poll_timeout)

            self.verify_results()
        finally:
            # For timeout error, all tasks to be cancelled
            # Before proceeding to next test
            for task in tasks:
                task.cancel()

    """Replication with disabled/enabled ddoc compaction on both clusters.

    This test begins by loading a given number of items on both clusters.
    Then we disabled or enabled compaction on both clusters( set via params).
    Then we mutate and delete data on clusters 3 times. After deletion we recreate
    deleted items. When data was changed 3 times we perform
    a full verification: wait for the disk queues to drain
    and then verify that there has been no data loss on both clusters."""
    def replication_with_disabled_ddoc_compaction(self):
        self.setup_xdcr()
        self.src_cluster.load_all_buckets(self._num_items)
        self.dest_cluster.load_all_buckets(self._num_items)

        if "C1" in self._disable_compaction:
            self.src_cluster.disable_compaction()
        if "C2" in self._disable_compaction:
            self.dest_cluster.disable_compaction()

        # perform doc's ops 3 times to increase rev number
        for _ in range(3):
            self.async_perform_update_delete()
            # wait till deletes have been sent to recreate
            self.sleep(60)
            # restore(re-creating) deleted items
            if 'C1' in self._del_clusters:
                c1_kv_gen = self.src_cluster.get_kv_gen()

                c1_gen_delete = copy.deepcopy(c1_kv_gen[OPS.DELETE])
                if self._expires:
                    # if expiration set, recreate those keys before
                    # trying to update
                    c1_gen_update = copy.deepcopy(c1_kv_gen[OPS.UPDATE])
                    self.src_cluster.load_all_buckets_from_generator(kv_gen=c1_gen_update)
                self.src_cluster.load_all_buckets_from_generator(kv_gen=c1_gen_delete)
            if 'C2' in self._del_clusters:
                c2_kv_gen = self.dest_cluster.get_kv_gen()
                c2_gen_delete = copy.deepcopy(c2_kv_gen[OPS.DELETE])
                if self._expires:
                    c2_gen_update = copy.deepcopy(c2_kv_gen[OPS.UPDATE])
                    self.dest_cluster.load_all_buckets_from_generator(kv_gen=c2_gen_update)
                self.dest_cluster.load_all_buckets_from_generator(kv_gen=c2_gen_delete)
            # wait till we recreate deleted keys before we can delete/update
            self.sleep(60)

        self.verify_results()

    def replication_while_rebooting_a_non_master_src_dest_node(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout)

        reboot_node_dest = self.dest_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(reboot_node_dest, self, wait_time=self._wait_timeout * 4, wait_if_warmup=True)

        reboot_node_src = self.src_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(reboot_node_src, self, wait_time=self._wait_timeout * 4, wait_if_warmup=True)

        self.sleep(120)
        ClusterOperationHelper.wait_for_ns_servers_or_assert([reboot_node_dest], self, wait_if_warmup=True)
        ClusterOperationHelper.wait_for_ns_servers_or_assert([reboot_node_src], self, wait_if_warmup=True)
        self.verify_results()

    def test_disk_full(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        self.sleep(self._wait_timeout)

        zip_file = "%s.zip" % (self._input.param("file_name", "collectInfo"))
        try:
            for node in [self.src_master, self.dest_master]:
                self.shell = RemoteMachineShellConnection(node)
                self.shell.execute_cbcollect_info(zip_file)
                if self.shell.extract_remote_info().type.lower() != "windows":
                    command = "unzip %s" % (zip_file)
                    output, error = self.shell.execute_command(command)
                    self.shell.log_command_output(output, error)
                    if len(error) > 0:
                        raise Exception("unable to unzip the files. Check unzip command output for help")
                    cmd = 'grep -R "Approaching full disk warning." cbcollect_info*/'
                    output, _ = self.shell.execute_command(cmd)
                else:
                    cmd = "curl -0 http://{1}:{2}@{0}:8091/diag 2>/dev/null | grep 'Approaching full disk warning.'".format(
                                                        self.src_master.ip,
                                                        self.src_master.rest_username,
                                                        self.src_master.rest_password)
                    output, _ = self.shell.execute_command(cmd)
                self.assertNotEquals(len(output), 0, "Full disk warning not generated as expected in %s" % node.ip)
                self.log.info("Full disk warning generated as expected in %s" % node.ip)

                self.shell.delete_files(zip_file)
                self.shell.delete_files("cbcollect_info*")
        except Exception as e:
            self.log.info(e)
Exemple #8
0
class unidirectional(XDCRNewBaseTest):
    def setUp(self):
        super(unidirectional, self).setUp()
        self.src_cluster = self.get_cb_cluster_by_name('C1')
        self.src_master = self.src_cluster.get_master_node()
        self.dest_cluster = self.get_cb_cluster_by_name('C2')
        self.dest_master = self.dest_cluster.get_master_node()

    def tearDown(self):
        super(unidirectional, self).tearDown()

    def suite_tearDown(self):
        pass

    def suite_setUp(self):
        pass

    """Testing Unidirectional load( Loading only at source) Verifying whether XDCR replication is successful on
    subsequent destination clusters.Create/Update/Delete operations are performed based on doc-ops specified by the user. """

    def load_with_ops(self):
        self.setup_xdcr_and_load()
        self.perform_update_delete()
        self.verify_results()

    """Testing Unidirectional load( Loading only at source) Verifying whether XDCR replication is successful on
    subsequent destination clusters. Create/Update/Delete are performed in parallel- doc-ops specified by the user. """

    def load_with_async_ops(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    def load_with_async_ops_diff_data_size(self):
        # Load 1 item with size 1
        # 52 alphabets (small and capital letter)
        self.src_cluster.load_all_buckets(52, value_size=1)
        # Load items with below sizes of 1M
        self.src_cluster.load_all_buckets(5, value_size=1000000)
        # Load items with size 10MB
        # Getting memory issue with 20MB data on VMs.
        self.src_cluster.load_all_buckets(1, value_size=10000000)

        self.verify_results()

    """Testing Unidirectional load( Loading only at source). Failover node at Source/Destination while
    Create/Update/Delete are performed after based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_ops_with_warmup(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        self.perform_update_delete()
        self.sleep(self._wait_timeout // 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_ops_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        self.perform_update_delete()
        self.sleep(self._wait_timeout // 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_async_ops_with_warmup(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            "Test case does not apply for Ephemeral buckets"
            return
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout // 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_async_ops_with_warmup_master(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            "Test case does not apply for Ephemeral buckets"
            return
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout // 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.sleep(300)

        self.verify_results()

    def load_with_failover(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes()

        self.sleep(self._wait_timeout // 6)
        self.perform_update_delete()

        self.verify_results()

    def load_with_failover_then_add_back(self):

        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.src_cluster.add_back_node()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.dest_cluster.add_back_node()

        self.perform_update_delete()

        self.verify_results()

    """Testing Unidirectional load( Loading only at source). Failover node at Source/Destination while
    Create/Update/Delete are performed in parallel based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_failover_master(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_master()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_master()

        self.sleep(self._wait_timeout // 6)
        self.perform_update_delete()

        self.sleep(300)

        self.verify_results()

    """Testing Unidirectional load( Loading only at source). Failover node at Source/Destination while
    Create/Update/Delete are performed in parallel based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_async_failover(self):
        self.setup_xdcr_and_load()

        tasks = []
        if "C1" in self._failover:
            tasks.append(self.src_cluster.async_failover())
        if "C2" in self._failover:
            tasks.append(self.dest_cluster.async_failover())

        self.perform_update_delete()
        self.sleep(self._wait_timeout // 4)

        for task in tasks:
            task.result()

        if "C1" in self._failover:
            self.src_cluster.rebalance_failover_nodes()
        if "C2" in self._failover:
            self.dest_cluster.rebalance_failover_nodes()

        self.verify_results()

    """Replication with compaction ddocs and view queries on both clusters.

        This test begins by loading a given number of items on the source cluster.
        It creates num_views as development/production view with default
        map view funcs(_is_dev_ddoc = True by default) on both clusters.
        Then we disabled compaction for ddoc on src cluster. While we don't reach
        expected fragmentation for ddoc on src cluster we update docs and perform
        view queries for all views. Then we start compaction when fragmentation
        was reached fragmentation_value. When compaction was completed we perform
        a full verification: wait for the disk queues to drain
        and then verify that there has been any data loss on all clusters."""
    def replication_with_ddoc_compaction(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            self.log.info("Test case does not apply to ephemeral")
            return

        self.setup_xdcr_and_load()

        num_views = self._input.param("num_views", 5)
        is_dev_ddoc = self._input.param("is_dev_ddoc", True)
        fragmentation_value = self._input.param("fragmentation_value", 80)
        for bucket in self.src_cluster.get_buckets():
            views = Utility.make_default_views(bucket.name, num_views, is_dev_ddoc)

        ddoc_name = "ddoc1"
        prefix = ("", "dev_")[is_dev_ddoc]

        query = {"full_set": "true", "stale": "false"}

        tasks = self.src_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
        tasks += self.dest_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
        for task in tasks:
            task.result(self._poll_timeout)

        self.src_cluster.disable_compaction()
        fragmentation_monitor = self.src_cluster.async_monitor_view_fragmentation(prefix + ddoc_name, fragmentation_value, BUCKET_NAME.DEFAULT)

        # generate load until fragmentation reached
        while fragmentation_monitor.state != "FINISHED":
            # update docs to create fragmentation
            self.src_cluster.update_delete_data(OPS.UPDATE)
            for view in views:
                # run queries to create indexes
                self.src_cluster.query_view(prefix + ddoc_name, view.name, query)
        fragmentation_monitor.result()

        compaction_task = self.src_cluster.async_compact_view(prefix + ddoc_name, 'default')

        self.assertTrue(compaction_task.result())

        self.verify_results()

    """Replication with disabled/enabled ddoc compaction on source cluster.

        This test begins by loading a given number of items on the source cluster.
        Then we disabled or enabled compaction on both clusters( set via params).
        Then we mutate and delete data on the source cluster 3 times.
        After deletion we recreate deleted items. When data was changed 3 times
        we perform a full verification: wait for the disk queues to drain
        and then verify that there has been no data loss on both all clusters."""
    def replication_with_disabled_ddoc_compaction(self):
        self.setup_xdcr_and_load()

        if "C1" in self._disable_compaction:
            self.src_cluster.disable_compaction()
        if "C2" in self._disable_compaction:
            self.dest_cluster.disable_compaction()

        # perform doc's ops 3 times to increase rev number
        for _ in range(3):
            self.async_perform_update_delete()
            # restore(re-creating) deleted items
            if 'C1' in self._del_clusters:
                c1_kv_gen = self.src_cluster.get_kv_gen()
                gen_delete = copy.deepcopy(c1_kv_gen[OPS.DELETE])
                self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_delete)
                self.sleep(5)

        self.sleep(600)
        self.verify_results()

    def replication_while_rebooting_a_non_master_destination_node(self):
        self.setup_xdcr_and_load()
        self.src_cluster.set_xdcr_param("xdcrFailureRestartInterval", 1)
        self.perform_update_delete()
        self.sleep(self._wait_timeout // 2)
        rebooted_node = self.dest_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(rebooted_node, self, wait_time=self._wait_timeout * 4, wait_if_warmup=True)

        self.verify_results()

    def replication_with_firewall_enabled(self):
        self.src_cluster.set_xdcr_param("xdcrFailureRestartInterval", 1)
        self.setup_xdcr_and_load()
        self.perform_update_delete()

        NodeHelper.enable_firewall(self.dest_master)
        self.sleep(30)
        NodeHelper.disable_firewall(self.dest_master)
        self.verify_results()

    """Testing Unidirectional append ( Loading only at source) Verifying whether XDCR replication is successful on
    subsequent destination clusters. """

    def test_append(self):
        self.setup_xdcr_and_load()
        self.verify_results()
        loop_count = self._input.param("loop_count", 20)
        for i in range(loop_count):
            self.log.info("Append iteration # %s" % i)
            gen_append = BlobGenerator('loadOne', 'loadOne', self._value_size, end=self._num_items)
            self.src_cluster.load_all_buckets_from_generator(gen_append, ops=OPS.APPEND, batch_size=1)
            self.sleep(self._wait_timeout)
        self.verify_results()

    '''
    This method runs cbcollectinfo tool after setting up uniXDCR and check
    whether the log generated by cbcollectinfo contains xdcr log file or not.
    '''
    def collectinfotest_for_xdcr(self):
        self.load_with_ops()
        self.node_down = self._input.param("node_down", False)
        self.log_filename = self._input.param("file_name", "collectInfo")
        self.shell = RemoteMachineShellConnection(self.src_master)
        self.shell.execute_cbcollect_info("%s.zip" % (self.log_filename))
        from clitest import collectinfotest
        # HACK added self.buckets data member.
        self.buckets = self.src_cluster.get_buckets()
        collectinfotest.CollectinfoTests.verify_results(
            self, self.log_filename
        )

    """ Verify the fix for MB-9548"""
    def verify_replications_deleted_after_bucket_deletion(self):
        self.setup_xdcr_and_load()
        self.verify_results()
        rest_conn = RestConnection(self.src_master)
        replications = rest_conn.get_replications()
        self.assertTrue(replications, "Number of replications should not be 0")
        self.src_cluster.delete_all_buckets()
        self.sleep(60)
        replications = rest_conn.get_replications()
        self.log.info("Replications : %s" % replications)
        self.assertTrue(not replications, "Rest returns replication list even after source bucket is deleted ")

    """ Verify fix for MB-9862"""
    def test_verify_memcache_connections(self):
        allowed_memcached_conn = self._input.param("allowed_connections", 100)
        max_ops_per_second = self._input.param("max_ops_per_second", 2500)
        min_item_size = self._input.param("min_item_size", 128)
        num_docs = self._input.param("num_docs", 30000)
        # start load, max_ops_per_second is the combined limit for all buckets
        mcsodaLoad = LoadWithMcsoda(self.src_master, num_docs, prefix='')
        mcsodaLoad.cfg["max-ops"] = 0
        mcsodaLoad.cfg["max-ops-per-sec"] = max_ops_per_second
        mcsodaLoad.cfg["exit-after-creates"] = 1
        mcsodaLoad.cfg["min-value-size"] = min_item_size
        mcsodaLoad.cfg["json"] = 0
        mcsodaLoad.cfg["batch"] = 100
        loadDataThread = Thread(target=mcsodaLoad.load_data,
                                  name='mcloader_default')
        loadDataThread.daemon = True
        loadDataThread.start()

        src_remote_shell = RemoteMachineShellConnection(self.src_master)
        machine_type = src_remote_shell.extract_remote_info().type.lower()
        while (loadDataThread.isAlive() and machine_type == 'linux'):
            command = "netstat -lpnta | grep 11210 | grep TIME_WAIT | wc -l"
            output, _ = src_remote_shell.execute_command(command)
            if int(output[0]) > allowed_memcached_conn:
                # stop load
                mcsodaLoad.load_stop()
                loadDataThread.join()
                self.fail("Memcached connections {0} are increased above {1} \
                            on Source node".format(
                                                   allowed_memcached_conn,
                                                   int(output[0])))
            self.sleep(5)

        # stop load
        mcsodaLoad.load_stop()
        loadDataThread.join()

    # Test to verify MB-10116
    def verify_ssl_private_key_not_present_in_logs(self):
        zip_file = "%s.zip" % (self._input.param("file_name", "collectInfo"))
        try:
            self.shell = RemoteMachineShellConnection(self.src_master)
            self.load_with_ops()
            self.shell.execute_cbcollect_info(zip_file)
            if self.shell.extract_remote_info().type.lower() != "windows":
                command = "unzip %s" % (zip_file)
                output, error = self.shell.execute_command(command)
                self.shell.log_command_output(output, error)
                if len(error) > 0:
                    raise Exception("unable to unzip the files. Check unzip command output for help")
                cmd = 'grep -R "BEGIN RSA PRIVATE KEY" cbcollect_info*/'
                output, _ = self.shell.execute_command(cmd)
            else:
                cmd = "curl -0 http://{1}:{2}@{0}:8091/diag 2>/dev/null | grep 'BEGIN RSA PRIVATE KEY'".format(
                                                    self.src_master.ip,
                                                    self.src_master.rest_username,
                                                    self.src_master.rest_password)
                output, _ = self.shell.execute_command(cmd)
            self.assertTrue(not output, "XDCR SSL Private Key is found diag logs -> %s" % output)
        finally:
            self.shell.delete_files(zip_file)
            self.shell.delete_files("cbcollect_info*")

    # Buckets States
    def delete_recreate_dest_buckets(self):
        self.setup_xdcr_and_load()

        # Remove destination buckets
        self.dest_cluster.delete_all_buckets()

        # Code for re-create_buckets
        self.create_buckets_on_cluster("C2")

        self._resetup_replication_for_recreate_buckets("C2")

        self.async_perform_update_delete()
        self.verify_results()

    def flush_dest_buckets(self):
        self.setup_xdcr_and_load()

        # flush destination buckets
        self.dest_cluster.flush_buckets()

        self.async_perform_update_delete()
        self.verify_results()

    # Nodes Crashing Scenarios
    def __kill_processes(self, crashed_nodes=[]):
        for node in crashed_nodes:
            try:
                NodeHelper.kill_erlang(node)
            except:
                self.log.info('Could not kill erlang process on node, continuing..')

    def __start_cb_server(self, node):
        shell = RemoteMachineShellConnection(node)
        shell.start_couchbase()
        shell.disconnect()

    def test_node_crash_master(self):
        self.setup_xdcr_and_load()

        crashed_nodes = []
        crash = self._input.param("crash", "").split('-')
        if "C1" in crash:
            crashed_nodes.append(self.src_master)
        if "C2" in crash:
            crashed_nodes.append(self.dest_master)

        self.__kill_processes(crashed_nodes)

        for crashed_node in crashed_nodes:
            self.__start_cb_server(crashed_node)

        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            self.sleep(self._wait_timeout)
        else:
            NodeHelper.wait_warmup_completed(crashed_nodes)

        self.async_perform_update_delete()
        self.verify_results()

    # Disaster at site.
    # 1. Crash Source Cluster., Sleep n second
    # 2. Crash Dest Cluster.
    # 3. Wait for Source Cluster to warmup. Load more data and perform mutations on Src.
    # 4. Wait for Dest to warmup.
    # 5. Verify data.
    def test_node_crash_cluster(self):
        self.setup_xdcr_and_load()

        crashed_nodes = []
        crash = self._input.param("crash", "").split('-')
        if "C1" in crash:
            crashed_nodes += self.src_cluster.get_nodes()
            self.__kill_processes(crashed_nodes)
            self.sleep(30)
        if "C2" in crash:
            crashed_nodes += self.dest_cluster.get_nodes()
            self.__kill_processes(crashed_nodes)

        for crashed_node in crashed_nodes:
            self.__start_cb_server(crashed_node)

        bucket_type = self._input.param("bucket_type", "membase")

        if "C1" in crash:
            if bucket_type == "ephemeral":
                self.sleep(self._wait_timeout)
            else:
                NodeHelper.wait_warmup_completed(self.src_cluster.get_nodes())
            gen_create = BlobGenerator('loadTwo', 'loadTwo', self._value_size, end=self._num_items)
            self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_create)

        self.async_perform_update_delete()

        if "C2" in crash:
            if bucket_type == "ephemeral":
                self.sleep(self._wait_timeout)
            else:
                NodeHelper.wait_warmup_completed(self.dest_cluster.get_nodes())

        self.verify_results()

    """ Test if replication restarts 60s after idle xdcr following dest bucket flush """
    def test_idle_xdcr_dest_flush(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        bucket = self.dest_cluster.get_bucket_by_name(BUCKET_NAME.DEFAULT)
        self.dest_cluster.flush_buckets([bucket])

        self.sleep(self._wait_timeout)

        self.verify_results()

    """ Test if replication restarts 60s after idle xdcr following dest bucket recreate """
    def test_idle_xdcr_dest_recreate(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        bucket = self.dest_cluster.get_bucket_by_name(BUCKET_NAME.DEFAULT)
        self.dest_cluster.delete_bucket(BUCKET_NAME.DEFAULT)
        bucket_params=self._create_bucket_params(size=bucket.bucket_size)
        self.dest_cluster.create_default_bucket(bucket_params)
        self.sleep(self._wait_timeout)

        self.verify_results()

    """ Test if replication restarts 60s after idle xdcr following dest failover """
    def test_idle_xdcr_dest_failover(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        self.dest_cluster.failover_and_rebalance_nodes()

        self.sleep(self._wait_timeout)

        self.verify_results()

    def _disable_compression(self):
        shell = RemoteMachineShellConnection(self.src_master)
        for remote_cluster in self.src_cluster.get_remote_clusters():
            for repl in remote_cluster.get_replications():
                src_bucket_name = repl.get_src_bucket().name
                if src_bucket_name in str(repl):
                    repl_id = repl.get_repl_id()
                    repl_id = str(repl_id).replace('/', '%2F')
                    base_url = "http://" + self.src_master.ip + \
                               ":8091/settings/replications/" + repl_id
                    command = "curl -X POST -u Administrator:password " + base_url + \
                              " -d compressionType=" + "None"
                    output, error = shell.execute_command(command)
                    shell.log_command_output(output, error)
        shell.disconnect()

    def test_optimistic_replication(self):
        """Tests with 2 buckets with customized optimisic replication thresholds
           one greater than value_size, other smaller
        """
        from .xdcrnewbasetests import REPL_PARAM
        self.setup_xdcr()
        # To ensure docs size = value_size on target
        self._disable_compression()
        self.load_data_topology()
        self._wait_for_replication_to_catchup()
        for remote_cluster in self.src_cluster.get_remote_clusters():
            for replication in remote_cluster.get_replications():
                src_bucket_name = replication.get_src_bucket().name
                opt_repl_threshold = replication.get_xdcr_setting(REPL_PARAM.OPTIMISTIC_THRESHOLD)
                docs_opt_replicated_stat = 'replications/%s/docs_opt_repd' %replication.get_repl_id()
                opt_replicated = RestConnection(self.src_master).fetch_bucket_xdcr_stats(
                                        src_bucket_name
                                        )['op']['samples'][docs_opt_replicated_stat][-1]
                self.log.info("Bucket: %s, value size: %s, optimistic threshold: %s"
                              " number of mutations optimistically replicated: %s"
                                %(src_bucket_name,
                                  self._value_size,
                                  opt_repl_threshold,
                                  opt_replicated
                                ))
                if self._value_size <= opt_repl_threshold:
                    if opt_replicated == self._num_items:
                        self.log.info("SUCCESS: All keys in bucket %s were optimistically"
                                      " replicated"
                                      %(replication.get_src_bucket().name))
                    else:
                        self.fail("Value size: %s, optimistic threshold: %s,"
                                  " number of docs optimistically replicated: %s"
                          %(self._value_size, opt_repl_threshold, opt_replicated))
                else:
                    if opt_replicated == 0:
                        self.log.info("SUCCESS: No key in bucket %s was optimistically"
                                      " replicated"

                                      %(replication.get_src_bucket().name))
                    else:
                        self.fail("Partial optimistic replication detected!")

    def test_disk_full(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        self.sleep(self._wait_timeout)

        zip_file = "%s.zip" % (self._input.param("file_name", "collectInfo"))
        try:
            for node in [self.src_master, self.dest_master]:
                self.shell = RemoteMachineShellConnection(node)
                self.shell.execute_cbcollect_info(zip_file)
                if self.shell.extract_remote_info().type.lower() != "windows":
                    command = "unzip %s" % (zip_file)
                    output, error = self.shell.execute_command(command)
                    self.shell.log_command_output(output, error)
                    if len(error) > 0:
                        raise Exception("unable to unzip the files. Check unzip command output for help")
                    cmd = 'grep -R "Approaching full disk warning." cbcollect_info*/'
                    output, _ = self.shell.execute_command(cmd)
                else:
                    cmd = "curl -0 http://{1}:{2}@{0}:8091/diag 2>/dev/null | grep 'Approaching full disk warning.'".format(
                                                        self.src_master.ip,
                                                        self.src_master.rest_username,
                                                        self.src_master.rest_password)
                    output, _ = self.shell.execute_command(cmd)
                self.assertNotEqual(len(output), 0, "Full disk warning not generated as expected in %s" % node.ip)
                self.log.info("Full disk warning generated as expected in %s" % node.ip)

                self.shell.delete_files(zip_file)
                self.shell.delete_files("cbcollect_info*")
        except Exception as e:
            self.log.info(e)

    def test_retry_connections_on_errors_before_restart(self):
        """
        CBQE-3373: Do not restart pipeline as soon as connection errors are
        detected, backoff and retry 5 times before trying to restart pipeline.
        """
        passed = False
        # start data load after setting up xdcr
        load_tasks = self.setup_xdcr_async_load()
        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'

        # block port 11210 on target to simulate a connection error
        shell = RemoteMachineShellConnection(self.dest_master)
        out, err = shell.execute_command("/sbin/iptables -A INPUT -p tcp --dport"
                                         " 11210 -j DROP")
        shell.log_command_output(out, err)
        out, err = shell.execute_command("/sbin/iptables -L")
        shell.log_command_output(out, err)

        # complete loading
        for task in load_tasks:
            task.result()

        # wait for goxdcr to detect i/o timeout and try repairing
        self.sleep(self._wait_timeout*5)

        # unblock port 11210 so replication can continue
        out, err = shell.execute_command("/sbin/iptables -D INPUT -p tcp --dport"
                                         " 11210 -j DROP")
        shell.log_command_output(out, err)
        out, err = shell.execute_command("/sbin/iptables -L")
        shell.log_command_output(out, err)
        shell.disconnect()

        # check logs for traces of retry attempts
        for node in self.src_cluster.get_nodes():
            _, count1 = NodeHelper.check_goxdcr_log(
                            node,
                            "Failed to repair connections to target cluster",
                            goxdcr_log)
            _, count2 = NodeHelper.check_goxdcr_log(
                            node,
                            "Failed to set up connections to target cluster",
                            goxdcr_log)
            count = count1 + count2
            if count > 0:
                self.log.info('SUCCESS: We tried to repair connections before'
                              ' restarting pipeline')
                passed = True

        if not passed:
            self.fail("No attempts were made to repair connections on %s before"
                      " restarting pipeline" % self.src_cluster.get_nodes())
        self.verify_results()

    def test_verify_mb19802_1(self):
        load_tasks = self.setup_xdcr_async_load()
        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'

        conn = RemoteMachineShellConnection(self.dest_cluster.get_master_node())
        conn.stop_couchbase()

        for task in load_tasks:
            task.result()

        conn.start_couchbase()
        self.sleep(300)

        for node in self.src_cluster.get_nodes():
            _, count = NodeHelper.check_goxdcr_log(
                            node,
                            "batchGetMeta received fatal error and had to abort",
                            goxdcr_log)
            self.assertEqual(count, 0, "batchGetMeta error message found in " + str(node.ip))
            self.log.info("batchGetMeta error message not found in " + str(node.ip))

        self.verify_results()

    def test_verify_mb19802_2(self):
        load_tasks = self.setup_xdcr_async_load()
        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'

        self.dest_cluster.failover_and_rebalance_master()

        for task in load_tasks:
            task.result()

        for node in self.src_cluster.get_nodes():
            _, count = NodeHelper.check_goxdcr_log(
                            node,
                            "batchGetMeta received fatal error and had to abort",
                            goxdcr_log)
            self.assertEqual(count, 0, "batchGetMeta timed out error message found in " + str(node.ip))
            self.log.info("batchGetMeta error message not found in " + str(node.ip))

        self.sleep(300)
        self.verify_results()

    def test_verify_mb19697(self):
        self.setup_xdcr_and_load()
        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'

        self.src_cluster.pause_all_replications()

        gen = BlobGenerator("C1-", "C1-", self._value_size, end=100000)
        self.src_cluster.load_all_buckets_from_generator(gen)

        self.src_cluster.resume_all_replications()
        self._wait_for_replication_to_catchup()

        gen = BlobGenerator("C1-", "C1-", self._value_size, end=100000)
        load_tasks = self.src_cluster.async_load_all_buckets_from_generator(gen)

        self.src_cluster.rebalance_out()

        for task in load_tasks:
            task.result()

        self._wait_for_replication_to_catchup()

        self.src_cluster.rebalance_in()

        gen = BlobGenerator("C1-", "C1-", self._value_size, end=100000)
        load_tasks = self.src_cluster.async_load_all_buckets_from_generator(gen)

        self.src_cluster.failover_and_rebalance_master()

        for task in load_tasks:
            task.result()

        self._wait_for_replication_to_catchup()

        for node in self.src_cluster.get_nodes():
            _, count = NodeHelper.check_goxdcr_log(
                            node,
                            "counter .+ goes backward, maybe due to the pipeline is restarted",
                            goxdcr_log)
            self.assertEqual(count, 0, "counter goes backward, maybe due to the pipeline is restarted "
                                        "error message found in " + str(node.ip))
            self.log.info("counter goes backward, maybe due to the pipeline is restarted "
                                        "error message not found in " + str(node.ip))

        self.sleep(300)
        self.verify_results()

    def test_verify_mb20463(self):
        src_version = NodeHelper.get_cb_version(self.src_cluster.get_master_node())
        if float(src_version[:3]) != 4.5:
            self.log.info("Source cluster has to be at 4.5 for this test")
            return

        servs = self._input.servers[2:4]
        params = {}
        params['num_nodes'] = len(servs)
        params['product'] = 'cb'
        params['version'] = '4.1.2-6088'
        params['vbuckets'] = [1024]
        self.log.info("will install {0} on {1}".format('4.1.2-6088', [s.ip for s in servs]))
        InstallerJob().parallel_install(servs, params)

        if params['product'] in ["couchbase", "couchbase-server", "cb"]:
            success = True
            for server in servs:
                success &= RemoteMachineShellConnection(server).is_couchbase_installed()
                if not success:
                    self.fail("some nodes were not installed successfully on target cluster!")

        self.log.info("4.1.2 installed successfully on target cluster")

        conn = RestConnection(self.dest_cluster.get_master_node())
        conn.add_node(user=self._input.servers[3].rest_username, password=self._input.servers[3].rest_password,
                      remoteIp=self._input.servers[3].ip)
        self.sleep(30)
        conn.rebalance(otpNodes=[node.id for node in conn.node_statuses()])
        self.sleep(30)
        conn.create_bucket(bucket='default', ramQuotaMB=512)

        tasks = self.setup_xdcr_async_load()

        self.sleep(30)

        NodeHelper.enable_firewall(self.dest_master)
        self.sleep(30)
        NodeHelper.disable_firewall(self.dest_master)

        for task in tasks:
            task.result()

        self._wait_for_replication_to_catchup(timeout=600)

        self.verify_results()

    def test_rollback(self):
        bucket = self.src_cluster.get_buckets()[0]
        nodes = self.src_cluster.get_nodes()

        # Stop Persistence on Node A & Node B
        for node in nodes:
            mem_client = MemcachedClientHelper.direct_client(node, bucket)
            mem_client.stop_persistence()

        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'
        self.setup_xdcr()

        self.src_cluster.pause_all_replications()

        gen = BlobGenerator("C1-", "C1-", self._value_size, end=self._num_items)
        self.src_cluster.load_all_buckets_from_generator(gen)

        self.src_cluster.resume_all_replications()

        # Perform mutations on the bucket
        self.async_perform_update_delete()

        rest1 = RestConnection(self.src_cluster.get_master_node())
        rest2 = RestConnection(self.dest_cluster.get_master_node())

        # Fetch count of docs in src and dest cluster
        _count1 = rest1.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]
        _count2 = rest2.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]

        self.log.info("Before rollback src cluster count = {0} dest cluster count = {1}".format(_count1, _count2))

        # Kill memcached on Node A so that Node B becomes master
        shell = RemoteMachineShellConnection(self.src_cluster.get_master_node())
        shell.kill_memcached()

        # Start persistence on Node B
        mem_client = MemcachedClientHelper.direct_client(nodes[1], bucket)
        mem_client.start_persistence()

        # Failover Node B
        failover_task = self.src_cluster.async_failover()
        failover_task.result()

        # Wait for Failover & rollback to complete
        self.sleep(60)

        # Fetch count of docs in src and dest cluster
        _count1 = rest1.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]
        _count2 = rest2.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]

        self.log.info("After rollback src cluster count = {0} dest cluster count = {1}".format(_count1, _count2))

        self.assertTrue(self.src_cluster.wait_for_outbound_mutations(),
                        "Mutations in source cluster not replicated to target after rollback")
        self.log.info("Mutations in source cluster replicated to target after rollback")

        _, count = NodeHelper.check_goxdcr_log(
                        nodes[0],
                        "Received rollback from DCP stream",
                        goxdcr_log, timeout=60)
        self.assertGreater(count, 0, "rollback did not happen as expected")
        self.log.info("rollback happened as expected")

    def test_verify_mb19181(self):
        load_tasks = self.setup_xdcr_async_load()
        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0]) \
                     + '/goxdcr.log*'

        self.dest_cluster.failover_and_rebalance_master()

        for task in load_tasks:
            task.result()

        for node in self.src_cluster.get_nodes():
            _, count = NodeHelper.check_goxdcr_log(
                node,
                "Can't move update state from",
                goxdcr_log)
            self.assertEqual(count, 0, "Can't move update state from - error message found in " + str(node.ip))
            self.log.info("Can't move update state from - error message not found in " + str(node.ip))

        self.verify_results()

    def test_verify_mb21369(self):
        repeat = self._input.param("repeat", 5)
        load_tasks = self.setup_xdcr_async_load()

        conn = RemoteMachineShellConnection(self.src_cluster.get_master_node())
        output, error = conn.execute_command("netstat -an | grep " + self.src_cluster.get_master_node().ip
                                             + ":11210 | wc -l")
        conn.log_command_output(output, error)
        before = output[0]
        self.log.info("No. of memcached connections before: {0}".format(output[0]))

        for i in range(0, repeat):
            self.src_cluster.pause_all_replications()
            self.sleep(30)
            self.src_cluster.resume_all_replications()
            self.sleep(self._wait_timeout)
            output, error = conn.execute_command("netstat -an | grep " + self.src_cluster.get_master_node().ip
                                                 + ":11210 | wc -l")
            conn.log_command_output(output, error)
            self.log.info("No. of memcached connections in iteration {0}:  {1}".format(i+1, output[0]))
            if int(output[0]) - int(before) > 5:
                self.fail("Number of memcached connections changed beyond allowed limit")

        for task in load_tasks:
            task.result()

        self.log.info("No. of memcached connections did not increase with pausing and resuming replication multiple times")

    def test_maxttl_setting(self):
        maxttl = int(self._input.param("maxttl", None))
        self.setup_xdcr_and_load()
        self.merge_all_buckets()
        self._wait_for_replication_to_catchup()
        self.sleep(maxttl, "waiting for docs to expire per maxttl properly")
        for bucket in self.src_cluster.get_buckets():
            items = RestConnection(self.src_master).get_active_key_count(bucket)
            self.log.info("Docs in source bucket is {0} after maxttl has elapsed".format(items))
            if items != 0:
                self.fail("Docs in source bucket is not 0 after maxttl has elapsed")
        self._wait_for_replication_to_catchup()
class RecoveryUseTransferTests(TransferBaseTest):

    def setUp(self):
        self.times_teardown_called = 1
        super(RecoveryUseTransferTests, self).setUp()
        self.server_origin = self.servers[0]
        self.server_recovery = self.servers[1]
        self.shell = RemoteMachineShellConnection(self.server_origin)
        info = self.shell.extract_remote_info()
        self.os = info.type.lower()

    def tearDown(self):
        if not self.input.param("skip_cleanup", True):
            if self.times_teardown_called > 1 :
                if self.os == 'windows':
                    self.shell.delete_files("/cygdrive/c%s" % (self.backup_location))
                else:
                    self.shell.delete_files(self.backup_location)
                self.shell.disconnect()
                del self.buckets
        if self.input.param("skip_cleanup", True):
            if self.case_number > 1 or self.times_teardown_called > 1:
                if self.os == 'windows':
                    self.shell.delete_files("/cygdrive/c%s" % (self.backup_location))
                else:
                    self.shell.delete_files(self.backup_location)
                self.shell.disconnect()
                del self.buckets
        self.times_teardown_called += 1
        super(RecoveryUseTransferTests, self).tearDown()

    def recover_to_cbserver(self):
        """Recover data with 2.0 couchstore files to a 2.0 online server

        We load a number of items to one node first and then do some mutation on these items.
        Later we use cbtranfer to transfer the couchstore files we have on this
        node to a new node. We verify the data by comparison between the items in KVStore
        and items in the new node."""

        self.load_data()

        kvs_before = {}
        bucket_names = []
        for bucket in self.buckets:
            kvs_before[bucket.name] = bucket.kvs[1]
            bucket_names.append(bucket.name)

        if self.default_bucket:
            self.cluster.create_default_bucket(self.server_recovery, self.bucket_size, self.num_replicas)
            self.buckets.append(Bucket(name="default", authType="sasl", saslPassword="", num_replicas=self.num_replicas, bucket_size=self.bucket_size))
        self._create_sasl_buckets(self.server_recovery, self.sasl_buckets)
        self._create_standard_buckets(self.server_recovery, self.standard_buckets)

        for bucket in self.buckets:
            bucket.kvs[1] = kvs_before[bucket.name]
            transfer_source = "couchstore-files://%s" % (COUCHBASE_DATA_PATH)
            transfer_destination = "http://%s@%s:%s -b %s -B %s -v -v -v" % (self.couchbase_login_info,
                                                                             self.server_recovery.ip,
                                                                             self.server_recovery.port,
                                                                             bucket.name, bucket.name)
            self.shell.execute_cbtransfer(transfer_source, transfer_destination)
        del kvs_before
        time.sleep(self.expire_time + 1)
        shell_server_recovery = RemoteMachineShellConnection(self.server_recovery)
        for bucket in self.buckets:
            shell_server_recovery.execute_cbepctl(bucket, "", "set flush_param", "exp_pager_stime", 5)
        shell_server_recovery.disconnect()
        time.sleep(30)

        self._wait_for_stats_all_buckets([self.server_recovery])
        self._verify_all_buckets(self.server_recovery, 1, self.wait_timeout * 50, self.max_verify, True, 1)
        self._verify_stats_all_buckets([self.server_recovery])

    def recover_to_backupdir(self):
        """Recover data with 2.0 couchstore files to a 2.0 backup diretory

        We load a number of items to a node first and then do some mutataion on these items.
        Later we use cbtransfer to transfer the couchstore files we have on this node to
        a backup directory. We use cbrestore to restore these backup files to the same node
        for verification."""

        self.load_data()

        kvs_before = {}
        bucket_names = []

        self.shell.delete_files(self.backup_location)
        self.shell.create_directory(self.backup_location)

        for bucket in self.buckets:
            kvs_before[bucket.name] = bucket.kvs[1]
            bucket_names.append(bucket.name)
            transfer_source = "-v -v -v couchstore-files://%s" % (COUCHBASE_DATA_PATH)
            transfer_destination = self.backup_location
            self.shell.execute_cbtransfer(transfer_source, transfer_destination)

        self._all_buckets_delete(self.server_origin)
        if self.default_bucket:
            self.cluster.create_default_bucket(self.server_origin, self.bucket_size, self.num_replicas)
            self.buckets.append(Bucket(name="default", authType="sasl", saslPassword="", num_replicas=self.num_replicas, bucket_size=self.bucket_size))
        self._create_sasl_buckets(self.server_origin, self.sasl_buckets)
        self._create_standard_buckets(self.server_origin, self.standard_buckets)

        for bucket in self.buckets:
            bucket.kvs[1] = kvs_before[bucket.name]
        del kvs_before
        self.shell.restore_backupFile(self.couchbase_login_info, self.backup_location, bucket_names)
        time.sleep(self.expire_time + 1)
        for bucket in self.buckets:
            self.shell.execute_cbepctl(bucket, "", "set flush_param", "exp_pager_stime", 5)
        time.sleep(30)

        self._wait_for_stats_all_buckets([self.server_origin])
        self._verify_all_buckets(self.server_origin, 1, self.wait_timeout * 50, self.max_verify, True, 1)
        self._verify_stats_all_buckets([self.server_origin])

    def load_data(self):
        gen_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        gen_update = BlobGenerator('nosql', 'nosql-', self.value_size, end=(self.num_items / 2 - 1))
        gen_expire = BlobGenerator('nosql', 'nosql-', self.value_size, start=self.num_items / 2, end=(self.num_items * 3 / 4 - 1))
        gen_delete = BlobGenerator('nosql', 'nosql-', self.value_size, start=self.num_items * 3 / 4, end=self.num_items)
        self._load_all_buckets(self.server_origin, gen_load, "create", 0, 1, self.item_flag, True, batch_size=20000, pause_secs=5, timeout_secs=180)

        if(self.doc_ops is not None):
            if("update" in self.doc_ops):
                self._load_all_buckets(self.server_origin, gen_update, "update", 0, 1, self.item_flag, True, batch_size=20000, pause_secs=5, timeout_secs=180)
            if("delete" in self.doc_ops):
                self._load_all_buckets(self.server_origin, gen_delete, "delete", 0, 1, self.item_flag, True, batch_size=20000, pause_secs=5, timeout_secs=180)
            if("expire" in self.doc_ops):
                self._load_all_buckets(self.server_origin, gen_expire, "update", self.expire_time, 1, self.item_flag, True, batch_size=20000, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets([self.server_origin])
        time.sleep(30)
class BackupBaseTest(BaseTestCase):
    def setUp(self):
        self.times_teardown_called = 1
        super(BackupBaseTest, self).setUp()
        self.shell = RemoteMachineShellConnection(self.master)
        info = self.shell.extract_remote_info()
        self.os = info.type.lower()
        self.value_size = self.input.param("value_size", 256)
        self.expire_time = self.input.param("expire_time", 60)
        self.number_of_backups = self.input.param("number_of_backups", 1)
        self.backup_type = self.input.param("backup_type", None)
        self.item_flag = self.input.param("item_flag", 0)
        self.couchbase_login_info = "%s:%s" % (self.input.membase_settings.rest_username,
                                               self.input.membase_settings.rest_password)
        self.backup_location = self.input.param("backup_location", "/tmp/backup")
        self.command_options = self.input.param("command_options", '')
        if self.command_options is not '':
            self.command_options = self.command_options.split(";")
        self.doc_ops = self.input.param("doc_ops", None)
        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")
        self.backup_x_options = self.input.param("backup_x_options", None)
        if self.backup_x_options is not None:
            temp = self.backup_x_options.split(";")
            temp_x_options = {}
            for element in temp:
                temp_array = element.split()
                temp_x_options[temp_array[0]] = temp_array[1]
            self.backup_x_options = temp_x_options

        self.restore_x_options = self.input.param("restore_x_options", None)
        if self.restore_x_options is not None:
            temp = self.restore_x_options.split(";")
            temp_x_options = {}
            for element in temp:
                temp_array = element.split()
                temp_x_options[temp_array[0]] = temp_array[1]
            self.restore_x_options = temp_x_options
        servers_in = [self.servers[i + 1] for i in range(self.num_servers - 1)]
        for bucket in self.buckets:
            bucket.kvs[2] = KVStore()
        self.cluster.rebalance(self.servers[:1], servers_in, [])

    def tearDown(self):
        if not self.input.param("skip_cleanup", True):
            if self.times_teardown_called > 1 :
                if self.os == 'windows':
                    output, error = self.shell.execute_command("taskkill /F /T /IM cbbackup.exe")
                    self.shell.log_command_output(output, error)
                    self.shell.delete_files("/cygdrive/c%s" % (self.backup_location))
                else:
                    self.shell.delete_files(self.backup_location)
                self.shell.disconnect()
                del self.buckets
                gc.collect()
        if self.input.param("skip_cleanup", True):
            if self.case_number > 1 or self.times_teardown_called > 1:
                if self.os == 'windows':
                    output, error = self.shell.execute_command("taskkill /F /T /IM cbbackup.exe")
                    self.shell.log_command_output(output, error)
                    self.shell.delete_files("/cygdrive/c%s" % (self.backup_location))
                else:
                    self.shell.delete_files(self.backup_location)
                self.shell.disconnect()
                del self.buckets
                gc.collect()
        self.times_teardown_called += 1
        super(BackupBaseTest, self).tearDown()

    def verify_results(self, server, kv_store=1):
        """This is the verification function for test cases of backup/restore.

        Args:
          server: the master server in the cluster as self.master.
          kv_store: default value is 1. This is the key of the kv_store of each bucket.

        if the command line assign command options -k and/or -b and/or --single-node, in the verification function
        key_name indicates which keys we need to verify and bucket_name indicates which bucket we need to verify.
        If single node flag is true, the we only need to verify all the buckets at the master node"""

        key_name = None
        bucket_name = None
        single_node_flag = False
        if self.command_options is not None:
            for s in self.command_options:
                if s.find("-k") != -1:
                    sub = s.find(" ")
                    key_name = s[sub + 1:]
                if s.find("-b") != -1:
                    sub = s.find(" ")
                    bucket_name = s[sub + 1:]
                if "--single-node" in self.command_options:
                    single_node_flag = True

        #we delete the buckets whose name does not match the name assigned to -b in KVStore
        self.buckets = [bucket for bucket in self.buckets if bucket_name is None or bucket.name == bucket_name]
        for bucket in self.buckets:
             if key_name is not None:
                valid_keys, deleted_keys = bucket.kvs[kv_store].key_set()
                for key in valid_keys:
                    matchObj = re.search(key_name, key, re.M | re.S) #use regex match to find out keys we need to verify
                    if matchObj is None:
                        partition = bucket.kvs[kv_store].acquire_partition(key)
                        partition.delete(key)  #we delete keys whose prefix does not match the value assigned to -k in KVStore
                        bucket.kvs[kv_store].release_partition(key)
        if single_node_flag is False:
            self._verify_all_buckets(server, kv_store, self.wait_timeout * 50, self.max_verify, True, 1)
        else:
            self.verify_single_node(server, kv_store)

    def verify_single_node(self, server, kv_store=1):
        """This is the verification function for single node backup.

        Args:
          server: the master server in the cluster as self.master.
          kv_store: default value is 1. This is the key of the kv_store of each bucket.

        If --single-node flag appears in backup commad line, we just backup all the items
        from a single node (the master node in this case). For each bucket, we request for the vBucketMap. For every key
        in the kvstore of that bucket, we use hash function to get the vBucketId corresponding to that
        key. By using the vBucketMap, we can know whether that key is in master node or not.
        If yes, keep it. Otherwise delete it."""

        rest = RestConnection(server)
        for bucket in self.buckets:
            VBucketAware = VBucketAwareMemcached(rest, bucket.name)
            memcacheds, vBucketMap, vBucketMapReplica = VBucketAware.request_map(rest, bucket.name)
            valid_keys, deleted_keys = bucket.kvs[kv_store].key_set()
            for key in valid_keys:
                vBucketId = VBucketAware._get_vBucket_id(key)
                which_server = vBucketMap[vBucketId]
                sub = which_server.find(":")
                which_server_ip = which_server[:sub]
                if which_server_ip != server.ip:
                    partition = bucket.kvs[kv_store].acquire_partition(key)
                    partition.delete(key)
                    bucket.kvs[kv_store].release_partition(key)

        self._verify_all_buckets(server, kv_store, self.wait_timeout * 50, self.max_verify, True, 1)
Exemple #11
0
class unidirectional(XDCRNewBaseTest):
    def setUp(self):
        super(unidirectional, self).setUp()
        self.src_cluster = self.get_cb_cluster_by_name('C1')
        self.src_master = self.src_cluster.get_master_node()
        self.dest_cluster = self.get_cb_cluster_by_name('C2')
        self.dest_master = self.dest_cluster.get_master_node()

    def tearDown(self):
        super(unidirectional, self).tearDown()

    """Testing Unidirectional load( Loading only at source) Verifying whether XDCR replication is successful on
    subsequent destination clusters.Create/Update/Delete operations are performed based on doc-ops specified by the user. """

    def load_with_ops(self):
        self.setup_xdcr_and_load()
        self.perform_update_delete()
        self.verify_results()

    """Testing Unidirectional load( Loading only at source) Verifying whether XDCR replication is successful on
    subsequent destination clusters. Create/Update/Delete are performed in parallel- doc-ops specified by the user. """

    def load_with_async_ops(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    def load_with_async_ops_diff_data_size(self):
        # Load 1 item with size 1
        # 52 alphabets (small and capital letter)
        self.src_cluster.load_all_buckets(52, value_size=1)
        # Load items with below sizes of 1M
        self.src_cluster.load_all_buckets(5, value_size=1000000)
        # Load items with size 10MB
        # Getting memory issue with 20MB data on VMs.
        self.src_cluster.load_all_buckets(1, value_size=10000000)

        self.verify_results()

    """Testing Unidirectional load( Loading only at source). Failover node at Source/Destination while
    Create/Update/Delete are performed after based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_ops_with_warmup(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        self.perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_ops_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        self.perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_async_ops_with_warmup(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            "Test case does not apply for Ephemeral buckets"
            return
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_async_ops_with_warmup_master(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            "Test case does not apply for Ephemeral buckets"
            return
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.sleep(300)

        self.verify_results()

    def load_with_failover(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes()

        self.sleep(self._wait_timeout / 6)
        self.perform_update_delete()

        self.verify_results()

    def load_with_failover_then_add_back(self):

        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.src_cluster.add_back_node()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.dest_cluster.add_back_node()

        self.perform_update_delete()

        self.verify_results()

    """Testing Unidirectional load( Loading only at source). Failover node at Source/Destination while
    Create/Update/Delete are performed in parallel based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_failover_master(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_master()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_master()

        self.sleep(self._wait_timeout / 6)
        self.perform_update_delete()

        self.sleep(300)

        self.verify_results()

    """Testing Unidirectional load( Loading only at source). Failover node at Source/Destination while
    Create/Update/Delete are performed in parallel based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_async_failover(self):
        self.setup_xdcr_and_load()

        tasks = []
        if "C1" in self._failover:
            tasks.append(self.src_cluster.async_failover())
        if "C2" in self._failover:
            tasks.append(self.dest_cluster.async_failover())

        self.perform_update_delete()
        self.sleep(self._wait_timeout / 4)

        for task in tasks:
            task.result()

        if "C1" in self._failover:
            self.src_cluster.rebalance_failover_nodes()
        if "C2" in self._failover:
            self.dest_cluster.rebalance_failover_nodes()

        self.verify_results()

    """Replication with compaction ddocs and view queries on both clusters.

        This test begins by loading a given number of items on the source cluster.
        It creates num_views as development/production view with default
        map view funcs(_is_dev_ddoc = True by default) on both clusters.
        Then we disabled compaction for ddoc on src cluster. While we don't reach
        expected fragmentation for ddoc on src cluster we update docs and perform
        view queries for all views. Then we start compaction when fragmentation
        was reached fragmentation_value. When compaction was completed we perform
        a full verification: wait for the disk queues to drain
        and then verify that there has been any data loss on all clusters."""
    def replication_with_ddoc_compaction(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            self.log.info("Test case does not apply to ephemeral")
            return

        self.setup_xdcr_and_load()

        num_views = self._input.param("num_views", 5)
        is_dev_ddoc = self._input.param("is_dev_ddoc", True)
        fragmentation_value = self._input.param("fragmentation_value", 80)
        for bucket in self.src_cluster.get_buckets():
            views = Utility.make_default_views(bucket.name, num_views, is_dev_ddoc)

        ddoc_name = "ddoc1"
        prefix = ("", "dev_")[is_dev_ddoc]

        query = {"full_set": "true", "stale": "false"}

        tasks = self.src_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
        tasks += self.dest_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
        for task in tasks:
            task.result(self._poll_timeout)

        self.src_cluster.disable_compaction()
        fragmentation_monitor = self.src_cluster.async_monitor_view_fragmentation(prefix + ddoc_name, fragmentation_value, BUCKET_NAME.DEFAULT)

        # generate load until fragmentation reached
        while fragmentation_monitor.state != "FINISHED":
            # update docs to create fragmentation
            self.src_cluster.update_delete_data(OPS.UPDATE)
            for view in views:
                # run queries to create indexes
                self.src_cluster.query_view(prefix + ddoc_name, view.name, query)
        fragmentation_monitor.result()

        compaction_task = self.src_cluster.async_compact_view(prefix + ddoc_name, 'default')

        self.assertTrue(compaction_task.result())

        self.verify_results()

    """Replication with disabled/enabled ddoc compaction on source cluster.

        This test begins by loading a given number of items on the source cluster.
        Then we disabled or enabled compaction on both clusters( set via params).
        Then we mutate and delete data on the source cluster 3 times.
        After deletion we recreate deleted items. When data was changed 3 times
        we perform a full verification: wait for the disk queues to drain
        and then verify that there has been no data loss on both all clusters."""
    def replication_with_disabled_ddoc_compaction(self):
        self.setup_xdcr_and_load()

        if "C1" in self._disable_compaction:
            self.src_cluster.disable_compaction()
        if "C2" in self._disable_compaction:
            self.dest_cluster.disable_compaction()

        # perform doc's ops 3 times to increase rev number
        for _ in range(3):
            self.async_perform_update_delete()
            # restore(re-creating) deleted items
            if 'C1' in self._del_clusters:
                c1_kv_gen = self.src_cluster.get_kv_gen()
                gen_delete = copy.deepcopy(c1_kv_gen[OPS.DELETE])
                self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_delete)
                self.sleep(5)

        self.sleep(600)
        self.verify_results()

    def replication_while_rebooting_a_non_master_destination_node(self):
        self.setup_xdcr_and_load()
        self.src_cluster.set_xdcr_param("xdcrFailureRestartInterval", 1)
        self.perform_update_delete()
        self.sleep(self._wait_timeout / 2)
        rebooted_node = self.dest_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(rebooted_node, self, wait_time=self._wait_timeout * 4, wait_if_warmup=True)

        self.verify_results()

    def replication_with_firewall_enabled(self):
        self.src_cluster.set_xdcr_param("xdcrFailureRestartInterval", 1)
        self.setup_xdcr_and_load()
        self.perform_update_delete()

        NodeHelper.enable_firewall(self.dest_master)
        self.sleep(30)
        NodeHelper.disable_firewall(self.dest_master)
        self.verify_results()

    """Testing Unidirectional append ( Loading only at source) Verifying whether XDCR replication is successful on
    subsequent destination clusters. """

    def test_append(self):
        self.setup_xdcr_and_load()
        self.verify_results()
        loop_count = self._input.param("loop_count", 20)
        for i in xrange(loop_count):
            self.log.info("Append iteration # %s" % i)
            gen_append = BlobGenerator('loadOne', 'loadOne', self._value_size, end=self._num_items)
            self.src_cluster.load_all_buckets_from_generator(gen_append, ops=OPS.APPEND, batch_size=1)
            self.sleep(self._wait_timeout)
        self.verify_results()

    '''
    This method runs cbcollectinfo tool after setting up uniXDCR and check
    whether the log generated by cbcollectinfo contains xdcr log file or not.
    '''
    def collectinfotest_for_xdcr(self):
        self.load_with_ops()
        self.node_down = self._input.param("node_down", False)
        self.log_filename = self._input.param("file_name", "collectInfo")
        self.shell = RemoteMachineShellConnection(self.src_master)
        self.shell.execute_cbcollect_info("%s.zip" % (self.log_filename))
        from clitest import collectinfotest
        # HACK added self.buckets data member.
        self.buckets = self.src_cluster.get_buckets()
        collectinfotest.CollectinfoTests.verify_results(
            self, self.log_filename
        )

    """ Verify the fix for MB-9548"""
    def verify_replications_deleted_after_bucket_deletion(self):
        self.setup_xdcr_and_load()
        self.verify_results()
        rest_conn = RestConnection(self.src_master)
        replications = rest_conn.get_replications()
        self.assertTrue(replications, "Number of replications should not be 0")
        self.src_cluster.delete_all_buckets()
        self.sleep(60)
        replications = rest_conn.get_replications()
        self.log.info("Replications : %s" % replications)
        self.assertTrue(not replications, "Rest returns replication list even after source bucket is deleted ")

    """ Verify fix for MB-9862"""
    def test_verify_memcache_connections(self):
        allowed_memcached_conn = self._input.param("allowed_connections", 100)
        max_ops_per_second = self._input.param("max_ops_per_second", 2500)
        min_item_size = self._input.param("min_item_size", 128)
        num_docs = self._input.param("num_docs", 30000)
        # start load, max_ops_per_second is the combined limit for all buckets
        mcsodaLoad = LoadWithMcsoda(self.src_master, num_docs, prefix='')
        mcsodaLoad.cfg["max-ops"] = 0
        mcsodaLoad.cfg["max-ops-per-sec"] = max_ops_per_second
        mcsodaLoad.cfg["exit-after-creates"] = 1
        mcsodaLoad.cfg["min-value-size"] = min_item_size
        mcsodaLoad.cfg["json"] = 0
        mcsodaLoad.cfg["batch"] = 100
        loadDataThread = Thread(target=mcsodaLoad.load_data,
                                  name='mcloader_default')
        loadDataThread.daemon = True
        loadDataThread.start()

        src_remote_shell = RemoteMachineShellConnection(self.src_master)
        machine_type = src_remote_shell.extract_remote_info().type.lower()
        while (loadDataThread.isAlive() and machine_type == 'linux'):
            command = "netstat -lpnta | grep 11210 | grep TIME_WAIT | wc -l"
            output, _ = src_remote_shell.execute_command(command)
            if int(output[0]) > allowed_memcached_conn:
                # stop load
                mcsodaLoad.load_stop()
                loadDataThread.join()
                self.fail("Memcached connections {0} are increased above {1} \
                            on Source node".format(
                                                   allowed_memcached_conn,
                                                   int(output[0])))
            self.sleep(5)

        # stop load
        mcsodaLoad.load_stop()
        loadDataThread.join()

    # Test to verify MB-10116
    def verify_ssl_private_key_not_present_in_logs(self):
        zip_file = "%s.zip" % (self._input.param("file_name", "collectInfo"))
        try:
            self.shell = RemoteMachineShellConnection(self.src_master)
            self.load_with_ops()
            self.shell.execute_cbcollect_info(zip_file)
            if self.shell.extract_remote_info().type.lower() != "windows":
                command = "unzip %s" % (zip_file)
                output, error = self.shell.execute_command(command)
                self.shell.log_command_output(output, error)
                if len(error) > 0:
                    raise Exception("unable to unzip the files. Check unzip command output for help")
                cmd = 'grep -R "BEGIN RSA PRIVATE KEY" cbcollect_info*/'
                output, _ = self.shell.execute_command(cmd)
            else:
                cmd = "curl -0 http://{1}:{2}@{0}:8091/diag 2>/dev/null | grep 'BEGIN RSA PRIVATE KEY'".format(
                                                    self.src_master.ip,
                                                    self.src_master.rest_username,
                                                    self.src_master.rest_password)
                output, _ = self.shell.execute_command(cmd)
            self.assertTrue(not output, "XDCR SSL Private Key is found diag logs -> %s" % output)
        finally:
            self.shell.delete_files(zip_file)
            self.shell.delete_files("cbcollect_info*")

    # Buckets States
    def delete_recreate_dest_buckets(self):
        self.setup_xdcr_and_load()

        # Remove destination buckets
        self.dest_cluster.delete_all_buckets()

        # Code for re-create_buckets
        self.create_buckets_on_cluster("C2")

        self._resetup_replication_for_recreate_buckets("C2")

        self.async_perform_update_delete()
        self.verify_results()

    def flush_dest_buckets(self):
        self.setup_xdcr_and_load()

        # flush destination buckets
        self.dest_cluster.flush_buckets()

        self.async_perform_update_delete()
        self.verify_results()

    # Nodes Crashing Scenarios
    def __kill_processes(self, crashed_nodes=[]):
        for node in crashed_nodes:
            NodeHelper.kill_erlang(node)

    def __start_cb_server(self, node):
        shell = RemoteMachineShellConnection(node)
        shell.start_couchbase()
        shell.disconnect()

    def test_node_crash_master(self):
        self.setup_xdcr_and_load()

        crashed_nodes = []
        crash = self._input.param("crash", "").split('-')
        if "C1" in crash:
            crashed_nodes.append(self.src_master)
        if "C2" in crash:
            crashed_nodes.append(self.dest_master)

        self.__kill_processes(crashed_nodes)

        for crashed_node in crashed_nodes:
            self.__start_cb_server(crashed_node)

        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            self.sleep(self._wait_timeout)
        else:
            NodeHelper.wait_warmup_completed(crashed_nodes)

        self.async_perform_update_delete()
        self.verify_results()

    # Disaster at site.
    # 1. Crash Source Cluster., Sleep n second
    # 2. Crash Dest Cluster.
    # 3. Wait for Source Cluster to warmup. Load more data and perform mutations on Src.
    # 4. Wait for Dest to warmup.
    # 5. Verify data.
    def test_node_crash_cluster(self):
        self.setup_xdcr_and_load()

        crashed_nodes = []
        crash = self._input.param("crash", "").split('-')
        if "C1" in crash:
            crashed_nodes += self.src_cluster.get_nodes()
            self.__kill_processes(crashed_nodes)
            self.sleep(30)
        if "C2" in crash:
            crashed_nodes += self.dest_cluster.get_nodes()
            self.__kill_processes(crashed_nodes)

        for crashed_node in crashed_nodes:
            self.__start_cb_server(crashed_node)

        bucket_type = self._input.param("bucket_type", "membase")

        if "C1" in crash:
            if bucket_type == "ephemeral":
                self.sleep(self._wait_timeout)
            else:
                NodeHelper.wait_warmup_completed(self.src_cluster.get_nodes())
            gen_create = BlobGenerator('loadTwo', 'loadTwo', self._value_size, end=self._num_items)
            self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_create)

        self.async_perform_update_delete()

        if "C2" in crash:
            if bucket_type == "ephemeral":
                self.sleep(self._wait_timeout)
            else:
                NodeHelper.wait_warmup_completed(self.dest_cluster.get_nodes())

        self.verify_results()

    """ Test if replication restarts 60s after idle xdcr following dest bucket flush """
    def test_idle_xdcr_dest_flush(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        bucket = self.dest_cluster.get_bucket_by_name(BUCKET_NAME.DEFAULT)
        self.dest_cluster.flush_buckets([bucket])

        self.sleep(self._wait_timeout)

        self.verify_results()

    """ Test if replication restarts 60s after idle xdcr following dest bucket recreate """
    def test_idle_xdcr_dest_recreate(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        bucket = self.dest_cluster.get_bucket_by_name(BUCKET_NAME.DEFAULT)
        self.dest_cluster.delete_bucket(BUCKET_NAME.DEFAULT)
        bucket_params=self._create_bucket_params(size=bucket.bucket_size)
        self.dest_cluster.create_default_bucket(bucket_params)
        self.sleep(self._wait_timeout)

        self.verify_results()

    """ Test if replication restarts 60s after idle xdcr following dest failover """
    def test_idle_xdcr_dest_failover(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        self.dest_cluster.failover_and_rebalance_nodes()

        self.sleep(self._wait_timeout)

        self.verify_results()

    def test_optimistic_replication(self):
        """Tests with 2 buckets with customized optimisic replication thresholds
           one greater than value_size, other smaller
        """
        from xdcrnewbasetests import REPL_PARAM
        self.setup_xdcr_and_load()
        self._wait_for_replication_to_catchup()
        for remote_cluster in self.src_cluster.get_remote_clusters():
            for replication in remote_cluster.get_replications():
                src_bucket_name = replication.get_src_bucket().name
                opt_repl_threshold = replication.get_xdcr_setting(REPL_PARAM.OPTIMISTIC_THRESHOLD)
                docs_opt_replicated_stat = 'replications/%s/docs_opt_repd' %replication.get_repl_id()
                opt_replicated = RestConnection(self.src_master).fetch_bucket_xdcr_stats(
                                        src_bucket_name
                                        )['op']['samples'][docs_opt_replicated_stat][-1]
                self.log.info("Bucket: %s, value size: %s, optimistic threshold: %s"
                              " number of mutations optimistically replicated: %s"
                                %(src_bucket_name,
                                  self._value_size,
                                  opt_repl_threshold,
                                  opt_replicated
                                ))
                if self._value_size <= opt_repl_threshold:
                    if opt_replicated == self._num_items:
                        self.log.info("SUCCESS: All keys in bucket %s were optimistically"
                                      " replicated"
                                      %(replication.get_src_bucket().name))
                    else:
                        self.fail("Value size: %s, optimistic threshold: %s,"
                                  " number of docs optimistically replicated: %s"
                          %(self._value_size, opt_repl_threshold, opt_replicated))
                else:
                    if opt_replicated == 0:
                        self.log.info("SUCCESS: No key in bucket %s was optimistically"
                                      " replicated"

                                      %(replication.get_src_bucket().name))
                    else:
                        self.fail("Partial optimistic replication detected!")

    def test_disk_full(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        self.sleep(self._wait_timeout)

        zip_file = "%s.zip" % (self._input.param("file_name", "collectInfo"))
        try:
            for node in [self.src_master, self.dest_master]:
                self.shell = RemoteMachineShellConnection(node)
                self.shell.execute_cbcollect_info(zip_file)
                if self.shell.extract_remote_info().type.lower() != "windows":
                    command = "unzip %s" % (zip_file)
                    output, error = self.shell.execute_command(command)
                    self.shell.log_command_output(output, error)
                    if len(error) > 0:
                        raise Exception("unable to unzip the files. Check unzip command output for help")
                    cmd = 'grep -R "Approaching full disk warning." cbcollect_info*/'
                    output, _ = self.shell.execute_command(cmd)
                else:
                    cmd = "curl -0 http://{1}:{2}@{0}:8091/diag 2>/dev/null | grep 'Approaching full disk warning.'".format(
                                                        self.src_master.ip,
                                                        self.src_master.rest_username,
                                                        self.src_master.rest_password)
                    output, _ = self.shell.execute_command(cmd)
                self.assertNotEquals(len(output), 0, "Full disk warning not generated as expected in %s" % node.ip)
                self.log.info("Full disk warning generated as expected in %s" % node.ip)

                self.shell.delete_files(zip_file)
                self.shell.delete_files("cbcollect_info*")
        except Exception as e:
            self.log.info(e)

    def test_retry_connections_on_errors_before_restart(self):
        """
        CBQE-3373: Do not restart pipeline as soon as connection errors are
        detected, backoff and retry 5 times before trying to restart pipeline.
        """
        passed = False
        # start data load after setting up xdcr
        load_tasks = self.setup_xdcr_async_load()
        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'

        # block port 11210 on target to simulate a connection error
        shell = RemoteMachineShellConnection(self.dest_master)
        out, err = shell.execute_command("/sbin/iptables -A INPUT -p tcp --dport"
                                         " 11210 -j DROP")
        shell.log_command_output(out, err)
        out, err = shell.execute_command("/sbin/iptables -L")
        shell.log_command_output(out, err)

        # complete loading
        for task in load_tasks:
            task.result()

        # wait for goxdcr to detect i/o timeout and try repairing
        self.sleep(self._wait_timeout*5)

        # unblock port 11210 so replication can continue
        out, err = shell.execute_command("/sbin/iptables -D INPUT -p tcp --dport"
                                         " 11210 -j DROP")
        shell.log_command_output(out, err)
        out, err = shell.execute_command("/sbin/iptables -L")
        shell.log_command_output(out, err)
        shell.disconnect()

        # check logs for traces of retry attempts
        for node in self.src_cluster.get_nodes():
            count1 = NodeHelper.check_goxdcr_log(
                            node,
                            "Failed to repair connections to target cluster",
                            goxdcr_log)
            count2 = NodeHelper.check_goxdcr_log(
                            node,
                            "Failed to set up connections to target cluster",
                            goxdcr_log)
            count = count1 + count2
            if count > 0:
                self.log.info('SUCCESS: We tried to repair connections before'
                              ' restarting pipeline')
                passed = True

        if not passed:
            self.fail("No attempts were made to repair connections on %s before"
                      " restarting pipeline" % self.src_cluster.get_nodes())
        self.verify_results()

    def test_verify_mb19802_1(self):
        load_tasks = self.setup_xdcr_async_load()
        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'

        conn = RemoteMachineShellConnection(self.dest_cluster.get_master_node())
        conn.stop_couchbase()

        for task in load_tasks:
            task.result()

        conn.start_couchbase()
        self.sleep(300)

        for node in self.src_cluster.get_nodes():
            count = NodeHelper.check_goxdcr_log(
                            node,
                            "batchGetMeta received fatal error and had to abort",
                            goxdcr_log)
            self.assertEqual(count, 0, "batchGetMeta error message found in " + str(node.ip))
            self.log.info("batchGetMeta error message not found in " + str(node.ip))

        self.verify_results()

    def test_verify_mb19802_2(self):
        load_tasks = self.setup_xdcr_async_load()
        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'

        self.dest_cluster.failover_and_rebalance_master()

        for task in load_tasks:
            task.result()

        for node in self.src_cluster.get_nodes():
            count = NodeHelper.check_goxdcr_log(
                            node,
                            "batchGetMeta received fatal error and had to abort",
                            goxdcr_log)
            self.assertEqual(count, 0, "batchGetMeta timed out error message found in " + str(node.ip))
            self.log.info("batchGetMeta error message not found in " + str(node.ip))

        self.sleep(300)
        self.verify_results()

    def test_verify_mb19697(self):
        self.setup_xdcr_and_load()
        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'

        self.src_cluster.pause_all_replications()

        gen = BlobGenerator("C1-", "C1-", self._value_size, end=100000)
        self.src_cluster.load_all_buckets_from_generator(gen)

        self.src_cluster.resume_all_replications()
        self._wait_for_replication_to_catchup()

        gen = BlobGenerator("C1-", "C1-", self._value_size, end=100000)
        load_tasks = self.src_cluster.async_load_all_buckets_from_generator(gen)

        self.src_cluster.rebalance_out()

        for task in load_tasks:
            task.result()

        self._wait_for_replication_to_catchup()

        self.src_cluster.rebalance_in()

        gen = BlobGenerator("C1-", "C1-", self._value_size, end=100000)
        load_tasks = self.src_cluster.async_load_all_buckets_from_generator(gen)

        self.src_cluster.failover_and_rebalance_master()

        for task in load_tasks:
            task.result()

        self._wait_for_replication_to_catchup()

        for node in self.src_cluster.get_nodes():
            count = NodeHelper.check_goxdcr_log(
                            node,
                            "counter .+ goes backward, maybe due to the pipeline is restarted",
                            goxdcr_log)
            self.assertEqual(count, 0, "counter goes backward, maybe due to the pipeline is restarted "
                                        "error message found in " + str(node.ip))
            self.log.info("counter goes backward, maybe due to the pipeline is restarted "
                                        "error message not found in " + str(node.ip))

        self.sleep(300)
        self.verify_results()

    def test_verify_mb20463(self):
        src_version = NodeHelper.get_cb_version(self.src_cluster.get_master_node())
        if float(src_version[:3]) != 4.5:
            self.log.info("Source cluster has to be at 4.5 for this test")
            return

        servs = self._input.servers[2:4]
        params = {}
        params['num_nodes'] = len(servs)
        params['product'] = 'cb'
        params['version'] = '4.1.2-6088'
        params['vbuckets'] = [1024]
        self.log.info("will install {0} on {1}".format('4.1.2-6088', [s.ip for s in servs]))
        InstallerJob().parallel_install(servs, params)

        if params['product'] in ["couchbase", "couchbase-server", "cb"]:
            success = True
            for server in servs:
                success &= RemoteMachineShellConnection(server).is_couchbase_installed()
                if not success:
                    self.fail("some nodes were not installed successfully on target cluster!")

        self.log.info("4.1.2 installed successfully on target cluster")

        conn = RestConnection(self.dest_cluster.get_master_node())
        conn.add_node(user=self._input.servers[3].rest_username, password=self._input.servers[3].rest_password,
                      remoteIp=self._input.servers[3].ip)
        self.sleep(30)
        conn.rebalance(otpNodes=[node.id for node in conn.node_statuses()])
        self.sleep(30)
        conn.create_bucket(bucket='default', ramQuotaMB=512)

        tasks = self.setup_xdcr_async_load()

        self.sleep(30)

        NodeHelper.enable_firewall(self.dest_master)
        self.sleep(30)
        NodeHelper.disable_firewall(self.dest_master)

        for task in tasks:
            task.result()

        self._wait_for_replication_to_catchup(timeout=600)

        self.verify_results()

    def test_rollback(self):
        bucket = self.src_cluster.get_buckets()[0]
        nodes = self.src_cluster.get_nodes()

        # Stop Persistence on Node A & Node B
        for node in nodes:
            mem_client = MemcachedClientHelper.direct_client(node, bucket)
            mem_client.stop_persistence()

        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'
        self.setup_xdcr()

        self.src_cluster.pause_all_replications()

        gen = BlobGenerator("C1-", "C1-", self._value_size, end=self._num_items)
        self.src_cluster.load_all_buckets_from_generator(gen)

        self.src_cluster.resume_all_replications()

        # Perform mutations on the bucket
        self.async_perform_update_delete()

        rest1 = RestConnection(self.src_cluster.get_master_node())
        rest2 = RestConnection(self.dest_cluster.get_master_node())

        # Fetch count of docs in src and dest cluster
        _count1 = rest1.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]
        _count2 = rest2.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]

        self.log.info("Before rollback src cluster count = {0} dest cluster count = {1}".format(_count1, _count2))

        # Kill memcached on Node A so that Node B becomes master
        shell = RemoteMachineShellConnection(self.src_cluster.get_master_node())
        shell.kill_memcached()

        # Start persistence on Node B
        mem_client = MemcachedClientHelper.direct_client(nodes[1], bucket)
        mem_client.start_persistence()

        # Failover Node B
        failover_task = self.src_cluster.async_failover()
        failover_task.result()

        # Wait for Failover & rollback to complete
        self.sleep(60)

        # Fetch count of docs in src and dest cluster
        _count1 = rest1.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]
        _count2 = rest2.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]

        self.log.info("After rollback src cluster count = {0} dest cluster count = {1}".format(_count1, _count2))

        self.assertTrue(self.src_cluster.wait_for_outbound_mutations(),
                        "Mutations in source cluster not replicated to target after rollback")
        self.log.info("Mutations in source cluster replicated to target after rollback")

        count = NodeHelper.check_goxdcr_log(
                        nodes[0],
                        "Received rollback from DCP stream",
                        goxdcr_log)
        self.assertGreater(count, 0, "rollback did not happen as expected")
        self.log.info("rollback happened as expected")

    def test_verify_mb19181(self):
        load_tasks = self.setup_xdcr_async_load()
        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0]) \
                     + '/goxdcr.log*'

        self.dest_cluster.failover_and_rebalance_master()

        for task in load_tasks:
            task.result()

        for node in self.src_cluster.get_nodes():
            count = NodeHelper.check_goxdcr_log(
                node,
                "Can't move update state from",
                goxdcr_log)
            self.assertEqual(count, 0, "Can't move update state from - error message found in " + str(node.ip))
            self.log.info("Can't move update state from - error message not found in " + str(node.ip))

        self.verify_results()

    def test_verify_mb21369(self):
        repeat = self._input.param("repeat", 5)
        load_tasks = self.setup_xdcr_async_load()

        conn = RemoteMachineShellConnection(self.src_cluster.get_master_node())
        output, error = conn.execute_command("netstat -an | grep " + self.src_cluster.get_master_node().ip
                                             + ":11210 | wc -l")
        conn.log_command_output(output, error)
        before = output[0]
        self.log.info("No. of memcached connections before: {0}".format(output[0]))

        for i in range(0, repeat):
            self.src_cluster.pause_all_replications()
            self.sleep(30)
            self.src_cluster.resume_all_replications()

            self.sleep(self._wait_timeout)

            output, error = conn.execute_command("netstat -an | grep " + self.src_cluster.get_master_node().ip
                                                 + ":11210 | wc -l")
            conn.log_command_output(output, error)
            self.log.info("No. of memcached connections in iteration {0}:  {1}".format(i+1, output[0]))
            self.assertLessEqual(abs(int(output[0]) - int(before)), 5, "Number of memcached connections changed beyond allowed limit")

        for task in load_tasks:
            task.result()

        self.log.info("No. of memcached connections did not increase with pausing and resuming replication multiple times")

    def test_maxttl_setting(self):
        maxttl = int(self._input.param("maxttl", None))
        self.setup_xdcr_and_load()
        self.merge_all_buckets()
        self._wait_for_replication_to_catchup()
        self.sleep(maxttl, "waiting for docs to expire per maxttl properly")
        for bucket in self.src_cluster.get_buckets():
            items = RestConnection(self.src_master).get_active_key_count(bucket)
            self.log.info("Docs in source bucket is {0} after maxttl has elapsed".format(items))
            if items != 0:
                self.fail("Docs in source bucket is not 0 after maxttl has elapsed")
        self._wait_for_replication_to_catchup()
class bidirectional(XDCRNewBaseTest):
    def setUp(self):
        super(bidirectional, self).setUp()
        self.src_cluster = self.get_cb_cluster_by_name('C1')
        self.src_master = self.src_cluster.get_master_node()
        self.dest_cluster = self.get_cb_cluster_by_name('C2')
        self.dest_master = self.dest_cluster.get_master_node()

    def tearDown(self):
        super(bidirectional, self).tearDown()

    def __perform_ops_joint_sets(self):
        # Merging the keys as keys are actually replicated.
        temp_expires = self._expires
        self._expires = 0  # Assigning it to 0, so that merge_buckets don't wait for expiration here.
        self.merge_all_buckets()

        tasks = []
        kv_gen_src = self.src_cluster.get_kv_gen()[OPS.CREATE]
        gen_update = BlobGenerator(kv_gen_src.name,
                                   kv_gen_src.seed,
                                   kv_gen_src.value_size,
                                   start=0,
                                   end=int(kv_gen_src.end *
                                           (float)(self._perc_upd) / 100))
        gen_delete = BlobGenerator(
            kv_gen_src.name,
            kv_gen_src.seed,
            kv_gen_src.value_size,
            start=int((kv_gen_src.end) * (float)(100 - self._perc_del) / 100),
            end=kv_gen_src.end)
        if "C1" in self._upd_clusters:
            tasks += self.src_cluster.async_load_all_buckets_from_generator(
                gen_update, OPS.UPDATE, self._expires)
        if "C2" in self._upd_clusters:
            tasks += self.dest_cluster.async_load_all_buckets_from_generator(
                gen_update, OPS.UPDATE, self._expires)
        if "C1" in self._del_clusters:
            tasks += self.src_cluster.async_load_all_buckets_from_generator(
                gen_delete, OPS.DELETE, 0)
        if "C2" in self._del_clusters:
            tasks += self.dest_cluster.async_load_all_buckets_from_generator(
                gen_delete, OPS.DELETE, 0)

        for task in tasks:
            task.result()

        self._expires = temp_expires
        if (self._wait_for_expiration
                and self._expires) and ("C1" in self._upd_clusters
                                        or "C2" in self._upd_clusters):
            self.sleep(self._expires)

        self.sleep(self._wait_timeout)

    """Bidirectional replication between two clusters(currently), create-updates-deletes on DISJOINT sets on same bucket."""

    def load_with_ops(self):
        self.setup_xdcr_and_load()
        self.perform_update_delete()
        self.verify_results()

    """Bidirectional replication between two clusters(currently), create-updates-deletes on DISJOINT sets on same bucket.
    Here running incremental load on both cluster1 and cluster2 as specified by the user/conf file"""

    def load_with_async_ops(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    """Testing Bidirectional load( Loading at source/destination). Failover node at Source/Destination while
    Create/Update/Delete are performed in parallel based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_async_ops_and_joint_sets(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    def load_with_async_ops_with_warmup(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        NodeHelper.wait_warmup_completed(warmupnodes)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)
        self.verify_results()

    def load_with_async_ops_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        NodeHelper.wait_warmup_completed(warmupnodes)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)
        self.verify_results()

    def load_with_async_ops_and_joint_sets_with_warmup(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_async_ops_and_joint_sets_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_failover(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes()

        self.sleep(self._wait_timeout / 6)
        self.perform_update_delete()

        self.verify_results()

    def load_with_failover_then_add_back(self):

        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.src_cluster.add_back_node()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.dest_cluster.add_back_node()

        self.perform_update_delete()

        self.verify_results()

    def load_with_failover_master(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_master()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_master()

        self.sleep(self._wait_timeout / 6)
        self.perform_update_delete()

        self.verify_results()

    """Replication with compaction ddocs and view queries on both clusters.

    This test begins by loading a given number of items on both clusters.
    It creates _num_views as development/production view with default
    map view funcs(_is_dev_ddoc = True by default) on both clusters.
    Then we disabled compaction for ddoc on src cluster. While we don't reach
    expected fragmentation for ddoc on src cluster we update docs and perform
    view queries for all views. Then we start compaction when fragmentation
    was reached fragmentation_value. When compaction was completed we perform
    a full verification: wait for the disk queues to drain
    and then verify that there has been no data loss on both clusters."""

    def replication_with_ddoc_compaction(self):
        self.setup_xdcr()
        self.src_cluster.load_all_buckets(self._num_items)
        self.dest_cluster.load_all_buckets(self._num_items)

        num_views = self._input.param("num_views", 5)
        is_dev_ddoc = self._input.param("is_dev_ddoc", True)
        fragmentation_value = self._input.param("fragmentation_value", 80)
        for bucket in self.src_cluster.get_buckets():
            views = Utility.make_default_views(bucket.name, num_views,
                                               is_dev_ddoc)

        ddoc_name = "ddoc1"
        prefix = ("", "dev_")[is_dev_ddoc]

        query = {"full_set": "true", "stale": "false"}

        tasks = self.src_cluster.async_create_views(ddoc_name, views,
                                                    BUCKET_NAME.DEFAULT)
        tasks += self.dest_cluster.async_create_views(ddoc_name, views,
                                                      BUCKET_NAME.DEFAULT)
        for task in tasks:
            task.result(self._poll_timeout)

        self.src_cluster.disable_compaction()
        fragmentation_monitor = self.src_cluster.async_monitor_view_fragmentation(
            prefix + ddoc_name, fragmentation_value, BUCKET_NAME.DEFAULT)
        # generate load until fragmentation reached
        while fragmentation_monitor.state != "FINISHED":
            # update docs to create fragmentation
            self.src_cluster.update_delete_data(OPS.UPDATE)
            for view in views:
                # run queries to create indexes
                self.src_cluster.query_view(prefix + ddoc_name, view.name,
                                            query)
                self.dest_cluster.query_view(prefix + ddoc_name, view.name,
                                             query)
        fragmentation_monitor.result()

        compaction_task = self.src_cluster.async_compact_view(
            prefix + ddoc_name, 'default')

        self.assertTrue(compaction_task.result())

        self.verify_results()

    def replication_with_view_queries_and_ops(self):
        tasks = []
        try:
            self.setup_xdcr()
            self.src_cluster.load_all_buckets(self._num_items)
            self.dest_cluster.load_all_buckets(self._num_items)

            num_views = self._input.param("num_views", 5)
            is_dev_ddoc = self._input.param("is_dev_ddoc", True)
            for bucket in self.src_cluster.get_buckets():
                views = Utility.make_default_views(bucket.name, num_views,
                                                   is_dev_ddoc)

            ddoc_name = "ddoc1"
            prefix = ("", "dev_")[is_dev_ddoc]

            query = {
                "full_set": "true",
                "stale": "false",
                "connection_timeout": 60000
            }

            tasks = self.src_cluster.async_create_views(
                ddoc_name, views, BUCKET_NAME.DEFAULT)
            tasks += self.dest_cluster.async_create_views(
                ddoc_name, views, BUCKET_NAME.DEFAULT)

            for task in tasks:
                task.result(self._poll_timeout)

            tasks = []
            # Setting up doc-ops at source nodes
            if "C1" in self._upd_clusters:
                tasks.extend(
                    self.src_cluster.async_update_delete(
                        OPS.UPDATE, self._perc_upd, self._expires))
            if "C1" in self._del_clusters:
                tasks.extend(
                    self.src_cluster.async_update_delete(
                        OPS.DELETE, self._perc_del))
            if "C2" in self._upd_clusters:
                tasks.extend(
                    self.dest_cluster.async_update_delete(
                        OPS.UPDATE, self._perc_upd, self._expires))
            if "C2" in self._del_clusters:
                tasks.extend(
                    self.dest_cluster.async_update_delete(
                        OPS.DELETE, self._perc_del))

            self.sleep(5)
            while True:
                for view in views:
                    self.src_cluster.query_view(prefix + ddoc_name, view.name,
                                                query)
                    self.dest_cluster.query_view(prefix + ddoc_name, view.name,
                                                 query)
                if set([task.state for task in tasks]) != set(["FINISHED"]):
                    continue
                else:
                    if self._wait_for_expiration:
                        if "C1" in self._upd_clusters or "C2" in self._upd_clusters:
                            self.sleep(self._expires)
                    break

            self.merge_all_buckets()
            self.src_cluster.verify_items_count()
            self.dest_cluster.verify_items_count()

            tasks = []
            src_buckets = self.src_cluster.get_buckets()
            dest_buckets = self.dest_cluster.get_buckets()
            for view in views:
                tasks.append(
                    self.src_cluster.async_query_view(
                        prefix + ddoc_name, view.name, query,
                        src_buckets[0].kvs[1].__len__()))
                tasks.append(
                    self.src_cluster.async_query_view(
                        prefix + ddoc_name, view.name, query,
                        dest_buckets[0].kvs[1].__len__()))

            for task in tasks:
                task.result(self._poll_timeout)

            self.verify_results()
        finally:
            # For timeout error, all tasks to be cancelled
            # Before proceeding to next test
            for task in tasks:
                task.cancel()

    """Replication with disabled/enabled ddoc compaction on both clusters.

    This test begins by loading a given number of items on both clusters.
    Then we disabled or enabled compaction on both clusters( set via params).
    Then we mutate and delete data on clusters 3 times. After deletion we recreate
    deleted items. When data was changed 3 times we perform
    a full verification: wait for the disk queues to drain
    and then verify that there has been no data loss on both clusters."""

    def replication_with_disabled_ddoc_compaction(self):
        self.setup_xdcr()
        self.src_cluster.load_all_buckets(self._num_items)
        self.dest_cluster.load_all_buckets(self._num_items)

        if "C1" in self._disable_compaction:
            self.src_cluster.disable_compaction()
        if "C2" in self._disable_compaction:
            self.dest_cluster.disable_compaction()

        # perform doc's ops 3 times to increase rev number
        for _ in range(3):
            self.async_perform_update_delete()
            # wait till deletes have been sent to recreate
            self.sleep(60)
            # restore(re-creating) deleted items
            if 'C1' in self._del_clusters:
                c1_kv_gen = self.src_cluster.get_kv_gen()

                c1_gen_delete = copy.deepcopy(c1_kv_gen[OPS.DELETE])
                if self._expires:
                    # if expiration set, recreate those keys before
                    # trying to update
                    c1_gen_update = copy.deepcopy(c1_kv_gen[OPS.UPDATE])
                    self.src_cluster.load_all_buckets_from_generator(
                        kv_gen=c1_gen_update)
                self.src_cluster.load_all_buckets_from_generator(
                    kv_gen=c1_gen_delete)
            if 'C2' in self._del_clusters:
                c2_kv_gen = self.dest_cluster.get_kv_gen()
                c2_gen_delete = copy.deepcopy(c2_kv_gen[OPS.DELETE])
                if self._expires:
                    c2_gen_update = copy.deepcopy(c2_kv_gen[OPS.UPDATE])
                    self.dest_cluster.load_all_buckets_from_generator(
                        kv_gen=c2_gen_update)
                self.dest_cluster.load_all_buckets_from_generator(
                    kv_gen=c2_gen_delete)
            # wait till we recreate deleted keys before we can delete/update
            self.sleep(60)

        self.verify_results()

    def replication_while_rebooting_a_non_master_src_dest_node(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout)

        reboot_node_dest = self.dest_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(reboot_node_dest,
                                       self,
                                       wait_time=self._wait_timeout * 4,
                                       wait_if_warmup=True)

        reboot_node_src = self.src_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(reboot_node_src,
                                       self,
                                       wait_time=self._wait_timeout * 4,
                                       wait_if_warmup=True)

        self.sleep(120)
        ClusterOperationHelper.wait_for_ns_servers_or_assert(
            [reboot_node_dest], self, wait_if_warmup=True)
        ClusterOperationHelper.wait_for_ns_servers_or_assert(
            [reboot_node_src], self, wait_if_warmup=True)
        self.verify_results()

    def test_disk_full(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        self.sleep(self._wait_timeout)

        zip_file = "%s.zip" % (self._input.param("file_name", "collectInfo"))
        try:
            for node in [self.src_master, self.dest_master]:
                self.shell = RemoteMachineShellConnection(node)
                self.shell.execute_cbcollect_info(zip_file)
                if self.shell.extract_remote_info().type.lower() != "windows":
                    command = "unzip %s" % (zip_file)
                    output, error = self.shell.execute_command(command)
                    self.shell.log_command_output(output, error)
                    if len(error) > 0:
                        raise Exception(
                            "unable to unzip the files. Check unzip command output for help"
                        )
                    cmd = 'grep -R "Approaching full disk warning." cbcollect_info*/'
                    output, _ = self.shell.execute_command(cmd)
                else:
                    cmd = "curl -0 http://{1}:{2}@{0}:8091/diag 2>/dev/null | grep 'Approaching full disk warning.'".format(
                        self.src_master.ip, self.src_master.rest_username,
                        self.src_master.rest_password)
                    output, _ = self.shell.execute_command(cmd)
                self.assertNotEquals(
                    len(output), 0,
                    "Full disk warning not generated as expected in %s" %
                    node.ip)
                self.log.info("Full disk warning generated as expected in %s" %
                              node.ip)

                self.shell.delete_files(zip_file)
                self.shell.delete_files("cbcollect_info*")
        except Exception as e:
            self.log.info(e)
class CollectinfoTests(CliBaseTest):

    def setUp(self):
        super(CollectinfoTests, self).setUp()
        self.log_filename = self.input.param("filename", "info")
        self.doc_ops = self.input.param("doc_ops", None)
        self.expire_time = self.input.param("expire_time", 5)
        self.value_size = self.input.param("value_size", 256)
        self.node_down = self.input.param("node_down", False)
        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")

    def tearDown(self):
        super(CollectinfoTests, self).tearDown()

    def collectinfo_test(self):
        """We use cbcollect_info to automatically collect the logs for server node

        First we load some items to the node. Optionally you can do some mutation
        against these items. Then we use cbcollect_info the automatically generate
        the zip file containing all the logs about the node. We want to verify we have
        all the log files according to the LOG_FILE_NAME_LIST and in stats.log, we have
        stats for all the buckets we have created"""

        gen_load = BlobGenerator('nosql', 'nosql-', self.value_size,
                                                  end=self.num_items)
        gen_update = BlobGenerator('nosql', 'nosql-', self.value_size,
                                          end=(self.num_items // 2 - 1))
        gen_expire = BlobGenerator('nosql', 'nosql-', self.value_size,
                                             start=self.num_items // 2,
                                             end=(self.num_items * 3 // 4 - 1))
        gen_delete = BlobGenerator('nosql', 'nosql-', self.value_size,
                                                 start=self.num_items * 3 // 4,
                                                          end=self.num_items)
        self._load_all_buckets(self.master, gen_load, "create", 0)

        if(self.doc_ops is not None):
            if("update" in self.doc_ops):
                self._load_all_buckets(self.master, gen_update, "update", 0)
            if("delete" in self.doc_ops):
                self._load_all_buckets(self.master, gen_delete, "delete", 0)
            if("expire" in self.doc_ops):
                self._load_all_buckets(self.master, gen_expire, "update",\
                                                               self.expire_time)
                self.sleep(self.expire_time + 1)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        self.shell.delete_files("%s.zip" % (self.log_filename))
        """ This is the folder generated after unzip the log package """
        self.shell.delete_files("cbcollect_info*")

        cb_server_started = False
        if self.node_down:
            """ set autofailover to off """
            rest = RestConnection(self.master)
            rest.update_autofailover_settings(False, 60)
            if self.os == 'linux':
                output, error = self.shell.execute_command(
                                    "killall -9 memcached & killall -9 beam.smp")
                self.shell.log_command_output(output, error)
        output, error = self.shell.execute_cbcollect_info("%s.zip"
                                                           % (self.log_filename))

        if self.os != "windows":
            if len(error) > 0:
                if self.node_down:
                    shell = RemoteMachineShellConnection(self.master)
                    shell.start_server()
                    self.sleep(15)
                    shell.disconnect()
                raise Exception("Command throw out error: %s " % error)

            for output_line in output:
                if output_line.find("ERROR") >= 0 or output_line.find("Error") >= 0:
                    if "from http endpoint" in output_line.lower():
                        continue

                    """ remove this code when bug in MB-45867 is fixed """
                    if "error occurred getting server guts" in output_line.lower() or \
                       "error: unable to retrieve statistics" in output_line.lower():
                        continue
                    """ *************************** """

                    if self.node_down:
                        shell = RemoteMachineShellConnection(self.master)
                        shell.start_server()
                        self.sleep(15)
                        shell.disconnect()
                    raise Exception("Command throw out error: %s " % output_line)
        try:
            if self.node_down:
                if self.os == 'linux':
                    self.shell = RemoteMachineShellConnection(self.master)
                    self.shell.start_server()
                    self.sleep(30, "wait for server up completely")
                    rest = RestConnection(self.master)
                    if RestHelper(rest).is_ns_server_running(timeout_in_seconds=60):
                        cb_server_started = True
                    else:
                        self.fail("CB server failed to start")
            self.verify_results(self, self.log_filename)
        finally:
            if self.node_down and not cb_server_started:
                if self.os == 'linux':
                    self.shell.start_server()
                    rest = RestConnection(self.master)
                    if not RestHelper(rest).is_ns_server_running(timeout_in_seconds=60):
                        self.fail("CB server failed to start")

    def test_cbcollectinfo_detect_container(self):
        """ this test only runs inside docker host and
            detect if a node is a docker container.
            It should run with param skip_init_check_cbserver=true """
        docker_id = None
        if "." in self.ip:
            self.fail("This test only run in docker host")
        elif self.ip is not None:
            docker_id = self.ip
            os.system("docker exec %s %scbcollect_info testlog.zip"
                                        % (docker_id, self.cli_command_path))
            os.system("docker cp %s:/testlog.zip ." % (docker_id))
            os.system("unzip testlog.zip")
            output = call("cd cbcollect_info_*; grep 'docker' ./* ")
            if output and "docker" in output:
                self.log.info("cbcollect log detected docker container")
            else:
                self.fail("cbcollect info could not detect docker container")
        os.system("docker exec %s rm testlog.zip" % (docker_id))

    def test_not_collect_stats_hash_in_cbcollectinfo(self):
        """ this test verifies we don't collect stats hash
            in when run cbcollectinfo
            params: nodes_init=2
        """
        check_version = ["5.1.2", "5.5.1"]
        mesg = "memcached stats ['hash', 'detail']"
        if self.cb_version[:5] not in check_version \
                                   or float(self.cb_version[:3]) < 6.0:
            self.log.info("\nThis version {0} does not need to test {1}"\
                                          .format(self.cb_version, mesg))
            return
        self.shell.delete_files("{0}.zip".format(self.log_filename))
        """ This is the folder generated after unzip the log package """
        self.shell.delete_files("cbcollect_info*")
        output, error = self.shell.execute_cbcollect_info("%s.zip"
                                                           % (self.log_filename))
        if output:
            for x in output:
                if x.startswith(mesg):
                    self.fail("cbcollectinfo should not collect {0}".format(mesg))
        self.log.info("cbcollectinfo does not collect {0}".format(mesg))

    @staticmethod
    def verify_results(self, output_file_name):
        try:
            os = "linux"
            zip_file = "%s.zip" % (output_file_name)
            info = self.shell.extract_remote_info()
            type = info.type.lower()
            if type == 'windows':
                os = "windows"

            if os == "linux":
                command = "unzip %s" % (zip_file)
                output, error = self.shell.execute_command(command)
                self.sleep(2)
                if self.debug_logs:
                    self.shell.log_command_output(output, error)
                if len(error) > 0:
                    raise Exception("unable to unzip the files. Check unzip command output for help")

                command = "ls cbcollect_info*/"
                output, error = self.shell.execute_command(command)
                if self.debug_logs:
                    self.shell.log_command_output(output, error)
                if len(error) > 0:
                    raise Exception("unable to list the files. Check ls command output for help")
                missing_logs = False
                nodes_services = RestConnection(self.master).get_nodes_services()
                for node, services in list(nodes_services.items()):
                    for service in services:
                        if service.encode("ascii") == "fts" and \
                                     self.master.ip in node and \
                                    "fts_diag.json" not in LOG_FILE_NAMES:
                            LOG_FILE_NAMES.append("fts_diag.json")
                        if service.encode("ascii") == "index" and \
                                            self.master.ip in node:
                            if "indexer_mprof.log" not in LOG_FILE_NAMES:
                                LOG_FILE_NAMES.append("indexer_mprof.log")
                            if "indexer_pprof.log" not in LOG_FILE_NAMES:
                                LOG_FILE_NAMES.append("indexer_pprof.log")
                if self.debug_logs:
                    self.log.info('\nlog files sample: {0}'.format(LOG_FILE_NAMES))
                    self.log.info('\nlog files in zip: {0}'.format(output))

                for x in LOG_FILE_NAMES:
                    find_log = False
                    for output_line in output:
                        if output_line.find(x) >= 0:
                            find_log = True
                    if not find_log:
                        # missing syslog.tar.gz in mac as in ticket MB-9110
                        # need to remove 3 lines below if it is fixed in 2.2.1
                        # in mac os
                        if x == "syslog.tar.gz" and info.distribution_type.lower() == "mac":
                            missing_logs = False
                        else:
                            missing_logs = True
                            self.log.error("The log zip file miss %s" % (x))

                missing_buckets = False
                if not self.node_down:
                    for bucket in self.buckets:
                        command = "grep %s cbcollect_info*/stats.log" % (bucket.name)
                        output, error = self.shell.execute_command(command)
                        if self.debug_logs:
                            self.shell.log_command_output(output, error)
                        if len(error) > 0:
                            raise Exception("unable to grep key words. Check grep command output for help")
                        if len(output) == 0:
                            missing_buckets = True
                            self.log.error("%s stats are missed in stats.log" % (bucket.name))

                command = "du -s cbcollect_info*/*"
                output, error = self.shell.execute_command(command)
                if self.debug_logs:
                    self.shell.log_command_output(output, error)
                empty_logs = False
                if len(error) > 0:
                    raise Exception("unable to list file size. Check du command output for help")
                for output_line in output:
                    output_line = output_line.split()
                    file_size = int(output_line[0])
                    if "dist_cfg" in output_line[1]:
                        continue
                    if self.debug_logs:
                        print(("File size: ", file_size))
                    if file_size == 0:
                        if "kv_trace" in output_line[1] and self.node_down:
                            continue
                        else:
                            empty_logs = True
                            self.log.error("%s is empty" % (output_line[1]))

                if missing_logs:
                    raise Exception("Bad log file package generated. Missing logs")
                if missing_buckets:
                    raise Exception("Bad stats.log which miss some bucket information")
                if empty_logs:
                    raise Exception("Collect empty log files")
            elif os == "windows":
                # try to figure out what command works for windows for verification
                pass

        finally:
            self.shell.delete_files(zip_file)
            self.shell.delete_files("cbcollect_info*")

    def collectinfo_test_for_views(self):
        self.default_design_doc_name = "Doc1"
        self.view_name = self.input.param("view_name", "View")
        self.generate_map_reduce_error = self.input.param("map_reduce_error", False)
        self.default_map_func = 'function (doc) { emit(doc.age, doc.first_name);}'
        self.gen_load = BlobGenerator('couch', 'cb-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, self.gen_load, "create", 0)
        self.reduce_fn = "_count"
        expected_num_items = self.num_items
        if self.generate_map_reduce_error:
            self.reduce_fn = "_sum"
            expected_num_items = None

        view = View(self.view_name, self.default_map_func, self.reduce_fn, dev_view=False)
        self.cluster.create_view(self.master, self.default_design_doc_name, view,
                                 'default', self.wait_timeout * 2)
        query = {"stale": "false", "connection_timeout": 60000}
        try:
            self.cluster.query_view(self.master, self.default_design_doc_name, self.view_name, query,
                                expected_num_items, 'default', timeout=self.wait_timeout)
        except Exception as ex:
            if not self.generate_map_reduce_error:
                raise ex
        self.shell.execute_cbcollect_info("%s.zip" % (self.log_filename))
        self.verify_results(self, self.log_filename)

    def test_default_collect_logs_in_cluster(self):
        """
           In a cluster, if we run cbcollectinfo from 1 node, it will collect logs
           on 1 node only.
           Initial nodes: 3
        """
        gen_load = BlobGenerator('cbcollect', 'cbcollect-', self.value_size,
                                                            end=self.num_items)
        self._load_all_buckets(self.master, gen_load, "create", 0)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        self.log.info("Delete old logs files")
        self.shell.delete_files("%s.zip" % (self.log_filename))
        self.log.info("Delete old logs directory")
        self.shell.delete_files("cbcollect_info*")
        output, error = self.shell.execute_cbcollect_info("%s.zip "\
                                                       % (self.log_filename))
        if output:
            if self.debug_logs:
                self.shell.log_command_output(output, error)
            for line in output:
                if "noLogs=1" in line:
                    if "oneNode=1" not in line:
                        self.log.error("Error line: %s" % line)
                        self.fail("cbcollect did not set to collect diag only at 1 node ")
        self.verify_results(self, self.log_filename)

    def test_cbcollectinfo_memory_usuage(self):
        """
           Test to make sure cbcollectinfo did not use a lot of memory.
           We run test with 200K items with size 128 bytes
        """
        gen_load = BlobGenerator('cbcollect', 'cbcollect-', self.value_size,
                                                            end=200000)
        self._load_all_buckets(self.master, gen_load, "create", 0)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        self.log.info("Delete old logs files")
        self.shell.delete_files("%s.zip" % (self.log_filename))
        self.log.info("Delete old logs directory")
        self.shell.delete_files("cbcollect_info*")
        collect_threads = []
        col_thread = Thread(target=self.shell.execute_cbcollect_info,
                                        args=("%s.zip" % (self.log_filename)))
        collect_threads.append(col_thread)
        col_thread.start()
        monitor_mem_thread = Thread(target=self._monitor_collect_log_mem_process)
        collect_threads.append(monitor_mem_thread)
        monitor_mem_thread.start()
        self.thred_end = False
        while not self.thred_end:
            if not col_thread.isAlive():
                self.thred_end = True
        for t in collect_threads:
            t.join()

    def _monitor_collect_log_mem_process(self):
        mem_stat = []
        results = []
        shell = RemoteMachineShellConnection(self.master)
        vsz, rss = RemoteMachineHelper(shell).monitor_process_memory('cbcollect_info')
        vsz_delta = max(abs(x - y) for (x, y) in zip(vsz[1:], vsz[:-1]))
        rss_delta = max(abs(x - y) for (x, y) in zip(rss[1:], rss[:-1]))
        self.log.info("The largest delta in VSZ: %s KB " % vsz_delta)
        self.log.info("The largest delta in RSS: %s KB " % rss_delta)

        if vsz_delta > 20000:
            self.fail("cbcollect_info process spikes up to 20 MB")
Exemple #14
0
class bidirectional(XDCRNewBaseTest):
    def setUp(self):
        super(bidirectional, self).setUp()
        self.src_cluster = self.get_cb_cluster_by_name('C1')
        self.src_master = self.src_cluster.get_master_node()
        self.dest_cluster = self.get_cb_cluster_by_name('C2')
        self.dest_master = self.dest_cluster.get_master_node()

    def tearDown(self):
        super(bidirectional, self).tearDown()

    def __perform_ops_joint_sets(self):
        # Merging the keys as keys are actually replicated.
        temp_expires = self._expires
        self._expires = 0  # Assigning it to 0, so that merge_buckets don't wait for expiration here.
        self.merge_all_buckets()

        tasks = []
        kv_gen_src = self.src_cluster.get_kv_gen()[OPS.CREATE]
        gen_update = BlobGenerator(kv_gen_src.name,
                                   kv_gen_src.seed,
                                   kv_gen_src.value_size,
                                   start=0,
                                   end=int(kv_gen_src.end *
                                           (float)(self._perc_upd) / 100))
        gen_delete = BlobGenerator(
            kv_gen_src.name,
            kv_gen_src.seed,
            kv_gen_src.value_size,
            start=int((kv_gen_src.end) * (float)(100 - self._perc_del) / 100),
            end=kv_gen_src.end)
        if "C1" in self._upd_clusters:
            tasks += self.src_cluster.async_load_all_buckets_from_generator(
                gen_update, OPS.UPDATE, self._expires)
        if "C2" in self._upd_clusters:
            tasks += self.dest_cluster.async_load_all_buckets_from_generator(
                gen_update, OPS.UPDATE, self._expires)
        if "C1" in self._del_clusters:
            tasks += self.src_cluster.async_load_all_buckets_from_generator(
                gen_delete, OPS.DELETE, 0)
        if "C2" in self._del_clusters:
            tasks += self.dest_cluster.async_load_all_buckets_from_generator(
                gen_delete, OPS.DELETE, 0)

        for task in tasks:
            task.result()

        self._expires = temp_expires
        if (self._wait_for_expiration
                and self._expires) and ("C1" in self._upd_clusters
                                        or "C2" in self._upd_clusters):
            self.sleep(self._expires)

        self.sleep(self._wait_timeout)

    """Bidirectional replication between two clusters(currently), create-updates-deletes on DISJOINT sets on same bucket."""

    def load_with_ops(self):
        self.setup_xdcr_and_load()
        self.perform_update_delete()
        self.verify_results()

    """Bidirectional replication between two clusters(currently), create-updates-deletes on DISJOINT sets on same bucket.
    Here running incremental load on both cluster1 and cluster2 as specified by the user/conf file"""

    def load_with_async_ops(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    """Testing Bidirectional load( Loading at source/destination). Failover node at Source/Destination while
    Create/Update/Delete are performed in parallel based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_async_ops_and_joint_sets(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    def load_with_async_ops_with_warmup(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        NodeHelper.wait_warmup_completed(warmupnodes)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout // 2)
        self.verify_results()

    def load_with_async_ops_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        NodeHelper.wait_warmup_completed(warmupnodes)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout // 2)
        self.verify_results()

    def load_with_async_ops_and_joint_sets_with_warmup(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            "Test case does not apply for Ephemeral buckets"
            return
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout // 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_async_ops_and_joint_sets_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout // 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_failover(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes()

        self.sleep(self._wait_timeout // 6)
        self.perform_update_delete()
        self.sleep(300)

        self.verify_results()

    def load_with_failover_then_add_back(self):

        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.src_cluster.add_back_node()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.dest_cluster.add_back_node()

        self.perform_update_delete()

        self.verify_results()

    def load_with_failover_master(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_master()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_master()

        self.sleep(self._wait_timeout // 6)
        self.perform_update_delete()

        self.verify_results()

    """Replication with compaction ddocs and view queries on both clusters.

    This test begins by loading a given number of items on both clusters.
    It creates _num_views as development/production view with default
    map view funcs(_is_dev_ddoc = True by default) on both clusters.
    Then we disabled compaction for ddoc on src cluster. While we don't reach
    expected fragmentation for ddoc on src cluster we update docs and perform
    view queries for all views. Then we start compaction when fragmentation
    was reached fragmentation_value. When compaction was completed we perform
    a full verification: wait for the disk queues to drain
    and then verify that there has been no data loss on both clusters."""

    def replication_with_ddoc_compaction(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            self.log.info("Test case does not apply to ephemeral")
            return

        self.setup_xdcr()

        self.src_cluster.load_all_buckets(self._num_items)
        self.dest_cluster.load_all_buckets(self._num_items)

        num_views = self._input.param("num_views", 5)
        is_dev_ddoc = self._input.param("is_dev_ddoc", True)
        fragmentation_value = self._input.param("fragmentation_value", 80)
        for bucket in self.src_cluster.get_buckets():
            views = Utility.make_default_views(bucket.name, num_views,
                                               is_dev_ddoc)

        ddoc_name = "ddoc1"
        prefix = ("", "dev_")[is_dev_ddoc]

        query = {"full_set": "true", "stale": "false"}

        tasks = self.src_cluster.async_create_views(ddoc_name, views,
                                                    BUCKET_NAME.DEFAULT)
        tasks += self.dest_cluster.async_create_views(ddoc_name, views,
                                                      BUCKET_NAME.DEFAULT)
        for task in tasks:
            task.result(self._poll_timeout)

        self.src_cluster.disable_compaction()
        fragmentation_monitor = self.src_cluster.async_monitor_view_fragmentation(
            prefix + ddoc_name, fragmentation_value, BUCKET_NAME.DEFAULT)
        # generate load until fragmentation reached
        while fragmentation_monitor.state != "FINISHED":
            # update docs to create fragmentation
            self.src_cluster.update_delete_data(OPS.UPDATE)
            for view in views:
                # run queries to create indexes
                self.src_cluster.query_view(prefix + ddoc_name, view.name,
                                            query)
                self.dest_cluster.query_view(prefix + ddoc_name, view.name,
                                             query)
        fragmentation_monitor.result()

        compaction_task = self.src_cluster.async_compact_view(
            prefix + ddoc_name, 'default')

        self.assertTrue(compaction_task.result())

        self.verify_results()

    def replication_with_view_queries_and_ops(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            self.log.info("Test case does not apply to ephemeral")
            return
        tasks = []
        try:
            self.setup_xdcr()

            self.src_cluster.load_all_buckets(self._num_items)
            self.dest_cluster.load_all_buckets(self._num_items)

            num_views = self._input.param("num_views", 5)
            is_dev_ddoc = self._input.param("is_dev_ddoc", True)
            for bucket in self.src_cluster.get_buckets():
                views = Utility.make_default_views(bucket.name, num_views,
                                                   is_dev_ddoc)

            ddoc_name = "ddoc1"
            prefix = ("", "dev_")[is_dev_ddoc]

            query = {
                "full_set": "true",
                "stale": "false",
                "connection_timeout": 60000
            }

            tasks = self.src_cluster.async_create_views(
                ddoc_name, views, BUCKET_NAME.DEFAULT)
            tasks += self.dest_cluster.async_create_views(
                ddoc_name, views, BUCKET_NAME.DEFAULT)

            for task in tasks:
                task.result(self._poll_timeout)

            tasks = []
            # Setting up doc-ops at source nodes
            if "C1" in self._upd_clusters:
                tasks.extend(
                    self.src_cluster.async_update_delete(
                        OPS.UPDATE, self._perc_upd, self._expires))
            if "C1" in self._del_clusters:
                tasks.extend(
                    self.src_cluster.async_update_delete(
                        OPS.DELETE, self._perc_del))
            if "C2" in self._upd_clusters:
                tasks.extend(
                    self.dest_cluster.async_update_delete(
                        OPS.UPDATE, self._perc_upd, self._expires))
            if "C2" in self._del_clusters:
                tasks.extend(
                    self.dest_cluster.async_update_delete(
                        OPS.DELETE, self._perc_del))

            self.sleep(5)
            while True:
                for view in views:
                    self.src_cluster.query_view(prefix + ddoc_name, view.name,
                                                query)
                    self.dest_cluster.query_view(prefix + ddoc_name, view.name,
                                                 query)
                if {task.state for task in tasks} != {"FINISHED"}:
                    continue
                else:
                    if self._wait_for_expiration:
                        if "C1" in self._upd_clusters or "C2" in self._upd_clusters:
                            self.sleep(self._expires)
                    break

            self.merge_all_buckets()
            self.src_cluster.verify_items_count()
            self.dest_cluster.verify_items_count()

            tasks = []
            src_buckets = self.src_cluster.get_buckets()
            dest_buckets = self.dest_cluster.get_buckets()
            for view in views:
                tasks.append(
                    self.src_cluster.async_query_view(
                        prefix + ddoc_name, view.name, query,
                        src_buckets[0].kvs[1].__len__()))
                tasks.append(
                    self.src_cluster.async_query_view(
                        prefix + ddoc_name, view.name, query,
                        dest_buckets[0].kvs[1].__len__()))

            for task in tasks:
                task.result(self._poll_timeout)

            self.verify_results()
        finally:
            # For timeout error, all tasks to be cancelled
            # Before proceeding to next test
            for task in tasks:
                task.cancel()

    """Replication with disabled/enabled ddoc compaction on both clusters.

    This test begins by loading a given number of items on both clusters.
    Then we disabled or enabled compaction on both clusters( set via params).
    Then we mutate and delete data on clusters 3 times. After deletion we recreate
    deleted items. When data was changed 3 times we perform
    a full verification: wait for the disk queues to drain
    and then verify that there has been no data loss on both clusters."""

    def replication_with_disabled_ddoc_compaction(self):
        self.setup_xdcr()
        self.src_cluster.load_all_buckets(self._num_items)
        self.dest_cluster.load_all_buckets(self._num_items)

        if "C1" in self._disable_compaction:
            self.src_cluster.disable_compaction()
        if "C2" in self._disable_compaction:
            self.dest_cluster.disable_compaction()

        # perform doc's ops 3 times to increase rev number
        for _ in range(3):
            self.async_perform_update_delete()
            # wait till deletes have been sent to recreate
            self.sleep(60)
            # restore(re-creating) deleted items
            if 'C1' in self._del_clusters:
                c1_kv_gen = self.src_cluster.get_kv_gen()

                c1_gen_delete = copy.deepcopy(c1_kv_gen[OPS.DELETE])
                if self._expires:
                    # if expiration set, recreate those keys before
                    # trying to update
                    c1_gen_update = copy.deepcopy(c1_kv_gen[OPS.UPDATE])
                    self.src_cluster.load_all_buckets_from_generator(
                        kv_gen=c1_gen_update)
                self.src_cluster.load_all_buckets_from_generator(
                    kv_gen=c1_gen_delete)
            if 'C2' in self._del_clusters:
                c2_kv_gen = self.dest_cluster.get_kv_gen()
                c2_gen_delete = copy.deepcopy(c2_kv_gen[OPS.DELETE])
                if self._expires:
                    c2_gen_update = copy.deepcopy(c2_kv_gen[OPS.UPDATE])
                    self.dest_cluster.load_all_buckets_from_generator(
                        kv_gen=c2_gen_update)
                self.dest_cluster.load_all_buckets_from_generator(
                    kv_gen=c2_gen_delete)
            # wait till we recreate deleted keys before we can delete/update
            self.sleep(300)

        self.verify_results()

    def replication_while_rebooting_a_non_master_src_dest_node(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            self.log.info("Test case does not apply to ephemeral")
            return
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout)

        reboot_node_dest = self.dest_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(reboot_node_dest,
                                       self,
                                       wait_time=self._wait_timeout * 4,
                                       wait_if_warmup=True)

        reboot_node_src = self.src_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(reboot_node_src,
                                       self,
                                       wait_time=self._wait_timeout * 4,
                                       wait_if_warmup=True)

        self.sleep(120)
        ClusterOperationHelper.wait_for_ns_servers_or_assert(
            [reboot_node_dest], self, wait_if_warmup=True)
        ClusterOperationHelper.wait_for_ns_servers_or_assert(
            [reboot_node_src], self, wait_if_warmup=True)
        self.verify_results()

    def test_disk_full(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        self.sleep(self._wait_timeout)

        zip_file = "%s.zip" % (self._input.param("file_name", "collectInfo"))
        try:
            for node in [self.src_master, self.dest_master]:
                self.shell = RemoteMachineShellConnection(node)
                self.shell.execute_cbcollect_info(zip_file)
                if self.shell.extract_remote_info().type.lower() != "windows":
                    command = "unzip %s" % (zip_file)
                    output, error = self.shell.execute_command(command)
                    self.shell.log_command_output(output, error)
                    if len(error) > 0:
                        raise Exception(
                            "unable to unzip the files. Check unzip command output for help"
                        )
                    cmd = 'grep -R "Approaching full disk warning." cbcollect_info*/'
                    output, _ = self.shell.execute_command(cmd)
                else:
                    cmd = "curl -0 http://{1}:{2}@{0}:8091/diag 2>/dev/null | grep 'Approaching full disk warning.'".format(
                        self.src_master.ip, self.src_master.rest_username,
                        self.src_master.rest_password)
                    output, _ = self.shell.execute_command(cmd)
                self.assertNotEqual(
                    len(output), 0,
                    "Full disk warning not generated as expected in %s" %
                    node.ip)
                self.log.info("Full disk warning generated as expected in %s" %
                              node.ip)

                self.shell.delete_files(zip_file)
                self.shell.delete_files("cbcollect_info*")
        except Exception as e:
            self.log.info(e)

    def test_rollback(self):
        bucket = self.src_cluster.get_buckets()[0]
        src_nodes = self.src_cluster.get_nodes()
        dest_nodes = self.dest_cluster.get_nodes()
        nodes = src_nodes + dest_nodes

        # Stop Persistence on Node A & Node B
        for node in nodes:
            mem_client = MemcachedClientHelper.direct_client(node, bucket)
            mem_client.stop_persistence()

        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'
        self.setup_xdcr()

        self.src_cluster.pause_all_replications()
        self.dest_cluster.pause_all_replications()

        gen = BlobGenerator("C1-",
                            "C1-",
                            self._value_size,
                            end=self._num_items)
        self.src_cluster.load_all_buckets_from_generator(gen)
        gen = BlobGenerator("C2-",
                            "C2-",
                            self._value_size,
                            end=self._num_items)
        self.dest_cluster.load_all_buckets_from_generator(gen)

        self.src_cluster.resume_all_replications()
        self.dest_cluster.resume_all_replications()

        # Perform mutations on the bucket
        self.async_perform_update_delete()

        rest1 = RestConnection(self.src_cluster.get_master_node())
        rest2 = RestConnection(self.dest_cluster.get_master_node())

        # Fetch count of docs in src and dest cluster
        _count1 = rest1.fetch_bucket_stats(
            bucket=bucket.name)["op"]["samples"]["curr_items"][-1]
        _count2 = rest2.fetch_bucket_stats(
            bucket=bucket.name)["op"]["samples"]["curr_items"][-1]

        self.log.info(
            "Before rollback src cluster count = {0} dest cluster count = {1}".
            format(_count1, _count2))

        # Kill memcached on Node A so that Node B becomes master
        shell = RemoteMachineShellConnection(
            self.src_cluster.get_master_node())
        shell.kill_memcached()
        shell = RemoteMachineShellConnection(
            self.dest_cluster.get_master_node())
        shell.kill_memcached()

        # Start persistence on Node B
        mem_client = MemcachedClientHelper.direct_client(src_nodes[1], bucket)
        mem_client.start_persistence()
        mem_client = MemcachedClientHelper.direct_client(dest_nodes[1], bucket)
        mem_client.start_persistence()

        # Failover Node B
        failover_task = self.src_cluster.async_failover()
        failover_task.result()
        failover_task = self.dest_cluster.async_failover()
        failover_task.result()

        # Wait for Failover & rollback to complete
        self.sleep(60)

        # Fetch count of docs in src and dest cluster
        _count1 = rest1.fetch_bucket_stats(
            bucket=bucket.name)["op"]["samples"]["curr_items"][-1]
        _count2 = rest2.fetch_bucket_stats(
            bucket=bucket.name)["op"]["samples"]["curr_items"][-1]

        self.log.info(
            "After rollback src cluster count = {0} dest cluster count = {1}".
            format(_count1, _count2))

        self.assertTrue(
            self.src_cluster.wait_for_outbound_mutations(),
            "Mutations in source cluster not replicated to target after rollback"
        )
        self.assertTrue(
            self.dest_cluster.wait_for_outbound_mutations(),
            "Mutations in target cluster not replicated to source after rollback"
        )

        _, count = NodeHelper.check_goxdcr_log(
            src_nodes[0], "Received rollback from DCP stream", goxdcr_log)
        self.assertGreater(count, 0, "rollback did not happen as expected")
        self.log.info("rollback happened as expected")

        _, count = NodeHelper.check_goxdcr_log(
            dest_nodes[0], "Received rollback from DCP stream", goxdcr_log)
        self.assertGreater(count, 0, "rollback did not happen as expected")
        self.log.info("rollback happened as expected")

    def test_scramsha(self):
        """
        Creates a new bi-xdcr replication with scram-sha
        Make sure to pass use-scramsha=True
        from command line
        """
        self.setup_xdcr()
        self.sleep(60, "wait before checking logs")
        for node in [self.src_cluster.get_master_node()
                     ] + [self.dest_cluster.get_master_node()]:
            _, count = NodeHelper.check_goxdcr_log(
                node,
                "HttpAuthMech=ScramSha for remote cluster reference remoteCluster",
                timeout=60)
            if count <= 0:
                self.fail(
                    "Node {0} does not use SCRAM-SHA authentication".format(
                        node.ip))
            else:
                self.log.info("SCRAM-SHA auth successful on node {0}".format(
                    node.ip))
        self.verify_results()

    def test_update_to_scramsha_auth(self):
        """
        Start with ordinary replication, then switch to use scram_sha_auth
        Search for success log stmtsS
        """
        _, old_count = NodeHelper.check_goxdcr_log(
            self.src_cluster.get_master_node(),
            "HttpAuthMech=ScramSha for remote cluster reference remoteCluster",
            timeout=60)
        self.setup_xdcr()
        # modify remote cluster ref to use scramsha
        for remote_cluster in self.src_cluster.get_remote_clusters(
        ) + self.dest_cluster.get_remote_clusters():
            remote_cluster.use_scram_sha_auth()
        self.sleep(60, "wait before checking the logs for using scram-sha")
        for node in [self.src_cluster.get_master_node()
                     ] + [self.dest_cluster.get_master_node()]:
            _, count = NodeHelper.check_goxdcr_log(
                node,
                "HttpAuthMech=ScramSha for remote cluster reference remoteCluster",
                timeout=60)
            if count <= old_count:
                self.fail(
                    "Node {0} does not use SCRAM-SHA authentication".format(
                        node.ip))
            else:
                self.log.info("SCRAM-SHA auth successful on node {0}".format(
                    node.ip))
        self.verify_results()
Exemple #15
0
class BackupBaseTest(BaseTestCase):
    def setUp(self):
        self.times_teardown_called = 1
        super(BackupBaseTest, self).setUp()
        self.shell = RemoteMachineShellConnection(self.master)
        info = self.shell.extract_remote_info()
        self.os = info.type.lower()
        self.value_size = self.input.param("value_size", 256)
        self.expire_time = self.input.param("expire_time", 60)
        self.item_flag = self.input.param("item_flag", 0)
        self.couchbase_login_info = "%s:%s" % (self.input.membase_settings.rest_username,
                                               self.input.membase_settings.rest_password)
        self.backup_location = self.input.param("backup_location", "/tmp/backup")
        self.command_options = self.input.param("command_options", '')
        if self.command_options is not '':
            self.command_options = self.command_options.split(";")
        self.doc_ops = self.input.param("doc_ops", None)
        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")
        servers_in = [self.servers[i + 1] for i in range(self.num_servers - 1)]
        for bucket in self.buckets:
            bucket.kvs[2] = KVStore()
        self.cluster.rebalance(self.servers[:1], servers_in, [])

    def tearDown(self):
        if not self.input.param("skip_cleanup", True):
            if self.times_teardown_called > 1 :
                if self.os == 'windows':
                    self.shell.delete_files("/cygdrive/c%s" % (self.backup_location))
                else:
                    self.shell.delete_files(self.backup_location)
                self.shell.disconnect()
                del self.buckets
                gc.collect()
        if self.input.param("skip_cleanup", True):
            if self.case_number > 1 or self.times_teardown_called > 1:
                if self.os == 'windows':
                    self.shell.delete_files("/cygdrive/c%s" % (self.backup_location))
                else:
                    self.shell.delete_files(self.backup_location)
                self.shell.disconnect()
                del self.buckets
                gc.collect()
        self.times_teardown_called += 1
        super(BackupBaseTest, self).tearDown()

    def verify_results(self, server, kv_store=1):
        """This is the verification function for test cases of backup/restore.

        Args:
          server: the master server in the cluster as self.master.
          kv_store: default value is 1. This is the key of the kv_store of each bucket.

        if the command line assign command options -k and/or -b and/or --single-node, in the verification function
        key_name indicates which keys we need to verify and bucket_name indicates which bucket we need to verify.
        If single node flag is true, the we only need to verify all the buckets at the master node"""

        key_name = None
        bucket_name = None
        single_node_flag = False
        if self.command_options is not None:
            for s in self.command_options:
                if s.find("-k") != -1:
                    sub = s.find(" ")
                    key_name = s[sub + 1:]
                if s.find("-b") != -1:
                    sub = s.find(" ")
                    bucket_name = s[sub + 1:]
                if "--single-node" in self.command_options:
                    single_node_flag = True

        #we delete the buckets whose name does not match the name assigned to -b in KVStore
        self.buckets = [bucket for bucket in self.buckets if bucket_name is None or bucket.name == bucket_name]
        for bucket in self.buckets:
             if key_name is not None:
                valid_keys, deleted_keys = bucket.kvs[kv_store].key_set()
                for key in valid_keys:
                    matchObj = re.search(key_name, key, re.M | re.S) #use regex match to find out keys we need to verify
                    if matchObj is None:
                        partition = bucket.kvs[kv_store].acquire_partition(key)
                        partition.delete(key)  #we delete keys whose prefix does not match the value assigned to -k in KVStore
                        bucket.kvs[kv_store].release_partition(key)
        if single_node_flag is False:
            self._verify_all_buckets(server, kv_store, self.wait_timeout * 50, self.max_verify, True, 1)
        else:
            self.verify_single_node(server, kv_store)

    def verify_single_node(self, server, kv_store=1):
        """This is the verification function for single node backup.

        Args:
          server: the master server in the cluster as self.master.
          kv_store: default value is 1. This is the key of the kv_store of each bucket.

        If --single-node flag appears in backup commad line, we just backup all the items
        from a single node (the master node in this case). For each bucket, we request for the vBucketMap. For every key
        in the kvstore of that bucket, we use hash function to get the vBucketId corresponding to that
        key. By using the vBucketMap, we can know whether that key is in master node or not.
        If yes, keep it. Otherwise delete it."""

        rest = RestConnection(server)
        for bucket in self.buckets:
            VBucketAware = VBucketAwareMemcached(rest, bucket.name)
            memcacheds, vBucketMap, vBucketMapReplica = VBucketAware.request_map(rest, bucket.name)
            valid_keys, deleted_keys = bucket.kvs[kv_store].key_set()
            for key in valid_keys:
                vBucketId = VBucketAware._get_vBucket_id(key)
                which_server = vBucketMap[vBucketId]
                sub = which_server.find(":")
                which_server_ip = which_server[:sub]
                if which_server_ip != server.ip:
                    partition = bucket.kvs[kv_store].acquire_partition(key)
                    partition.delete(key)
                    bucket.kvs[kv_store].release_partition(key)

        self._verify_all_buckets(server, kv_store, self.wait_timeout * 50, self.max_verify, True, 1)
Exemple #16
0
class unidirectional(XDCRNewBaseTest):
    def setUp(self):
        super(unidirectional, self).setUp()
        self.src_cluster = self.get_cb_cluster_by_name('C1')
        self.src_master = self.src_cluster.get_master_node()
        self.dest_cluster = self.get_cb_cluster_by_name('C2')
        self.dest_master = self.dest_cluster.get_master_node()

    def tearDown(self):
        super(unidirectional, self).tearDown()

    """Testing Unidirectional load( Loading only at source) Verifying whether XDCR replication is successful on
    subsequent destination clusters.Create/Update/Delete operations are performed based on doc-ops specified by the user. """

    def load_with_ops(self):
        self.setup_xdcr_and_load()
        self.perform_update_delete()
        self.verify_results()

    """Testing Unidirectional load( Loading only at source) Verifying whether XDCR replication is successful on
    subsequent destination clusters. Create/Update/Delete are performed in parallel- doc-ops specified by the user. """

    def load_with_async_ops(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    def load_with_async_ops_diff_data_size(self):
        # Load 1 item with size 1
        # 52 alphabets (small and capital letter)
        self.src_cluster.load_all_buckets(52, value_size=1)
        # Load items with below sizes of 1M
        self.src_cluster.load_all_buckets(5, value_size=1000000)
        # Load items with size 10MB
        # Getting memory issue with 20MB data on VMs.
        self.src_cluster.load_all_buckets(1, value_size=10000000)

        self.verify_results()

    """Testing Unidirectional load( Loading only at source). Failover node at Source/Destination while
    Create/Update/Delete are performed after based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_ops_with_warmup(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        self.perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_ops_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        self.perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_async_ops_with_warmup(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_async_ops_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_failover(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes()

        self.sleep(self._wait_timeout / 6)
        self.perform_update_delete()

        self.verify_results()

    def load_with_failover_then_add_back(self):

        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.src_cluster.add_back_node()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.dest_cluster.add_back_node()

        self.perform_update_delete()

        self.verify_results()

    """Testing Unidirectional load( Loading only at source). Failover node at Source/Destination while
    Create/Update/Delete are performed in parallel based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_failover_master(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_master()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_master()

        self.sleep(self._wait_timeout / 6)
        self.perform_update_delete()

        self.verify_results()

    """Testing Unidirectional load( Loading only at source). Failover node at Source/Destination while
    Create/Update/Delete are performed in parallel based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """

    def load_with_async_failover(self):
        self.setup_xdcr_and_load()

        tasks = []
        if "C1" in self._failover:
            tasks.append(self.src_cluster.async_failover())
        if "C2" in self._failover:
            tasks.append(self.dest_cluster.async_failover())

        self.perform_update_delete()
        self.sleep(self._wait_timeout / 4)

        for task in tasks:
            task.result()

        if "C1" in self._failover:
            self.src_cluster.rebalance_failover_nodes()
        if "C2" in self._failover:
            self.dest_cluster.rebalance_failover_nodes()

        self.verify_results()

    """Replication with compaction ddocs and view queries on both clusters.

        This test begins by loading a given number of items on the source cluster.
        It creates num_views as development/production view with default
        map view funcs(_is_dev_ddoc = True by default) on both clusters.
        Then we disabled compaction for ddoc on src cluster. While we don't reach
        expected fragmentation for ddoc on src cluster we update docs and perform
        view queries for all views. Then we start compaction when fragmentation
        was reached fragmentation_value. When compaction was completed we perform
        a full verification: wait for the disk queues to drain
        and then verify that there has been any data loss on all clusters."""
    def replication_with_ddoc_compaction(self):
        self.setup_xdcr_and_load()

        num_views = self._input.param("num_views", 5)
        is_dev_ddoc = self._input.param("is_dev_ddoc", True)
        fragmentation_value = self._input.param("fragmentation_value", 80)
        for bucket in self.src_cluster.get_buckets():
            views = Utility.make_default_views(bucket.name, num_views, is_dev_ddoc)

        ddoc_name = "ddoc1"
        prefix = ("", "dev_")[is_dev_ddoc]

        query = {"full_set": "true", "stale": "false"}

        tasks = self.src_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
        tasks += self.dest_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
        for task in tasks:
            task.result(self._poll_timeout)

        self.src_cluster.disable_compaction()
        fragmentation_monitor = self.src_cluster.async_monitor_view_fragmentation(prefix + ddoc_name, fragmentation_value, BUCKET_NAME.DEFAULT)

        # generate load until fragmentation reached
        while fragmentation_monitor.state != "FINISHED":
            # update docs to create fragmentation
            self.src_cluster.update_delete_data(OPS.UPDATE)
            for view in views:
                # run queries to create indexes
                self.src_cluster.query_view(prefix + ddoc_name, view.name, query)
        fragmentation_monitor.result()

        compaction_task = self.src_cluster.async_compact_view(prefix + ddoc_name, 'default')

        self.assertTrue(compaction_task.result())

        self.verify_results()

    """Replication with disabled/enabled ddoc compaction on source cluster.

        This test begins by loading a given number of items on the source cluster.
        Then we disabled or enabled compaction on both clusters( set via params).
        Then we mutate and delete data on the source cluster 3 times.
        After deletion we recreate deleted items. When data was changed 3 times
        we perform a full verification: wait for the disk queues to drain
        and then verify that there has been no data loss on both all clusters."""
    def replication_with_disabled_ddoc_compaction(self):
        self.setup_xdcr_and_load()

        if "C1" in self._disable_compaction:
            self.src_cluster.disable_compaction()
        if "C2" in self._disable_compaction:
            self.dest_cluster.disable_compaction()

        # perform doc's ops 3 times to increase rev number
        for _ in range(3):
            self.async_perform_update_delete()
            # restore(re-creating) deleted items
            if 'C1' in self._del_clusters:
                c1_kv_gen = self.src_cluster.get_kv_gen()
                gen_delete = copy.deepcopy(c1_kv_gen[OPS.DELETE])
                self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_delete)
                self.sleep(5)

        self.verify_results()

    def replication_while_rebooting_a_non_master_destination_node(self):
        self.setup_xdcr_and_load()
        self.src_cluster.set_xdcr_param("xdcrFailureRestartInterval", 1)
        self.perform_update_delete()
        self.sleep(self._wait_timeout / 2)
        rebooted_node = self.dest_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(rebooted_node, self, wait_time=self._wait_timeout * 4, wait_if_warmup=True)

        self.verify_results()

    def replication_with_firewall_enabled(self):
        self.src_cluster.set_xdcr_param("xdcrFailureRestartInterval", 1)
        self.setup_xdcr_and_load()
        self.perform_update_delete()

        NodeHelper.enable_firewall(self.dest_master)
        self.sleep(30)
        NodeHelper.disable_firewall(self.dest_master)
        self.verify_results()

    """Testing Unidirectional append ( Loading only at source) Verifying whether XDCR replication is successful on
    subsequent destination clusters. """

    def test_append(self):
        self.setup_xdcr_and_load()
        self.verify_results()
        loop_count = self._input.param("loop_count", 20)
        for i in xrange(loop_count):
            self.log.info("Append iteration # %s" % i)
            gen_append = BlobGenerator('loadOne', 'loadOne', self._value_size, end=self._num_items)
            self.src_cluster.load_all_buckets_from_generator(gen_append, ops=OPS.APPEND, batch_size=1)
            self.sleep(self._wait_timeout)
        self.verify_results()

    '''
    This method runs cbcollectinfo tool after setting up uniXDCR and check
    whether the log generated by cbcollectinfo contains xdcr log file or not.
    '''
    def collectinfotest_for_xdcr(self):
        self.load_with_ops()
        self.node_down = self._input.param("node_down", False)
        self.log_filename = self._input.param("file_name", "collectInfo")
        self.shell = RemoteMachineShellConnection(self.src_master)
        self.shell.execute_cbcollect_info("%s.zip" % (self.log_filename))
        from clitest import collectinfotest
        # HACK added self.buckets data member.
        self.buckets = self.src_cluster.get_buckets()
        collectinfotest.CollectinfoTests.verify_results(
            self, self.log_filename
        )

    """ Verify the fix for MB-9548"""
    def test_verify_replications_stream_delete(self):
        self.setup_xdcr_and_load()
        self.verify_results()
        rest_conn = RestConnection(self.src_master)
        replications = rest_conn.get_replications()
        self.assertTrue(replications, "Number of replication streams should not be 0")
        self.src_cluster.delete_all_buckets()

        replications = rest_conn.get_replications()
        self.assertTrue(not replications, "No replication streams should exists after deleting the buckets")

    """ Verify fix for MB-9862"""
    def test_verify_memcache_connections(self):
        allowed_memcached_conn = self._input.param("allowed_connections", 100)
        max_ops_per_second = self._input.param("max_ops_per_second", 2500)
        min_item_size = self._input.param("min_item_size", 128)
        num_docs = self._input.param("num_docs", 30000)
        # start load, max_ops_per_second is the combined limit for all buckets
        mcsodaLoad = LoadWithMcsoda(self.src_master, num_docs, prefix='')
        mcsodaLoad.cfg["max-ops"] = 0
        mcsodaLoad.cfg["max-ops-per-sec"] = max_ops_per_second
        mcsodaLoad.cfg["exit-after-creates"] = 1
        mcsodaLoad.cfg["min-value-size"] = min_item_size
        mcsodaLoad.cfg["json"] = 0
        mcsodaLoad.cfg["batch"] = 100
        loadDataThread = Thread(target=mcsodaLoad.load_data,
                                  name='mcloader_default')
        loadDataThread.daemon = True
        loadDataThread.start()

        src_remote_shell = RemoteMachineShellConnection(self.src_master)
        machine_type = src_remote_shell.extract_remote_info().type.lower()
        while (loadDataThread.isAlive() and machine_type == 'linux'):
            command = "netstat -lpnta | grep 11210 | grep TIME_WAIT | wc -l"
            output, _ = src_remote_shell.execute_command(command)
            if int(output[0]) > allowed_memcached_conn:
                # stop load
                mcsodaLoad.load_stop()
                loadDataThread.join()
                self.fail("Memcached connections {0} are increased above {1} \
                            on Source node".format(
                                                   allowed_memcached_conn,
                                                   int(output[0])))
            self.sleep(5)

        # stop load
        mcsodaLoad.load_stop()
        loadDataThread.join()

    # Test to verify MB-10116
    def verify_ssl_private_key_not_present_in_logs(self):
        zip_file = "%s.zip" % (self._input.param("file_name", "collectInfo"))
        try:
            self.shell = RemoteMachineShellConnection(self.src_master)
            self.load_with_ops()
            self.shell.execute_cbcollect_info(zip_file)
            if self.shell.extract_remote_info().type.lower() != "windows":
                command = "unzip %s" % (zip_file)
                output, error = self.shell.execute_command(command)
                self.shell.log_command_output(output, error)
                if len(error) > 0:
                    raise Exception("unable to unzip the files. Check unzip command output for help")
                cmd = 'grep -R "BEGIN RSA PRIVATE KEY" cbcollect_info*/'
                output, _ = self.shell.execute_command(cmd)
            else:
                cmd = "curl -0 http://{1}:{2}@{0}:8091/diag 2>/dev/null | grep 'BEGIN RSA PRIVATE KEY'".format(
                                                    self.src_master.ip,
                                                    self.src_master.rest_username,
                                                    self.src_master.rest_password)
                output, _ = self.shell.execute_command(cmd)
            self.assertTrue(not output, "XDCR SSL Private Key is found diag logs -> %s" % output)
        finally:
            self.shell.delete_files(zip_file)
            self.shell.delete_files("cbcollect_info*")

    # Buckets States
    def delete_recreate_dest_buckets(self):
        self.setup_xdcr_and_load()

        # Remove destination buckets
        self.dest_cluster.delete_all_buckets()

        # Code for re-create_buckets
        self.create_buckets_on_cluster("C2")

        self._resetup_replication_for_recreate_buckets("C2")

        self.async_perform_update_delete()
        self.verify_results()

    def flush_dest_buckets(self):
        self.setup_xdcr_and_load()

        # flush destination buckets
        self.dest_cluster.flush_buckets()

        self.async_perform_update_delete()
        self.verify_results()

    # Nodes Crashing Scenarios
    def __kill_processes(self, crashed_nodes=[]):
        for node in crashed_nodes:
            NodeHelper.kill_erlang(node)

    def __start_cb_server(self, node):
        shell = RemoteMachineShellConnection(node)
        shell.start_couchbase()
        shell.disconnect()

    def test_node_crash_master(self):
        self.setup_xdcr_and_load()

        crashed_nodes = []
        crash = self._input.param("crash", "").split('-')
        if "C1" in crash:
            crashed_nodes.append(self.src_master)
        if "C2" in crash:
            crashed_nodes.append(self.dest_master)

        self.__kill_processes(crashed_nodes)

        for crashed_node in crashed_nodes:
            self.__start_cb_server(crashed_node)
        NodeHelper.wait_warmup_completed(crashed_nodes)

        self.async_perform_update_delete()
        self.verify_results()

    # Disaster at site.
    # 1. Crash Source Cluster., Sleep n second
    # 2. Crash Dest Cluster.
    # 3. Wait for Source Cluster to warmup. Load more data and perform mutations on Src.
    # 4. Wait for Dest to warmup.
    # 5. Verify data.
    def test_node_crash_cluster(self):
        self.setup_xdcr_and_load()

        crashed_nodes = []
        crash = self._input.param("crash", "").split('-')
        if "C1" in crash:
            crashed_nodes += self.src_cluster.get_nodes()
            self.__kill_processes(crashed_nodes)
            self.sleep(30)
        if "C2" in crash:
            crashed_nodes += self.dest_cluster.get_nodes()
            self.__kill_processes(crashed_nodes)

        for crashed_node in crashed_nodes:
            self.__start_cb_server(crashed_node)

        if "C1" in crash:
            NodeHelper.wait_warmup_completed(self.src_cluster.get_nodes())
            gen_create = BlobGenerator('loadTwo', 'loadTwo', self._value_size, end=self._num_items)
            self.src_cluster.load_all_buckets_from_generator(kv_gen=gen_create)

        self.async_perform_update_delete()

        if "C2" in crash:
            NodeHelper.wait_warmup_completed(self.dest_cluster.get_nodes())

        self.verify_results()

    """ Test if replication restarts 60s after idle xdcr following dest bucket flush """
    def test_idle_xdcr_dest_flush(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        bucket = self.dest_cluster.get_bucket_by_name(BUCKET_NAME.DEFAULT)
        self.dest_cluster.flush_buckets([bucket])

        self.sleep(self._wait_timeout)

        self.verify_results()

    """ Test if replication restarts 60s after idle xdcr following dest bucket recreate """
    def test_idle_xdcr_dest_recreate(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        bucket = self.dest_cluster.get_bucket_by_name(BUCKET_NAME.DEFAULT)
        self.dest_cluster.delete_bucket(BUCKET_NAME.DEFAULT)

        self.dest_cluster.create_default_bucket(bucket.bucket_size)

        self.sleep(self._wait_timeout)

        self.verify_results()

    """ Test if replication restarts 60s after idle xdcr following dest failover """
    def test_idle_xdcr_dest_failover(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        self.dest_cluster.failover_and_rebalance_nodes()

        self.sleep(self._wait_timeout)

        self.verify_results()
Exemple #17
0
class RecoveryUseTransferTests(TransferBaseTest):
    def setUp(self):
        self.times_teardown_called = 1
        super(RecoveryUseTransferTests, self).setUp()
        self.server_origin = self.servers[0]
        self.server_recovery = self.servers[1]
        self.shell = RemoteMachineShellConnection(self.server_origin)
        info = self.shell.extract_remote_info()
        self.os = info.type.lower()

    def tearDown(self):
        super(RecoveryUseTransferTests, self).tearDown()
        if not self.input.param("skip_cleanup", True):
            if times_tear_down_called > 1:
                if self.os == 'windows':
                    self.shell.delete_files("/cygdrive/c%s" %
                                            (self.backup_location))
                else:
                    self.shell.delete_files(self.backup_location)
                self.shell.disconnect()
                del self.buckets
        if self.input.param("skip_cleanup", True):
            if self.case_number > 1 or self.times_teardown_called > 1:
                if self.os == 'windows':
                    self.shell.delete_files("/cygdrive/c%s" %
                                            (self.backup_location))
                else:
                    self.shell.delete_files(self.backup_location)
                self.shell.disconnect()
                del self.buckets
        self.times_teardown_called += 1

    def recover_to_cbserver(self):
        """Recover data with 2.0 couchstore files to a 2.0 online server

        We load a number of items to one node first and then do some mutation on these items.
        Later we use cbtranfer to transfer the couchstore files we have on this
        node to a new node. We verify the data by comparison between the items in KVStore
        and items in the new node."""

        self.load_data()

        kvs_before = {}
        bucket_names = []
        for bucket in self.buckets:
            kvs_before[bucket.name] = bucket.kvs[1]
            bucket_names.append(bucket.name)

        if self.default_bucket:
            self.cluster.create_default_bucket(self.server_recovery,
                                               self.bucket_size,
                                               self.num_replicas)
            self.buckets.append(
                Bucket(name="default",
                       authType="sasl",
                       saslPassword="",
                       num_replicas=self.num_replicas,
                       bucket_size=self.bucket_size))
        self._create_sasl_buckets(self.server_recovery, self.sasl_buckets)
        self._create_standard_buckets(self.server_recovery,
                                      self.standard_buckets)

        for bucket in self.buckets:
            bucket.kvs[1] = kvs_before[bucket.name]
            transfer_source = "couchstore-files://%s" % (COUCHBASE_DATA_PATH)
            transfer_destination = "http://%s@%s:%s -b %s -B %s -v -v -v" % (
                self.couchbase_login_info, self.server_recovery.ip,
                self.server_recovery.port, bucket.name, bucket.name)
            self.shell.execute_cbtransfer(transfer_source,
                                          transfer_destination)
        del kvs_before
        time.sleep(self.expire_time + 1)
        shell_server_recovery = RemoteMachineShellConnection(
            self.server_recovery)
        for bucket in self.buckets:
            shell_server_recovery.execute_cbepctl(bucket, "",
                                                  "set flush_param",
                                                  "exp_pager_stime", 5)
        shell_server_recovery.disconnect()
        time.sleep(30)

        self._wait_for_stats_all_buckets([self.server_recovery])
        self._verify_all_buckets(self.server_recovery, 1,
                                 self.wait_timeout * 50, None, True)
        self._verify_stats_all_buckets([self.server_recovery])

    def recover_to_backupdir(self):
        """Recover data with 2.0 couchstore files to a 2.0 backup diretory

        We load a number of items to a node first and then do some mutataion on these items.
        Later we use cbtransfer to transfer the couchstore files we have on this node to
        a backup directory. We use cbrestore to restore these backup files to the same node
        for verification."""

        self.load_data()

        kvs_before = {}
        bucket_names = []

        self.shell.delete_files(self.backup_location)
        self.shell.create_directory(self.backup_location)

        for bucket in self.buckets:
            kvs_before[bucket.name] = bucket.kvs[1]
            bucket_names.append(bucket.name)
            transfer_source = "-v -v -v couchstore-files://%s" % (
                COUCHBASE_DATA_PATH)
            transfer_destination = self.backup_location
            self.shell.execute_cbtransfer(transfer_source,
                                          transfer_destination)

        self._all_buckets_delete(self.server_origin)
        if self.default_bucket:
            self.cluster.create_default_bucket(self.server_origin,
                                               self.bucket_size,
                                               self.num_replicas)
            self.buckets.append(
                Bucket(name="default",
                       authType="sasl",
                       saslPassword="",
                       num_replicas=self.num_replicas,
                       bucket_size=self.bucket_size))
        self._create_sasl_buckets(self.server_origin, self.sasl_buckets)
        self._create_standard_buckets(self.server_origin,
                                      self.standard_buckets)

        for bucket in self.buckets:
            bucket.kvs[1] = kvs_before[bucket.name]
        del kvs_before
        self.shell.restore_backupFile(self.couchbase_login_info,
                                      self.backup_location, bucket_names)
        time.sleep(self.expire_time + 1)
        for bucket in self.buckets:
            self.shell.execute_cbepctl(bucket, "", "set flush_param",
                                       "exp_pager_stime", 5)
        time.sleep(30)

        self._wait_for_stats_all_buckets([self.server_origin])
        self._verify_all_buckets(self.server_origin, 1, self.wait_timeout * 50,
                                 None, True)
        self._verify_stats_all_buckets([self.server_origin])

    def load_data(self):
        gen_load = BlobGenerator('nosql',
                                 'nosql-',
                                 self.value_size,
                                 end=self.num_items)
        gen_update = BlobGenerator('nosql',
                                   'nosql-',
                                   self.value_size,
                                   end=(self.num_items / 2 - 1))
        gen_expire = BlobGenerator('nosql',
                                   'nosql-',
                                   self.value_size,
                                   start=self.num_items / 2,
                                   end=(self.num_items * 3 / 4 - 1))
        gen_delete = BlobGenerator('nosql',
                                   'nosql-',
                                   self.value_size,
                                   start=self.num_items * 3 / 4,
                                   end=self.num_items)
        self._load_all_buckets(self.server_origin,
                               gen_load,
                               "create",
                               0,
                               1,
                               self.item_flag,
                               True,
                               batch_size=20000,
                               pause_secs=5,
                               timeout_secs=180)

        if (self.doc_ops is not None):
            if ("update" in self.doc_ops):
                self._load_all_buckets(self.server_origin,
                                       gen_update,
                                       "update",
                                       0,
                                       1,
                                       self.item_flag,
                                       True,
                                       batch_size=20000,
                                       pause_secs=5,
                                       timeout_secs=180)
            if ("delete" in self.doc_ops):
                self._load_all_buckets(self.server_origin,
                                       gen_delete,
                                       "delete",
                                       0,
                                       1,
                                       self.item_flag,
                                       True,
                                       batch_size=20000,
                                       pause_secs=5,
                                       timeout_secs=180)
            if ("expire" in self.doc_ops):
                self._load_all_buckets(self.server_origin,
                                       gen_expire,
                                       "update",
                                       self.expire_time,
                                       1,
                                       self.item_flag,
                                       True,
                                       batch_size=20000,
                                       pause_secs=5,
                                       timeout_secs=180)
        self._wait_for_stats_all_buckets([self.server_origin])
        time.sleep(30)
Exemple #18
0
class bidirectional(XDCRNewBaseTest):
    def setUp(self):
        super(bidirectional, self).setUp()
        self.src_cluster = self.get_cb_cluster_by_name('C1')
        self.src_master = self.src_cluster.get_master_node()
        self.dest_cluster = self.get_cb_cluster_by_name('C2')
        self.dest_master = self.dest_cluster.get_master_node()

    def tearDown(self):
        super(bidirectional, self).tearDown()

    def __perform_ops_joint_sets(self):
        # Merging the keys as keys are actually replicated.
        temp_expires = self._expires
        self._expires = 0   # Assigning it to 0, so that merge_buckets don't wait for expiration here.
        self.merge_all_buckets()

        tasks = []
        kv_gen_src = self.src_cluster.get_kv_gen()[OPS.CREATE]
        gen_update = BlobGenerator(kv_gen_src.name,
                                   kv_gen_src.seed,
                                   kv_gen_src.value_size,
                                   start=0,
                                   end=int(kv_gen_src.end * (float)(self._perc_upd) / 100))
        gen_delete = BlobGenerator(kv_gen_src.name,
                                   kv_gen_src.seed,
                                   kv_gen_src.value_size,
                                   start=int((kv_gen_src.end) * (float)(100 - self._perc_del) / 100),
                                   end=kv_gen_src.end)
        if "C1" in self._upd_clusters:
            tasks += self.src_cluster.async_load_all_buckets_from_generator(gen_update, OPS.UPDATE, self._expires)
        if "C2" in self._upd_clusters:
            tasks += self.dest_cluster.async_load_all_buckets_from_generator(gen_update, OPS.UPDATE, self._expires)
        if "C1" in self._del_clusters:
            tasks += self.src_cluster.async_load_all_buckets_from_generator(gen_delete, OPS.DELETE, 0)
        if "C2" in self._del_clusters:
            tasks += self.dest_cluster.async_load_all_buckets_from_generator(gen_delete, OPS.DELETE, 0)

        for task in tasks:
            task.result()

        self._expires = temp_expires
        if (self._wait_for_expiration and self._expires) and ("C1" in self._upd_clusters or "C2" in self._upd_clusters):
            self.sleep(self._expires)

        self.sleep(self._wait_timeout)

    """Bidirectional replication between two clusters(currently), create-updates-deletes on DISJOINT sets on same bucket."""
    def load_with_ops(self):
        self.setup_xdcr_and_load()
        self.perform_update_delete()
        self.verify_results()

    """Bidirectional replication between two clusters(currently), create-updates-deletes on DISJOINT sets on same bucket.
    Here running incremental load on both cluster1 and cluster2 as specified by the user/conf file"""

    def load_with_async_ops(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    """Testing Bidirectional load( Loading at source/destination). Failover node at Source/Destination while
    Create/Update/Delete are performed in parallel based on doc-ops specified by the user.
    Verifying whether XDCR replication is successful on subsequent destination clusters. """
    def load_with_async_ops_and_joint_sets(self):
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.verify_results()

    def load_with_async_ops_with_warmup(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        NodeHelper.wait_warmup_completed(warmupnodes)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)
        self.verify_results()

    def load_with_async_ops_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        NodeHelper.wait_warmup_completed(warmupnodes)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)
        self.verify_results()

    def load_with_async_ops_and_joint_sets_with_warmup(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            "Test case does not apply for Ephemeral buckets"
            return
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node())
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node())

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_async_ops_and_joint_sets_with_warmup_master(self):
        self.setup_xdcr_and_load()
        warmupnodes = []
        if "C1" in self._warmup:
            warmupnodes.append(self.src_cluster.warmup_node(master=True))
        if "C2" in self._warmup:
            warmupnodes.append(self.dest_cluster.warmup_node(master=True))

        self.sleep(self._wait_timeout)
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout / 2)

        NodeHelper.wait_warmup_completed(warmupnodes)

        self.verify_results()

    def load_with_failover(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes()

        self.sleep(self._wait_timeout / 6)
        self.perform_update_delete()
        self.sleep(300)

        self.verify_results()

    def load_with_failover_then_add_back(self):

        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.src_cluster.add_back_node()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_nodes(rebalance=False)
            self.dest_cluster.add_back_node()

        self.perform_update_delete()

        self.verify_results()

    def load_with_failover_master(self):
        self.setup_xdcr_and_load()

        if "C1" in self._failover:
            self.src_cluster.failover_and_rebalance_master()
        if "C2" in self._failover:
            self.dest_cluster.failover_and_rebalance_master()

        self.sleep(self._wait_timeout / 6)
        self.perform_update_delete()

        self.verify_results()

    """Replication with compaction ddocs and view queries on both clusters.

    This test begins by loading a given number of items on both clusters.
    It creates _num_views as development/production view with default
    map view funcs(_is_dev_ddoc = True by default) on both clusters.
    Then we disabled compaction for ddoc on src cluster. While we don't reach
    expected fragmentation for ddoc on src cluster we update docs and perform
    view queries for all views. Then we start compaction when fragmentation
    was reached fragmentation_value. When compaction was completed we perform
    a full verification: wait for the disk queues to drain
    and then verify that there has been no data loss on both clusters."""
    def replication_with_ddoc_compaction(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            self.log.info("Test case does not apply to ephemeral")
            return

        self.setup_xdcr()

        self.src_cluster.load_all_buckets(self._num_items)
        self.dest_cluster.load_all_buckets(self._num_items)

        num_views = self._input.param("num_views", 5)
        is_dev_ddoc = self._input.param("is_dev_ddoc", True)
        fragmentation_value = self._input.param("fragmentation_value", 80)
        for bucket in self.src_cluster.get_buckets():
            views = Utility.make_default_views(bucket.name, num_views, is_dev_ddoc)

        ddoc_name = "ddoc1"
        prefix = ("", "dev_")[is_dev_ddoc]

        query = {"full_set": "true", "stale": "false"}

        tasks = self.src_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
        tasks += self.dest_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
        for task in tasks:
            task.result(self._poll_timeout)

        self.src_cluster.disable_compaction()
        fragmentation_monitor = self.src_cluster.async_monitor_view_fragmentation(prefix + ddoc_name, fragmentation_value, BUCKET_NAME.DEFAULT)
        # generate load until fragmentation reached
        while fragmentation_monitor.state != "FINISHED":
            # update docs to create fragmentation
            self.src_cluster.update_delete_data(OPS.UPDATE)
            for view in views:
                # run queries to create indexes
                self.src_cluster.query_view(prefix + ddoc_name, view.name, query)
                self.dest_cluster.query_view(prefix + ddoc_name, view.name, query)
        fragmentation_monitor.result()

        compaction_task = self.src_cluster.async_compact_view(prefix + ddoc_name, 'default')

        self.assertTrue(compaction_task.result())

        self.verify_results()

    def replication_with_view_queries_and_ops(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            self.log.info("Test case does not apply to ephemeral")
            return
        tasks = []
        try:
            self.setup_xdcr()

            self.src_cluster.load_all_buckets(self._num_items)
            self.dest_cluster.load_all_buckets(self._num_items)

            num_views = self._input.param("num_views", 5)
            is_dev_ddoc = self._input.param("is_dev_ddoc", True)
            for bucket in self.src_cluster.get_buckets():
                views = Utility.make_default_views(bucket.name, num_views, is_dev_ddoc)

            ddoc_name = "ddoc1"
            prefix = ("", "dev_")[is_dev_ddoc]

            query = {"full_set": "true", "stale": "false", "connection_timeout": 60000}

            tasks = self.src_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)
            tasks += self.dest_cluster.async_create_views(ddoc_name, views, BUCKET_NAME.DEFAULT)

            for task in tasks:
                task.result(self._poll_timeout)

            tasks = []
            # Setting up doc-ops at source nodes
            if "C1" in self._upd_clusters:
                tasks.extend(self.src_cluster.async_update_delete(OPS.UPDATE, self._perc_upd, self._expires))
            if "C1" in self._del_clusters:
                tasks.extend(self.src_cluster.async_update_delete(OPS.DELETE, self._perc_del))
            if "C2" in self._upd_clusters:
                tasks.extend(self.dest_cluster.async_update_delete(OPS.UPDATE, self._perc_upd, self._expires))
            if "C2" in self._del_clusters:
                tasks.extend(self.dest_cluster.async_update_delete(OPS.DELETE, self._perc_del))

            self.sleep(5)
            while True:
                for view in views:
                    self.src_cluster.query_view(prefix + ddoc_name, view.name, query)
                    self.dest_cluster.query_view(prefix + ddoc_name, view.name, query)
                if set([task.state for task in tasks]) != set(["FINISHED"]):
                    continue
                else:
                    if self._wait_for_expiration:
                        if "C1" in self._upd_clusters or "C2" in self._upd_clusters:
                            self.sleep(self._expires)
                    break

            self.merge_all_buckets()
            self.src_cluster.verify_items_count()
            self.dest_cluster.verify_items_count()

            tasks = []
            src_buckets = self.src_cluster.get_buckets()
            dest_buckets = self.dest_cluster.get_buckets()
            for view in views:
                tasks.append(self.src_cluster.async_query_view(prefix + ddoc_name, view.name, query, src_buckets[0].kvs[1].__len__()))
                tasks.append(self.src_cluster.async_query_view(prefix + ddoc_name, view.name, query, dest_buckets[0].kvs[1].__len__()))

            for task in tasks:
                task.result(self._poll_timeout)

            self.verify_results()
        finally:
            # For timeout error, all tasks to be cancelled
            # Before proceeding to next test
            for task in tasks:
                task.cancel()

    """Replication with disabled/enabled ddoc compaction on both clusters.

    This test begins by loading a given number of items on both clusters.
    Then we disabled or enabled compaction on both clusters( set via params).
    Then we mutate and delete data on clusters 3 times. After deletion we recreate
    deleted items. When data was changed 3 times we perform
    a full verification: wait for the disk queues to drain
    and then verify that there has been no data loss on both clusters."""
    def replication_with_disabled_ddoc_compaction(self):
        self.setup_xdcr()
        self.src_cluster.load_all_buckets(self._num_items)
        self.dest_cluster.load_all_buckets(self._num_items)

        if "C1" in self._disable_compaction:
            self.src_cluster.disable_compaction()
        if "C2" in self._disable_compaction:
            self.dest_cluster.disable_compaction()

        # perform doc's ops 3 times to increase rev number
        for _ in range(3):
            self.async_perform_update_delete()
            # wait till deletes have been sent to recreate
            self.sleep(60)
            # restore(re-creating) deleted items
            if 'C1' in self._del_clusters:
                c1_kv_gen = self.src_cluster.get_kv_gen()

                c1_gen_delete = copy.deepcopy(c1_kv_gen[OPS.DELETE])
                if self._expires:
                    # if expiration set, recreate those keys before
                    # trying to update
                    c1_gen_update = copy.deepcopy(c1_kv_gen[OPS.UPDATE])
                    self.src_cluster.load_all_buckets_from_generator(kv_gen=c1_gen_update)
                self.src_cluster.load_all_buckets_from_generator(kv_gen=c1_gen_delete)
            if 'C2' in self._del_clusters:
                c2_kv_gen = self.dest_cluster.get_kv_gen()
                c2_gen_delete = copy.deepcopy(c2_kv_gen[OPS.DELETE])
                if self._expires:
                    c2_gen_update = copy.deepcopy(c2_kv_gen[OPS.UPDATE])
                    self.dest_cluster.load_all_buckets_from_generator(kv_gen=c2_gen_update)
                self.dest_cluster.load_all_buckets_from_generator(kv_gen=c2_gen_delete)
            # wait till we recreate deleted keys before we can delete/update
            self.sleep(300)

        self.verify_results()

    def replication_while_rebooting_a_non_master_src_dest_node(self):
        bucket_type = self._input.param("bucket_type", "membase")
        if bucket_type == "ephemeral":
            self.log.info("Test case does not apply to ephemeral")
            return
        self.setup_xdcr_and_load()
        self.async_perform_update_delete()
        self.sleep(self._wait_timeout)

        reboot_node_dest = self.dest_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(reboot_node_dest, self, wait_time=self._wait_timeout * 4, wait_if_warmup=True)

        reboot_node_src = self.src_cluster.reboot_one_node(self)
        NodeHelper.wait_node_restarted(reboot_node_src, self, wait_time=self._wait_timeout * 4, wait_if_warmup=True)

        self.sleep(120)
        ClusterOperationHelper.wait_for_ns_servers_or_assert([reboot_node_dest], self, wait_if_warmup=True)
        ClusterOperationHelper.wait_for_ns_servers_or_assert([reboot_node_src], self, wait_if_warmup=True)
        self.verify_results()

    def test_disk_full(self):
        self.setup_xdcr_and_load()
        self.verify_results()

        self.sleep(self._wait_timeout)

        zip_file = "%s.zip" % (self._input.param("file_name", "collectInfo"))
        try:
            for node in [self.src_master, self.dest_master]:
                self.shell = RemoteMachineShellConnection(node)
                self.shell.execute_cbcollect_info(zip_file)
                if self.shell.extract_remote_info().type.lower() != "windows":
                    command = "unzip %s" % (zip_file)
                    output, error = self.shell.execute_command(command)
                    self.shell.log_command_output(output, error)
                    if len(error) > 0:
                        raise Exception("unable to unzip the files. Check unzip command output for help")
                    cmd = 'grep -R "Approaching full disk warning." cbcollect_info*/'
                    output, _ = self.shell.execute_command(cmd)
                else:
                    cmd = "curl -0 http://{1}:{2}@{0}:8091/diag 2>/dev/null | grep 'Approaching full disk warning.'".format(
                                                        self.src_master.ip,
                                                        self.src_master.rest_username,
                                                        self.src_master.rest_password)
                    output, _ = self.shell.execute_command(cmd)
                self.assertNotEquals(len(output), 0, "Full disk warning not generated as expected in %s" % node.ip)
                self.log.info("Full disk warning generated as expected in %s" % node.ip)

                self.shell.delete_files(zip_file)
                self.shell.delete_files("cbcollect_info*")
        except Exception as e:
            self.log.info(e)

    def test_rollback(self):
        bucket = self.src_cluster.get_buckets()[0]
        src_nodes = self.src_cluster.get_nodes()
        dest_nodes = self.dest_cluster.get_nodes()
        nodes = src_nodes + dest_nodes

        # Stop Persistence on Node A & Node B
        for node in nodes:
            mem_client = MemcachedClientHelper.direct_client(node, bucket)
            mem_client.stop_persistence()

        goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\
                     + '/goxdcr.log*'
        self.setup_xdcr()

        self.src_cluster.pause_all_replications()
        self.dest_cluster.pause_all_replications()

        gen = BlobGenerator("C1-", "C1-", self._value_size, end=self._num_items)
        self.src_cluster.load_all_buckets_from_generator(gen)
        gen = BlobGenerator("C2-", "C2-", self._value_size, end=self._num_items)
        self.dest_cluster.load_all_buckets_from_generator(gen)

        self.src_cluster.resume_all_replications()
        self.dest_cluster.resume_all_replications()

        # Perform mutations on the bucket
        self.async_perform_update_delete()

        rest1 = RestConnection(self.src_cluster.get_master_node())
        rest2 = RestConnection(self.dest_cluster.get_master_node())

        # Fetch count of docs in src and dest cluster
        _count1 = rest1.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]
        _count2 = rest2.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]

        self.log.info("Before rollback src cluster count = {0} dest cluster count = {1}".format(_count1, _count2))

        # Kill memcached on Node A so that Node B becomes master
        shell = RemoteMachineShellConnection(self.src_cluster.get_master_node())
        shell.kill_memcached()
        shell = RemoteMachineShellConnection(self.dest_cluster.get_master_node())
        shell.kill_memcached()

        # Start persistence on Node B
        mem_client = MemcachedClientHelper.direct_client(src_nodes[1], bucket)
        mem_client.start_persistence()
        mem_client = MemcachedClientHelper.direct_client(dest_nodes[1], bucket)
        mem_client.start_persistence()

        # Failover Node B
        failover_task = self.src_cluster.async_failover()
        failover_task.result()
        failover_task = self.dest_cluster.async_failover()
        failover_task.result()

        # Wait for Failover & rollback to complete
        self.sleep(60)

        # Fetch count of docs in src and dest cluster
        _count1 = rest1.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]
        _count2 = rest2.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1]

        self.log.info("After rollback src cluster count = {0} dest cluster count = {1}".format(_count1, _count2))

        self.assertTrue(self.src_cluster.wait_for_outbound_mutations(),
                        "Mutations in source cluster not replicated to target after rollback")
        self.assertTrue(self.dest_cluster.wait_for_outbound_mutations(),
                        "Mutations in target cluster not replicated to source after rollback")

        count = NodeHelper.check_goxdcr_log(
                        src_nodes[0],
                        "Received rollback from DCP stream",
                        goxdcr_log)
        self.assertGreater(count, 0, "rollback did not happen as expected")
        self.log.info("rollback happened as expected")

        count = NodeHelper.check_goxdcr_log(
                        dest_nodes[0],
                        "Received rollback from DCP stream",
                        goxdcr_log)
        self.assertGreater(count, 0, "rollback did not happen as expected")
        self.log.info("rollback happened as expected")

    def test_scramsha(self):
        """
        Creates a new bi-xdcr replication with scram-sha
        Make sure to pass use-scramsha=True
        from command line
        """
        self.setup_xdcr()
        self.sleep(60, "wait before checking logs")
        for node in [self.src_cluster.get_master_node()]+[self.dest_cluster.get_master_node()]:
            count = NodeHelper.check_goxdcr_log(node,
                        "HttpAuthMech=ScramSha for remote cluster reference remote_cluster")
            if count <= 0:
                self.fail("Node {0} does not use SCRAM-SHA authentication".format(node.ip))
            else:
                self.log.info("SCRAM-SHA auth successful on node {0}".format(node.ip))
        self.verify_results()

    def test_update_to_scramsha_auth(self):
        """
        Start with ordinary replication, then switch to use scram_sha_auth
        Search for success log stmtsS
        """
        old_count = NodeHelper.check_goxdcr_log(self.src_cluster.get_master_node(),
                                                "HttpAuthMech=ScramSha for remote cluster reference remote_cluster")
        self.setup_xdcr()
        # modify remote cluster ref to use scramsha
        for remote_cluster in self.src_cluster.get_remote_clusters()+self.dest_cluster.get_remote_clusters():
            remote_cluster.use_scram_sha_auth()
        self.sleep(60, "wait before checking the logs for using scram-sha")
        for node in [self.src_cluster.get_master_node()]+[self.dest_cluster.get_master_node()]:
            count = NodeHelper.check_goxdcr_log(node, "HttpAuthMech=ScramSha for remote cluster reference remote_cluster")
            if count <= old_count:
                self.fail("Node {0} does not use SCRAM-SHA authentication".format(node.ip))
            else:
                self.log.info("SCRAM-SHA auth successful on node {0}".format(node.ip))
        self.verify_results()