def test_partial_rollback(self): kv_node = self.get_nodes_from_services_map(service_type="kv", get_all_nodes=True) log.info("kv nodes:{0}".format(kv_node)) for node in kv_node: mem_client = MemcachedClientHelper.direct_client(node, self.src_bucket_name) mem_client.stop_persistence() body = self.create_save_function_body(self.function_name, self.handler_code, worker_count=3) if self.is_curl: body['depcfg']['curl'] = [] body['depcfg']['curl'].append({"hostname": self.hostname, "value": "server", "auth_type": self.auth_type, "username": self.curl_username, "password": self.curl_password,"cookies": self.cookies}) try: task = self.cluster.async_load_gen_docs(self.master, self.src_bucket_name, self.gens_load, self.buckets[0].kvs[1], 'create', compression=self.sdk_compression) except Exception as e: log.info("error while loading data") self.deploy_function(body, wait_for_bootstrap=False) # Kill memcached on Node A self.log.info("Killing memcached on {0}".format(kv_node[1])) shell = RemoteMachineShellConnection(kv_node[1]) shell.kill_memcached() # Start persistence on Node B self.log.info("Starting persistence on {0}". format(kv_node[0])) mem_client = MemcachedClientHelper.direct_client(kv_node[0], self.src_bucket_name) mem_client.start_persistence() self.wait_for_handler_state(body['appname'], "deployed") stats_src = RestConnection(self.master).get_bucket_stats(bucket=self.src_bucket_name) log.info(stats_src) self.verify_eventing_results(self.function_name, stats_src["curr_items"], skip_stats_validation=True)
def system_stats(self, nodes, pnames, frequency, verbosity=False): shells = [] for node in nodes: try: bucket = RestConnection(node).get_buckets()[0].name MemcachedClientHelper.direct_client(node, bucket) shells.append(RemoteMachineShellConnection(node)) except: pass d = {"snapshots": []} # "pname":"x","pid":"y","snapshots":[{"time":time,"value":value}] start_time = str(self._task["time"]) while not self._aborted(): time.sleep(frequency) current_time = time.time() i = 0 for shell in shells: node = nodes[i] unique_id = node.ip + '-' + start_time for pname in pnames: obj = RemoteMachineHelper(shell).is_process_running(pname) if obj and obj.pid: value = self._extract_proc_info(shell, obj.pid) value["name"] = pname value["id"] = obj.pid value["unique_id"] = unique_id value["time"] = current_time value["ip"] = node.ip d["snapshots"].append(value) i += 1 self._task["systemstats"] = d["snapshots"] log.info("Finished system_stats")
def iostats(self, nodes, frequency, verbosity=False): shells = [] for node in nodes: try: bucket = RestConnection(node).get_buckets()[0].name MemcachedClientHelper.direct_client(node, bucket) shells.append(RemoteMachineShellConnection(node)) except: pass self._task["iostats"] = [] log.info("Started capturing io stats") while not self._aborted(): time.sleep(frequency) log.info("Collecting io_stats") for shell in shells: try: kB_read, kB_wrtn, util, iowait, idle = \ self._extract_io_info(shell) except (ValueError, TypeError, IndexError): continue if kB_read and kB_wrtn: self._task["iostats"].append({ "time": time.time(), "ip": shell.ip, "read": kB_read, "write": kB_wrtn, "util": util, "iowait": iowait, "idle": idle }) log.info("Finished capturing io stats")
def test_partial_rollback(self): kv_node = self.get_nodes_from_services_map(service_type="kv", get_all_nodes=True) log.info("kv nodes:{0}".format(kv_node)) for node in kv_node: mem_client = MemcachedClientHelper.direct_client(node, self.src_bucket_name) mem_client.stop_persistence() body = self.create_save_function_body(self.function_name, self.handler_code, worker_count=3) try: task = self.cluster.async_load_gen_docs(self.master, self.src_bucket_name, self.gens_load, self.buckets[0].kvs[1], 'create') except Exception as e: log.info("error while loading data") self.deploy_function(body,wait_for_bootstrap=False) # Kill memcached on Node A self.log.info("Killing memcached on {0}".format(kv_node[1])) shell = RemoteMachineShellConnection(kv_node[1]) shell.kill_memcached() # Start persistence on Node B self.log.info("Starting persistence on {0}". format(kv_node[0])) mem_client = MemcachedClientHelper.direct_client(kv_node[0], self.src_bucket_name) mem_client.start_persistence() # Wait for bootstrap to complete self.wait_for_bootstrap_to_complete(body['appname']) stats_src = RestConnection(self.master).get_bucket_stats(bucket=self.src_bucket_name) log.info(stats_src) self.verify_eventing_results(self.function_name, stats_src["curr_items"], skip_stats_validation=True)
def test_partial_rollback(self): kv_node = self.get_nodes_from_services_map(service_type="kv", get_all_nodes=True) log.info("kv nodes:{0}".format(kv_node)) for node in kv_node: mem_client = MemcachedClientHelper.direct_client(node, self.src_bucket_name) mem_client.stop_persistence() body = self.create_save_function_body(self.function_name, self.handler_code, worker_count=3) try: task = self.cluster.async_load_gen_docs(self.master, self.src_bucket_name, self.gens_load, self.buckets[0].kvs[1], 'create', compression=self.sdk_compression) except Exception as e: log.info("error while loading data") self.deploy_function(body,wait_for_bootstrap=False) # Kill memcached on Node A self.log.info("Killing memcached on {0}".format(kv_node[1])) shell = RemoteMachineShellConnection(kv_node[1]) shell.kill_memcached() # Start persistence on Node B self.log.info("Starting persistence on {0}". format(kv_node[0])) mem_client = MemcachedClientHelper.direct_client(kv_node[0], self.src_bucket_name) mem_client.start_persistence() # Wait for bootstrap to complete self.wait_for_bootstrap_to_complete(body['appname']) stats_src = RestConnection(self.master).get_bucket_stats(bucket=self.src_bucket_name) log.info(stats_src) self.verify_eventing_results(self.function_name, stats_src["curr_items"], skip_stats_validation=True)
def iostats(self, nodes, frequency, verbosity=False): shells = [] for node in nodes: try: bucket = RestConnection(node).get_buckets()[0].name MemcachedClientHelper.direct_client(node, bucket) shells.append(RemoteMachineShellConnection(node)) except: pass self._task["iostats"] = [] log.info("Started capturing io stats") while not self._aborted(): time.sleep(frequency) log.info("Collecting io_stats") for shell in shells: try: kB_read, kB_wrtn, util, iowait, idle = \ self._extract_io_info(shell) except (ValueError, TypeError, IndexError): continue if kB_read and kB_wrtn: self._task["iostats"].append({"time": time.time(), "ip": shell.ip, "read": kB_read, "write": kB_wrtn, "util": util, "iowait": iowait, "idle": idle}) log.info("Finished capturing io stats")
def test_partial_rollback(self): self.multi_create_index() self.sleep(30) self.log.info("Stopping persistence on NodeA & NodeB") data_nodes = self.get_nodes_from_services_map(service_type="kv", get_all_nodes=True) for data_node in data_nodes: for bucket in self.buckets: mem_client = MemcachedClientHelper.direct_client(data_node, bucket.name) mem_client.stop_persistence() self.run_doc_ops() self.sleep(10) # Get count before rollback bucket_before_item_counts = {} for bucket in self.buckets: bucket_count_before_rollback = self.get_item_count(self.master, bucket.name) bucket_before_item_counts[bucket.name] = bucket_count_before_rollback log.info("Items in bucket {0} before rollback = {1}".format( bucket.name, bucket_count_before_rollback)) # Index rollback count before rollback self._verify_bucket_count_with_index_count() self.multi_query_using_index() # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(data_nodes[0]) shell.kill_memcached() # Start persistence on Node B self.log.info("Starting persistence on NodeB") for bucket in self.buckets: mem_client = MemcachedClientHelper.direct_client(data_nodes[1], bucket.name) mem_client.start_persistence() # Failover Node B self.log.info("Failing over NodeB") self.sleep(10) failover_task = self.cluster.async_failover( self.servers[:self.nodes_init], [data_nodes[1]], self.graceful, wait_for_pending=120) failover_task.result() # Wait for a couple of mins to allow rollback to complete # self.sleep(120) bucket_after_item_counts = {} for bucket in self.buckets: bucket_count_after_rollback = self.get_item_count(self.master, bucket.name) bucket_after_item_counts[bucket.name] = bucket_count_after_rollback log.info("Items in bucket {0} after rollback = {1}".format( bucket.name, bucket_count_after_rollback)) for bucket in self.buckets: if bucket_after_item_counts[bucket.name] == bucket_before_item_counts[bucket.name]: log.info("Looks like KV rollback did not happen at all.") self._verify_bucket_count_with_index_count() self.multi_query_using_index()
def get_client(self, bucket): if bucket not in self.clients: self.clients[bucket] = MemcachedClientHelper().direct_client( self.server, bucket) self.clients[bucket].hello(memcacheConstants.FEATURE_COLLECTIONS) return self.clients[bucket]
def connect(self): if not self.server or \ not isinstance(self.server, Server): return False self.mc = MemcachedClientHelper.direct_client(self.server, self.bucket) return True
def test_scan_consistency_post_memcached_crash(self): self.log.info('Load documents in the default bucket') self.perform_doc_ops_in_all_cb_buckets(self.num_items, "create", 0, self.num_items) self.log.info('Create dataset') self.cbas_util.create_dataset_on_bucket(self.cb_bucket_name, self.cbas_dataset_name) self.log.info('Connect link') self.cbas_util.connect_link() self.log.info('Verify dataset count') self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items) self.log.info('Stopping persistence on KV') mem_client = MemcachedClientHelper.direct_client( self.input.servers[0], self.cb_bucket_name) mem_client.stop_persistence() self.log.info('Performing Mutations') self.perform_doc_ops_in_all_cb_buckets(self.num_items, "create", self.num_items, self.num_items * 2) self.log.info('Kill Memcached process') shell = RemoteMachineShellConnection(self.master) shell.kill_memcached() shell.disconnect() self.log.info('Validate count') query = 'select count(*) from %s' % self.cbas_dataset_name dataset_count = 0 start_time = time.time() output = [] while time.time() < start_time + 120: try: response, _, _, results, _ = self.cbas_util.execute_statement_on_cbas_util( query, scan_consistency=self.scan_consistency, scan_wait=self.scan_wait) self.assertEqual(response, "success", "Query failed...") dataset_count = results[0]['$1'] if dataset_count == self.num_items: break except Exception as e: self.log.info('Try again as memcached might be recovering...') self.log.info('Verify dataset count is equal to number of items in KV') count_n1ql = self.rest.query_tool( 'select count(*) from %s' % self.cb_bucket_name)['results'][0]['$1'] self.assertEqual(dataset_count, count_n1ql, msg='KV-CBAS count mismatch. Actual %s, expected %s' % (dataset_count, count_n1ql))
def membase_stats(self, interval=60): mcs = [] for node in self.nodes: try: bucket = RestConnection(node).get_buckets()[0].name mc = MemcachedClientHelper.direct_client(node, bucket) mcs.append(mc) except Exception, error: log.error(error)
def connect(self): if not self.server or \ not isinstance(self.server, Server): return False self.mc = MemcachedClientHelper.direct_client( self.server, self.bucket) return True
def send_json(server_info, job=None): if job: jobs = get_jobs_stats(onlyLastBuild=True, job_url=JENKINS_URL + 'job/' + job) else: jobs = get_jobs_stats(onlyLastBuild=True) jsons = build_json_result(jobs) client = MemcachedClientHelper.direct_client(server_info, CB_BUCKET_NAME) for key, rq in jsons: try: client.set(key, 0, 0, rq) except Exception as ex: sys.exit(str(ex))
def send_json(server_info, job=None): if job: jobs = get_jobs_stats(onlyLastBuild=True, job_url=JENKINS_URL + 'job/' + job) else: jobs = get_jobs_stats(onlyLastBuild=True) jsons = build_json_result(jobs) client = MemcachedClientHelper.direct_client(server_info, CB_BUCKET_NAME) for key, rq in jsons: try: client.set(key, 0, 0, rq) except Exception, ex: sys.exit(str(ex))
def membase_stats(self, nodes, bucket, frequency, verbose=False): mcs = [] for node in nodes: try: bucket = RestConnection(node).get_buckets()[0].name mcs.append(MemcachedClientHelper.direct_client(node, bucket)) except: pass self._task["membasestats"] = [] self._task["timings"] = [] self._task["dispatcher"] = [] d = {} # "pname": "x", "pid": "y","snapshots": [{"time": time,"value": value}] for mc in mcs: d[mc.host] = {"snapshots": [], "timings": [], "dispatcher": []} while not self._aborted(): time_left = frequency log.info("Collecting membase stats") timings = None # at minimum we want to check for aborted every minute while not self._aborted() and time_left > 0: time.sleep(min(time_left, 60)) time_left -= 60 for mc in mcs: retries = 0 stats = {} while not stats and retries < RETRIES: try: stats = mc.stats() try: mem_stats = mc.stats('raw memory') except MemcachedError: mem_stats = mc.stats('memory') stats.update(mem_stats) except Exception as e: log.error("{0}, retries = {1}".format(str(e), retries)) time.sleep(2) mc.reconnect() retries += 1 continue stats["time"] = time.time() stats["ip"] = mc.host d[mc.host]["snapshots"].append(stats) try: timings = mc.stats('timings') d[mc.host]["timings"].append(timings) dispatcher = mc.stats('dispatcher') d[mc.host]["dispatcher"].append(dispatcher) except EOFError, e: log.error("Unable to get timings/dispatcher stats {0}: {1}"\ .format(mc.host, e))
def capture_mb_snapshot(self, node): """Capture membase stats snapshot manually""" log.info("capturing memcache stats snapshot for {0}".format(node.ip)) stats = {} try: bucket = RestConnection(node).get_buckets()[0].name mc = MemcachedClientHelper.direct_client(node, bucket) stats = mc.stats() stats.update(mc.stats("warmup")) except Exception, e: log.error(e) return False
def capture_mb_snapshot(self, node): """Capture membase stats snapshot manually""" log.info("capturing memcache stats snapshot for {0}".format(node.ip)) stats = {} try: bucket = RestConnection(node).get_buckets()[0].name mc = MemcachedClientHelper.direct_client(node, bucket) stats = mc.stats() stats.update(mc.stats("warmup")) except Exception as e: log.error(e) return False finally: stats["time"] = time.time() stats["ip"] = node.ip self._mb_stats["snapshots"].append(stats) print(stats) log.info("memcache stats snapshot captured") return True
def capture_mb_snapshot(self, node): """Capture membase stats snapshot manually""" log.info("Capturing memcache stats snapshot for {0}".format(node.ip)) stats = {} try: bucket = RestConnection(node).get_buckets()[0].name mc = MemcachedClientHelper.direct_client(node, bucket) stats = mc.stats() stats.update(mc.stats("warmup")) except Exception as e: log.error("Exception: {0}".format(str(e))) return False finally: stats["time"] = time.time() stats["ip"] = node.ip self._mb_stats["snapshots"].append(stats) print stats log.info("Memcache stats snapshot captured") return True
def test_partial_rollback(self): self.multi_create_index() self.sleep(30) self.log.info("Stopping persistence on NodeA & NodeB") data_nodes = self.get_nodes_from_services_map(service_type="kv", get_all_nodes=True) for data_node in data_nodes: for bucket in self.buckets: mem_client = MemcachedClientHelper.direct_client( data_node, bucket.name) mem_client.stop_persistence() self.run_doc_ops() self.sleep(10) # Get count before rollback bucket_before_item_counts = {} for bucket in self.buckets: bucket_count_before_rollback = self.get_item_count( self.master, bucket.name) bucket_before_item_counts[ bucket.name] = bucket_count_before_rollback log.info("Items in bucket {0} before rollback = {1}".format( bucket.name, bucket_count_before_rollback)) # Index rollback count before rollback self._verify_bucket_count_with_index_count() self.multi_query_using_index() # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(data_nodes[0]) shell.kill_memcached() # Start persistence on Node B self.log.info("Starting persistence on NodeB") for bucket in self.buckets: mem_client = MemcachedClientHelper.direct_client( data_nodes[1], bucket.name) mem_client.start_persistence() # Failover Node B self.log.info("Failing over NodeB") self.sleep(10) failover_task = self.cluster.async_failover( self.servers[:self.nodes_init], [data_nodes[1]], self.graceful, wait_for_pending=120) failover_task.result() # Wait for a couple of mins to allow rollback to complete # self.sleep(120) bucket_after_item_counts = {} for bucket in self.buckets: bucket_count_after_rollback = self.get_item_count( self.master, bucket.name) bucket_after_item_counts[bucket.name] = bucket_count_after_rollback log.info("Items in bucket {0} after rollback = {1}".format( bucket.name, bucket_count_after_rollback)) for bucket in self.buckets: if bucket_after_item_counts[ bucket.name] == bucket_before_item_counts[bucket.name]: log.info("Looks like KV rollback did not happen at all.") self._verify_bucket_count_with_index_count() self.multi_query_using_index()
def partial_rollback(self): bucket = self._cb_cluster.get_bucket_by_name("default") self._cb_cluster.flush_buckets([bucket]) index = self.create_index(bucket, "default_index") self.load_data() self.wait_for_indexing_complete() # Stop Persistence on Node A & Node B mem_client = MemcachedClientHelper.direct_client(self._input.servers[0], bucket) mem_client.stop_persistence() mem_client = MemcachedClientHelper.direct_client(self._input.servers[1], bucket) mem_client.stop_persistence() # Perform mutations on the bucket self.async_perform_update_delete(self.upd_del_fields) if self._update: self.sleep(60, "Waiting for updates to get indexed...") self.wait_for_indexing_complete() # Run FTS Query to fetch the initial count of mutated items query = "{\"query\": \"mutated:>0\"}" query = json.loads(query) for index in self._cb_cluster.get_indexes(): hits1, _, _, _ = index.execute_query(query) self.log.info("Hits before rollback: %s" % hits1) # Fetch count of docs in index and bucket before_index_doc_count = index.get_indexed_doc_count() before_bucket_doc_count = index.get_src_bucket_doc_count() self.log.info("Docs in Bucket : %s, Docs in Index : %s" % ( before_bucket_doc_count, before_index_doc_count)) # Kill memcached on Node A so that Node B becomes master shell = RemoteMachineShellConnection(self._master) shell.kill_memcached() # Start persistence on Node B mem_client = MemcachedClientHelper.direct_client(self._input.servers[1], bucket) mem_client.start_persistence() # Failover Node B failover_task = self._cb_cluster.async_failover( node=self._input.servers[1]) failover_task.result() # Wait for Failover & FTS index rollback to complete self.sleep(10) # Run FTS query to fetch count of mutated items post rollback. for index in self._cb_cluster.get_indexes(): hits2, _, _, _ = index.execute_query(query) self.log.info("Hits after rollback: %s" % hits2) # Fetch count of docs in index and bucket after_index_doc_count = index.get_indexed_doc_count() after_bucket_doc_count = index.get_src_bucket_doc_count() self.log.info("Docs in Bucket : %s, Docs in Index : %s" % (after_bucket_doc_count, after_index_doc_count)) # Validation : If there are deletes, validate the #docs in index goes up post rollback if self._input.param("delete", False): self.assertGreater(after_index_doc_count, before_index_doc_count, "Deletes : Index count after rollback not greater than before rollback") else: # For Updates, validate that #hits goes down in the query output post rollback self.assertGreater(hits1, hits2, "Mutated items before rollback are not more than after rollback") # Failover FTS node failover_fts_node = self._input.param("failover_fts_node", False) if failover_fts_node: failover_task = self._cb_cluster.async_failover( node=self._input.servers[2]) failover_task.result() self.sleep(10) # Run FTS query to fetch count of mutated items post FTS node failover. for index in self._cb_cluster.get_indexes(): hits3, _, _, _ = index.execute_query(query) self.log.info( "Hits after rollback and failover of primary FTS node: %s" % hits3) self.assertEqual(hits2, hits3, "Mutated items after FTS node failover are not equal to that after rollback")
def test_rollback(self): bucket = self.src_cluster.get_buckets()[0] nodes = self.src_cluster.get_nodes() # Stop Persistence on Node A & Node B for node in nodes: mem_client = MemcachedClientHelper.direct_client(node, bucket) mem_client.stop_persistence() goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\ + '/goxdcr.log*' self.setup_xdcr() self.src_cluster.pause_all_replications() gen = BlobGenerator("C1-", "C1-", self._value_size, end=self._num_items) self.src_cluster.load_all_buckets_from_generator(gen) self.src_cluster.resume_all_replications() # Perform mutations on the bucket self.async_perform_update_delete() rest1 = RestConnection(self.src_cluster.get_master_node()) rest2 = RestConnection(self.dest_cluster.get_master_node()) # Fetch count of docs in src and dest cluster _count1 = rest1.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1] _count2 = rest2.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1] self.log.info("Before rollback src cluster count = {0} dest cluster count = {1}".format(_count1, _count2)) # Kill memcached on Node A so that Node B becomes master shell = RemoteMachineShellConnection(self.src_cluster.get_master_node()) shell.kill_memcached() # Start persistence on Node B mem_client = MemcachedClientHelper.direct_client(nodes[1], bucket) mem_client.start_persistence() # Failover Node B failover_task = self.src_cluster.async_failover() failover_task.result() # Wait for Failover & rollback to complete self.sleep(60) # Fetch count of docs in src and dest cluster _count1 = rest1.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1] _count2 = rest2.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1] self.log.info("After rollback src cluster count = {0} dest cluster count = {1}".format(_count1, _count2)) self.assertTrue(self.src_cluster.wait_for_outbound_mutations(), "Mutations in source cluster not replicated to target after rollback") self.log.info("Mutations in source cluster replicated to target after rollback") count = NodeHelper.check_goxdcr_log( nodes[0], "Received rollback from DCP stream", goxdcr_log) self.assertGreater(count, 0, "rollback did not happen as expected") self.log.info("rollback happened as expected")
def test_rebalance_kv_rollback_create_ops(self): self.setup_for_test() items_before_persistence_stop = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name)[0] self.log.info("Items in CBAS before persistence stop: %s" % items_before_persistence_stop) # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets(self.num_items / 2, "create", self.num_items, self.num_items * 3 / 2) kv_nodes = self.get_kv_nodes(self.servers, self.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection(self.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) if self.CC: self.cluster_util.remove_node([self.otpNodes[0]], wait_for_rebalance=False) self.cbas_util.closeConn() self.cbas_util = cbas_utils(self.master, self.cbas_servers[0]) self.cbas_util.createConn("default") else: self.cluster_util.remove_node([self.otpNodes[1]], wait_for_rebalance=False) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.master) shell.kill_memcached() self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket == -1 or ( items_in_cbas_bucket != 0 and items_in_cbas_bucket > items_before_persistence_stop): try: if curr + 120 < time.time(): break items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items in CBAS: %s" % items_in_cbas_bucket) except: self.log.info( "Probably rebalance is in progress and the reason for queries being failing." ) pass self.assertTrue(items_in_cbas_bucket <= items_before_persistence_stop, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket or items_in_cb_bucket == 0: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass if curr + 120 < time.time(): break str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: self.sleep(1) self.log.info("Waiting for rebalance to complete") self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" )
def test_ingestion_after_kv_rollback_cbas_disconnected(self): self.setup_for_test() # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets(self.num_items / 2, "delete", 0, self.num_items / 2) # Count no. of items in CB & CBAS Buckets kv_nodes = self.get_kv_nodes(self.servers, self.master) items_in_cb_bucket = 0 for node in kv_nodes: items_in_cb_bucket += self.get_item_count(node, self.cb_bucket_name) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) items_before_rollback = items_in_cbas_bucket self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.master) shell.kill_memcached() # self.sleep(10,"Wait for 10 secs for memcached restarts.") if self.input.param('kill_cbas', False): shell = RemoteMachineShellConnection(self.cbas_node) shell.kill_process("/opt/couchbase/lib/cbas/runtime/bin/java", "java") shell.kill_process("/opt/couchbase/bin/cbas", "cbas") tries = 60 result = False while tries > 0 and not result: try: result = self.cbas_util.connect_to_bucket( self.cbas_bucket_name) tries -= 1 except: pass self.sleep(2) self.assertTrue( result, "CBAS connect bucket failed after memcached killed on KV node.") curr = time.time() while items_in_cbas_bucket != 0 and items_in_cbas_bucket <= items_before_rollback: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.assertTrue(items_in_cbas_bucket > items_before_rollback, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" )
def test_ingestion_after_kv_rollback(self): self.setup_for_test() # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA & NodeB") mem_client = MemcachedClientHelper.direct_client(self.input.servers[0], self.cb_bucket_name) mem_client.stop_persistence() mem_client = MemcachedClientHelper.direct_client(self.input.servers[1], self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets(self.num_items, "delete", 0, self.num_items / 2) # Validate no. of items in CBAS dataset if not self.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items / 2, 0): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cb_bucket = self.get_item_count(self.master, self.cb_bucket_name) items_in_cbas_bucket, _ = self.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) if items_in_cb_bucket != items_in_cbas_bucket: self.fail( "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket") # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.master) shell.kill_memcached() # Start persistence on Node B self.log.info("Starting persistence on NodeB") mem_client = MemcachedClientHelper.direct_client(self.input.servers[1], self.cb_bucket_name) mem_client.start_persistence() # Failover Node B self.log.info("Failing over NodeB") self.sleep(10) failover_task = self._cb_cluster.async_failover(self.input.servers, [self.input.servers[1]]) failover_task.result() # Wait for Failover & CBAS rollback to complete self.sleep(120) # Count no. of items in CB & CBAS Buckets items_in_cb_bucket = self.get_item_count(self.master, self.cb_bucket_name) items_in_cbas_bucket, _ = self.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) if items_in_cb_bucket != items_in_cbas_bucket: self.fail( "After Rollback : # Items in CBAS bucket does not match that in the CB bucket")
def membase_stats(self, interval=60): mcs = [] for node in self.nodes: try: bucket = RestConnection(node).get_buckets()[0].name mc = MemcachedClientHelper.direct_client(node, bucket) mcs.append(mc) except Exception as error: log.error(error) self._task["membasestats"] = [] self._task["timings"] = [] self._task["dispatcher"] = [] data = dict() for mc in mcs: data[mc.host] = {"snapshots": [], "timings": [], "dispatcher": []} while not self._aborted(): time.sleep(interval) log.info("collecting membase stats") for mc in mcs: for rerty in range(RETRIES): try: stats = mc.stats() except Exception as e: log.warn("{0}, retries = {1}".format(str(e), rerty)) time.sleep(2) mc.reconnect() else: break else: stats = {} data[mc.host]["snapshots"].append(stats) for arg in ("timings", "dispatcher"): try: stats = mc.stats(arg) data[mc.host][arg].append(stats) except EOFError as e: log.error("unable to get {0} stats {1}: {2}".format( arg, mc.host, e)) for host in (mc.host for mc in mcs): unique_id = host + '-' + str(self._task["time"]) current_time = time.time() if self._mb_stats["snapshots"]: # use manually captured stats self._task["membasestats"] = self._mb_stats["snapshots"] else: # use periodically captured stats for snapshot in data[host]["snapshots"]: snapshot["unique_id"] = unique_id snapshot["time"] = current_time snapshot["ip"] = host self._task["membasestats"].append(snapshot) for timing in data[host]["timings"]: timing["unique_id"] = unique_id timing["time"] = current_time timing["ip"] = host self._task["timings"].append(timing) for dispatcher in data[host]["dispatcher"]: dispatcher["unique_id"] = unique_id dispatcher["time"] = current_time dispatcher["ip"] = host self._task["dispatcher"].append(dispatcher) if data[host]["timings"]: log.info("dumping disk timing stats: {0}".format(host)) latests_timings = data[host]["timings"][-1] for key, value in sorted(latests_timings.items()): if key.startswith("disk"): print("{0:50s}: {1}".format(key, value)) log.info("finished membase_stats")
def test_ingestion_after_kv_rollback(self): self.setup_for_test() # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA & NodeB") mem_client = MemcachedClientHelper.direct_client(self.input.servers[0], self.cb_bucket_name) mem_client.stop_persistence() mem_client = MemcachedClientHelper.direct_client(self.input.servers[1], self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets(self.num_items, "delete", 0, self.num_items / 2) # Validate no. of items in CBAS dataset if not self.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items / 2, 0): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cb_bucket = self.get_item_count(self.master, self.cb_bucket_name) items_in_cbas_bucket, _ = self.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) if items_in_cb_bucket != items_in_cbas_bucket: self.fail( "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket") # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.master) shell.kill_memcached() # Start persistence on Node B self.log.info("Starting persistence on NodeB") mem_client = MemcachedClientHelper.direct_client(self.input.servers[1], self.cb_bucket_name) mem_client.start_persistence() # Failover Node B self.log.info("Failing over NodeB") self.sleep(10) failover_task = self._cb_cluster.async_failover(self.input.servers, [self.input.servers[1]]) failover_task.result() # Wait for Failover & CBAS rollback to complete self.sleep(60) # Count no. of items in CB & CBAS Buckets items_in_cb_bucket = self.get_item_count(self.master, self.cb_bucket_name) items_in_cbas_bucket, _ = self.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) if items_in_cb_bucket != items_in_cbas_bucket: self.fail( "After Rollback : # Items in CBAS bucket does not match that in the CB bucket")
def test_rollback(self): bucket = self.src_cluster.get_buckets()[0] nodes = self.src_cluster.get_nodes() # Stop Persistence on Node A & Node B for node in nodes: mem_client = MemcachedClientHelper.direct_client(node, bucket) mem_client.stop_persistence() goxdcr_log = NodeHelper.get_goxdcr_log_dir(self._input.servers[0])\ + '/goxdcr.log*' self.setup_xdcr() self.src_cluster.pause_all_replications() gen = BlobGenerator("C1-", "C1-", self._value_size, end=self._num_items) self.src_cluster.load_all_buckets_from_generator(gen) self.src_cluster.resume_all_replications() # Perform mutations on the bucket self.async_perform_update_delete() rest1 = RestConnection(self.src_cluster.get_master_node()) rest2 = RestConnection(self.dest_cluster.get_master_node()) # Fetch count of docs in src and dest cluster _count1 = rest1.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1] _count2 = rest2.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1] self.log.info("Before rollback src cluster count = {0} dest cluster count = {1}".format(_count1, _count2)) # Kill memcached on Node A so that Node B becomes master shell = RemoteMachineShellConnection(self.src_cluster.get_master_node()) shell.kill_memcached() # Start persistence on Node B mem_client = MemcachedClientHelper.direct_client(nodes[1], bucket) mem_client.start_persistence() # Failover Node B failover_task = self.src_cluster.async_failover() failover_task.result() # Wait for Failover & rollback to complete self.sleep(60) # Fetch count of docs in src and dest cluster _count1 = rest1.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1] _count2 = rest2.fetch_bucket_stats(bucket=bucket.name)["op"]["samples"]["curr_items"][-1] self.log.info("After rollback src cluster count = {0} dest cluster count = {1}".format(_count1, _count2)) self.assertTrue(self.src_cluster.wait_for_outbound_mutations(), "Mutations in source cluster not replicated to target after rollback") self.log.info("Mutations in source cluster replicated to target after rollback") _, count = NodeHelper.check_goxdcr_log( nodes[0], "Received rollback from DCP stream", goxdcr_log, timeout=60) self.assertGreater(count, 0, "rollback did not happen as expected") self.log.info("rollback happened as expected")
def test_ingestion_after_kv_rollback_delete_ops(self): self.setup_for_test() # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets(self.num_items / 2, "delete", 0, self.num_items / 2) kv_nodes = self.get_kv_nodes(self.servers, self.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection(self.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) items_before_rollback = items_in_cbas_bucket self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.master) shell.kill_memcached() self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket != 0 and items_in_cbas_bucket <= items_before_rollback: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.assertTrue(items_in_cbas_bucket > items_before_rollback, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" )