def wait_for_stats_int_value(master, bucket, stat_key, stat_value, option="==", timeout_in_seconds=120, verbose=True): log = logger.get("infra") log.info("waiting for bucket {0} stat : {1} to {2} {3} on {4}".format( bucket, stat_key, option, stat_value, master.ip)) start = time.time() verified = False while (time.time() - start) <= timeout_in_seconds: rest = RestConnection(master) stats = rest.get_bucket_stats(bucket) # some stats are in memcached if stats and stat_key in stats: actual = int(stats[stat_key]) if option == "==": verified = stat_value == actual elif option == ">": verified = stat_value > actual elif option == "<": verified = stat_value < actual elif option == ">=": verified = stat_value >= actual elif option == "<=": verified = stat_value <= actual if verified: log.info("verified {0} : {1}".format(stat_key, actual)) break if verbose: log.info("{0} : {1} isn't {2} {3}".format( stat_key, stat_value, option, actual)) sleep(2, log_type="infra") return verified
class User(Thread): scenarios = dict() log = logger.get("test") max_retries = 5 def __init__(self, bucket, scope, op_type, **kwargs): super(User, self).__init__() self.bucket = bucket self.scope = scope self.op_type = op_type self.op_count = 1 self.result = None self.exception = None if 'num_items' in kwargs: self.num_items = kwargs['num_items'] if 'op_count' in kwargs: self.op_count = kwargs['op_count'] User.scenarios = get_all_scenarios(User) @staticmethod def get_template(): template = JsonObject.create() template.put("mutated", 0) template.put("createdDate", "01/31/1970") template.put("preferences", JsonObject.create()) return template @staticmethod def populate_values(template, u_id, user_name): random_seconds = randrange(DAYS_BETWEEN_DATES_INT) dob = START_DATE + timedelta(seconds=random_seconds) dob = dob.strftime("%Y-%m-%d") template.put("id", u_id) template.put("user_name", user_name) template.put("gender", choice(GENDER)) template.put("dob", dob) template.put("country", choice(COUNTRY)) template.put("email", user_name + "@travel_app.com") template.put("phone", randint(1000000000, 9999999999)) @staticmethod def get_random_user_id(client, scope): result = client.cluster.query("SELECT raw id " "FROM `travel-sample`.`%s`.`profile`" % scope) return choice(result.rowsAs(int)) def scenario_user_registration(self): if 'num_items' in self.__dict__: num_items = self.num_items else: num_items = randint(1, 20) collection = "profile" client = sdk_clients["user_manager"] collection_obj = self.bucket.scopes[self.scope].collections[collection] client.select_collection(self.scope, collection) template = User.get_template() start = collection_obj.doc_index[1] end = start + num_items if collection_obj.doc_index != (0, 0): collection_obj.num_items += (end - start) collection_obj.doc_index = (collection_obj.doc_index[0], end) while start < end: u_id = query_util.CommonUtil.get_next_id(self.scope, collection) key = COMMON_KEY + str(u_id) User.populate_values(template, u_id, key) retry = 1 while retry <= User.max_retries: result = client.crud( DocLoading.Bucket.DocOps.CREATE, key, template, durability=Bucket.DurabilityLevel.MAJORITY, timeout=10) if result["status"] is False: if SDKException.DurabilityImpossibleException \ in str(result["error"]): User.log.debug("Retrying due to d_impossible") if retry == User.max_retries: raise Exception("User profile creation failed: %s" % result) else: collection_obj.num_items += 1 break retry += 1 start += 1 return "User - registered: %s" % num_items @staticmethod def book_flight(u_id, tenant_scope, src_airport=None, dest_airport=None): summary = dict() required_seats = choice(range(1, 7)) ticket_type = "normal" checkout_cart_collection = "checkout_cart" d_level = Bucket.DurabilityLevel if [src_airport, dest_airport].count(None) == 0: ticket_type = "return" result = query_util.Airline.query_for_routes( sdk_clients["airline_booking"], src_airport=src_airport, dest_airport=dest_airport) flights = list() for row in result["q_result"].rowsAsObject(): src_airport = row.get("sourceairport") dest_airport = row.get("destinationairport") for flight in row.get("flights"): flights.append(flight) summary["src_airport"] = src_airport summary["dest_airport"] = dest_airport summary["required_seats"] = required_seats if not flights: summary["status"] = "No flights available" return summary flight_to_book = choice(flights) reservation_date = get_random_reservation_date() checkout_doc = JsonObject.create() passenger_data = list() for _ in range(required_seats): gender = choice(["M", "F"]) first_name = choice(FIRST_NAMES[gender]) last_name = choice(LAST_NAMES) age = randint(3, 90) passenger_info = JsonObject.create() passenger_info.put("first_name", first_name) passenger_info.put("last_name", last_name) passenger_info.put("gender", gender) passenger_info.put("age", age) passenger_data.append(passenger_info) client = sdk_clients["airline_booking"] client.select_collection(tenant_scope, checkout_cart_collection) cart_id = query_util.CommonUtil.get_next_id(tenant_scope, checkout_cart_collection) cart_key = "cart_%s" % cart_id checkout_doc.put("id", cart_id) checkout_doc.put("user_id", u_id) checkout_doc.put("flight_name", flight_to_book.get("flight")) checkout_doc.put("flight_time", flight_to_book.get("utc")) checkout_doc.put("travel_date", reservation_date) checkout_doc.put("from", src_airport) checkout_doc.put("to", dest_airport) checkout_doc.put("day_of_week", flight_to_book.get("day")) checkout_doc.put("seat_count", required_seats) checkout_doc.put("passengers", passenger_data) retry = 1 while retry <= User.max_retries: result = client.crud(DocLoading.Bucket.DocOps.CREATE, cart_key, checkout_doc, durability=d_level.MAJORITY) if result["status"] is False: if SDKException.DurabilityImpossibleException \ in str(result["error"]): User.log.debug("Retrying due to d_impossible") else: raise Exception("Flight cart add failed: %s" % result) retry += 1 if choice([True, False]): # Booking confirmed scenario, add ticket under flight booking c_name = "booking_data" booking_id = query_util.CommonUtil.get_next_id( tenant_scope, c_name) ticket_key = "ticket_%s" % booking_id checkout_doc.put("id", booking_id) retry = 1 while retry <= User.max_retries: client.select_collection(tenant_scope, c_name) result = client.crud( DocLoading.Bucket.DocOps.CREATE, ticket_key, checkout_doc, durability=d_level.MAJORITY_AND_PERSIST_TO_ACTIVE) if result["status"] is False: if SDKException.DurabilityImpossibleException \ in str(result["error"]): User.log.debug("Retrying due to d_impossible") else: raise Exception("Ticket booking failed: %s" % result) retry += 1 # Add confirmed ticket under user profile f_booking_id = query_util.CommonUtil.get_next_id( tenant_scope, "flight_booking") f_booking_key = "booking_%s" % f_booking_id f_booking_doc = JsonObject.create() f_booking_doc.put("id", f_booking_id) f_booking_doc.put("user_id", u_id) f_booking_doc.put("ticket_id", booking_id) f_booking_doc.put("status", "active") f_booking_doc.put("booked_on", global_vars.app_current_date) f_booking_doc.put("ticket_type", ticket_type) client.select_collection(tenant_scope, "flight_booking") result = client.crud(DocLoading.Bucket.DocOps.CREATE, f_booking_key, f_booking_doc) if result["status"] is False: raise Exception("User flight_booking add failed: %s" % result) # Remove booked ticket from cart retry = 1 while retry <= User.max_retries: client.select_collection(tenant_scope, checkout_cart_collection) result = client.crud(DocLoading.Bucket.DocOps.DELETE, cart_key, durability=d_level.MAJORITY) if result["status"] is False: if SDKException.DurabilityImpossibleException \ in str(result["error"]): User.log.debug("Retrying due to d_impossible") else: break retry += 1 summary["status"] = "Booking success" else: summary["status"] = "cancelled" return summary def scenario_book_one_way_flight(self): summary = "User - scenario_book_one_way_flight\n" u_id = self.get_random_user_id(sdk_clients["user_manager"], self.scope) b_summary = self.book_flight(u_id, self.scope) summary += "From %s -> %s for %s people\n" \ % (b_summary["src_airport"], b_summary["dest_airport"], b_summary["required_seats"]) if b_summary["status"] == "cancelled": summary += "Booking not confirmed !" else: summary += b_summary["status"] return summary def scenario_book_flight_with_return(self): summary = "User - scenario_book_flight_with_return\n" u_id = self.get_random_user_id(sdk_clients["user_manager"], self.scope) b_summary = self.book_flight(u_id, self.scope) summary += "From %s -> %s for %s people\n" \ % (b_summary["src_airport"], b_summary["dest_airport"], b_summary["required_seats"]) if b_summary["status"] != "timeout": summary += "%s\n" % b_summary["status"] b_summary = self.book_flight(u_id, self.scope, src_airport=b_summary["dest_airport"], dest_airport=b_summary["src_airport"]) summary += "From %s -> %s for %s people\n" \ % (b_summary["src_airport"], b_summary["dest_airport"], b_summary["required_seats"]) if b_summary["status"] == "timeout": summary += "Return booking not confirmed !" else: summary += "Return %s" % b_summary["status"] else: summary += "Booking not confirmed !" return summary # def _scenario_read_flight_booking_history(self): # result = "User - scenario_read_flight_booking_history\n" # return result # # def _scenario_book_hotel(self): # result = "User - scenario_book_hotel\n" # return result # # def _scenario_read_hotel_booking_history(self): # result = "User - scenario_read_hotel_booking_history\n" # return result # # def _scenario_write_hotel_review(self): # result = "User - scenario_write_hotel_review\n" # return result def run(self): while self.op_count > 0: try: if self.op_type == "random": rand_scenario = get_random_scenario(User) self.result = User.scenarios[rand_scenario](self) else: self.result = User.scenarios[self.op_type](self) User.log.info("%s %s" % (self.scope, self.result)) except Exception as e: self.exception = e traceback.print_exc() break except Java_base_exception as e: self.exception = e traceback.print_exc() break self.op_count -= 1
def full_execute_query(self, stmts, commit, query_params={}, rollback_to_savepoint=False, write_conflict=False, issleep=0, N1qlhelper=None, prepare=False, server=None, memory_quota=0): """ 1. collection_map will store the values changed for a collection after savepoint it will be re-intialized after each savepoint and the values will be copied to collection_savepoint collection_map[collection] = {INSERT:{}, UPDATE:{}, DELETE:[]} 2. collection savepoint will keep track of all changes and map it to savepoint collection savepoint = {savepoint: { collection1:{ INSERT:{}, UPDATE:{}, DELETE:[]},.. }..} 3. savepoint list will have the order of savepoints """ self.name_list = [] self.prepare = prepare self.log = logger.get("test") collection_savepoint = dict() savepoint = list() collection_map = dict() txid = query_params.values()[0] self.memory_quota = memory_quota rerun_thread = False if N1qlhelper: self.n1ql_helper = N1qlhelper queries = dict() queries[txid] = list() try: for stmt in stmts: query = "SELECT * FROM system:transactions" results = self.n1ql_helper.run_cbq_query(query) self.log.info(json.JSONEncoder().encode(results)) clause = stmt.split(":") if clause[0] == "SAVEPOINT": query = self.run_savepoint_query(clause, query_params, server=server) if clause[1] in str(savepoint): str1 = clause[1] + ":" + str( len(collection_savepoint.keys())) collection_savepoint[str1] = copy.deepcopy( collection_map) savepoint.append(str1) else: collection_savepoint[clause[1]] = copy.deepcopy( collection_map) savepoint.append(clause[1]) queries[txid].append(query) collection_map = {} continue elif clause[0] not in collection_map.keys(): collection_map[clause[0]] = \ {"INSERT": {}, "UPDATE": {}, "DELETE":[]} if clause[1] == "UPDATE": result, query = \ self.run_update_query(clause, query_params, server) queries[txid].extend(query) if clause[3] in collection_map[clause[0]]["UPDATE"].keys(): result.extend( collection_map[clause[0]]["UPDATE"][clause[3]]) collection_map[clause[0]]["UPDATE"][clause[3]] = result if clause[1] == "INSERT": result, query = self.run_insert_query( clause, query_params, server) collection_map[clause[0]]["INSERT"].update(result) queries[txid].extend(query) if clause[1] == "DELETE": result, query = self.run_delete_query( clause, query_params, server) collection_map[clause[0]]["DELETE"].extend(result) queries[txid].extend(query) if clause[1] == "MERGE": result, query = \ self.run_merge_query(clause, query_params, server) if isinstance(result, list): collection_map[clause[0]][clause[4]].extend(result) elif result: if clause[4] == "UPDATE": for key, value in result.items(): if key in collection_map[clause[0]][ clause[4]].keys(): value.extend(collection_map[clause[0]][ clause[4]][key]) collection_map[clause[0]][ clause[4]][key] = value else: collection_map[clause[0]][clause[4]].update(result) queries[txid].append(query) if issleep: self.sleep(issleep) if write_conflict: write_conflict_result = \ self.simulate_write_conflict(stmts, random.choice([True, False])) if rollback_to_savepoint and (len(savepoint) > 0): savepoint = self.get_savepoint_to_verify(savepoint) query, result = self.n1ql_helper.end_txn( query_params, commit, savepoint[-1].split(':')[0], server=server) queries[txid].append(query) if commit is False: savepoint = [] collection_savepoint = {} query, result = self.n1ql_helper.end_txn(query_params, commit=False, server=server) queries[txid].append(query) else: if (not rollback_to_savepoint) or len(savepoint) == 0: collection_savepoint['last'] = copy.deepcopy( collection_map) savepoint.append('last') query = "SELECT * FROM system:transactions" results = self.n1ql_helper.run_cbq_query(query) self.log.debug(results) queries[txid].append(query) query, result = self.n1ql_helper.end_txn(query_params, commit=True, server=server) queries[txid].append(query) if isinstance(result, str) or 'errors' in result: #retry the entire transaction rerun_thread = self.validate_error_during_commit( result, collection_savepoint, savepoint) savepoint = [] collection_savepoint = {} if write_conflict and write_conflict_result: collection_savepoint['last'] = copy.deepcopy( write_conflict_result) savepoint.append("last") except Exception as e: self.log.info(json.JSONEncoder().encode(e)) collection_savepoint = e return collection_savepoint, savepoint, queries, rerun_thread
def __init__(self): self.log = logger.get("test")
def __init__(self, user_id=None, payload=None, host=None): self.user_id = user_id self.password = "" self.payload = payload self.host = host self.log = logger.get("infra")
from backup_service_client.models.task_template_merge_options import TaskTemplateMergeOptions from backup_service_client.configuration import Configuration from backup_service_client.api_client import ApiClient from backup_service_client.api.plan_api import PlanApi from backup_service_client.api.import_api import ImportApi from backup_service_client.api.repository_api import RepositoryApi from backup_service_client.api.configuration_api import ConfigurationApi from backup_service_client.api.active_repository_api import ActiveRepositoryApi from backup_service_client.models.plan import Plan from backup_service_client.models.archive_request import ArchiveRequest from backup_service_client.models.create_active_repository_request import CreateActiveRepositoryRequest from nfs import NfsConnection from membase.api.rest_client import RestConnection from threading import Timer log = logger.get("test") class BackupServiceTest: def __init__(self, servers): self.backup_service = BackupService(servers) self.backup_monitor = BackupMonitor(self.backup_service) self.backup_tasks = BackupTasks(self.backup_service) def setup(self): self.backup_service.clean() self.backup_service.setup() self.backup_tasks.create_predefined_plans() self.backup_tasks.create_predefined_repos() self.backup_monitor.start()
class Cluster(object): scenarios = dict() log = logger.get("test") def __init__(self, task, cluster, cluster_util): super(Cluster, self).__init__() self.task = task self.cluster = cluster self.cluster_util = cluster_util self.rebalance_task = None self.spare_nodes = list() self.node_service_mapping = dict() self.update_nodes_in_cluster() Cluster.scenarios = get_all_scenarios(Cluster) def update_nodes_in_cluster(self): """ 1. Fetches current master node 2. Constructs map of node_services: node_list 3. Filters out spare nodes to be used for rebalance swap/in scenarios """ self.spare_nodes = list() self.node_service_mapping = dict() self.cluster.nodes_in_cluster = list() cluster_status = RestConnection(self.cluster.master).cluster_status() self.cluster_util.find_orchestrator(self.cluster) self.log.info("Current master: %s" % self.cluster.master.ip) for node in cluster_status["nodes"]: node["services"].sort() map_key = ",".join(node["services"]) if map_key not in self.node_service_mapping: self.node_service_mapping[map_key] = list() host_ip = node["hostname"].split(":")[0] for server in self.cluster.servers: if server.ip == host_ip: self.cluster.nodes_in_cluster.append(server) self.node_service_mapping[map_key].append(server) break for server in self.cluster.servers: if server not in self.cluster.nodes_in_cluster: self.spare_nodes.append(server) self.log.debug("Node service map: %s" % self.node_service_mapping) self.log.debug("Nodes in cluster: %s" % self.cluster.nodes_in_cluster) self.log.debug("Spare nodes: %s" % self.spare_nodes) def get_node_to_remove(self, known_node_list): node_to_remove = choice(known_node_list) known_node_list.remove(node_to_remove) self.cluster.nodes_in_cluster.remove(node_to_remove) return node_to_remove def scenario_rebalance(self, kwargs): self.rebalance_task = self.task.async_rebalance( self.cluster.nodes_in_cluster, to_add=[], to_remove=[], services=None, sleep_before_rebalance=0, retry_get_process_num=25) def scenario_rebalance_in(self, kwargs): services = kwargs.get("services") self.rebalance_task = self.task.async_rebalance( self.cluster.nodes_in_cluster, to_add=self.spare_nodes[:len(services)], to_remove=[], services=services, sleep_before_rebalance=0, retry_get_process_num=25) def scenario_rebalance_out(self, kwargs): services = kwargs.get("services") nodes_to_remove = list() for service in services: self.log.debug("Removing node with services: %s" % service) service = service.split(",") service.sort() service = ','.join(service) if service in self.node_service_mapping: node_to_remove = \ self.get_node_to_remove(self.node_service_mapping[service]) nodes_to_remove.append(node_to_remove) else: for t_service, node_list in self.node_service_mapping.items(): if service in t_service: node_to_remove = self.get_node_to_remove(node_list) nodes_to_remove.append(node_to_remove) break self.rebalance_task = self.task.async_rebalance( self.cluster.nodes_in_cluster, to_add=[], to_remove=nodes_to_remove, check_vbucket_shuffling=True, sleep_before_rebalance=0, retry_get_process_num=25) # Update existing node as master for _, node_list in self.node_service_mapping.items(): if node_list: self.cluster.master = node_list[0] break def scenario_rebalance_swap(self, kwargs): services = kwargs.get("services") nodes_to_remove = list() for service in services: self.log.debug("Swap node with services: %s" % service) service = service.split(",") service.sort() service = ','.join(service) if service in self.node_service_mapping: node_to_remove = \ self.get_node_to_remove(self.node_service_mapping[service]) nodes_to_remove.append(node_to_remove) else: for t_service, node_list in self.node_service_mapping.items(): if service in t_service: node_to_remove = self.get_node_to_remove(node_list) nodes_to_remove.append(node_to_remove) break self.rebalance_task = self.task.async_rebalance( self.cluster.nodes_in_cluster, to_add=self.spare_nodes[:len(services)], to_remove=nodes_to_remove, check_vbucket_shuffling=False, services=services, sleep_before_rebalance=0, retry_get_process_num=25) # Update existing node as master self.cluster.master = self.spare_nodes[0] def scenario_graceful_failover(self, kwargs): services = kwargs.get("services") nodes_to_failover = list() for service in services: self.log.debug("Fetching %s nodes to failover" % service) service = service.split(",") service.sort() service = ','.join(service) if service in self.node_service_mapping: node_to_remove = \ self.get_node_to_remove(self.node_service_mapping[service]) nodes_to_failover.append(node_to_remove) else: for t_service, node_list in self.node_service_mapping.items(): if service in t_service: node_to_remove = self.get_node_to_remove(node_list) nodes_to_failover.append(node_to_remove) break self.rebalance_task = self.task.async_failover([self.cluster.master], nodes_to_failover, graceful=True) # Update existing node as master for _, node_list in self.node_service_mapping.items(): if node_list: self.cluster.master = node_list[0] break def scenario_failover_node_delta_recovery(self, kwargs): services = kwargs.get("services") nodes_in_failed_state = list() rest = RestConnection(self.cluster.master) nodes_status = rest.cluster_status()["nodes"] for node in nodes_status: if node["clusterMembership"] == "inactiveFailed": nodes_in_failed_state.append(node["otpNode"].split("@")[1]) self.log.info("Nodes in failover state: %s" % nodes_in_failed_state) if len(services) > len(nodes_in_failed_state): self.log.warning( "Failover nodes '%s' < '%s' expected node to recover" % (len(nodes_in_failed_state), len(services))) for index, service in enumerate(services): rest.set_recovery_type( otpNode="ns_1@" + nodes_in_failed_state[index], recoveryType=CbServer.Failover.RecoveryType.DELTA) def run(self, op_type, **kwargs): self.log.info("Running cluster scenario: %s" % op_type) self.update_nodes_in_cluster() Cluster.scenarios["scenario_%s" % op_type](self, kwargs)
class CapellaUtils(object): cidr = "10.0.0.0" memcached_port = "11207" log = logger.get("infra") @staticmethod def get_cluster_config(environment="hosted", provider=AWS.__str__, region=AWS.Region.US_WEST_2, single_az=False, plan=Cluster.Plan.DEV_PRO, timezone=Cluster.Timezone.PT, cluster_name="taf_cluster", description=""): return { "environment": environment, "clusterName": cluster_name, "projectId": "", "description": description, "place": { "singleAZ": single_az, "hosted": { "provider": provider, "region": region, "CIDR": None } }, "servers": list(), "supportPackage": { "timezone": timezone, "type": plan } } @staticmethod def get_cluster_config_spec(services, count, compute=AWS.ComputeNode.VCPU4_RAM16, storage_type=AWS.StorageType.GP3, storage_size_gb=AWS.StorageSize.MIN, storage_iops=AWS.StorageIOPS.MIN): return { "services": services, "size": count, "compute": compute, "storage": { "type": storage_type, "size": storage_size_gb, "iops": storage_iops, } } @staticmethod def create_project(pod, tenant, name): capella_api = CapellaAPI(pod.url_public, tenant.api_secret_key, tenant.api_access_key, tenant.user, tenant.pwd) resp = capella_api.create_project(tenant.id, name) if resp.status_code != 201: raise Exception("Creating capella project failed: {}".format( resp.content)) project_id = json.loads(resp.content).get("id") tenant.project_id = project_id CapellaUtils.log.info("Project ID: {}".format(project_id)) @staticmethod def delete_project(pod, tenant): capella_api = CapellaAPI(pod.url_public, tenant.api_secret_key, tenant.api_access_key, tenant.user, tenant.pwd) capella_api.delete_project(tenant.id, tenant.project_id) CapellaUtils.log.info("Project Deleted: {}".format(tenant.project_id)) @staticmethod def get_next_cidr(): addr = CapellaUtils.cidr.split(".") if int(addr[1]) < 255: addr[1] = str(int(addr[1]) + 1) elif int(addr[2]) < 255: addr[2] = str(int(addr[2]) + 1) CapellaUtils.cidr = ".".join(addr) return CapellaUtils.cidr @staticmethod def create_cluster(pod, tenant, cluster_details, timeout=1800): end_time = time.time() + timeout while time.time() < end_time: subnet = CapellaUtils.get_next_cidr() + "/20" CapellaUtils.log.info("Trying with cidr: {}".format(subnet)) cluster_details["place"]["hosted"].update({"CIDR": subnet}) cluster_details.update({"projectId": tenant.project_id}) capella_api = CapellaAPI(pod.url_public, tenant.api_secret_key, tenant.api_access_key, tenant.user, tenant.pwd) capella_api_resp = capella_api.create_cluster(cluster_details) # Check resp code , 202 is success if capella_api_resp.status_code == 202: break else: CapellaUtils.log.critical("Create capella cluster failed.") CapellaUtils.log.critical("Capella API returned " + str(capella_api_resp.status_code)) CapellaUtils.log.critical(capella_api_resp.json()["message"]) cluster_id = capella_api_resp.headers['Location'].split("/")[-1] CapellaUtils.log.info("Cluster created with cluster ID: {}"\ .format(cluster_id)) CapellaUtils.wait_until_done(pod, tenant, cluster_id, "Creating Cluster {}".format( cluster_details.get("clusterName")), timeout=timeout) cluster_srv = CapellaUtils.get_cluster_srv(pod, tenant, cluster_id) CapellaUtils.allow_my_ip(pod, tenant, cluster_id) servers = CapellaUtils.get_nodes(pod, tenant, cluster_id) return cluster_id, cluster_srv, servers @staticmethod def wait_until_done(pod, tenant, cluster_id, msg="", prnt=False, timeout=1800): end_time = time.time() + timeout while time.time() < end_time: content = CapellaUtils.jobs(pod, tenant, cluster_id) state = CapellaUtils.get_cluster_state(pod, tenant, cluster_id) if state in [ "deployment_failed", "deploymentFailed", "redeploymentFailed", "rebalance_failed" ]: raise Exception("{} for cluster {}".format(state, cluster_id)) if prnt: CapellaUtils.log.info(content) if content.get("data") or state != "healthy": for data in content.get("data"): data = data.get("data") if data.get("clusterId") == cluster_id: step, progress = data.get("currentStep"), \ data.get("completionPercentage") CapellaUtils.log.info( "{}: Status=={}, State=={}, Progress=={}%".format( msg, state, step, progress)) time.sleep(5) else: CapellaUtils.log.info("{} Ready!!!".format(msg)) break @staticmethod def destroy_cluster(cluster): capella_api = CapellaAPI(cluster.pod.url_public, cluster.tenant.api_secret_key, cluster.tenant.api_access_key, cluster.tenant.user, cluster.tenant.pwd) resp = capella_api.delete_cluster(cluster.id) if resp.status_code != 202: raise Exception("Deleting Capella Cluster Failed.") time.sleep(10) while True: resp = capella_api.get_cluster_internal(cluster.tenant.id, cluster.tenant.project_id, cluster.id) content = json.loads(resp.content) if content.get("data"): CapellaUtils.log.info( "Cluster status %s: %s" % (cluster.cluster_config.get("name"), content.get("data").get("status").get("state"))) if content.get("data").get("status").get( "state") == "destroying": time.sleep(5) continue elif content.get("message") == 'Not Found.': CapellaUtils.log.info("Cluster is destroyed.") cluster.tenant.clusters.pop(cluster.id) break @staticmethod def get_all_buckets(cluster): capella_api = CapellaAPI(cluster.pod.url_public, cluster.tenant.api_secret_key, cluster.tenant.api_access_key, cluster.tenant.user, cluster.tenant.pwd) resp = capella_api.get_buckets(cluster.tenant.id, cluster.tenant.project_id, cluster.id) return resp @staticmethod def create_bucket(cluster, bucket_params): while True: state = CapellaUtils.get_cluster_state(cluster.pod, cluster.tenant, cluster.id) if state == "healthy": break time.sleep(1) capella_api = CapellaAPI(cluster.pod.url_public, cluster.tenant.api_secret_key, cluster.tenant.api_access_key, cluster.tenant.user, cluster.tenant.pwd) resp = capella_api.create_bucket(cluster.tenant.id, cluster.tenant.project_id, cluster.id, bucket_params) if resp.status_code in [200, 201, 202]: CapellaUtils.log.info("Bucket create successfully!") else: CapellaUtils.log.critical("Bucket creation failed: {}, {}".format( resp.status_code, resp.content)) raise Exception("Bucket creation failed") @staticmethod def get_bucket_id(cluster, name): capella_api = CapellaAPI(cluster.pod.url_public, cluster.tenant.api_secret_key, cluster.tenant.api_access_key, cluster.tenant.user, cluster.tenant.pwd) resp = capella_api.get_buckets(cluster.tenant.id, cluster.tenant.project_id, cluster.id) content = json.loads(resp.content) bucket_id = None for bucket in content.get("buckets").get("data"): if bucket.get("data").get("name") == name: bucket_id = bucket.get("data").get("id") return bucket_id @staticmethod def flush_bucket(cluster, name): bucket_id = CapellaUtils.get_bucket_id(cluster, name) if bucket_id: capella_api = CapellaAPI(cluster.pod.url_public, cluster.tenant.api_secret_key, cluster.tenant.api_access_key, cluster.tenant.user, cluster.tenant.pwd) resp = capella_api.flush_bucket(cluster.tenant.id, cluster.tenant.project_id, cluster.id, bucket_id) if resp.status >= 200 and resp.status < 300: CapellaUtils.log.info("Bucket deleted successfully!") else: CapellaUtils.log.info(resp.content) else: CapellaUtils.log.info("Bucket not found.") @staticmethod def delete_bucket(cluster, name): bucket_id = CapellaUtils.get_bucket_id(cluster, name) if bucket_id: capella_api = CapellaAPI(cluster.pod.url_public, cluster.tenant.api_secret_key, cluster.tenant.api_access_key, cluster.tenant.user, cluster.tenant.pwd) resp = capella_api.delete_bucket(cluster.tenant.id, cluster.tenant.project_id, cluster.id, bucket_id) if resp.status_code == 204: CapellaUtils.log.info("Bucket deleted successfully!") else: CapellaUtils.log.critical(resp.content) raise Exception("Bucket {} cannot be deleted".format(name)) else: CapellaUtils.log.info("Bucket not found.") @staticmethod def update_bucket_settings(cluster, bucket_id, bucket_params): capella_api = CapellaAPI(cluster.pod.url_public, cluster.tenant.api_secret_key, cluster.tenant.api_access_key, cluster.tenant.user, cluster.tenant.pwd) resp = capella_api.update_bucket_settings(cluster.tenant.id, cluster.tenant.project_id, cluster.id, bucket_id, bucket_params) code = resp.status if 200 > code or code >= 300: CapellaUtils.log.critical("Bucket update failed: %s" % resp.content) return resp.status @staticmethod def scale(cluster, new_config): capella_api = CapellaAPI(cluster.pod.url_public, cluster.tenant.api_secret_key, cluster.tenant.api_access_key, cluster.tenant.user, cluster.tenant.pwd) while True: resp = capella_api.update_cluster_servers(cluster.id, new_config) if resp.status_code != 202: result = json.loads(resp.content) if result["errorType"] == "ClusterModifySpecsInvalidState": CapellaUtils.wait_until_done( cluster.pod, cluster.tenant, cluster.id, "Wait for healthy cluster state") else: break @staticmethod def jobs(pod, tenant, cluster_id): capella_api = CapellaAPI(pod.url_public, tenant.api_secret_key, tenant.api_access_key, tenant.user, tenant.pwd) resp = capella_api.jobs(tenant.project_id, tenant.id, cluster_id) if resp.status_code != 200: CapellaUtils.log.critical("LOG A BUG: Internal API returns :\ {}".format(resp.status_code)) print(resp.content) time.sleep(5) return CapellaUtils.jobs(pod, tenant, cluster_id) try: content = json.loads(resp.content) except Exception as e: CapellaUtils.log.critical("LOG A BUG: Internal API returns :\ {}".format(resp.status_code)) print(resp.content) time.sleep(5) return CapellaUtils.jobs(pod, tenant, cluster_id) return content @staticmethod def get_cluster_info(pod, tenant, cluster_id): capella_api = CapellaAPI(pod.url_public, tenant.api_secret_key, tenant.api_access_key, tenant.user, tenant.pwd) resp = capella_api.get_cluster_info(cluster_id) if resp.status_code != 200: CapellaUtils.log.critical("LOG A BUG: Fetch Cluster API returns :\ {}".format(resp.status_code)) print(resp.content) time.sleep(5) return CapellaUtils.get_cluster_info(pod, tenant, cluster_id) return json.loads(resp.content) @staticmethod def get_cluster_state(pod, tenant, cluster_id): content = CapellaUtils.get_cluster_info(pod, tenant, cluster_id) return content.get("status") @staticmethod def get_cluster_srv(pod, tenant, cluster_id): content = CapellaUtils.get_cluster_info(pod, tenant, cluster_id) return content.get("endpointsSrv") @staticmethod def get_nodes(pod, tenant, cluster_id): capella_api = CapellaAPI(pod.url_public, tenant.api_secret_key, tenant.api_access_key, tenant.user, tenant.pwd) resp = capella_api.get_nodes(tenant.id, tenant.project_id, cluster_id) if resp.status_code != 200: CapellaUtils.log.critical( "LOG A BUG: Fetch Cluster Node API returns :\ {}".format(resp.status_code)) print(resp.content) time.sleep(5) return CapellaUtils.get_nodes(pod, tenant, cluster_id) CapellaUtils.log.info(json.loads(resp.content)) return [ server.get("data") for server in json.loads(resp.content).get("data") ] @staticmethod def get_db_users(pod, tenant, cluster_id, page=1, limit=100): capella_api = CapellaAPI(pod.url_public, tenant.api_secret_key, tenant.api_access_key, tenant.user, tenant.pwd) resp = capella_api.get_db_users(tenant.id, tenant.project_id, cluster_id, page, limit) return json.loads(resp.content) @staticmethod def delete_db_user(pod, tenant, cluster_id, user_id): uri = "{}/v2/organizations/{}/projects/{}/clusters/{}/users/{}" \ .format(tenant.id, tenant.project_id, cluster_id, user_id) print(uri) @staticmethod def create_db_user(pod, tenant, cluster_id, user, pwd): capella_api = CapellaAPI(pod.url_public, tenant.api_secret_key, tenant.api_access_key, tenant.user, tenant.pwd) resp = capella_api.create_db_user(tenant.id, tenant.project_id, cluster_id, user, pwd) if resp.status_code != 200: result = json.loads(resp.content) CapellaUtils.log.critical( "Add capella cluster user failed: (}".format(resp.status_code)) CapellaUtils.log.critical(result) if result["errorType"] == "ErrDataplaneUserNameExists": CapellaUtils.log.warn("User is already added: %s" % result["message"]) return CapellaUtils.create_db_user(pod, tenant, cluster_id, user, pwd) CapellaUtils.log.critical(json.loads(resp.content)) CapellaUtils.log.info(json.loads(resp.content)) return json.loads(resp.content) @staticmethod def allow_my_ip(pod, tenant, cluster_id): capella_api = CapellaAPI(pod.url_public, tenant.api_secret_key, tenant.api_access_key, tenant.user, tenant.pwd) resp = capella_api.allow_my_ip(tenant.id, tenant.project_id, cluster_id) if resp.status_code != 202: result = json.loads(resp.content) if result["errorType"] == "ErrAllowListsCreateDuplicateCIDR": CapellaUtils.log.warn("IP is already added: %s" % result["message"]) return CapellaUtils.log.critical(resp.content) raise Exception("Adding allowed IP failed.") @staticmethod def load_sample_bucket(pod, tenant, cluster_id, bucket_name): capella_api = CapellaAPI(pod.url_public, tenant.api_secret_key, tenant.api_access_key, tenant.user, tenant.pwd) resp = capella_api.load_sample_bucket(tenant.id, tenant.project_id, cluster_id, bucket_name)
def setUp(self): self.input = TestInputSingleton.input # Framework specific parameters self.log_level = self.input.param("log_level", "info").upper() self.infra_log_level = self.input.param("infra_log_level", "error").upper() self.skip_setup_cleanup = self.input.param("skip_setup_cleanup", False) self.tear_down_while_setup = self.input.param("tear_down_while_setup", True) self.test_timeout = self.input.param("test_timeout", 3600) self.thread_to_use = self.input.param("threads_to_use", 30) self.case_number = self.input.param("case_number", 0) # End of framework parameters # Cluster level info settings self.log_info = self.input.param("log_info", None) self.log_location = self.input.param("log_location", None) self.stat_info = self.input.param("stat_info", None) self.port = self.input.param("port", None) self.port_info = self.input.param("port_info", None) self.servers = self.input.servers self.cb_clusters = OrderedDict() self.num_servers = self.input.param("servers", len(self.servers)) self.primary_index_created = False self.index_quota_percent = self.input.param("index_quota_percent", None) self.gsi_type = self.input.param("gsi_type", 'plasma') # CBAS setting self.jre_path = self.input.param("jre_path", None) self.enable_dp = self.input.param("enable_dp", False) # End of cluster info parameters # Bucket specific params self.bucket_type = self.input.param("bucket_type", Bucket.Type.MEMBASE) self.bucket_ttl = self.input.param("bucket_ttl", 0) self.bucket_size = self.input.param("bucket_size", None) self.bucket_conflict_resolution_type = \ self.input.param("bucket_conflict_resolution", Bucket.ConflictResolution.SEQ_NO) self.bucket_replica_index = self.input.param("bucket_replica_index", 1) self.bucket_eviction_policy = \ self.input.param("bucket_eviction_policy", Bucket.EvictionPolicy.VALUE_ONLY) self.flush_enabled = self.input.param("flushEnabled", Bucket.FlushBucket.DISABLED) self.bucket_time_sync = self.input.param("bucket_time_sync", False) self.standard_buckets = self.input.param("standard_buckets", 1) self.num_replicas = self.input.param("replicas", Bucket.ReplicaNum.ONE) self.active_resident_threshold = \ int(self.input.param("active_resident_threshold", 100)) self.compression_mode = \ self.input.param("compression_mode", Bucket.CompressionMode.PASSIVE) self.bucket_storage = \ self.input.param("bucket_storage", Bucket.StorageBackend.couchstore) if self.bucket_storage == Bucket.StorageBackend.magma: self.bucket_eviction_policy = Bucket.EvictionPolicy.FULL_EVICTION self.scope_name = self.input.param("scope", CbServer.default_scope) self.collection_name = self.input.param("collection", CbServer.default_collection) self.bucket_durability_level = self.input.param( "bucket_durability", Bucket.DurabilityLevel.NONE).upper() self.bucket_purge_interval = self.input.param("bucket_purge_interval", 1) self.bucket_durability_level = \ BucketDurability[self.bucket_durability_level] # End of bucket parameters # Doc specific params self.key = self.input.param("key", "test_docs") self.key_size = self.input.param("key_size", 8) self.doc_size = self.input.param("doc_size", 256) self.sub_doc_size = self.input.param("sub_doc_size", 10) self.doc_type = self.input.param("doc_type", "json") self.num_items = self.input.param("num_items", 100000) self.target_vbucket = self.input.param("target_vbucket", None) self.maxttl = self.input.param("maxttl", 0) self.random_exp = self.input.param("random_exp", False) self.randomize_doc_size = self.input.param("randomize_doc_size", False) self.randomize_value = self.input.param("randomize_value", False) self.rev_write = self.input.param("rev_write", False) self.rev_read = self.input.param("rev_read", False) self.rev_update = self.input.param("rev_update", False) self.rev_del = self.input.param("rev_del", False) self.random_key = self.input.param("random_key", False) self.mix_key_size = self.input.param("mix_key_size", False) # End of doc specific parameters # Transactions parameters self.transaction_timeout = self.input.param("transaction_timeout", 100) self.transaction_commit = self.input.param("transaction_commit", True) self.update_count = self.input.param("update_count", 1) self.sync = self.input.param("sync", True) self.default_bucket = self.input.param("default_bucket", True) self.num_buckets = self.input.param("num_buckets", 0) self.atomicity = self.input.param("atomicity", False) self.defer = self.input.param("defer", False) # end of transaction parameters # Client specific params self.sdk_client_type = self.input.param("sdk_client_type", "java") self.replicate_to = self.input.param("replicate_to", 0) self.persist_to = self.input.param("persist_to", 0) self.sdk_retries = self.input.param("sdk_retries", 5) self.sdk_timeout = self.input.param("sdk_timeout", 5) self.time_unit = self.input.param("time_unit", "seconds") self.durability_level = self.input.param("durability", "").upper() self.sdk_client_pool = self.input.param("sdk_client_pool", None) self.sdk_pool_capacity = self.input.param("sdk_pool_capacity", 1) # Client compression settings self.sdk_compression = self.input.param("sdk_compression", None) compression_min_ratio = self.input.param("min_ratio", None) compression_min_size = self.input.param("min_size", None) if type(self.sdk_compression) is bool: self.sdk_compression = {"enabled": self.sdk_compression} if compression_min_size: self.sdk_compression["minSize"] = compression_min_size if compression_min_ratio: self.sdk_compression["minRatio"] = compression_min_ratio # Doc Loader Params self.process_concurrency = self.input.param("process_concurrency", 20) self.batch_size = self.input.param("batch_size", 2000) self.dgm_batch = self.input.param("dgm_batch", 5000) self.ryow = self.input.param("ryow", False) self.check_persistence = self.input.param("check_persistence", False) # End of client specific parameters # initial number of items in the cluster self.services_init = self.input.param("services_init", None) self.nodes_init = self.input.param("nodes_init", 1) self.nodes_in = self.input.param("nodes_in", 1) self.nodes_out = self.input.param("nodes_out", 1) self.services_in = self.input.param("services_in", None) self.forceEject = self.input.param("forceEject", False) self.wait_timeout = self.input.param("wait_timeout", 120) self.verify_unacked_bytes = \ self.input.param("verify_unacked_bytes", False) self.disabled_consistent_view = \ self.input.param("disabled_consistent_view", None) self.rebalanceIndexWaitingDisabled = \ self.input.param("rebalanceIndexWaitingDisabled", None) self.rebalanceIndexPausingDisabled = \ self.input.param("rebalanceIndexPausingDisabled", None) self.maxParallelIndexers = \ self.input.param("maxParallelIndexers", None) self.maxParallelReplicaIndexers = \ self.input.param("maxParallelReplicaIndexers", None) self.quota_percent = self.input.param("quota_percent", 90) self.skip_buckets_handle = self.input.param("skip_buckets_handle", False) # SDKClientPool object for creating generic clients across tasks if self.sdk_client_pool is True: self.init_sdk_pool_object() # Initiate logging variables self.log = logger.get("test") self.infra_log = logger.get("infra") self.cleanup_pcaps() self.collect_pcaps = self.input.param("collect_pcaps", False) if self.collect_pcaps: self.start_collect_pcaps() # variable for log collection using cbCollect self.get_cbcollect_info = self.input.param("get-cbcollect-info", False) # Variable for initializing the current (start of test) timestamp self.start_timestamp = datetime.now() ''' Be careful while using this flag. This is only and only for stand-alone tests. During bugs reproductions, when a crash is seen stop_server_on_crash will stop the server so that we can collect data/logs/dumps at the right time ''' self.stop_server_on_crash = self.input.param("stop_server_on_crash", False) self.collect_data = self.input.param("collect_data", False) # Configure loggers self.log.setLevel(self.log_level) self.infra_log.setLevel(self.infra_log_level) # Support lib objects for testcase execution self.task_manager = TaskManager(self.thread_to_use) self.task = ServerTasks(self.task_manager) # End of library object creation self.sleep = sleep self.cleanup = False self.nonroot = False self.test_failure = None self.crash_warning = self.input.param("crash_warning", False) self.summary = TestSummary(self.log) # Populate memcached_port in case of cluster_run cluster_run_base_port = ClusterRun.port if int(self.input.servers[0].port) == ClusterRun.port: for server in self.input.servers: server.port = cluster_run_base_port cluster_run_base_port += 1 # If not defined in node.ini under 'memcached_port' section if server.memcached_port is CbServer.memcached_port: server.memcached_port = \ ClusterRun.memcached_port \ + (2 * (int(server.port) - ClusterRun.port)) self.log_setup_status(self.__class__.__name__, "started") cluster_name_format = "C%s" default_cluster_index = counter_index = 1 if len(self.input.clusters) > 1: # Multi cluster setup for _, nodes in self.input.clusters.iteritems(): cluster_name = cluster_name_format % counter_index tem_cluster = CBCluster(name=cluster_name, servers=nodes) self.cb_clusters[cluster_name] = tem_cluster counter_index += 1 else: # Single cluster cluster_name = cluster_name_format % counter_index self.cb_clusters[cluster_name] = CBCluster(name=cluster_name, servers=self.servers) # Initialize self.cluster with first available cluster as default self.cluster = self.cb_clusters[cluster_name_format % default_cluster_index] self.cluster_util = ClusterUtils(self.cluster, self.task_manager) self.bucket_util = BucketUtils(self.cluster_util, self.task) if self.standard_buckets > 10: self.bucket_util.change_max_buckets(self.cluster.master, self.standard_buckets) for cluster_name, cluster in self.cb_clusters.items(): shell = RemoteMachineShellConnection(cluster.master) self.os_info = shell.extract_remote_info().type.lower() if self.os_info != 'windows': if cluster.master.ssh_username != "root": self.nonroot = True shell.disconnect() break shell.disconnect() """ some tests need to bypass checking cb server at set up to run installation """ self.skip_init_check_cbserver = \ self.input.param("skip_init_check_cbserver", False) try: if self.skip_setup_cleanup: self.cluster.buckets = self.bucket_util.get_all_buckets( self.cluster) return self.services_map = None self.log_setup_status("BaseTestCase", "started") for cluster_name, cluster in self.cb_clusters.items(): if not self.skip_buckets_handle \ and not self.skip_init_check_cbserver: self.log.debug("Cleaning up cluster") cluster_util = ClusterUtils(cluster, self.task_manager) bucket_util = BucketUtils(cluster_util, self.task) cluster_util.cluster_cleanup(bucket_util) # Avoid cluster operations in setup for new upgrade / upgradeXDCR if str(self.__class__).find('newupgradetests') != -1 or \ str(self.__class__).find('upgradeXDCR') != -1 or \ str(self.__class__).find('Upgrade_EpTests') != -1 or \ self.skip_buckets_handle: self.log.warning("Cluster operation in setup will be skipped") self.primary_index_created = True self.log_setup_status("BaseTestCase", "finished") return # avoid clean up if the previous test has been tear down if self.case_number == 1 or self.case_number > 1000: if self.case_number > 1000: self.log.warn("TearDown for prev test failed. Will retry") self.case_number -= 1000 self.cleanup = True if not self.skip_init_check_cbserver: self.tearDownEverything() self.tear_down_while_setup = False if not self.skip_init_check_cbserver: for cluster_name, cluster in self.cb_clusters.items(): self.log.info("Initializing cluster") cluster_util = ClusterUtils(cluster, self.task_manager) cluster_util.reset_cluster() master_services = cluster_util.get_services( cluster.servers[:1], self.services_init, start_node=0) if master_services is not None: master_services = master_services[0].split(",") self.quota = self._initialize_nodes( self.task, cluster, self.disabled_consistent_view, self.rebalanceIndexWaitingDisabled, self.rebalanceIndexPausingDisabled, self.maxParallelIndexers, self.maxParallelReplicaIndexers, self.port, self.quota_percent, services=master_services) cluster_util.change_env_variables() cluster_util.change_checkpoint_params() self.log.info("{0} initialized".format(cluster)) else: self.quota = "" # Enable dp_version since we need collections enabled if self.enable_dp: for server in self.cluster.servers: shell_conn = RemoteMachineShellConnection(server) cb_cli = CbCli(shell_conn) cb_cli.enable_dp() shell_conn.disconnect() for cluster_name, cluster in self.cb_clusters.items(): cluster_util = ClusterUtils(cluster, self.task_manager) if self.log_info: cluster_util.change_log_info() if self.log_location: cluster_util.change_log_location() if self.stat_info: cluster_util.change_stat_info() if self.port_info: cluster_util.change_port_info() if self.port: self.port = str(self.port) self.log_setup_status("BaseTestCase", "finished") if not self.skip_init_check_cbserver: self.__log("started") except Exception as e: traceback.print_exc() self.task.shutdown(force=True) self.fail(e)
def __init__(self, cluster, server_task, n1ql_node): self.cluster = cluster self.task = server_task self.task_manager = self.task.jython_task_manager self.n1ql_node = n1ql_node self.log = logger.get("test")
def rebalance_in(servers, how_many, do_shuffle=True, monitor=True, do_check=True): log = logger.get("infra") servers_rebalanced = [] rest = RestConnection(servers[0]) nodes = rest.node_statuses() # are all ips the same nodes_on_same_ip = True firstIp = nodes[0].ip if len(nodes) == 1: nodes_on_same_ip = False else: for node in nodes: if node.ip != firstIp: nodes_on_same_ip = False break nodeIps = ["{0}:{1}".format(node.ip, node.port) for node in nodes] log.info("current nodes : {0}".format(nodeIps)) toBeAdded = [] master = servers[0] selection = servers[1:] if do_shuffle: shuffle(selection) for server in selection: if nodes_on_same_ip: if not "{0}:{1}".format(firstIp, server.port) in nodeIps: toBeAdded.append(server) servers_rebalanced.append(server) log.info("choosing {0}:{1}".format(server.ip, server.port)) elif not "{0}:{1}".format(server.ip, server.port) in nodeIps: toBeAdded.append(server) servers_rebalanced.append(server) log.info("choosing {0}:{1}".format(server.ip, server.port)) if len(toBeAdded) == int(how_many): break if do_check and len(toBeAdded) < how_many: raise Exception( "unable to find {0} nodes to rebalance_in".format(how_many)) for server in toBeAdded: otpNode = rest.add_node(master.rest_username, master.rest_password, server.ip, server.port) otpNodes = [node.id for node in rest.node_statuses()] started = rest.rebalance(otpNodes, []) msg = "rebalance operation started ? {0}" log.info(msg.format(started)) if monitor is not True: return True, servers_rebalanced if started: try: result = rest.monitorRebalance() except RebalanceFailedException as e: log.error("rebalance failed: {0}".format(e)) return False, servers_rebalanced msg = "successfully rebalanced in selected nodes from the cluster ? {0}" log.info(msg.format(result)) return result, servers_rebalanced return False, servers_rebalanced
def verify_items_count(master, bucket, num_attempt=3, timeout=2): # get the #of buckets from rest log = logger.get("infra") rest = RestConnection(master) if isinstance(bucket, Bucket): bucket = bucket.name bucket_info = rest.get_bucket(bucket, num_attempt, timeout) replica_factor = bucket_info.replicaNumber vbucket_active_sum = 0 vbucket_replica_sum = 0 vbucket_pending_sum = 0 kv_nodes = 0 all_server_stats = [] stats_received = True nodes = rest.get_nodes() nodes_services = rest.get_nodes_services() for node in nodes_services: if 'kv' in nodes_services[node]: kv_nodes += 1 for server in nodes: # get the stats server_stats = rest.get_bucket_stats_for_node(bucket, server) if not server_stats: log.info("unable to get stats from {0}:{1}".format( server.ip, server.port)) stats_received = False all_server_stats.append((server, server_stats)) if not stats_received: raise StatsUnavailableException() sum = 0 for server, single_stats in all_server_stats: if not single_stats or "curr_items" not in single_stats: continue sum += single_stats["curr_items"] log.info("curr_items from {0}:{1} : {2}".format(server.ip, server.port, \ single_stats["curr_items"])) if 'vb_pending_num' in single_stats: vbucket_pending_sum += single_stats['vb_pending_num'] log.info( "vb_pending_num from {0}:{1} : {2}".format(server.ip, server.port, \ single_stats["vb_pending_num"])) if 'vb_active_num' in single_stats: vbucket_active_sum += single_stats['vb_active_num'] log.info( "vb_active_num from {0}:{1} : {2}".format(server.ip, server.port, \ single_stats["vb_active_num"])) if 'vb_replica_num' in single_stats: vbucket_replica_sum += single_stats['vb_replica_num'] log.info( "vb_replica_num from {0}:{1} : {2}".format(server.ip, server.port, \ single_stats["vb_replica_num"])) msg = "summation of vb_active_num : {0} vb_pending_num : {1} vb_replica_num : {2}" log.info( msg.format(vbucket_active_sum, vbucket_pending_sum, vbucket_replica_sum)) msg = 'sum : {0} and sum * (replica_factor + 1) ({1}) : {2}' log.info( msg.format(sum, replica_factor + 1, (sum * (replica_factor + 1)))) master_stats = rest.get_bucket_stats(bucket) if "curr_items_tot" in master_stats: log.info('curr_items_tot from master: {0}'.format( master_stats["curr_items_tot"])) else: raise Exception( "bucket {0} stats doesnt contain 'curr_items_tot':".format( bucket)) if replica_factor >= kv_nodes: log.warn("the number of nodes is less than replica requires") delta = sum * (kv_nodes) - master_stats["curr_items_tot"] else: delta = sum * (replica_factor + 1) - master_stats["curr_items_tot"] delta = abs(delta) if delta > 0: if sum == 0: missing_percentage = 0 else: missing_percentage = delta * 1.0 / (sum * (replica_factor + 1)) log.info("Nodes stats are: {0}".format([node.ip for node in nodes])) else: missing_percentage = 1 log.info("delta : {0} missing_percentage : {1} replica_factor : {2}".format(delta, \ missing_percentage, replica_factor)) # If no items missing then, return True kv_nodes = 0 if not delta: return True return False
def wait_for_mc_stats_all_nodes(master, bucket, stat_key, stat_value, timeout_in_seconds=120, verbose=True): log = logger.get("infra") log.info( "waiting for bucket {0} stat : {1} to match {2} on {3}".format( bucket, stat_key, stat_value, master.ip)) time_to_timeout = 0 previous_stat_value = -1 curr_stat_value = -1 verified = False all_stats = {} while not verified: rest = RestConnection(master) nodes = rest.node_statuses() for node in nodes: _server = { "ip": node.ip, "port": node.port, "username": master.rest_username, "password": master.rest_password } # Failed over node is part of node_statuses but since # its failed over memcached connections to this node will fail node_self = RestConnection(_server).get_nodes_self() if node_self.clusterMembership == 'active': mc = MemcachedClientHelper.direct_client(_server, bucket) n_stats = mc.stats("") mc.close() all_stats[node.id] = n_stats actual_stat_value = -1 for k in all_stats: if all_stats[k] and stat_key in all_stats[k]: if actual_stat_value == -1: log.info(all_stats[k][stat_key]) actual_stat_value = int(all_stats[k][stat_key]) else: actual_stat_value += int(all_stats[k][stat_key]) if actual_stat_value == stat_value: log.info("{0} : {1}".format(stat_key, actual_stat_value)) verified = True break else: if verbose: log.info("{0} : {1}".format(stat_key, actual_stat_value)) curr_stat_value = actual_stat_value # values are changing so clear any timeout if curr_stat_value != previous_stat_value: time_to_timeout = 0 else: if time_to_timeout == 0: time_to_timeout = time.time() + timeout_in_seconds if time_to_timeout < time.time(): log.info( "no change in {0} stat after {1} seconds (value = {2})" .format(stat_key, timeout_in_seconds, curr_stat_value)) break previous_stat_value = curr_stat_value sleep_time = 2 if not verbose: sleep_time = 0.1 sleep(sleep_time) return verified
def __init__(self, server_task=None): self.task = server_task self.task_manager = self.task.jython_task_manager self.log = logger.get("test")
def __init__(self, task_manager): self.jython_task_manager = task_manager self.log = logger.get("infra") self.test_log = logger.get("test") self.log.debug("Initiating NodeTasks")
def __init__(self, servers, test): self.servers = servers self.test = test self.log = logger.get("test")
def wait_for_vbuckets_ready_state(node, bucket, timeout_in_seconds=300, log_msg='', admin_user='******', admin_pass='******'): start_time = time.time() end_time = start_time + timeout_in_seconds ready_vbuckets = {} log = logger.get("infra") rest = RestConnection(node) # servers = rest.get_nodes() bucket_conn = BucketHelper(node) bucket_conn.vbucket_map_ready(bucket, 60) vbucket_count = len(bucket_conn.get_vbuckets(bucket)) vbuckets = bucket_conn.get_vbuckets(bucket) obj = VBucketAwareMemcached(rest, bucket, info=node) memcacheds, vbucket_map, vbucket_map_replica = obj.request_map( rest, bucket) #Create dictionary with key:"ip:port" and value: a list of vbuckets server_dict = defaultdict(list) for everyID in range(0, vbucket_count): memcached_ip_port = str(vbucket_map[everyID]) server_dict[memcached_ip_port].append(everyID) while time.time() < end_time and len(ready_vbuckets) < vbucket_count: for every_ip_port in server_dict: #Retrieve memcached ip and port ip, port = every_ip_port.split(":") client = MemcachedClient(ip, int(port), timeout=30) client.vbucket_count = len(vbuckets) bucket_info = bucket_conn.get_bucket(bucket) versions = rest.get_nodes_versions(logging=False) pre_spock = False for version in versions: if "5" > version: pre_spock = True if pre_spock: log.info("Atleast 1 of the server is on pre-spock " "version. Using the old ssl auth to connect to " "bucket.") client.sasl_auth_plain( bucket_info.name.encode('ascii'), bucket_info.saslPassword.encode('ascii')) else: client.sasl_auth_plain(admin_user, admin_pass) bucket = bucket.encode('ascii') client.bucket_select(bucket) for i in server_dict[every_ip_port]: try: (a, b, c) = client.get_vbucket_state(i) except mc_bin_client.MemcachedError as e: ex_msg = str(e) if "Not my vbucket" in log_msg: log_msg = log_msg[:log_msg.find("vBucketMap") + 12] + "..." if e.status == memcacheConstants.ERR_NOT_MY_VBUCKET: # May receive this while waiting for vbuckets, continue and retry...S continue log.error("%s: %s" % (log_msg, ex_msg)) continue except exceptions.EOFError: # The client was disconnected for some reason. This can # happen just after the bucket REST API is returned (before # the buckets are created in each of the memcached processes.) # See here for some details: http://review.couchbase.org/#/c/49781/ # Longer term when we don't disconnect clients in this state we # should probably remove this code. log.error( "got disconnected from the server, reconnecting") continue if c.find("\x01") > 0 or c.find("\x02") > 0: ready_vbuckets[i] = True elif i in ready_vbuckets: log.warning( "vbucket state changed from active to {0}".format( c)) del ready_vbuckets[i] client.close() return len(ready_vbuckets) == vbucket_count
class Guest(Thread): scenarios = dict() log = logger.get("test") def __init__(self, bucket, op_type, **kwargs): super(Guest, self).__init__() self.bucket = bucket self.op_type = op_type self.op_count = 1 self.result = None self.exception = None if 'op_count' in kwargs: self.op_count = kwargs['op_count'] Guest.scenarios = get_all_scenarios(Guest) @staticmethod def __get_airline_query_summary(result): q_metrics = result["q_result"].metaData().metrics().get() return "Total src_airports: %s ,\n" \ "Total dest_airports: %s (from %s),\n" \ "Query: %s -> %s on days %s, time <%s>, stops <%s>,\n" \ "Total hits: %s, \n" \ "Time: elapsed: %s, execution: %s" \ % (len(result["src_airports"]), len(result["dest_airports"]), result["dest_airport"], result["src_airport"], result["dest_airport"], result["days"], result["time_clause"], result["stop_clause"], q_metrics.resultCount(), q_metrics.elapsedTime(), q_metrics.executionTime()) @staticmethod def __get_hotel_query_summary(result): q_metrics = result["q_result"].metaData().metrics().get() return "Total Country: %d, Total Cities in selected country: %d,\n" \ "Query: Hotels with avg_rating in %s::%s, %s \n" \ "Total hits: %s, \n" \ "Time: elapsed: %s, execution: %s" \ % (len(result["countries"]), len(result["cities"]), result["country"], result["city"], result["with_ratings"], q_metrics.resultCount(), q_metrics.elapsedTime(), q_metrics.executionTime()) @staticmethod def scenario_query_routes_on_days(): result = query_util.Airline.query_for_routes(sdk_clients["guest"]) q_summary = "Guest - scenario_query_routes_on_days\n" q_summary += Guest.__get_airline_query_summary(result) return q_summary @staticmethod def scenario_query_routes_on_days_time(): result = query_util.Airline.query_for_routes(sdk_clients["guest"], with_time=True) q_summary = "Guest - scenario_query_routes_on_days_time\n" q_summary += Guest.__get_airline_query_summary(result) return q_summary @staticmethod def scenario_query_fights_with_stop_count(): result = query_util.Airline.query_for_routes(sdk_clients["guest"], with_time=choice( [True, False]), with_stop_count=True) q_summary = "Guest - scenario_query_fights_with_stop_count\n" q_summary += Guest.__get_airline_query_summary(result) return q_summary @staticmethod def scenario_query_search_available_hotels(): result = query_util.Hotel.query_for_hotels(sdk_clients["guest"]) q_summary = "Guest - scenario_search_available_hotels\n" q_summary += Guest.__get_hotel_query_summary(result) return q_summary @staticmethod def scenario_query_hotel_based_on_ratings(): result = query_util.Hotel.query_for_hotels(sdk_clients["guest"], with_ratings=True) q_summary = "Guest - scenario_query_hotel_based_on_ratings\n" q_summary += Guest.__get_hotel_query_summary(result) return q_summary @staticmethod def scenario_read_hotel_reviews(): result = query_util.Hotel.query_for_hotels(sdk_clients["guest"], with_ratings=True, read_reviews=True) q_summary = "Guest - scenario_read_hotel_reviews\n" q_summary += Guest.__get_hotel_query_summary(result) return q_summary def run(self): while self.op_count > 0: try: if self.op_type == "random": self.result = Guest.scenarios[get_random_scenario(Guest)]() else: self.result = Guest.scenarios[self.op_type]() Guest.log.info(self.result) except Exception as e: self.exception = e traceback.print_exc() break except IndexFailureException as e: self.log.warning(e) except Java_base_exception as e: self.exception = e traceback.print_exc() break self.op_count -= 1