def load_data_in_bucket(self, folders, filenames, missing_field, operation, bucket, start_key=0, end_key=1000, batch_size=10, exp=0, durability="", mutation_num=0, key=None): """ Loads data in CB bucket. :param folders: list, folder paths in aws bucket :param filenames: list, file names in the bucket :param missing_field: list of booleans, if missing_field is True, then this fields value with be omitted while creating a S3 file. :param operation: create/update/delete :param bucket: name of the bucket on CB server :param start_key: :param end_key: :param batch_size: :param exp: :param durability: :param mutation_num: :param key: doc key :return: """ self.log.info("Loading data into bucket") folder = folders filename = filenames missing_field = missing_field template_obj = JsonObject.create() template_obj.put("filename", "") template_obj.put("folder", "") template_obj.put("mutated", mutation_num) template_obj.put("null_key", None) template_obj.put("missing_field", "") if not key: key = "test_docs" doc_gen = DocumentGenerator(key, template_obj, start=start_key, end=end_key, randomize=True, filename=filename, folder=folder, missing_field=missing_field) return self.bucket_util.async_load_bucket(self.cluster, bucket, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True)
def load_document_until_ram_percentage(self): self.start = 0 doc_batch_size = 5000 self.end = doc_batch_size bucket_helper = BucketHelper(self.cluster.master) mem_cap = (self.document_ram_percentage * self.bucket_ram * 1000000) first = ['james', 'sharon', 'dave', 'bill', 'mike', 'steve'] profession = ['doctor', 'lawyer'] template_obj = JsonObject.create() template_obj.put("number", 0) template_obj.put("first_name", "") template_obj.put("profession", "") template_obj.put("mutated", 0) template_obj.put("mutation_type", "ADD") while True: self.log.info("Add documents to bucket") doc_gen = DocumentGenerator("test_docs", template_obj, start=self.start, end=self.end, randomize=False, first_name=first, profession=profession, number=range(70)) try: self.bucket_util.sync_load_all_buckets( self.cluster, doc_gen, "create", 0, batch_size=doc_batch_size, durability=self.durability_level, suppress_error_table=True) except Exception as e: self.fail("Following error occurred while loading bucket - {" "0}".format(str(e))) self.log.info("Calculate available free memory") bucket_json = bucket_helper.get_bucket_json(self.bucket_name) mem_used = 0 for node_stat in bucket_json["nodes"]: mem_used += node_stat["interestingStats"]["mem_used"] if mem_used < mem_cap: self.log.info("Memory used: %s < %s" % (mem_used, mem_cap)) self.start = self.end self.end = self.end + doc_batch_size self.num_items = self.end else: break
def perform_doc_ops_in_all_cb_buckets(self, operation, start_key=0, end_key=1000, batch_size=10, exp=0, _async=False, durability=""): """ Create/Update/Delete docs in all cb buckets :param operation: String - "create","update","delete" :param start_key: Doc Key to start the operation with :param end_key: Doc Key to end the operation with :param batch_size: Batch size of doc_ops :param exp: MaxTTL used for doc operations :param _async: Boolean to decide whether to start ops in parallel :param durability: Durability level to use for doc operation :return: """ first = ['james', 'sharon', 'dave', 'bill', 'mike', 'steve'] profession = ['doctor', 'lawyer'] template_obj = JsonObject.create() template_obj.put("number", 0) template_obj.put("first_name", "") template_obj.put("profession", "") template_obj.put("mutated", 0) if operation == "update": template_obj.put("mutated", 1) template_obj.put("mutation_type", "ADD") doc_gen = DocumentGenerator('test_docs', template_obj, start=start_key, end=end_key, randomize=True, first_name=first, profession=profession, number=range(70)) try: if _async: return self.bucket_util._async_load_all_buckets( self.cluster, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True) else: self.bucket_util.sync_load_all_buckets( self.cluster, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True) except Exception as e: self.log.error(e.message)
def get_doc_generator(self, start, end): age = range(5) first = ['james', 'sharon'] body = [''.rjust(self.doc_size - 10, 'a')] template = JsonObject.create() template.put("age", None) template.put("first_name", None) template.put("body", None) generator = DocumentGenerator(self.key, template, randomize=True, age=age, first_name=first, body=body, start=start, end=end, key_size=self.key_size, doc_size=self.doc_size, doc_type=self.doc_type) return generator
def translate_to_json_object(self, value, doc_type="json"): if type(value) == JsonObject: return value json_obj = JsonObject.create() try: if doc_type.find("json") != -1: if type(value) != dict: value = pyJson.loads(value) for field, val in value.items(): json_obj.put(field, val) return json_obj elif doc_type.find("binary") != -1: pass except Exception: pass return json_obj
def __load_initial_data(self): # Create collection meta_data document sdk_client = self.sdk_clients["bucket_data_writer"] for tenant in self.tenants: sdk_client.select_collection(scope_name=tenant, collection_name="meta_data") app_data = JsonObject.create() app_data.put("date", "2001-01-01") result = sdk_client.crud(DocLoading.Bucket.DocOps.CREATE, "application", app_data) self.assertTrue(result["status"], "App_meta creation failed") self.bucket.scopes[self.tenants[0]].collections["meta_data"]\ .num_items += 1 create_users = User(self.bucket, scope=tenant, op_type="scenario_user_registration", num_items=20000) create_users.start() create_users.join()
def get_json_object(document): """ Function to convert python dictionary object into Couchbase JsonObject. This function will recursively convert the dictionary into JsonObject. Also will add JsonArray as required :param document: Dictionary to convert to json_object :type document: dict :return: Couchbase Jsonobject of the dictionary :rtype: JsonObject """ json_object = JsonObject.create() for field, val in document.items(): value = None if isinstance(val, dict): value = get_json_object(val) elif isinstance(val, list): value = get_json_array(val) else: value = val json_object.put(field, value) return json_object
def perform_doc_ops_in_all_cb_buckets(self, operation, start_key=0, end_key=1000, batch_size=10, exp=0, _async=False, durability="", mutation_num=0, cluster=None, buckets=[], key=None): """ Create/Update/Delete docs in all cb buckets :param operation: String - "create","update","delete" :param start_key: Doc Key to start the operation with :param end_key: Doc Key to end the operation with :param batch_size: Batch size of doc_ops :param exp: MaxTTL used for doc operations :param _async: Boolean to decide whether to start ops in parallel :param durability: Durability level to use for doc operation :param mutation_num: Mutation count to keep track per doc_loading :param cluster: cluster object for cluster on which this doc load operation has to be performed. :param buckets: list of buckets on which doc load operation has to be performed. :param key: key for the generated docs :return: """ first = ['james', 'sharon', 'dave', 'bill', 'mike', 'steve'] profession = ['doctor', 'lawyer'] template_obj = JsonObject.create() template_obj.put("number", 0) template_obj.put("first_name", "") template_obj.put("profession", "") template_obj.put("mutated", mutation_num) template_obj.put("mutation_type", "ADD") if not key: key = "test_docs" doc_gen = DocumentGenerator(key, template_obj, start=start_key, end=end_key, randomize=False, first_name=first, profession=profession, number=range(70)) if cluster: bucket_util = cluster.bucket_util else: cluster = self.cluster bucket_util = self.bucket_util try: if _async: if buckets: for bucket in buckets: return bucket_util.async_load_bucket( cluster, bucket, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True) else: return bucket_util._async_load_all_buckets( cluster, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True) else: bucket_util.sync_load_all_buckets(cluster, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True) except Exception as e: self.log.error(e.message)
def get_template(): template = JsonObject.create() template.put("mutated", 0) template.put("createdDate", "01/31/1970") template.put("preferences", JsonObject.create()) return template
def book_flight(u_id, tenant_scope, src_airport=None, dest_airport=None): summary = dict() required_seats = choice(range(1, 7)) ticket_type = "normal" checkout_cart_collection = "checkout_cart" d_level = Bucket.DurabilityLevel if [src_airport, dest_airport].count(None) == 0: ticket_type = "return" result = query_util.Airline.query_for_routes( sdk_clients["airline_booking"], src_airport=src_airport, dest_airport=dest_airport) flights = list() for row in result["q_result"].rowsAsObject(): src_airport = row.get("sourceairport") dest_airport = row.get("destinationairport") for flight in row.get("flights"): flights.append(flight) summary["src_airport"] = src_airport summary["dest_airport"] = dest_airport summary["required_seats"] = required_seats if not flights: summary["status"] = "No flights available" return summary flight_to_book = choice(flights) reservation_date = get_random_reservation_date() checkout_doc = JsonObject.create() passenger_data = list() for _ in range(required_seats): gender = choice(["M", "F"]) first_name = choice(FIRST_NAMES[gender]) last_name = choice(LAST_NAMES) age = randint(3, 90) passenger_info = JsonObject.create() passenger_info.put("first_name", first_name) passenger_info.put("last_name", last_name) passenger_info.put("gender", gender) passenger_info.put("age", age) passenger_data.append(passenger_info) client = sdk_clients["airline_booking"] client.select_collection(tenant_scope, checkout_cart_collection) cart_id = query_util.CommonUtil.get_next_id(tenant_scope, checkout_cart_collection) cart_key = "cart_%s" % cart_id checkout_doc.put("id", cart_id) checkout_doc.put("user_id", u_id) checkout_doc.put("flight_name", flight_to_book.get("flight")) checkout_doc.put("flight_time", flight_to_book.get("utc")) checkout_doc.put("travel_date", reservation_date) checkout_doc.put("from", src_airport) checkout_doc.put("to", dest_airport) checkout_doc.put("day_of_week", flight_to_book.get("day")) checkout_doc.put("seat_count", required_seats) checkout_doc.put("passengers", passenger_data) retry = 1 while retry <= User.max_retries: result = client.crud(DocLoading.Bucket.DocOps.CREATE, cart_key, checkout_doc, durability=d_level.MAJORITY) if result["status"] is False: if SDKException.DurabilityImpossibleException \ in str(result["error"]): User.log.debug("Retrying due to d_impossible") else: raise Exception("Flight cart add failed: %s" % result) retry += 1 if choice([True, False]): # Booking confirmed scenario, add ticket under flight booking c_name = "booking_data" booking_id = query_util.CommonUtil.get_next_id( tenant_scope, c_name) ticket_key = "ticket_%s" % booking_id checkout_doc.put("id", booking_id) retry = 1 while retry <= User.max_retries: client.select_collection(tenant_scope, c_name) result = client.crud( DocLoading.Bucket.DocOps.CREATE, ticket_key, checkout_doc, durability=d_level.MAJORITY_AND_PERSIST_TO_ACTIVE) if result["status"] is False: if SDKException.DurabilityImpossibleException \ in str(result["error"]): User.log.debug("Retrying due to d_impossible") else: raise Exception("Ticket booking failed: %s" % result) retry += 1 # Add confirmed ticket under user profile f_booking_id = query_util.CommonUtil.get_next_id( tenant_scope, "flight_booking") f_booking_key = "booking_%s" % f_booking_id f_booking_doc = JsonObject.create() f_booking_doc.put("id", f_booking_id) f_booking_doc.put("user_id", u_id) f_booking_doc.put("ticket_id", booking_id) f_booking_doc.put("status", "active") f_booking_doc.put("booked_on", global_vars.app_current_date) f_booking_doc.put("ticket_type", ticket_type) client.select_collection(tenant_scope, "flight_booking") result = client.crud(DocLoading.Bucket.DocOps.CREATE, f_booking_key, f_booking_doc) if result["status"] is False: raise Exception("User flight_booking add failed: %s" % result) # Remove booked ticket from cart retry = 1 while retry <= User.max_retries: client.select_collection(tenant_scope, checkout_cart_collection) result = client.crud(DocLoading.Bucket.DocOps.DELETE, cart_key, durability=d_level.MAJORITY) if result["status"] is False: if SDKException.DurabilityImpossibleException \ in str(result["error"]): User.log.debug("Retrying due to d_impossible") else: break retry += 1 summary["status"] = "Booking success" else: summary["status"] = "cancelled" return summary
def test_ingestion_after_kv_rollback(self): try: for kv_node in self.cluster.kv_nodes: for bucket in self.cluster.buckets: self.log.info("Stopping persistence on {0} for bucket {1}" "".format(kv_node.ip, bucket.name)) mem_client = MemcachedClientHelper.direct_client( kv_node, bucket) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") first = ['james', 'sharon', 'dave', 'bill', 'mike', 'steve'] profession = ['doctor', 'lawyer'] template_obj = JsonObject.create() template_obj.put("number", 0) template_obj.put("first_name", "") template_obj.put("profession", "") template_obj.put("mutated", 0) template_obj.put("mutation_type", "ADD") doc_gen = DocumentGenerator("test_docs", template_obj, start=0, end=1000, randomize=False, first_name=first, profession=profession, number=range(70)) try: self.bucket_util.sync_load_all_buckets( self.cluster, doc_gen, "create", 0, batch_size=1000, durability=self.durability_level, suppress_error_table=True) except Exception as e: self.fail("Following error occurred while loading bucket - {" "0}".format(str(e))) datasets = list() for dataset in self.cbas_util.list_all_dataset_objs(): if dataset.kv_scope and dataset.kv_scope.name == \ "_default" and dataset.kv_collection.name\ == "_default": dataset.num_of_items = dataset.num_of_items + 1000 datasets.append(dataset) for dataset in datasets: if not self.cbas_util.validate_cbas_dataset_items_count( self.cluster, dataset.full_name, dataset.num_of_items): raise Exception( "Dataset doc count does not match the " "actual doc count in associated KV collection") # Kill memcached on Master Node so that another KV node becomes master self.log.info("Kill Memcached process on Master node") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() failover_nodes = list() # Start persistence on Node for kv_node in self.cluster.kv_nodes: if kv_node.ip != self.cluster.master.ip: failover_nodes.append(kv_node) for bucket in self.cluster.buckets: self.log.info( "Starting persistence on {0} for bucket {1}" "".format(kv_node.ip, bucket.name)) mem_client = MemcachedClientHelper.direct_client( kv_node, bucket) mem_client.start_persistence() # Failover Nodes on which persistence was started self.log.info("Failing over Nodes - {0}".format( str(failover_nodes))) result = self.task.failover(servers=self.cluster.servers, failover_nodes=failover_nodes, graceful=False, use_hostnames=False, wait_for_pending=0, allow_unsafe=False, all_at_once=False) self.log.info(str(result)) # Wait for Failover & CBAS rollback to complete self.sleep(120) bucket_wise_collection_item_count = dict() for bucket in self.cluster.buckets: bucket_wise_collection_item_count[ bucket. name] = self.bucket_util.get_doc_count_per_collection( self.cluster, bucket) self.log.info(str(bucket_wise_collection_item_count)) # Verify the doc count in datasets for dataset in self.cbas_util.list_all_dataset_objs(): if not self.cbas_util.validate_cbas_dataset_items_count( self.cluster, dataset.full_name, bucket_wise_collection_item_count[ dataset.kv_bucket.name][dataset.kv_scope.name][ dataset.kv_collection.name]["items"]): raise Exception( "Dataset doc count does not match the actual doc count " "in associated KV collection after KV roll back") except Exception as e: for kv_node in self.cluster.kv_nodes: for bucket in self.cluster.buckets: self.log.info("Starting persistence on {0} for bucket {1}" "".format(kv_node.ip, bucket.name)) mem_client = MemcachedClientHelper.direct_client( kv_node, bucket) mem_client.start_persistence() self.fail(str(e))
def doc_generator(key, start, end, key_size=8, mix_key_size=False, doc_size=256, doc_type="json", target_vbucket=None, vbuckets=1024, mutation_type="ADD", mutate=0, randomize_doc_size=False, randomize_value=False, randomize=False, deep_copy=False): # Defaults to JSON doc_type template_obj = JsonObject.create() template_obj.put("mutated", mutate) _l = len('''{ "mutated": %s }''' % mutate) doc_size -= _l _l = len('"age ": 5,') if doc_size > _l: template_obj.put("age", 5) doc_size -= _l _l = len('"name ": "james",') if doc_size > _l: template_obj.put("name", "james") doc_size -= _l _l = len('"mutation_type ": {},'.format(mutation_type)) if doc_size > _l: template_obj.put("mutation_type", mutation_type) doc_size -= _l _l = len('"body ": ') if doc_size > _l: template_obj.put("body", "b") doc_size -= _l # if doc_type in ["string", "binary"]: # template_obj = 'age:{0}, first_name: "{1}", body: "{2}", ' \ # 'mutated: %s, mutation_type: "%s"' \ # % (mutate, mutation_type) if target_vbucket: return DocumentGeneratorForTargetVbucket( key, template_obj, start=start, end=end, key_size=key_size, mix_key_size=mix_key_size, doc_size=doc_size, doc_type=doc_type, target_vbucket=target_vbucket, vbuckets=vbuckets, randomize_doc_size=randomize_doc_size, randomize_value=randomize_value, randomize=randomize, deep_copy=deep_copy) return DocumentGenerator(key, template_obj, start=start, end=end, key_size=key_size, mix_key_size=mix_key_size, doc_size=doc_size, doc_type=doc_type, target_vbucket=target_vbucket, vbuckets=vbuckets, randomize_doc_size=randomize_doc_size, randomize_value=randomize_value, randomize=randomize, deep_copy=deep_copy)
def load_initial_collection_data(self): self.__print_step("Loading initial data into collections") # Pairs of scope, collection name type_collection_map = dict() type_collection_map["airline"] = ("airlines", "airline") type_collection_map["airport"] = ("airlines", "airport") type_collection_map["route"] = ("airlines", "routes") type_collection_map["hotel"] = ("hotels", "hotels") type_collection_map["landmark"] = ("hotels", "landmark") meta_data = dict() for scope in collection_spec["scopes"]: meta_data[scope["name"]] = dict() for collection in scope["collections"]: meta_data[scope["name"]][collection["name"]] = dict() meta_data[scope["name"]][collection["name"]]["doc_counter"] = 0 meta_data[scope["name"]][collection["name"]]["num_items"] = 0 hotel_review_rows = list() sdk_client = self.sdk_clients["bucket_data_writer"] query = "SELECT * FROM `travel-sample`.`_default`.`_default` " \ "WHERE type='%s'" for d_type, collection_info in type_collection_map.items(): s_name, c_name = collection_info[0], collection_info[1] sdk_client.select_collection(s_name, c_name) # TODO: Remove this retry logic once MB-41535 is fixed retry_index = 0 query_result = None while retry_index < 5: try: query_result = sdk_client.cluster.query(query % d_type) break except IndexFailureException: retry_index += 1 self.sleep(5, "Retrying due to IndexFailure (MB-41535)") continue rows_inserted = 0 for row in query_result.rowsAsObject(): value = row.getObject(CbServer.default_collection)\ .removeKey("type") doc_id = value.getInt("id") if doc_id > meta_data[s_name][c_name]["doc_counter"]: meta_data[s_name][c_name]["doc_counter"] = doc_id if d_type == "hotel": # Segregate 'reviews' from hotel collection review = JsonObject.create() review.put("id", doc_id) review.put("reviews", value.getArray("reviews")) hotel_review_rows.append(review) value = value.removeKey("reviews") key = d_type + "_" + str(doc_id) result = sdk_client.crud(DocLoading.Bucket.DocOps.CREATE, key, value) if result["status"] is False: self.fail("Loading collections failed") rows_inserted += 1 self.bucket.scopes[collection_info[0]].collections[ collection_info[1]].num_items += rows_inserted meta_data[s_name][c_name]["num_items"] = rows_inserted # Write hotel reviews into respective collection rows_inserted = 0 s_name, c_name = "hotels", "reviews" sdk_client.select_collection(s_name, c_name) for review in hotel_review_rows: doc_id = review.getInt("id") if doc_id > meta_data[s_name][c_name]["doc_counter"]: meta_data[s_name][c_name]["doc_counter"] = doc_id key = "review_" + str(doc_id) result = sdk_client.crud(DocLoading.Bucket.DocOps.CREATE, key, review) if result["status"] is False: self.fail("Loading reviews collection failed") rows_inserted += 1 self.bucket.scopes[s_name].collections[ c_name].num_items += rows_inserted meta_data[s_name][c_name]["num_items"] = rows_inserted # Create collection meta_data document sdk_client.select_collection(scope_name=CbServer.default_scope, collection_name="meta_data") app_data = JsonObject.create() app_data.put("date", "2001-01-01") result = sdk_client.crud(DocLoading.Bucket.DocOps.CREATE, "application", app_data) self.assertTrue(result["status"], "App_meta creation failed") self.bucket.scopes[CbServer.default_scope].collections[ "meta_data"].num_items += 1 for s_name, scope_data in meta_data.items(): for c_name, c_data in scope_data.items(): c_meta = JsonObject.create() for meta_key, meta_val in c_data.items(): c_meta.put(meta_key, meta_val) key = "%s.%s" % (s_name, c_name) result = sdk_client.crud(DocLoading.Bucket.DocOps.CREATE, key, c_meta) if result["status"] is False: self.fail("Meta creation failed") self.bucket.scopes[CbServer.default_scope].collections[ "meta_data"].num_items += 1 create_users = User(self.bucket, op_type="scenario_user_registration", num_items=10000) create_users.start() create_users.join() self.sleep(30, "Wait for num_items to get updated")