def setUp(self): super(QueryCollectionsUseKeys, self).setUp() self.log.info( "============== QueryCollectionsUseKeys setup has started ==============" ) self.skip_load = True self.collection_bucket_name = 'default_1' self.bucket_params = self._create_bucket_params( server=self.master, size=100, replicas=self.num_replicas, bucket_type=self.bucket_type, enable_replica_index=self.enable_replica_index, eviction_policy=self.eviction_policy, lww=self.lww) self.cluster.create_standard_bucket(name=self.collection_bucket_name, port=11222, bucket_params=self.bucket_params) self.scope_name = "my_scope" self.my_collection_name = "my_collection" self.col_rest = CollectionsRest(self.master) self.buckets = self.rest.get_buckets() result = self.col_rest.create_scope_collection( bucket=self.collection_bucket_name, scope=self.scope_name, collection=self.my_collection_name) self.assertTrue(result, "Failed to create Scope and Collection") self.sleep(20) self.num_items = 1000 self.gen_create = SDKDataLoader(num_ops=self.num_items, percent_create=100, percent_update=0, percent_delete=0, scope=self.scope_name, collection=self.my_collection_name) self._load_all_buckets(self.master, self.gen_create) if not self.query_context: self.col_namespace = f"default:{self.collection_bucket_name}.{self.scope_name}.{self.my_collection_name}" else: if '.' in self.query_context: _, bucket_space = self.query_context.split(':') bucket, scope = bucket_space.split('.') if bucket != self.collection_bucket_name: self.fail( "Bucket name in query_context is not matching with bucket available" ) if scope != self.scope_name: self.fail( "Scope name in query_context is not matching with bucket available" ) self.col_namespace = self.my_collection_name else: self.fail("Invalid query_context value") self.query_params = {"query_context": self.query_context} self.log.info( "============== QueryCollectionsUseKeys setup has ended ==============" )
def test_delete_bsc_while_index_updates_mutation(self): num_of_docs = 10**4 self.prepare_collection_for_indexing( num_of_docs_per_collection=num_of_docs) collection_namespace = self.namespaces[0] _, keyspace = collection_namespace.split(':') bucket, scope, collection = keyspace.split('.') index_gen = QueryDefinition(index_name='idx', index_fields=['age', 'city', 'country']) query = index_gen.generate_primary_index_create_query( namespace=collection_namespace, defer_build=self.defer_build) self.run_cbq_query(query=query) self.wait_until_indexes_online() select_query = f'select count(age) from {collection_namespace} where age >= 0' result = self.run_cbq_query(query=select_query)['results'][0]['$1'] self.assertEqual(result, num_of_docs) gen_create = SDKDataLoader(num_ops=num_of_docs * 10, percent_create=80, percent_update=10, percent_delete=10, scope=scope, collection=collection, start_seq_num=num_of_docs + 1) try: # deleting BSC while indexes catching up with new mutations with ThreadPoolExecutor() as executor: executor.submit(self._load_all_buckets, self.master, gen_create) self.sleep(30) select_task = executor.submit(self.run_cbq_query, query=select_query) delete_bsc = executor.submit( self.delete_bucket_scope_collection, server=self.servers[0], delete_item=self.item_to_delete, bucket=bucket, scope=scope, collection=collection) delete_result = delete_bsc.result() self.assertTrue(delete_result, f"Failed to delete: {self.item_to_delete}") count = select_task.result()['results'][0]['$1'] self.assertTrue( count > num_of_docs, "Delete bucket happened before mutation operation began") self.sleep(30) index_status = self.rest.get_index_status() self.assertFalse(index_status) except Exception as err: if self.item_to_delete == 'scope' or self.item_to_delete == 'collection': self.log.info(str(err)) # err_msg = "Unknown scope or collection in operation" # self.assertTrue(err_msg in str(err), "Error msg not matching") else: self.fail(str(err)) index_status = self.rest.get_index_status() self.assertFalse(index_status)
def _get_generator(self, prefix, docsize, numitems): if self._use_java_sdk: gen_create = SDKDataLoader(num_ops=numitems, percent_create=100, key_prefix=prefix, doc_size=docsize, timeout=1000) else: gen_create = BlobGenerator(prefix, prefix, docsize, end=numitems) return gen_create
def test_create_index_with_missing_with_at_plus_scan(self): collection_namespace = self.namespaces[0] index_gen = QueryDefinition(index_name='idx1', index_fields=['age', 'city', 'country']) # Checking the select query runs with where clause on leading key select_age_query = f'select count(age) from {collection_namespace} where age >= 0' select_country_query = f'select count(country) from {collection_namespace} where country is not null' # Checking the select query runs with where clause on non-leading key if self.partitoned_index: query = index_gen.generate_index_create_query( namespace=collection_namespace, defer_build=False, missing_indexes=True, num_replica=self.num_index_replicas, missing_field_desc=self.missing_field_desc, partition_by_fields=self.partition_fields, num_partition=self.num_partition, index_where_clause=self.index_where_clause) else: query = index_gen.generate_index_create_query( namespace=collection_namespace, defer_build=False, missing_indexes=True, num_replica=self.num_index_replicas, missing_field_desc=self.missing_field_desc, index_where_clause=self.index_where_clause) self.run_cbq_query(query=query) self.wait_until_indexes_online() scan_vectors_before_mutations = self.get_mutation_vectors() gen_create = SDKDataLoader(num_ops=self.num_of_docs_per_collection * 2, percent_create=100, json_template="Person", percent_update=0, percent_delete=0, scope='test_scope_1', collection='test_collection_1', output=True) task = self.cluster.async_load_gen_docs(self.master, self.test_bucket, gen_create) out = task.result() self.log.info(out) self.sleep(15, "Waiting some time before checking for mutation vectors") scan_vectors_after_mutations = self.get_mutation_vectors() new_scan_vectors = scan_vectors_after_mutations - scan_vectors_before_mutations scan_vector = self.convert_mutation_vector_to_scan_vector( new_scan_vectors) # Checking the select query runs with where clause on leading key result = self.run_cbq_query( query=select_age_query, scan_consistency='at_plus', scan_vector=scan_vector)['results'][0]['$1'] self.assertEqual(result, self.num_of_docs_per_collection * 2, "Doc count not matching") # Checking the select query runs with where clause on non-leading key result = self.run_cbq_query( query=select_country_query, scan_consistency='at_plus', scan_vector=scan_vector)['results'][0]['$1'] self.assertEqual(result, self.num_of_docs_per_collection * 2, "Doc count not matching")
def test_at_plus_index_consistency_with_paused_state(self): if self.gsi_type != 'memory_optimized': self.skipTest("This test run only with GSI type MOI") curr_index_quota = self.rest.get_index_official_stats( )['indexer']['memory_quota'] num_of_docs = self.num_of_docs_per_collection self.prepare_collection_for_indexing( num_of_docs_per_collection=num_of_docs, json_template="Hotel") collection_namespace = self.namespaces[0] _, keyspace = collection_namespace.split(':') bucket, scope, collection = keyspace.split('.') index_gen = QueryDefinition(index_name='idx', index_fields=['price', 'country', 'city'], partition_by_fields=['price']) meta_index_gen = QueryDefinition(index_name='meta_idx', index_fields=['meta().id']) query = index_gen.generate_index_create_query( namespace=collection_namespace) self.run_cbq_query(query=query) query = meta_index_gen.generate_index_create_query( namespace=collection_namespace) self.run_cbq_query(query=query) is_paused = False new_inserts = 10**4 while not is_paused: gen_create = SDKDataLoader(num_ops=new_inserts, percent_create=100, json_template="Hotel", percent_update=0, percent_delete=0, scope=scope, collection=collection, output=True, start_seq_num=num_of_docs + 1) task = self.cluster.async_load_gen_docs(self.master, bucket, gen_create) task.result() # Updating the doc counts num_of_docs = num_of_docs + new_inserts index_metadata = self.index_rest.get_indexer_metadata()['status'] for index in index_metadata: if index['status'] == 'Paused': is_paused = True scan_vectors_before_mutations = self.get_mutation_vectors() # Adding more data so that indexer has to catchup after increasing indexer quota for i in range(5): gen_create = SDKDataLoader(num_ops=new_inserts, percent_create=100, json_template="Hotel", percent_update=0, percent_delete=0, scope=scope, collection=collection, output=True, start_seq_num=num_of_docs + 1) task = self.cluster.async_load_gen_docs(self.master, bucket, gen_create) task.result() # Updating the doc counts num_of_docs = num_of_docs + new_inserts scan_vectors_after_mutations = self.get_mutation_vectors() new_scan_vectors = sorted( list(scan_vectors_after_mutations - scan_vectors_before_mutations)) scan_vector = self.convert_mutation_vector_to_scan_vector( new_scan_vectors) self.rest.set_service_memoryQuota( service='indexMemoryQuota', memoryQuota=int(curr_index_quota / 1024 / 1024) + 100) self.sleep(60) index_metadata = self.index_rest.get_indexer_metadata()['status'] for index in index_metadata: if index['status'] == 'Paused': self.fail( "Indexer is still in Paused stated. Either increase memory or sleep time" ) select_query = f'Select * from {collection_namespace} where price >= 0;' select_meta_id_query = f'Select meta().id,* from {collection} where meta().id like "doc_100%";' count_query = f'Select count(*) from {collection_namespace} where price >= 0;' named_collection_query_context = f'default:{bucket}.{scope}' try: with ThreadPoolExecutor() as executor: select_task = executor.submit(self.run_cbq_query, query=select_query, scan_consistency='at_plus', scan_vector=scan_vector) meta_task = executor.submit( self.run_cbq_query, query=select_meta_id_query, scan_consistency='at_plus', scan_vector=scan_vector, query_context=named_collection_query_context) count_task = executor.submit(self.run_cbq_query, query=count_query) result = select_task.result()['results'] meta_id_result_after_new_inserts = meta_task.result( )['results'] count_result = count_task.result()['results'][0]['$1'] self.assertTrue( len(meta_id_result_after_new_inserts) > 0, "at_plus didn't wait for all catchup") self.assertEqual(count_result, num_of_docs, "at_plus didn't wait for all catchup") self.assertEqual(len(result), num_of_docs, "at_plus didn't wait for all catchup") except Exception as err: self.fail(err)
def test_at_plus_scans_with_catching_up_indexer(self): num_of_docs = self.num_of_docs_per_collection scan_vectors_before_mutations = self.get_mutation_vectors() self.prepare_collection_for_indexing( num_of_docs_per_collection=num_of_docs, json_template="Hotel") collection_namespace = self.namespaces[0] _, keyspace = collection_namespace.split(':') bucket, scope, collection = keyspace.split('.') index_gen = QueryDefinition(index_name='idx', index_fields=['price', 'country', 'city'], partition_by_fields=['price']) meta_index_gen = QueryDefinition(index_name='meta_idx', index_fields=['meta().id']) query = index_gen.generate_index_create_query( namespace=collection_namespace, defer_build=True) self.run_cbq_query(query=query) query = meta_index_gen.generate_index_create_query( namespace=collection_namespace, num_replica=1, defer_build=True) self.run_cbq_query(query=query) scan_vectors_after_mutations = self.get_mutation_vectors() new_scan_vectors = sorted( list(scan_vectors_after_mutations - scan_vectors_before_mutations)) scan_vector = self.convert_mutation_vector_to_scan_vector( new_scan_vectors) select_query = f'Select * from {collection_namespace} where price >= 0;' select_meta_id_query = f'Select meta().id,* from {collection} where meta().id like "doc_100%";' count_query = f'Select count(*) from {collection_namespace} where price >= 0;' named_collection_query_context = f'default:{bucket}.{scope}' gen_create = SDKDataLoader(num_ops=num_of_docs, percent_create=100, json_template="Hotel", percent_update=0, percent_delete=0, scope=scope, collection=collection, output=True, start_seq_num=num_of_docs + 1) try: with ThreadPoolExecutor() as executor: build_query = f"BUILD INDEX ON {collection_namespace} (idx, meta_idx)" tasks = self.data_ops_javasdk_loader_in_batches( sdk_data_loader=gen_create, batch_size=10000) executor.submit(self.run_cbq_query, query=build_query) self.sleep(30, "Giving some time to build indexes") select_task = executor.submit(self.run_cbq_query, query=select_query, scan_consistency='at_plus', scan_vector=scan_vector) meta_task = executor.submit( self.run_cbq_query, query=select_meta_id_query, scan_consistency='at_plus', scan_vector=scan_vector, query_context=named_collection_query_context) count_task = executor.submit(self.run_cbq_query, query=count_query) result = select_task.result()['results'] meta_id_result_after_new_inserts = meta_task.result( )['results'] count_result = count_task.result()['results'][0]['$1'] self.assertTrue( len(meta_id_result_after_new_inserts) > 0, "at_plus didn't wait for all catchup") self.assertTrue(count_result > num_of_docs, "at_plus didn't wait for all catchup") self.assertTrue( len(result) >= num_of_docs, "at_plus didn't wait for all catchup") for task in tasks: task.result() except Exception as err: self.fail(err)
def test_request_plus_index_consistency(self): """ Summary: This test validate request_plus scan consistency with flooding high data load to cluster and at the same instance issuing select query to fetch new docs """ num_of_docs = 10**5 self.prepare_collection_for_indexing( num_of_docs_per_collection=num_of_docs) collection_namespace = self.namespaces[0] _, keyspace = collection_namespace.split(':') bucket, scope, collection = keyspace.split('.') index_gen = QueryDefinition(index_name='idx', index_fields=['age', 'country', 'city']) meta_index_gen = QueryDefinition(index_name='meta_idx', index_fields=['meta().id']) doc_body = { 'age': 34, 'country': 'test_country', 'city': 'test_city', 'filler1': [ 'ut', 'distinctio', 'sit', 'inventore', 'quo', 'quos', 'saepe', 'doloremque', 'sed', 'omnis' ], 'firstName': 'Mitch', 'lastName': 'Funk', 'streetAddress': '66877 Williamson Terrace', 'suffix': 'V', 'title': 'International Solutions Coordinator' } insert_query = f'INSERT INTO {collection_namespace} (KEY, VALUE) VALUES ("scan_doc_1", {doc_body})' query = index_gen.generate_index_create_query( namespace=collection_namespace) self.run_cbq_query(query=query) query = meta_index_gen.generate_index_create_query( namespace=collection_namespace) self.run_cbq_query(query=query) self.wait_until_indexes_online() gen_create = SDKDataLoader(num_ops=2 * num_of_docs, percent_create=100, percent_update=0, percent_delete=0, scope=scope, collection=collection, start_seq_num=num_of_docs + 1) select_query = f'Select country, city from {collection_namespace} where meta().id = "scan_doc_1"' count_query = f'Select count(meta().id) from {collection_namespace} where age >= 0' result = self.run_cbq_query(query=count_query)['results'][0]['$1'] self.assertEqual(result, num_of_docs) try: with ThreadPoolExecutor() as executor: executor.submit(self._load_all_buckets, self.master, gen_create) executor.submit(self.run_cbq_query, query=insert_query) self.sleep(30, "Giving some time so the mutations start") select_task = executor.submit(self.run_cbq_query, query=select_query, scan_consistency='request_plus') count_task = executor.submit(self.run_cbq_query, query=count_query, scan_consistency='request_plus') result1 = select_task.result()['results'][0] result2 = count_task.result()['results'][0]['$1'] self.assertEqual( result1, { 'city': 'test_city', 'country': 'test_country' }, "scan_doc_1 which was inserted before scan request with request_plus is not in result" ) self.assertTrue( result2 > num_of_docs + 1, "request plus scan is not able to wait for new inserted docs") except Exception as err: self.fail(str(err))
def test_at_plus_index_consistency_with_multiple_buckets(self): """ This test is running queries with scan vectors """ num_of_docs = 10**3 self.rest.delete_all_buckets() bucket_1 = 'test_bucket_1' bucket_2 = 'test_bucket_2' self.cluster.create_standard_bucket(name=bucket_1, port=11222, bucket_params=self.bucket_params) self.cluster.create_standard_bucket(name=bucket_2, port=11222, bucket_params=self.bucket_params) collection_namespaces = [] scope_prefix = 'test_scope' collection_prefix = 'test_collection' data_load_tasks = [] for bucket in (bucket_1, bucket_2): for s_item in range(self.num_scopes): scope = f'{scope_prefix}_{s_item}' self.collection_rest.create_scope(bucket=bucket, scope=scope) for c_item in range(self.num_collections): collection = f'{collection_prefix}_{c_item}' self.collection_rest.create_collection( bucket=bucket, scope=scope, collection=collection) self.sleep(10) gen_create = SDKDataLoader(num_ops=num_of_docs, percent_create=100, percent_update=0, percent_delete=0, scope=scope, collection=collection, json_template='Hotel') task = self.cluster.async_load_gen_docs(self.master, bucket, gen_create, timeout_secs=300) data_load_tasks.append(task) collection_namespaces.append( f'default:{bucket}.{scope}.{collection}') for task in data_load_tasks: task.result() for collection_namespace in collection_namespaces: index_gen = QueryDefinition( index_name='idx', index_fields=['price', 'country', 'city']) meta_index_gen = QueryDefinition(index_name='meta_idx', index_fields=['meta().id']) query = index_gen.generate_index_create_query( namespace=collection_namespace) self.run_cbq_query(query=query) query = meta_index_gen.generate_index_create_query( namespace=collection_namespace) self.run_cbq_query(query=query) self.wait_until_indexes_online() select_query1 = f'Select * from {collection_namespaces[0]} where price >100 and country like "A%";' select_query2 = f'Select * from {collection_namespaces[1]} where price >100 and country like "A%";' select_meta_id_query = f'Select * from test_collection_0 where meta().id like "doc_100%";' count_query1 = f'Select count(*) from {collection_namespaces[0]} where price >= 0;' count_query2 = f'Select count(*) from {collection_namespaces[1]} where price >= 0;' named_collection_query_context1 = f'default:{bucket_1}.test_scope_0' named_collection_query_context2 = f'default:{bucket_2}.test_scope_0' meta_id_result_before_inserts1 = self.run_cbq_query( query=select_meta_id_query, query_context=named_collection_query_context1)['results'] meta_id_result_before_inserts2 = self.run_cbq_query( query=select_meta_id_query, query_context=named_collection_query_context2)['results'] scan_vectors_before_mutations = self.get_mutation_vectors() new_insert_docs_num = 2 gen_create = SDKDataLoader(num_ops=new_insert_docs_num, percent_create=100, json_template="Hotel", percent_update=0, percent_delete=0, scope='test_scope_0', collection='test_collection_0', output=True, start_seq_num=num_of_docs + 1) scan_vectors = {} task = self.cluster.async_load_gen_docs(self.master, bucket_1, gen_create) out = task.result() self.log.info(out) self.sleep(15, "Waiting some time before checking for mutation vectors") scan_vectors_after_mutations = self.get_mutation_vectors() new_scan_vectors = scan_vectors_after_mutations - scan_vectors_before_mutations scan_vector = self.convert_mutation_vector_to_scan_vector( new_scan_vectors) scan_vectors[bucket_1] = scan_vector scan_vectors_before_mutations = self.get_mutation_vectors() task = self.cluster.async_load_gen_docs(self.master, bucket_2, gen_create) out = task.result() self.log.info(out) self.sleep(15, "Waiting some time before checking for mutation vectors") scan_vectors_after_mutations = self.get_mutation_vectors() new_scan_vectors = scan_vectors_after_mutations - scan_vectors_before_mutations scan_vector = self.convert_mutation_vector_to_scan_vector( new_scan_vectors) scan_vectors[bucket_2] = scan_vector result = self.run_cbq_query(query=count_query1)['results'][0]['$1'] self.assertEqual(result, num_of_docs + new_insert_docs_num) result = self.run_cbq_query(query=count_query2)['results'][0]['$1'] self.assertEqual(result, num_of_docs + new_insert_docs_num) try: with ThreadPoolExecutor() as executor: select_task1 = executor.submit(self.run_cbq_query, query=select_query1, scan_consistency='at_plus', scan_vectors=scan_vectors) meta_task1 = executor.submit( self.run_cbq_query, query=select_meta_id_query, scan_consistency='at_plus', scan_vectors=scan_vectors, query_context=named_collection_query_context1) result1 = select_task1.result()['results'] meta_id_result_after_inserts1 = meta_task1.result()['results'] select_task2 = executor.submit(self.run_cbq_query, query=select_query2, scan_consistency='at_plus', scan_vectors=scan_vectors) meta_task2 = executor.submit( self.run_cbq_query, query=select_meta_id_query, scan_consistency='at_plus', scan_vectors=scan_vectors, query_context=named_collection_query_context2) result2 = select_task2.result()['results'] meta_id_result_after_inserts2 = meta_task2.result()['results'] self.assertTrue( len(result1) > 0, "scan_doc_1 which was inserted before scan request with request_plus is not in result" ) self.assertEqual( len(meta_id_result_after_inserts1), len(meta_id_result_before_inserts1) + new_insert_docs_num, "request plus scan is not able to wait for new inserted docs") self.assertTrue( len(result2) > 0, "scan_doc_1 which was inserted before scan request with request_plus is not in result" ) self.assertEqual( len(meta_id_result_after_inserts2), len(meta_id_result_before_inserts2) + new_insert_docs_num, "request plus scan is not able to wait for new inserted docs") except Exception as err: self.fail(str(err))
def test_at_plus_index_consistency(self): num_of_docs = 10**3 self.prepare_collection_for_indexing( num_of_docs_per_collection=num_of_docs, json_template="Hotel") collection_namespace = self.namespaces[0] _, keyspace = collection_namespace.split(':') bucket, scope, collection = keyspace.split('.') index_gen = QueryDefinition(index_name='idx', index_fields=['price', 'country', 'city']) meta_index_gen = QueryDefinition(index_name='meta_idx', index_fields=['meta().id']) query = index_gen.generate_index_create_query( namespace=collection_namespace) self.run_cbq_query(query=query) query = meta_index_gen.generate_index_create_query( namespace=collection_namespace) self.run_cbq_query(query=query) self.wait_until_indexes_online() select_query = f'Select * from {collection_namespace} where price >100 and country like "A%";' select_meta_id_query = f'Select * from {collection} where meta().id like "doc_100%";' count_query = f'Select count(*) from {collection_namespace} where price >= 0;' named_collection_query_context = f'default:{bucket}.{scope}' meta_id_result_before_new_inserts = self.run_cbq_query( query=select_meta_id_query, query_context=named_collection_query_context)['results'] scan_vectors_before_mutations = self.get_mutation_vectors() new_insert_docs_num = 2 gen_create = SDKDataLoader(num_ops=new_insert_docs_num, percent_create=100, json_template="Hotel", percent_update=0, percent_delete=0, scope=scope, collection=collection, output=True, start_seq_num=num_of_docs + 1) tasks = self.data_ops_javasdk_loader_in_batches( sdk_data_loader=gen_create, batch_size=1000) for task in tasks: out = task.result() self.log.info(out) self.sleep(15, "Waiting some time before checking for mutation vectors") scan_vectors_after_mutations = self.get_mutation_vectors() new_scan_vectors = scan_vectors_after_mutations - scan_vectors_before_mutations scan_vector = self.convert_mutation_vector_to_scan_vector( new_scan_vectors) result = self.run_cbq_query(query=count_query)['results'][0]['$1'] self.assertEqual(result, num_of_docs + new_insert_docs_num) try: # Test with inserts on named collection with ThreadPoolExecutor() as executor: select_task = executor.submit(self.run_cbq_query, query=select_query, scan_consistency='at_plus', scan_vector=scan_vector) meta_task = executor.submit( self.run_cbq_query, query=select_meta_id_query, scan_consistency='at_plus', scan_vector=scan_vector, query_context=named_collection_query_context) result = select_task.result()['results'] meta_id_result_after_new_inserts = meta_task.result( )['results'] self.assertTrue( len(result) > 0, "scan_doc_1 which was inserted before scan request with request_plus is not in result" ) self.assertEqual( len(meta_id_result_after_new_inserts), len(meta_id_result_before_new_inserts) + new_insert_docs_num, "request plus scan is not able to wait for new inserted docs") # Test with update mutation on named collection result1 = \ self.run_cbq_query(query=f'Select * from {collection_namespace} where meta().id = "doc_1001"')['results'][ 0][collection] result2 = \ self.run_cbq_query(query=f'Select * from {collection_namespace} where meta().id = "doc_1002"')['results'][ 0][collection] scan_vectors_before_mutations = self.get_mutation_vectors() gen_create = SDKDataLoader(num_ops=new_insert_docs_num, percent_create=0, json_template="Hotel", percent_update=100, percent_delete=0, scope=scope, fields_to_update=["price"], collection=collection, output=True, start_seq_num=num_of_docs + 1, op_type="update") tasks = self.data_ops_javasdk_loader_in_batches( sdk_data_loader=gen_create, batch_size=1000) for task in tasks: out = task.result() self.log.info(out) self.sleep( 15, "Waiting some time before checking for mutation vectors") scan_vectors_after_mutations = self.get_mutation_vectors() new_scan_vectors = scan_vectors_after_mutations - scan_vectors_before_mutations scan_vector = self.convert_mutation_vector_to_scan_vector( new_scan_vectors) with ThreadPoolExecutor() as executor: select_task = executor.submit(self.run_cbq_query, query=select_query, scan_consistency='at_plus', scan_vector=scan_vector) meta_task = executor.submit( self.run_cbq_query, query=select_meta_id_query, scan_consistency='at_plus', scan_vector=scan_vector, query_context=named_collection_query_context) result = select_task.result()['results'] meta_id_result_after_new_inserts = meta_task.result( )['results'] result3 = \ self.run_cbq_query(query=f'Select * from {collection_namespace} where meta().id = "doc_1001"')[ 'results'][0][collection] result4 = \ self.run_cbq_query(query=f'Select * from {collection_namespace} where meta().id = "doc_1002"')[ 'results'][0][collection] diff1 = DeepDiff(result1, result3, ignore_order=True) diff2 = DeepDiff(result2, result4, ignore_order=True) self.assertTrue( len(result) > 0, "scan_doc_1 which was inserted before scan request with request_plus is not in result" ) self.assertEqual( len(meta_id_result_after_new_inserts), len(meta_id_result_before_new_inserts) + new_insert_docs_num, "request plus scan is not able to wait for new inserted docs") if len(diff1['values_changed'] ) == 1 and "root['price']" in diff1['values_changed']: self.log.info("Price field mutated for doc_1001") self.log.info(diff1) else: self.log.info(diff1) self.log.info(f"Before Muatation: {result1}") self.log.info(f"After Muatation: {result3}") self.fail("Unexpected Mutation found for doc_1001") if len(diff2['values_changed'] ) == 1 and "root['price']" in diff2['values_changed']: self.log.info("Price field mutated for doc_1002") self.log.info(diff2) else: self.log.info(diff1) self.log.info(f"Before Muatation: {result2}") self.log.info(f"After Muatation: {result4}") self.fail("Unexpected Mutation found for doc_1002") # Test with Delete mutation on named collection scan_vectors_before_mutations = self.get_mutation_vectors() gen_create = SDKDataLoader(num_ops=new_insert_docs_num, percent_create=0, json_template="Hotel", percent_update=0, percent_delete=100, scope=scope, collection=collection, output=True, start_seq_num=num_of_docs + 1) tasks = self.data_ops_javasdk_loader_in_batches( sdk_data_loader=gen_create, batch_size=1000) for task in tasks: out = task.result() self.log.info(out) self.sleep( 30, "Waiting some time before checking for mutation vectors") scan_vectors_after_mutations = self.get_mutation_vectors() new_scan_vectors = scan_vectors_after_mutations - scan_vectors_before_mutations scan_vector = self.convert_mutation_vector_to_scan_vector( new_scan_vectors) with ThreadPoolExecutor() as executor: select_task = executor.submit(self.run_cbq_query, query=select_query, scan_consistency='at_plus', scan_vector=scan_vector) meta_task = executor.submit( self.run_cbq_query, query=select_meta_id_query, scan_consistency='at_plus', scan_vector=scan_vector, query_context=named_collection_query_context) count_task = executor.submit(self.run_cbq_query, query=count_query) result = select_task.result()['results'] meta_id_result_after_new_inserts = meta_task.result( )['results'] count_result = count_task.result()['results'][0]['$1'] self.assertTrue( len(result) > 0, "scan_doc_1 which was inserted before scan request with request_plus is not in result" ) self.assertEqual( len(meta_id_result_after_new_inserts), len(meta_id_result_before_new_inserts), "request plus scan is not able to wait for new inserted docs") self.assertEqual(count_result, num_of_docs, "Docs count not matching.") # Test with new mutation on default collection select_query = f'Select * from {bucket} where price > 100 and country like "A%";' select_meta_id_query = f'Select meta().id,* from {bucket} where meta().id like "doc_100%";' count_query = f'Select count(*) from {bucket} where price >= 0;' named_collection_query_context = f'default:' scan_vectors_before_mutations = self.get_mutation_vectors() gen_create = SDKDataLoader(num_ops=10**3, percent_create=100, json_template="Hotel", percent_update=0, percent_delete=0, scope='_default', collection='_default', output=True) tasks = self.data_ops_javasdk_loader_in_batches( sdk_data_loader=gen_create, batch_size=1000) for task in tasks: out = task.result() self.log.info(out) scan_vectors_after_mutations = self.get_mutation_vectors() new_scan_vectors = scan_vectors_after_mutations - scan_vectors_before_mutations scan_vector = self.convert_mutation_vector_to_scan_vector( new_scan_vectors) default_index_gen = QueryDefinition( index_name='default_idx', index_fields=['price', 'country', 'city']) default_meta_index_gen = QueryDefinition( index_name='default_meta_idx', index_fields=['meta().id']) query = default_index_gen.generate_index_create_query( namespace=bucket) self.run_cbq_query(query=query) query = default_meta_index_gen.generate_index_create_query( namespace=bucket) self.run_cbq_query(query=query) with ThreadPoolExecutor() as executor: select_task = executor.submit(self.run_cbq_query, query=select_query, scan_consistency='at_plus', scan_vector=scan_vector) meta_task = executor.submit( self.run_cbq_query, query=select_meta_id_query, scan_consistency='at_plus', scan_vector=scan_vector, query_context=named_collection_query_context) count_task = executor.submit(self.run_cbq_query, query=count_query) result = select_task.result()['results'] meta_id_result_after_new_inserts = meta_task.result( )['results'] count_result = count_task.result()['results'][0]['$1'] self.assertTrue( len(result) > 0, "scan_doc_1 which was inserted before scan request with request_plus is not in result" ) self.assertEqual( len(meta_id_result_after_new_inserts), 2, "request plus scan is not able to wait for new inserted docs") self.assertEqual(count_result, num_of_docs, "Docs count not matching.") except Exception as err: self.fail(str(err))
def test_gsi_on_ephemeral_with_partial_KV_node(self): kv_nodes = self.get_kv_nodes() index_node = self.get_nodes_from_services_map(service_type="index") if len(kv_nodes) < 2: self.fail("This test requires at least 2 KV node") self.prepare_collection_for_indexing(num_of_docs_per_collection=self.num_of_docs_per_collection) collection_namespace = self.namespaces[0] _, keyspace = collection_namespace.split(':') bucket, scope, collection = keyspace.split('.') index_gen = QueryDefinition(index_name='idx', index_fields=['age', 'country', 'city']) meta_index_gen = QueryDefinition(index_name='meta_idx', index_fields=['meta().id']) query = index_gen.generate_index_create_query(namespace=collection_namespace, defer_build=self.defer_build) self.run_cbq_query(query) if self.defer_build: build_query = index_gen.generate_build_query(namespace=collection_namespace) self.run_cbq_query(build_query) self.wait_until_indexes_online() query = meta_index_gen.generate_index_create_query(namespace=collection_namespace, defer_build=self.defer_build) self.run_cbq_query(query) if self.defer_build: build_query = meta_index_gen.generate_build_query(namespace=collection_namespace) self.run_cbq_query(build_query) self.wait_until_indexes_online() select_query = f'Select * from {collection_namespace} where age >10 and country like "A%";' select_meta_id_query = f'Select * from {collection} where meta().id like "doc_%";' count_query = f'Select count(*) from {collection_namespace} where age >= 0;' named_collection_query_context = f'default:{bucket}.{scope}' select_result = self.run_cbq_query(query=select_query)['results'] meta_result = self.run_cbq_query(query=select_meta_id_query, query_context=named_collection_query_context)['results'] count_result = self.run_cbq_query(query=count_query)['results'][0]['$1'] self.assertTrue(len(select_result) > 0) self.assertEqual(len(meta_result), self.num_of_docs_per_collection) self.assertEqual(count_result, self.num_of_docs_per_collection) # Blocking communication between one KV node and index to check indexes catching with mutation from other KV kv_node_a, kv_node_b = kv_nodes try: self.block_incoming_network_from_node(kv_node_b, index_node) self.sleep(10) new_insert_docs_num = 10 ** 3 gen_create = SDKDataLoader(num_ops=new_insert_docs_num, percent_create=100, json_template="Person", percent_update=0, percent_delete=0, scope=scope, collection=collection, output=True, start_seq_num=self.num_of_docs_per_collection + 1) tasks = self.data_ops_javasdk_loader_in_batches(sdk_data_loader=gen_create, batch_size=10000) self.sleep(20, "Giving some time for data insertion") select_result_after_kv_block = self.run_cbq_query(query=select_query)['results'] meta_result_after_kv_block = self.run_cbq_query(query=select_meta_id_query, query_context=named_collection_query_context)['results'] count_result_after_kv_block = self.run_cbq_query(query=count_query)['results'][0]['$1'] self.assertTrue(len(select_result_after_kv_block) > len(select_result), "Query result not matching expected value") self.assertTrue(len(meta_result_after_kv_block) > len(meta_result), "Query result not matching expected value") self.assertTrue(count_result_after_kv_block > count_result, "Query result not matching expected value") for task in tasks: out = task.result() self.log.info(out) except Exception as err: self.fail(err) finally: self.resume_blocked_incoming_network_from_node(kv_node_b, index_node) self.sleep(10) # Checking if indexer continue to process insertion after KV node failover failover_task = self.cluster.async_failover(self.servers[:self.nodes_init], failover_nodes=[kv_node_b], graceful=self.graceful) failover_task.result() gen_create = SDKDataLoader(num_ops=new_insert_docs_num, percent_create=0, json_template="Person", percent_update=0, percent_delete=100, scope=scope, collection=collection, output=True, start_seq_num=self.num_of_docs_per_collection + 1) tasks = self.data_ops_javasdk_loader_in_batches(sdk_data_loader=gen_create, batch_size=10000) for task in tasks: out = task.result() self.log.info(out) select_result_after_failover = self.run_cbq_query(query=select_query)['results'] meta_result_after_failover = self.run_cbq_query(query=select_meta_id_query, query_context=named_collection_query_context)['results'] count_result_after_failover = self.run_cbq_query(query=count_query)['results'][0]['$1'] self.assertEqual(len(select_result_after_failover), len(select_result), "Query result not matching expected value") self.assertEqual(len(meta_result_after_failover), len(meta_result), "Query result not matching expected value") self.assertEqual(count_result_after_failover, count_result, "Query result not matching expected value")
def test_gsi_on_ephemeral_with_eviction_policy(self): num_of_docs = self.num_of_docs_per_collection self.prepare_collection_for_indexing(num_of_docs_per_collection=self.num_of_docs_per_collection) collection_namespace = self.namespaces[0] _, keyspace = collection_namespace.split(':') bucket, scope, collection = keyspace.split('.') index_gen = QueryDefinition(index_name='idx', index_fields=['age', 'country', 'city']) meta_index_gen = QueryDefinition(index_name='meta_idx', index_fields=['meta().id']) query = index_gen.generate_index_create_query(namespace=collection_namespace, defer_build=self.defer_build) self.run_cbq_query(query) if self.defer_build: build_query = index_gen.generate_build_query(namespace=collection_namespace) self.run_cbq_query(build_query) self.wait_until_indexes_online() query = meta_index_gen.generate_index_create_query(namespace=collection_namespace, defer_build=self.defer_build) self.run_cbq_query(query) if self.defer_build: build_query = meta_index_gen.generate_build_query(namespace=collection_namespace) self.run_cbq_query(build_query) self.wait_until_indexes_online() select_query = f'Select * from {collection_namespace} where age >10 and country like "A%";' select_meta_id_query = f'Select meta().id from {collection} where meta().id like "doc_%";' count_query = f'Select count(*) from {collection_namespace} where age >= 0;' named_collection_query_context = f'default:{bucket}.{scope}' select_result = self.run_cbq_query(query=select_query)['results'] meta_result = self.run_cbq_query(query=select_meta_id_query, query_context=named_collection_query_context)['results'] count_result = self.run_cbq_query(query=count_query)['results'][0]['$1'] self.assertTrue(len(select_result) > 0) self.assertEqual(len(meta_result), self.num_of_docs_per_collection) self.assertEqual(count_result, self.num_of_docs_per_collection) new_inserts = 10 ** 4 is_memory_full = False stats_all_buckets = {} for bucket in self.buckets: stats_all_buckets[bucket.name] = StatsCommon() threshold = 0.93 last_memory_used_val = 0 while not is_memory_full: gen_create = SDKDataLoader(num_ops=new_inserts, percent_create=100, percent_update=0, percent_delete=0, scope=scope, collection=collection, output=True, start_seq_num=num_of_docs+1) task = self.cluster.async_load_gen_docs(self.master, bucket, gen_create) task.result() # Updating the doc counts num_of_docs = num_of_docs + new_inserts self.sleep(30) memory_used = int(stats_all_buckets[bucket.name].get_stats([self.master], bucket, '', 'mem_used')[self.master]) self.log.info(f"Current memory usage: {memory_used}") if self.eviction_policy == 'noEviction': # memory is considered full if mem_used is at say 90% of the available memory if memory_used > threshold * self.bucket_size * 1000000: # Just filling the leftover memory to be double sure gen_create = SDKDataLoader(num_ops=new_inserts, percent_create=100, percent_update=0, percent_delete=0, scope=scope, collection=collection, output=True, start_seq_num=num_of_docs + 1) task = self.cluster.async_load_gen_docs(self.master, bucket, gen_create) task.result() num_of_docs = num_of_docs + new_inserts memory_used = int(stats_all_buckets[bucket.name].get_stats([self.master], bucket, '', 'mem_used')[self.master]) self.log.info(f"Current memory usage: {memory_used}") is_memory_full = True else: if memory_used < last_memory_used_val: break last_memory_used_val = memory_used meta_ids = self.run_cbq_query(query=select_meta_id_query, query_context=named_collection_query_context)['results'] ids_at_threshold = sorted([item['id'] for item in meta_ids]) # Pushing new docs to check the eviction policy new_inserts = 10 ** 4 gen_create = SDKDataLoader(num_ops=new_inserts, percent_create=100, json_template="Employee", percent_update=0, percent_delete=0, scope=scope, collection=collection, output=True, start_seq_num=num_of_docs+1) tasks = self.data_ops_javasdk_loader_in_batches(sdk_data_loader=gen_create, batch_size=10000) for task in tasks: out = task.result() self.log.info(out) meta_ids_with_eviction_enforced = self.run_cbq_query(query=select_meta_id_query, query_context=named_collection_query_context)['results'] ids_after_threshold = sorted([item['id'] for item in meta_ids_with_eviction_enforced]) if self.eviction_policy == 'noEviction': self.assertEqual(len(meta_ids_with_eviction_enforced), len(meta_ids)) self.assertEqual(ids_at_threshold, ids_after_threshold) else: self.assertTrue(len(meta_ids_with_eviction_enforced) != len(meta_ids)) self.assertTrue(ids_after_threshold != ids_at_threshold)
def test_gsi_on_ephemeral_with_bucket_flush(self): self.prepare_collection_for_indexing(num_of_docs_per_collection=self.num_of_docs_per_collection) collection_namespace = self.namespaces[0] _, keyspace = collection_namespace.split(':') bucket, scope, collection = keyspace.split('.') index_gen = QueryDefinition(index_name='idx', index_fields=['age', 'country', 'city']) meta_index_gen = QueryDefinition(index_name='meta_idx', index_fields=['meta().id']) query = index_gen.generate_index_create_query(namespace=collection_namespace, defer_build=self.defer_build) self.run_cbq_query(query) if self.defer_build: build_query = index_gen.generate_build_query(namespace=collection_namespace) self.run_cbq_query(build_query) self.wait_until_indexes_online() query = meta_index_gen.generate_index_create_query(namespace=collection_namespace, defer_build=self.defer_build) self.run_cbq_query(query) if self.defer_build: build_query = meta_index_gen.generate_build_query(namespace=collection_namespace) self.run_cbq_query(build_query) self.wait_until_indexes_online() select_query = f'Select * from {collection_namespace} where age >10 and country like "A%";' select_meta_id_query = f'Select * from {collection} where meta().id like "doc_%";' count_query = f'Select count(*) from {collection_namespace} where age >= 0;' named_collection_query_context = f'default:{bucket}.{scope}' select_result = self.run_cbq_query(query=select_query)['results'] meta_result = self.run_cbq_query(query=select_meta_id_query, query_context=named_collection_query_context)['results'] count_result = self.run_cbq_query(query=count_query)['results'][0]['$1'] self.assertTrue(len(select_result) > 0) self.assertEqual(len(meta_result), self.num_of_docs_per_collection) self.assertEqual(count_result, self.num_of_docs_per_collection) # flushing the bucket task = self.cluster.async_bucket_flush(server=self.master, bucket=self.test_bucket) result = task.result() self.log.info(result) self.sleep(30, "Giving some time to indexer to update doc counts") select_result = self.run_cbq_query(query=select_query)['results'] meta_result = self.run_cbq_query(query=select_meta_id_query, query_context=named_collection_query_context)['results'] count_result = self.run_cbq_query(query=count_query)['results'][0]['$1'] self.assertEqual(len(select_result), 0) self.assertEqual(len(meta_result), 0) self.assertEqual(count_result, 0) new_inserts = 1000 gen_create = SDKDataLoader(num_ops=new_inserts, percent_create=100, json_template="Person", percent_update=0, percent_delete=0, scope=scope, collection=collection, output=True) tasks = self.data_ops_javasdk_loader_in_batches(sdk_data_loader=gen_create, batch_size=1000) for task in tasks: out = task.result() self.log.info(out) self.sleep(30) select_result = self.run_cbq_query(query=select_query)['results'] meta_result = self.run_cbq_query(query=select_meta_id_query, query_context=named_collection_query_context)['results'] count_result = self.run_cbq_query(query=count_query)['results'][0]['$1'] self.assertTrue(len(select_result) > 0) self.assertEqual(len(meta_result), new_inserts) self.assertEqual(count_result, new_inserts)
def test_delete_multiple_bsc(self): bucket_prefix = self.test_bucket buckets_list = [] collection_namespaces_list = [] self.rest.delete_bucket(bucket=self.test_bucket) self.sleep(10) for bucket_num in range(5): self.test_bucket = f'{bucket_prefix}_{bucket_num}' buckets_list.append(self.test_bucket) self.cluster.create_standard_bucket( name=self.test_bucket, port=11222, bucket_params=self.bucket_params) # self.prepare_collection_for_indexing(num_of_docs_per_collection=10 ** 4) self.collection_rest.create_scope_collection_count( scope_num=1, collection_num=1, scope_prefix=self.scope_prefix, collection_prefix=self.collection_prefix, bucket=self.test_bucket) scope, collection = f'{self.scope_prefix}_1', f'{self.collection_prefix}_1' gen_create = SDKDataLoader(num_ops=10**3, percent_create=100, percent_update=0, percent_delete=0, scope=scope, collection=collection, json_template='Person') task = self.cluster.async_load_gen_docs(server=self.master, generator=gen_create, bucket=self.test_bucket, scope=scope, collection=collection) task.result() collection_namespace = f'default:{self.test_bucket}.{scope}.{collection}' collection_namespaces_list.append(collection_namespace) index_list = [] for count, collection_namespace in enumerate( collection_namespaces_list): index = f'idx_{count}' index_list.append(index) index_gen = QueryDefinition( index_name=index, index_fields=['age', 'city', 'country']) query = index_gen.generate_primary_index_create_query( namespace=collection_namespace, defer_build=self.defer_build) self.run_cbq_query(query=query) self.wait_until_indexes_online() # deleting multiple indexes across multiple bucket collection_namespace_to_be_deleted = random.sample( collection_namespaces_list, 3) with ThreadPoolExecutor() as executor: task_list = [] for collection_namespace in collection_namespace_to_be_deleted: _, keyspace = collection_namespace.split(':') bucket, scope, collection = keyspace.split('.') index_list.remove(f'idx_{bucket.split("_")[-1]}') task = executor.submit(self.delete_bucket_scope_collection, server=self.servers[0], delete_item=self.item_to_delete, bucket=bucket, scope=scope, collection=collection) task_list.append(task) for task in task_list: result = task.result() self.assertTrue(result) index_status = self.rest.get_index_status() for index in index_list: idx = index_status[f'test_bucket_{index.split("_")[-1]}'] self.assertTrue(index in idx, 'Index of available bucket is missing')