def test_expand_query_node(self): release_name = "scale-query" env = HelmEnv(release_name=release_name) env.helm_install_cluster_milvus() # connect connections.add_connection(default={ "host": '10.98.0.8', "port": 19530 }) connections.connect(alias='default') # create c_name = "query_scale_one" collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # # create index # collection_w.create_index(ct.default_float_vec_field_name, default_index_params) # assert collection_w.has_index()[0] # assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, # default_index_params) collection_w.load() # vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(5)] res1, _ = collection_w.search(data[-1][:5], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) # scale queryNode pod env.helm_upgrade_cluster_milvus(queryNode=2) c_name_2 = "query_scale_two" collection_w2 = ApiCollectionWrapper() collection_w2.init_collection( name=c_name_2, schema=cf.gen_default_collection_schema()) collection_w2.insert(data) assert collection_w2.num_entities == ct.default_nb collection_w2.load() res2, _ = collection_w2.search(data[-1][:5], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res1[0].ids == res2[0].ids
def test_connection_init_collection_connection(self, host, port): """ target: create collection then disconnection method: connection, init collection, then disconnection expected: check result """ # successfully created default connection self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=host, port=port, check_task=ct.CheckTasks.ccr) # init collection successfully collection_name = cf.gen_unique_str('connection_test_') schema = cf.gen_default_collection_schema() self.collection_wrap.init_collection(name=collection_name, schema=schema, _using=DefaultConfig.DEFAULT_USING) # remove connection self.connection_wrap.remove_connection(alias=DefaultConfig.DEFAULT_USING) # drop collection failed self.collection_wrap.drop(check_task=ct.CheckTasks.err_res, check_items={ct.err_code: 0, ct.err_msg: "should create connect first"}) # successfully created default connection self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=host, port=port, check_task=ct.CheckTasks.ccr) # drop collection success self.collection_wrap.drop()
def e2e_milvus(host, c_name): # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create # c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # collection_w.init_collection(name=c_name) # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # create index collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, ct.default_index) # search collection_w.load() search_res, _ = collection_w.search(data[-1][:ct.default_nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res[0]) == ct.default_limit # query ids = search_res[0].ids[0] term_expr = f'{ct.default_int64_field_name} in [{ids}]' query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"]) assert query_res[0][ct.default_int64_field_name] == ids
def test_collection_dup_name_new_dim(self): """ target: test collection with dup name and new dim schema method: 1. default schema 2. schema with new dim expected: raise exception """ self._connect() new_dim = 120 c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap( name=c_name, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) schema = cf.gen_default_collection_schema() new_fields = cf.gen_float_vec_field(dim=new_dim) schema.fields[-1] = new_fields error = { ct.err_code: 1, ct.err_msg: "The collection already exist, but the schema isnot the same as the " "passed in" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) assert collection_w.primary_field is None
def init_collection_general(self, prefix="test", insert_data=False, nb=ct.default_nb, partition_num=0, is_binary=False, is_all_data_type=False, auto_id=False, dim=ct.default_dim, is_index=False, primary_field=ct.default_int64_field_name, is_flush=True, name=None, **kwargs): """ target: create specified collections method: 1. create collections (binary/non-binary, default/all data type, auto_id or not) 2. create partitions if specified 3. insert specified (binary/non-binary, default/all data type) data into each partition if any 4. not load if specifying is_index as True expected: return collection and raw data, insert ids """ log.info("Test case of search interface: initialize before test case") self._connect() collection_name = cf.gen_unique_str(prefix) if name is not None: collection_name = name vectors = [] binary_raw_vectors = [] insert_ids = [] time_stamp = 0 # 1 create collection default_schema = cf.gen_default_collection_schema( auto_id=auto_id, dim=dim, primary_field=primary_field) if is_binary: default_schema = cf.gen_default_binary_collection_schema( auto_id=auto_id, dim=dim, primary_field=primary_field) if is_all_data_type: default_schema = cf.gen_collection_schema_all_datatype( auto_id=auto_id, dim=dim, primary_field=primary_field) log.info("init_collection_general: collection creation") collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs) # 2 add extra partitions if specified (default is 1 partition named "_default") if partition_num > 0: cf.gen_partitions(collection_w, partition_num) # 3 insert data if specified if insert_data: collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \ cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id, dim=dim) if is_flush: assert collection_w.is_empty is False assert collection_w.num_entities == nb # This condition will be removed after auto index feature if not is_index: collection_w.load() return collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp
def test_collection_dup_name_new_primary(self): """ target: test collection with dup name and new primary_field schema method: 1.collection with default schema 2. collection with same fields and new primary_field schema expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap( name=c_name, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) schema = cf.gen_default_collection_schema( primary_field=ct.default_int64_field_name) error = { ct.err_code: 1, ct.err_msg: "The collection already exist, but the schema isnot the same as the " "passed in" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) assert collection_w.primary_field is None
def _collection(self, name=None, data=None, schema=None, check_res=None, **kwargs): """ Testing func """ self._connect() name = cf.gen_unique_str("ApiReq") if name is None else name schema = cf.gen_default_collection_schema() if schema is None else schema collection = self.collection.collection_init(name=name, data=data, schema=schema, check_res=check_res, **kwargs) return name, collection
def collection_wrap_4_search(self, connection): c_wrap = ApiCollectionWrapper() c_wrap.init_collection(name=cf.gen_unique_str("collection_4_search_"), schema=cf.gen_default_collection_schema(), check_task="check_nothing") c_wrap.insert(data=cf.gen_default_dataframe_data(nb=10000)) return c_wrap
def test_collection_dup_name_with_desc(self): """ target: test collection with dup name method: 1. default schema with desc 2. dup name collection expected: desc consistent """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema( description=ct.collection_desc) collection_w = self.init_collection_wrap( name=c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: schema }) self.collection_wrap.init_collection( c_name, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: schema }) assert collection_w.description == self.collection_wrap.description
def test_shrink_index_node(self): """ target: test shrink indexNode from 2 to 1 method: 1.deploy two indexNode 2.create index with two indexNode 3.shrink indexNode from 2 to 1 4.create index with 1 indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "scale-index" env = HelmEnv(release_name=release_name, indexNode=2) env.helm_install_cluster_milvus() # connect connections.add_connection(default={ "host": '10.98.0.8', "port": 19530 }) connections.connect(alias='default') data = cf.gen_default_dataframe_data(nb) # create c_name = "index_scale_one" collection_w = ApiCollectionWrapper() # collection_w.init_collection(name=c_name) collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert loop = 10 for i in range(loop): collection_w.insert(data) assert collection_w.num_entities == nb * loop # create index on collection one and two start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.debug(f'two indexNodes: {t0}') collection_w.drop_index() assert not collection_w.has_index()[0] # expand indexNode from 1 to 2 # pdb.set_trace() env.helm_upgrade_cluster_milvus(indexNode=1) start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.debug(f'one indexNode: {t1}') log.debug(t1 / t0) assert round(t1 / t0) == 2
def init_collection(self): res, result = self.c_wrap.init_collection( name=cf.gen_unique_str("CreateChecker_"), schema=cf.gen_default_collection_schema(), timeout=timeout, enable_traceback=enable_traceback, check_task=CheckTasks.check_nothing) return res, result
def test_chaos_memory_stress_indexnode(self, connection, chaos_yaml): """ target: test inject memory stress into indexnode method: 1.Deploy milvus and limit indexnode memory resource 3 / 4Gi 2.Create collection and insert some data 3.Inject memory stress chaos 512Mi 4.Create index expected: """ # init collection and insert nb = 256000 # vector size: 512*4*nb about 512Mi and create index need 2.8Gi memory dim = 512 # c_name = cf.gen_unique_str('chaos_memory') c_name = 'chaos_memory_gKs8aSUu' index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 128}} collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(dim=dim), shards_num=1) # insert 256000 512 dim entities, size 512Mi for i in range(2): t0_insert = datetime.datetime.now() df = cf.gen_default_dataframe_data(nb=nb // 2, dim=dim) res = collection_w.insert(df)[0] assert res.insert_count == nb // 2 # log.info(f'After {i + 1} insert, num_entities: {collection_w.num_entities}') tt_insert = datetime.datetime.now() - t0_insert log.info(f"{i} insert data cost: {tt_insert}") # flush t0_flush = datetime.datetime.now() assert collection_w.num_entities == nb tt_flush = datetime.datetime.now() - t0_flush log.info(f'flush {nb * 10} entities cost: {tt_flush}') log.info(collection_w.indexes[0].params) if collection_w.has_index()[0]: collection_w.drop_index() # indexNode start build index, inject chaos memory stress chaos_config = gen_experiment_config(chaos_yaml) log.debug(chaos_config) chaos_res = CusResource(kind=chaos_config['kind'], group=constants.CHAOS_GROUP, version=constants.CHAOS_VERSION, namespace=constants.CHAOS_NAMESPACE) chaos_res.create(chaos_config) log.debug("inject chaos") # create index t0_index = datetime.datetime.now() index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name, index_params=index_params) tt_index = datetime.datetime.now() - t0_index log.info(f"create index cost: {tt_index}") log.info(collection_w.indexes[0].params)
def test_expand_data_node(self): """ target: test create and insert api after expand dataNode pod method: 1.create collection a and insert df 2.expand dataNode pod from 1 to 2 3.verify collection a property and verify create and insert of new collection expected: two collection create and insert op are both correctly """ # deploy all nodes one pod cluster milvus with helm release_name = "scale-data" env = HelmEnv(release_name=release_name) host = env.helm_install_cluster_milvus() # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # scale dataNode to 2 pods env.helm_upgrade_cluster_milvus(dataNode=2) # after scale, assert data consistent assert utility.has_collection(c_name) assert collection_w.num_entities == ct.default_nb # assert new operations new_cname = cf.gen_unique_str(prefix) new_collection_w = ApiCollectionWrapper() new_collection_w.init_collection( name=new_cname, schema=cf.gen_default_collection_schema()) new_mutation_res, _ = new_collection_w.insert(data) assert new_mutation_res.insert_count == ct.default_nb assert new_collection_w.num_entities == ct.default_nb # assert old collection ddl mutation_res_2, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb * 2 collection_w.drop() new_collection_w.drop()
def init_collection_wrap(self, name=None, schema=None, check_task=None, check_items=None, **kwargs): name = cf.gen_unique_str('coll_') if name is None else name schema = cf.gen_default_collection_schema() if schema is None else schema if self.connection_wrap.get_connection(alias=DefaultConfig.DEFAULT_USING)[0] is None: self._connect() collection_w = ApiCollectionWrapper() collection_w.init_collection(name=name, schema=schema, check_task=check_task, check_items=check_items, **kwargs) self.collection_object_list.append(collection_w) return collection_w
def __init__(self, flush=False): super().__init__() self.utility_wrap = ApiUtilityWrapper() self.schema = cf.gen_default_collection_schema() self.flush = flush self.files = ["bulk_load_data_source.json"] self.row_based = True self.recheck_failed_task = False self.failed_tasks = []
def test_expand_data_node(self): """ target: test create and insert api after expand dataNode pod method: 1.create collection a and insert df 2.expand dataNode pod from 1 to 2 3.verify collection a property and verify create and insert of new collection expected: two collection create and insert op are both correctly """ release_name = "scale-data" milvusOp, host, port = scale_common.deploy_default_milvus(release_name) # connect connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default') # create c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # # insert data = cf.gen_default_list_data() mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # scale dataNode to 2 pods milvusOp.upgrade(release_name, {'spec.components.dataNode.replicas': 2}, constants.NAMESPACE) milvusOp.wait_for_healthy(release_name, constants.NAMESPACE) # after scale, assert data consistent assert utility.has_collection(c_name) assert collection_w.num_entities == ct.default_nb # assert new operations new_cname = cf.gen_unique_str(prefix) new_collection_w = ApiCollectionWrapper() new_collection_w.init_collection(name=new_cname, schema=cf.gen_default_collection_schema()) new_mutation_res, _ = new_collection_w.insert(data) assert new_mutation_res.insert_count == ct.default_nb assert new_collection_w.num_entities == ct.default_nb # assert old collection ddl mutation_res_2, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb*2 collection_w.drop() new_collection_w.drop()
def test_expand_index_node(self): """ target: test expand indexNode from 1 to 2 method: 1.deploy two indexNode 2.create index with two indexNode 3.expand indexNode from 1 to 2 4.create index with one indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "scale-index" milvusOp, host, port = scale_common.deploy_default_milvus(release_name) # connect connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default') data = cf.gen_default_dataframe_data(nb) # create c_name = "index_scale_one" collection_w = ApiCollectionWrapper() # collection_w.init_collection(name=c_name) collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert loop = 10 for i in range(loop): collection_w.insert(data) assert collection_w.num_entities == nb * loop # create index on collection one and two start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.debug(f't0: {t0}') collection_w.drop_index() assert not collection_w.has_index()[0] # expand indexNode from 1 to 2 milvusOp.upgrade(release_name, {'spec.components.indexNode.replicas': 2}, constants.NAMESPACE) milvusOp.wait_for_healthy(release_name, constants.NAMESPACE) start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.debug(f't1: {t1}') assert round(t0 / t1) == 2
def __init__(self): self._succ = 0 self._fail = 0 self._running = True self.c_wrap = ApiCollectionWrapper() self.c_wrap.init_collection(name=cf.gen_unique_str('Checker_'), schema=cf.gen_default_collection_schema()) self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.ENTITIES_FOR_SEARCH), check_task='check_nothing') self.initial_entities = self.c_wrap.num_entities # do as a flush
def keep_running(self): while self._keep_running: res, result = self.run_task() if result: self.c_wrap.init_collection( name=cf.gen_unique_str("CreateChecker_"), schema=cf.gen_default_collection_schema(), timeout=timeout, check_task=CheckTasks.check_nothing) sleep(constants.WAIT_PER_OP / 10)
def coll_wrapper_4_insert(self): connections.configure(default={"host": "192.168.1.239", "port": 19530}) res = connections.create_connection(alias='default') if res is None: raise Exception("no connections") c_wrapper = ApiCollectionWrapper() c_wrapper.init_collection(name=cf.gen_unique_str(), schema=cf.gen_default_collection_schema(), check_task="check_nothing") return c_wrapper
def __init__(self, collection_name=None, files=[]): if collection_name is None: collection_name = cf.gen_unique_str("BulkLoadChecker_") super().__init__(collection_name=collection_name) self.utility_wrap = ApiUtilityWrapper() self.schema = cf.gen_default_collection_schema() self.files = files self.row_based = True self.recheck_failed_task = False self.failed_tasks = [] self.c_name = None
def keep_running(self): while self._running is True: collection, result = self.c_wrapper.init_collection( name=cf.gen_unique_str(), schema=cf.gen_default_collection_schema(), check_task="check_nothing") if result is True: self._succ += 1 self.c_wrapper.drop(check_task="check_nothing") else: self._fail += 1
def test_collection_primary_field(self): """ target: test collection with primary field method: specify primary field expected: collection.primary_field """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(primary_field=ct.default_int64_field_name) self.collection_wrap.init_collection(c_name, schema=schema) assert self.collection_wrap.primary_field.name == ct.default_int64_field_name
def test_collection_desc(self): """ target: test collection with description method: create with description expected: assert default description """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(description=ct.collection_desc) self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_name, exp_schema: schema})
def test_collection_none_desc(self): """ target: test collection with none description method: create with none description expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(description=None) error = {ct.err_code: 0, ct.err_msg: "expected one of: bytes, unicode"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
def __init__(self): self._succ = 0 self._fail = 0 self.average_time = 0 self.c_wrap = ApiCollectionWrapper() self.c_wrap.init_collection(name=cf.gen_unique_str('Checker_'), schema=cf.gen_default_collection_schema(), timeout=timeout, enable_traceback=enable_traceback) self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.ENTITIES_FOR_SEARCH), timeout=timeout, enable_traceback=enable_traceback) self.initial_entities = self.c_wrap.num_entities # do as a flush
def test_insert_auto_id_true_with_list_values(self): """ target: test insert with auto_id=True method: create collection with auto_id=True expected: 1.verify num entities 2.verify ids """ c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(auto_id=True) collection_w = self.init_collection_wrap(name=c_name, schema=schema) data = cf.gen_default_list_data(nb=100) error = {ct.err_code: 0, ct.err_msg: 'The data fields number is not match with schema'} collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error) assert collection_w.is_empty
def keep_running(self): while self._running is True: _, result = self.c_wrap.init_collection( name=cf.gen_unique_str("CreateChecker_"), schema=cf.gen_default_collection_schema(), timeout=timeout, check_task=CheckTasks.check_nothing) if result: self._succ += 1 self.c_wrap.drop(timeout=timeout) else: self._fail += 1 sleep(constants.WAIT_PER_OP / 10)
def test_shrink_data_node(self): """ target: test shrink dataNode from 2 to 1 method: 1.create collection and insert df 2. shrink dataNode 3.insert df expected: verify the property of collection which channel on shrink pod """ release_name = "scale-data" env = HelmEnv(release_name=release_name, dataNode=2) host = env.helm_install_cluster_milvus( image_pull_policy=constants.IF_NOT_PRESENT) # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') c_name = "data_scale_one" data = cf.gen_default_list_data(ct.default_nb) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb c_name_2 = "data_scale_two" collection_w2 = ApiCollectionWrapper() collection_w2.init_collection( name=c_name_2, schema=cf.gen_default_collection_schema()) mutation_res2, _ = collection_w2.insert(data) assert mutation_res2.insert_count == ct.default_nb assert collection_w2.num_entities == ct.default_nb env.helm_upgrade_cluster_milvus(dataNode=1) assert collection_w.num_entities == ct.default_nb mutation_res2, _ = collection_w2.insert(data) assert collection_w2.num_entities == ct.default_nb * 2 collection_w.drop() collection_w2.drop()
def test_collection_long_desc(self): """ target: test collection with long desc method: create with long desc expected: """ self._connect() c_name = cf.gen_unique_str(prefix) desc = "a".join("a" for _ in range(256)) schema = cf.gen_default_collection_schema(description=desc) self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_name, exp_schema: schema})