def test_insert_after_create_index(self): """ target: test insert after create index method: 1. create index 2. insert data expected: verify index and num entities """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] index, _ = collection_w.index() assert index == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) assert collection_w.indexes[0] == index df = cf.gen_default_dataframe_data(ct.default_nb) collection_w.insert(data=df) assert collection_w.num_entities == ct.default_nb
def test_insert_binary_after_index(self): """ target: test insert binary after index method: 1.create index 2.insert binary data expected: 1.index ok 2.num entities correct """ schema = cf.gen_default_binary_collection_schema() collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) collection_w.create_index(ct.default_binary_vec_field_name, default_binary_index_params) assert collection_w.has_index()[0] index, _ = collection_w.index() assert index == Index(collection_w.collection, ct.default_binary_vec_field_name, default_binary_index_params) assert collection_w.indexes[0] == index df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) collection_w.insert(data=df) assert collection_w.num_entities == ct.default_nb
def test_insert_auto_id_create_index(self): """ target: test create index in auto_id=True collection method: 1.create auto_id=True collection and insert 2.create index expected: index correct """ schema = cf.gen_default_collection_schema(auto_id=True) collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) df = cf.gen_default_dataframe_data(ct.default_nb) df.drop(ct.default_int64_field_name, axis=1, inplace=True) mutation_res, _ = collection_w.insert(data=df) assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb) assert collection_w.num_entities == ct.default_nb # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] index, _ = collection_w.index() assert index == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) assert collection_w.indexes[0] == index
def e2e_milvus(host, c_name, collection_exist=False): # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create collection_w = ApiCollectionWrapper() if collection_exist: collection_w.init_collection(name=c_name) else: collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, ct.default_index) # search collection_w.load() search_res, _ = collection_w.search(data[-1][:ct.default_nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res[0]) == ct.default_limit log.debug(search_res[0][0].id) # query ids = search_res[0].ids[0] term_expr = f'{ct.default_int64_field_name} in [{ids}]' query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"]) assert query_res[0][ct.default_int64_field_name] == ids
def e2e_milvus(host, c_name): """ e2e milvus """ log.debug(f'pid: {os.getpid()}') # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert df = cf.gen_default_dataframe_data() mutation_res, _ = collection_w.insert(df) assert mutation_res.insert_count == ct.default_nb log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, ct.default_index) # search collection_w.load() search_res, _ = collection_w.search(cf.gen_vectors(1, dim=ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res[0]) == ct.default_limit log.debug(search_res[0].ids) # query ids = search_res[0].ids[0] term_expr = f'{ct.default_int64_field_name} in [{ids}]' query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"]) assert query_res[0][ct.default_int64_field_name] == ids
def test_scale_query_node(self): """ target: test scale queryNode method: 1.deploy milvus cluster with 1 queryNode 2.prepare work (connect, create, insert, index and load) 3.continuously search (daemon thread) 4.expand queryNode from 2 to 5 5.continuously insert new data (daemon thread) 6.shrink queryNode from 5 to 3 expected: Verify milvus remains healthy and search successfully during scale """ release_name = "scale-query" query_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': 'harbor.zilliz.cc/milvus/milvus:master-20211202-ed546d0', 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 1, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(query_config) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy: {healthy}") host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] # host = "10.98.0.8" # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2) # insert two segments for i in range(3): df = cf.gen_default_dataframe_data(nb) collection_w.insert(df) log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) # load collection_w.load() # scale queryNode to 5 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE) # continuously search def do_search(): while True: search_res, _ = collection_w.search(cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) log.debug(search_res[0].ids) assert len(search_res[0].ids) == ct.default_limit t_search = threading.Thread(target=do_search, args=(), daemon=True) t_search.start() # wait new QN running, continuously insert # time.sleep(10) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy after scale up: {healthy}") # wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") def do_insert(): while True: tmp_df = cf.gen_default_dataframe_data(1000) collection_w.insert(tmp_df) t_insert = threading.Thread(target=do_insert, args=(), daemon=True) t_insert.start() log.debug(collection_w.num_entities) time.sleep(20) log.debug("Expand querynode test finished") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE) time.sleep(60) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(60) log.debug("Shrink querynode test finished")
def index(self, name, field_name, collection_name, schema, get_simple_index): connections.connect() collection = Collection(collection_name, schema=schema) return Index(collection, field_name, get_simple_index)
def test_scale_query_node(self): """ target: test scale queryNode method: 1.deploy milvus cluster with 1 queryNode 2.prepare work (connect, create, insert, index and load) 3.continuously search (daemon thread) 4.expand queryNode from 2 to 5 5.continuously insert new data (daemon thread) 6.shrink queryNode from 5 to 3 expected: Verify milvus remains healthy and search successfully during scale """ release_name = "scale-query" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' query_config = { 'metadata.namespace': constants.NAMESPACE, 'spec.mode': 'cluster', 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 1, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(query_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection( name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2) # insert two segments for i in range(3): df = cf.gen_default_dataframe_data(nb) collection_w.insert(df) log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index( collection_w.collection, ct.default_float_vec_field_name, default_index_params) # load collection_w.load() # scale queryNode to 5 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE) @counter def do_search(): """ do search """ search_res, is_succ = collection_w.search( cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_task=CheckTasks.check_nothing) assert len(search_res) == 1 return search_res, is_succ def loop_search(): """ continuously search """ while True: do_search() threading.Thread(target=loop_search, args=(), daemon=True).start() # wait new QN running, continuously insert mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") @counter def do_insert(): """ do insert """ return collection_w.insert(cf.gen_default_dataframe_data(1000), check_task=CheckTasks.check_nothing) def loop_insert(): """ loop insert """ while True: do_insert() threading.Thread(target=loop_insert, args=(), daemon=True).start() log.debug(collection_w.num_entities) time.sleep(20) log.debug("Expand querynode test finished") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(60) scale_common.check_succ_rate(do_search) scale_common.check_succ_rate(do_insert) log.debug("Shrink querynode test finished") except Exception as e: raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
dtype=DataType.INT64, description="int64", is_primary=True) field2 = FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, description="float vector", dim=128, is_primary=False) schema = CollectionSchema(fields=[field1, field2], description="collection description") collection = Collection(name='demo_film_tutorial', data=None, schema=schema) print(f"\nCreate index...") index_params = { "index_type": "IVF_FLAT", "metric_type": "L2", "params": { "nlist": 100 } } index = Index(collection, "embedding", index_params) print(index.params) print([index.params for index in collection.indexes]) print(f"\nDrop index...") index.drop() print([index.params for index in collection.indexes]) collection.drop()
def test_scale_query_node(self): """ target: test scale queryNode method: 1.deploy milvus cluster with 1 queryNode 2.prepare work (connect, create, insert, index and load) 3.continuously search (daemon thread) 4.expand queryNode from 2 to 5 5.continuously insert new data (daemon thread) 6.shrink queryNode from 5 to 3 expected: Verify milvus remains healthy and search successfully during scale """ fail_count = 0 release_name = "scale-query" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' query_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 1, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(query_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: # log.warning(f'Deploy {release_name} timeout and ready to uninstall') # mic.uninstall(release_name, namespace=constants.NAMESPACE) raise BaseException(f'Milvus healthy timeout 1200s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2) # insert two segments for i in range(3): df = cf.gen_default_dataframe_data(nb) collection_w.insert(df) log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) # load collection_w.load() # scale queryNode to 5 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE) # continuously search def do_search(): while True: search_res, _ = collection_w.search(cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) log.debug(search_res[0].ids) assert len(search_res[0].ids) == ct.default_limit t_search = threading.Thread(target=do_search, args=(), daemon=True) t_search.start() # wait new QN running, continuously insert mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") def do_insert(): while True: tmp_df = cf.gen_default_dataframe_data(1000) collection_w.insert(tmp_df) t_insert = threading.Thread(target=do_insert, args=(), daemon=True) t_insert.start() log.debug(collection_w.num_entities) time.sleep(20) log.debug("Expand querynode test finished") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(60) log.debug("Shrink querynode test finished") except Exception as e: log.error(str(e)) fail_count += 1 # raise Exception(str(e)) finally: log.info(f'Test finished with {fail_count} fail request') assert fail_count <= 1 label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)