def test_expand_data_node(self): """ target: test create and insert api after expand dataNode pod method: 1.create collection a and insert df 2.expand dataNode pod from 1 to 2 3.verify collection a property and verify create and insert of new collection expected: two collection create and insert op are both correctly """ # deploy all nodes one pod cluster milvus with helm release_name = "scale-data" env = HelmEnv(release_name=release_name) host = env.helm_install_cluster_milvus() # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # scale dataNode to 2 pods env.helm_upgrade_cluster_milvus(dataNode=2) # after scale, assert data consistent assert utility.has_collection(c_name) assert collection_w.num_entities == ct.default_nb # assert new operations new_cname = cf.gen_unique_str(prefix) new_collection_w = ApiCollectionWrapper() new_collection_w.init_collection( name=new_cname, schema=cf.gen_default_collection_schema()) new_mutation_res, _ = new_collection_w.insert(data) assert new_mutation_res.insert_count == ct.default_nb assert new_collection_w.num_entities == ct.default_nb # assert old collection ddl mutation_res_2, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb * 2 collection_w.drop() new_collection_w.drop()
def test_expand_data_node(self): """ target: test create and insert api after expand dataNode pod method: 1.create collection a and insert df 2.expand dataNode pod from 1 to 2 3.verify collection a property and verify create and insert of new collection expected: two collection create and insert op are both correctly """ release_name = "scale-data" milvusOp, host, port = scale_common.deploy_default_milvus(release_name) # connect connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default') # create c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # # insert data = cf.gen_default_list_data() mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # scale dataNode to 2 pods milvusOp.upgrade(release_name, {'spec.components.dataNode.replicas': 2}, constants.NAMESPACE) milvusOp.wait_for_healthy(release_name, constants.NAMESPACE) # after scale, assert data consistent assert utility.has_collection(c_name) assert collection_w.num_entities == ct.default_nb # assert new operations new_cname = cf.gen_unique_str(prefix) new_collection_w = ApiCollectionWrapper() new_collection_w.init_collection(name=new_cname, schema=cf.gen_default_collection_schema()) new_mutation_res, _ = new_collection_w.insert(data) assert new_mutation_res.insert_count == ct.default_nb assert new_collection_w.num_entities == ct.default_nb # assert old collection ddl mutation_res_2, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb*2 collection_w.drop() new_collection_w.drop()
def hello_milvus(collection_name): import time # create collection dim = 128 default_fields = [ FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), FieldSchema(name="float", dtype=DataType.FLOAT), FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535), FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim) ] default_schema = CollectionSchema(fields=default_fields, description="test collection") if utility.has_collection(collection_name): print("collection is exist") collection = Collection(name=collection_name) default_schema = collection.schema dim = [ field.params['dim'] for field in default_schema.fields if field.dtype in [101, 102] ][0] print(f"\nCreate collection...") collection = Collection(name=collection_name, schema=default_schema) # insert data nb = 3000 vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] t0 = time.time() collection.insert([[i for i in range(nb)], [np.float32(i) for i in range(nb)], [str(i) for i in range(nb)], vectors]) t1 = time.time() print(f"\nInsert {nb} vectors cost {t1 - t0:.4f} seconds") t0 = time.time() print(f"\nGet collection entities...") print(collection.num_entities) t1 = time.time() print(f"\nGet collection entities cost {t1 - t0:.4f} seconds") # create index and load table default_index = { "index_type": "IVF_SQ8", "metric_type": "L2", "params": { "nlist": 64 } } print(f"\nCreate index...") t0 = time.time() collection.create_index(field_name="float_vector", index_params=default_index) t1 = time.time() print(f"\nCreate index cost {t1 - t0:.4f} seconds") print("\nGet replicas number") try: replicas_info = collection.get_replicas() replica_number = len(replicas_info.groups) print(f"\nReplicas number is {replica_number}") except Exception as e: print(str(e)) replica_number = 1 print(f"\nload collection...") t0 = time.time() collection.load(replica_number=replica_number) t1 = time.time() print(f"\nload collection cost {t1 - t0:.4f} seconds") # load and search topK = 5 search_params = {"metric_type": "L2", "params": {"nprobe": 10}} t0 = time.time() print(f"\nSearch...") # define output_fields of search result res = collection.search(vectors[-2:], "float_vector", search_params, topK, "int64 > 100", output_fields=["int64", "float"], timeout=TIMEOUT) t1 = time.time() print(f"search cost {t1 - t0:.4f} seconds") # show result for hits in res: for hit in hits: # Get value of the random value field for search result print(hit, hit.entity.get("float")) # query expr = "int64 in [2,4,6,8]" output_fields = ["int64", "float"] res = collection.query(expr, output_fields, timeout=TIMEOUT) sorted_res = sorted(res, key=lambda k: k['int64']) for r in sorted_res: print(r)
def test_expand_data_node(self): """ target: test create and insert api after expand dataNode pod method: 1.create collection a and insert df 2.expand dataNode pod from 1 to 2 3.verify collection a property and verify create and insert of new collection expected: two collection create and insert op are both correctly """ # deploy all nodes one pod cluster milvus with helm release_name = "scale-data" # env = HelmEnv(release_name=release_name) # host = env.helm_install_cluster_milvus() # deploy cluster milvus with dataNode 1 replicas default_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': 'milvusdb/milvus-dev:master-20211020-b40513b', 'spec.components.proxy.serviceType': 'LoadBalancer', 'dependencies.etcd.inCluster.deletionPolicy': 'Delete', 'dependencies.etcd.inCluster.pvcDeletion': 'true', 'dependencies.pulsar.inCluster.deletionPolicy': 'Delete', 'dependencies.pulsar.inCluster.pvcDeletion': 'true', 'dependencies.storage.inCluster.deletionPolicy': 'Delete', 'dependencies.storage.inCluster.pvcDeletion': 'true', } milvusOp = MilvusOperator() milvusOp.install(default_config) if milvusOp.wait_for_healthy(release_name, namespace=constants.NAMESPACE): endpoint = milvusOp.endpoint(release_name, constants.NAMESPACE) endpoint = endpoint.split(':') host = endpoint[0] port = int(endpoint[-1]) else: raise Exception(f"Failed to install {release_name}") # connect connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default') # create c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # scale dataNode to 2 pods milvusOp.upgrade(release_name, {'spec.components.dataNode.replicas': 2}, constants.NAMESPACE) # env.helm_upgrade_cluster_milvus(dataNode=2) # after scale, assert data consistent assert utility.has_collection(c_name) assert collection_w.num_entities == ct.default_nb # assert new operations new_cname = cf.gen_unique_str(prefix) new_collection_w = ApiCollectionWrapper() new_collection_w.init_collection( name=new_cname, schema=cf.gen_default_collection_schema()) new_mutation_res, _ = new_collection_w.insert(data) assert new_mutation_res.insert_count == ct.default_nb assert new_collection_w.num_entities == ct.default_nb # assert old collection ddl mutation_res_2, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb * 2 collection_w.drop() new_collection_w.drop()
def test_exist_collection(name): assert utility.has_collection(name) is True collection = Collection(name) collection.drop()
def test_chaos_data_consist(self, connection, chaos_yaml): """ target: verify data consistence after chaos injected and recovered method: 1. create a collection, insert some data, search and query 2. inject a chaos object 3. reconnect to service 4. verify a) data entities persists, index persists, b) search and query results persist expected: collection data and results persist """ c_name = cf.gen_unique_str('chaos_collection_') nb = 5000 i_name = cf.gen_unique_str('chaos_index_') index_params = { "index_type": "IVF_SQ8", "metric_type": "L2", "params": { "nlist": 64 } } # create t0 = datetime.datetime.now() collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) tt = datetime.datetime.now() - t0 log.info(f"assert create: {tt}") assert collection_w.name == c_name # insert data = cf.gen_default_list_data(nb=nb) t0 = datetime.datetime.now() _, res = collection_w.insert(data) tt = datetime.datetime.now() - t0 log.info(f"assert insert: {tt}") assert res # flush t0 = datetime.datetime.now() assert collection_w.num_entities == nb tt = datetime.datetime.now() - t0 log.info(f"assert flush: {tt}") # search collection_w.load() search_vectors = cf.gen_vectors(1, ct.default_dim) t0 = datetime.datetime.now() search_params = {"metric_type": "L2", "params": {"nprobe": 16}} search_res, _ = collection_w.search( data=search_vectors, anns_field=ct.default_float_vec_field_name, param=search_params, limit=1) tt = datetime.datetime.now() - t0 log.info(f"assert search: {tt}") assert len(search_res) == 1 # index t0 = datetime.datetime.now() index, _ = collection_w.create_index( field_name=ct.default_float_vec_field_name, index_params=index_params, name=i_name) tt = datetime.datetime.now() - t0 log.info(f"assert index: {tt}") assert len(collection_w.indexes) == 1 # query term_expr = f'{ct.default_int64_field_name} in [1001,1201,999,99]' t0 = datetime.datetime.now() query_res, _ = collection_w.query(term_expr) tt = datetime.datetime.now() - t0 log.info(f"assert query: {tt}") assert len(query_res) == 4 # reboot a pod reboot_pod(chaos_yaml) # parse chaos object chaos_config = cc.gen_experiment_config(chaos_yaml) meta_name = chaos_config.get('metadata', None).get('name', None) # wait all pods ready log.info( f"wait for pods in namespace {constants.CHAOS_NAMESPACE} with label app.kubernetes.io/instance={meta_name}" ) wait_pods_ready(constants.CHAOS_NAMESPACE, f"app.kubernetes.io/instance={meta_name}") log.info( f"wait for pods in namespace {constants.CHAOS_NAMESPACE} with label release={meta_name}" ) wait_pods_ready(constants.CHAOS_NAMESPACE, f"release={meta_name}") log.info("all pods are ready") # reconnect if needed sleep(constants.WAIT_PER_OP * 3) reconnect(connections, alias='default') # verify collection persists assert utility.has_collection(c_name) log.info("assert collection persists") collection_w2 = ApiCollectionWrapper() collection_w2.init_collection(c_name) # verify data persist assert collection_w2.num_entities == nb log.info("assert data persists") # verify index persists assert collection_w2.has_index(i_name) log.info("assert index persists") # verify search results persist collection_w2.load() search_res, _ = collection_w.search( data=search_vectors, anns_field=ct.default_float_vec_field_name, param=search_params, limit=1) tt = datetime.datetime.now() - t0 log.info(f"assert search: {tt}") assert len(search_res) == 1 # verify query results persist query_res2, _ = collection_w2.query(term_expr) assert len(query_res2) == len(query_res) log.info("assert query result persists")
def has_collection(name): return utility.has_collection(name)
def test_chaos_data_consist(self, connection, chaos_yaml): c_name = cf.gen_unique_str('chaos_collection_') nb = 5000 i_name = cf.gen_unique_str('chaos_index_') index_params = { "index_type": "IVF_SQ8", "metric_type": "L2", "params": { "nlist": 64 } } # create t0 = datetime.datetime.now() collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) tt = datetime.datetime.now() - t0 log.debug(f"assert create: {tt}") assert collection_w.name == c_name # insert data = cf.gen_default_list_data(nb=nb) t0 = datetime.datetime.now() _, res = collection_w.insert(data) tt = datetime.datetime.now() - t0 log.debug(f"assert insert: {tt}") assert res # flush t0 = datetime.datetime.now() assert collection_w.num_entities == nb tt = datetime.datetime.now() - t0 log.debug(f"assert flush: {tt}") # search collection_w.load() search_vectors = cf.gen_vectors(1, ct.default_dim) t0 = datetime.datetime.now() search_res, _ = collection_w.search( data=search_vectors, anns_field=ct.default_float_vec_field_name, param={"nprobe": 16}, limit=1) tt = datetime.datetime.now() - t0 log.debug(f"assert search: {tt}") assert len(search_res) == 1 # index t0 = datetime.datetime.now() index, _ = collection_w.create_index( field_name=ct.default_float_vec_field_name, index_params=index_params, name=i_name) tt = datetime.datetime.now() - t0 log.debug(f"assert index: {tt}") assert len(collection_w.indexes) == 1 # query term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]' t0 = datetime.datetime.now() query_res, _ = collection_w.query(term_expr) tt = datetime.datetime.now() - t0 log.debug(f"assert query: {tt}") assert len(query_res) == 4 # reboot a pod reboot_pod(chaos_yaml) # reconnect if needed sleep(constants.WAIT_PER_OP * 4) reconnect(connections, self.host, self.port) # verify collection persists assert utility.has_collection(c_name) log.debug("assert collection persists") collection_w2 = ApiCollectionWrapper() collection_w2.init_collection(c_name) # verify data persist assert collection_w2.num_entities == nb log.debug("assert data persists") # verify index persists assert collection_w2.has_index(i_name) log.debug("assert index persists") # verify search results persist # verify query results persist query_res2, _ = collection_w2.query(term_expr) assert query_res2 == query_res log.debug("assert query result persists")
fmt = "\n=== {:30} ===\n" search_latency_fmt = "search latency = {:.4f}s" num_entities, dim = 3000, 8 ################################################################################# # 1. connect to Milvus # Add a new connection alias `default` for Milvus server in `localhost:19530` # Actually the "default" alias is a buildin in PyMilvus. # If the address of Milvus is the same as `localhost:19530`, you can omit all # parameters and call the method as: `connections.connect()`. # # Note: the `using` parameter of the following methods is default to "default". print(fmt.format("start connecting to Milvus")) connections.connect("default", host="localhost", port="19530") has = utility.has_collection("hello_milvus") print(f"Does collection hello_milvus exist in Milvus: {has}") ################################################################################# # 2. create collection # We're going to create a collection with 3 fields. # +-+------------+------------+------------------+------------------------------+ # | | field name | field type | other attributes | field description | # +-+------------+------------+------------------+------------------------------+ # |1| "pk" | Int64 | is_primary=True | "primary field" | # | | | | auto_id=False | | # +-+------------+------------+------------------+------------------------------+ # |2| "random" | Double | | "a double field" | # +-+------------+------------+------------------+------------------------------+ # |3|"embeddings"| FloatVector| dim=8 | "float vector with dim 8" | # +-+------------+------------+------------------+------------------------------+
def teardown(): if utility.has_collection(name_A): utility.drop_collection(name_A) if utility.has_collection(name_B): utility.drop_collection(name_B)
def test_has_collection(self): assert utility.has_collection(gen_collection_name()) is False