def connection(self, host, port): connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default') if connections.has_connection("default") is False: raise Exception("no connections") self.host = host self.port = port
def connection(self, host, port): connections.add_connection(default={"host": host, "port": port}) conn = connections.connect(alias='default') if conn is None: raise Exception("no connections") self.host = host self.port = port return conn
def test_expand_index_node(self): """ target: test expand indexNode from 1 to 2 method: 1.deploy two indexNode 2.create index with two indexNode 3.expand indexNode from 1 to 2 4.create index with one indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "scale-index" milvusOp, host, port = scale_common.deploy_default_milvus(release_name) # connect connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default') data = cf.gen_default_dataframe_data(nb) # create c_name = "index_scale_one" collection_w = ApiCollectionWrapper() # collection_w.init_collection(name=c_name) collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert loop = 10 for i in range(loop): collection_w.insert(data) assert collection_w.num_entities == nb * loop # create index on collection one and two start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.debug(f't0: {t0}') collection_w.drop_index() assert not collection_w.has_index()[0] # expand indexNode from 1 to 2 milvusOp.upgrade(release_name, {'spec.components.indexNode.replicas': 2}, constants.NAMESPACE) milvusOp.wait_for_healthy(release_name, constants.NAMESPACE) start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.debug(f't1: {t1}') assert round(t0 / t1) == 2
def connection(self, host, port): connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default') if connections.has_connection("default") is False: raise Exception("no connections") self.host = host self.port = port self.instance_name = get_milvus_instance_name( constants.CHAOS_NAMESPACE, host)
def test_shrink_index_node(self): """ target: test shrink indexNode from 2 to 1 method: 1.deploy two indexNode 2.create index with two indexNode 3.shrink indexNode from 2 to 1 4.create index with 1 indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "scale-index" env = HelmEnv(release_name=release_name, indexNode=2) host = env.helm_install_cluster_milvus() # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') data = cf.gen_default_dataframe_data(nb) # create c_name = "index_scale_one" collection_w = ApiCollectionWrapper() # collection_w.init_collection(name=c_name) collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert loop = 10 for i in range(loop): collection_w.insert(data) assert collection_w.num_entities == nb * loop # create index on collection one and two start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.debug(f'two indexNodes: {t0}') collection_w.drop_index() assert not collection_w.has_index()[0] # expand indexNode from 2 to 1 env.helm_upgrade_cluster_milvus(indexNode=1) start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.debug(f'one indexNode: {t1}') log.debug(t1 / t0) assert round(t1 / t0) == 2
def test_expand_query_node(self): release_name = "scale-query" env = HelmEnv(release_name=release_name) host = env.helm_install_cluster_milvus() # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = "query_scale_one" collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # # create index # collection_w.create_index(ct.default_float_vec_field_name, default_index_params) # assert collection_w.has_index()[0] # assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, # default_index_params) collection_w.load() # vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(5)] res1, _ = collection_w.search(data[-1][:5], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) # scale queryNode pod env.helm_upgrade_cluster_milvus(queryNode=2) c_name_2 = "query_scale_two" collection_w2 = ApiCollectionWrapper() collection_w2.init_collection( name=c_name_2, schema=cf.gen_default_collection_schema()) collection_w2.insert(data) assert collection_w2.num_entities == ct.default_nb collection_w2.load() res2, _ = collection_w2.search(data[-1][:5], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res1[0].ids == res2[0].ids
def test_expand_data_node(self): """ target: test create and insert api after expand dataNode pod method: 1.create collection a and insert df 2.expand dataNode pod from 1 to 2 3.verify collection a property and verify create and insert of new collection expected: two collection create and insert op are both correctly """ # deploy all nodes one pod cluster milvus with helm release_name = "scale-data" env = HelmEnv(release_name=release_name) host = env.helm_install_cluster_milvus() # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # scale dataNode to 2 pods env.helm_upgrade_cluster_milvus(dataNode=2) # after scale, assert data consistent assert utility.has_collection(c_name) assert collection_w.num_entities == ct.default_nb # assert new operations new_cname = cf.gen_unique_str(prefix) new_collection_w = ApiCollectionWrapper() new_collection_w.init_collection( name=new_cname, schema=cf.gen_default_collection_schema()) new_mutation_res, _ = new_collection_w.insert(data) assert new_mutation_res.insert_count == ct.default_nb assert new_collection_w.num_entities == ct.default_nb # assert old collection ddl mutation_res_2, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb * 2 collection_w.drop() new_collection_w.drop()
def get_milvus_instance_name(namespace, host, port="19530"): """ get milvus instance name after connection :param namespace: the namespace where the release :type namespace: str :param host: milvus host ip :type host: str :param port: milvus port :type port: str :example: >>> milvus_instance_name = get_milvus_instance_name("chaos-testing", "10.96.250.111") "milvus-multi-querynode" """ connections.add_connection(_default={"host": host, "port": port}) connections.connect(alias='_default') ms = MilvusSys() query_node_ip = ms.query_nodes[0]["infos"]['hardware_infos']["ip"].split( ":")[0] pod_name = "" if ms.deploy_mode == "STANDALONE": # get all pods which label is app.kubernetes.io/name=milvus and component=standalone ip_name_pairs = get_pod_ip_name_pairs( namespace, "app.kubernetes.io/name=milvus, component=standalone") pod_name = ip_name_pairs[query_node_ip] if ms.deploy_mode == "DISTRIBUTED": # get all pods which label is app.kubernetes.io/name=milvus and component=querynode ip_name_pairs = get_pod_ip_name_pairs( namespace, "app.kubernetes.io/name=milvus, component=querynode") pod_name = ip_name_pairs[query_node_ip] init_k8s_client_config() api_instance = client.CoreV1Api() try: api_response = api_instance.read_namespaced_pod(namespace=namespace, name=pod_name) except ApiException as e: log.error( "Exception when calling CoreV1Api->list_namespaced_pod: %s\n" % e) raise Exception(str(e)) milvus_instance_name = api_response.metadata.labels[ "app.kubernetes.io/instance"] return milvus_instance_name
def test_expand_data_node(self): """ target: test create and insert api after expand dataNode pod method: 1.create collection a and insert df 2.expand dataNode pod from 1 to 2 3.verify collection a property and verify create and insert of new collection expected: two collection create and insert op are both correctly """ release_name = "scale-data" milvusOp, host, port = scale_common.deploy_default_milvus(release_name) # connect connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default') # create c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # # insert data = cf.gen_default_list_data() mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # scale dataNode to 2 pods milvusOp.upgrade(release_name, {'spec.components.dataNode.replicas': 2}, constants.NAMESPACE) milvusOp.wait_for_healthy(release_name, constants.NAMESPACE) # after scale, assert data consistent assert utility.has_collection(c_name) assert collection_w.num_entities == ct.default_nb # assert new operations new_cname = cf.gen_unique_str(prefix) new_collection_w = ApiCollectionWrapper() new_collection_w.init_collection(name=new_cname, schema=cf.gen_default_collection_schema()) new_mutation_res, _ = new_collection_w.insert(data) assert new_mutation_res.insert_count == ct.default_nb assert new_collection_w.num_entities == ct.default_nb # assert old collection ddl mutation_res_2, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb*2 collection_w.drop() new_collection_w.drop()
def e2e_milvus(host, c_name, collection_exist=False): # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create collection_w = ApiCollectionWrapper() if collection_exist: collection_w.init_collection(name=c_name) else: collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, ct.default_index) # search collection_w.load() search_res, _ = collection_w.search(data[-1][:ct.default_nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res[0]) == ct.default_limit log.debug(search_res[0][0].id) # query ids = search_res[0].ids[0] term_expr = f'{ct.default_int64_field_name} in [{ids}]' query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"]) assert query_res[0][ct.default_int64_field_name] == ids
def e2e_milvus(host, c_name): """ e2e milvus """ log.debug(f'pid: {os.getpid()}') # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert df = cf.gen_default_dataframe_data() mutation_res, _ = collection_w.insert(df) assert mutation_res.insert_count == ct.default_nb log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, ct.default_index) # search collection_w.load() search_res, _ = collection_w.search(cf.gen_vectors(1, dim=ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res[0]) == ct.default_limit log.debug(search_res[0].ids) # query ids = search_res[0].ids[0] term_expr = f'{ct.default_int64_field_name} in [{ids}]' query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"]) assert query_res[0][ct.default_int64_field_name] == ids
def test_shrink_data_node(self): """ target: test shrink dataNode from 2 to 1 method: 1.create collection and insert df 2. shrink dataNode 3.insert df expected: verify the property of collection which channel on shrink pod """ release_name = "scale-data" env = HelmEnv(release_name=release_name, dataNode=2) host = env.helm_install_cluster_milvus( image_pull_policy=constants.IF_NOT_PRESENT) # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') c_name = "data_scale_one" data = cf.gen_default_list_data(ct.default_nb) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb c_name_2 = "data_scale_two" collection_w2 = ApiCollectionWrapper() collection_w2.init_collection( name=c_name_2, schema=cf.gen_default_collection_schema()) mutation_res2, _ = collection_w2.insert(data) assert mutation_res2.insert_count == ct.default_nb assert collection_w2.num_entities == ct.default_nb env.helm_upgrade_cluster_milvus(dataNode=1) assert collection_w.num_entities == ct.default_nb mutation_res2, _ = collection_w2.insert(data) assert collection_w2.num_entities == ct.default_nb * 2 collection_w.drop() collection_w2.drop()
def test_scale_data_node(self): """ target: test scale dataNode method: 1.deploy milvus cluster with 2 dataNode 2.create collection with shards_num=5 3.continuously insert new data (daemon thread) 4.expand dataNode from 2 to 5 5.create new collection with shards_num=2 6.continuously insert new collection new data (daemon thread) 7.shrink dataNode from 5 to 3 expected: Verify milvus remains healthy, Insert and flush successfully during scale Average dataNode memory usage """ release_name = "scale-data" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' fail_count = 0 data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.dataNode.replicas': 2, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(data_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: # log.warning(f'Deploy {release_name} timeout and ready to uninstall') # mic.uninstall(release_name, namespace=constants.NAMESPACE) raise BaseException(f'Milvus healthy timeout 1200s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection( name=c_name, schema=cf.gen_default_collection_schema(), shards_num=5) tmp_nb = 10000 def do_insert(): while True: tmp_df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(tmp_df) log.debug(collection_w.num_entities) t_insert = threading.Thread(target=do_insert, args=(), daemon=True) t_insert.start() # scale dataNode to 5 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 5}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug("Expand dataNode test finished") # create new collection and insert new_c_name = cf.gen_unique_str("scale_query") collection_w_new = ApiCollectionWrapper() collection_w_new.init_collection( name=new_c_name, schema=cf.gen_default_collection_schema(), shards_num=2) def do_new_insert(): while True: tmp_df = cf.gen_default_dataframe_data(tmp_nb) collection_w_new.insert(tmp_df) log.debug(collection_w_new.num_entities) t_insert_new = threading.Thread(target=do_new_insert, args=(), daemon=True) t_insert_new.start() # scale dataNode to 3 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 3}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(300) log.debug("Shrink dataNode test finished") except Exception as e: log.error(str(e)) fail_count += 1 # raise Exception(str(e)) finally: log.info(f'Test finished with {fail_count} fail request') assert fail_count <= 1 label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def connection(self, host, port): connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default')
def test_expand_index_node(self): """ target: test expand indexNode from 1 to 2 method: 1.deploy two indexNode 2.create index with two indexNode 3.expand indexNode from 1 to 2 4.create index with one indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "expand-index" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' init_replicas = 1 expand_replicas = 2 data_config = { 'metadata.namespace': constants.NAMESPACE, 'spec.mode': 'cluster', 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.indexNode.replicas': init_replicas, 'spec.components.dataNode.replicas': 2, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(data_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: # If deploy failed and want to uninsatll mic # log.warning(f'Deploy {release_name} timeout and ready to uninstall') # mic.uninstall(release_name, namespace=constants.NAMESPACE) raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create collection c_name = "index_scale_one" collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert data data = cf.gen_default_dataframe_data(nb) loop = 100 for i in range(loop): collection_w.insert(data, timeout=60) assert collection_w.num_entities == nb * loop # create index # Note that the num of segments and the num of indexNode are related to indexing time start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.info(f'Create index on {init_replicas} indexNode cost t0: {t0}') # drop index collection_w.drop_index() assert not collection_w.has_index()[0] # expand indexNode mic.upgrade(release_name, {'spec.components.indexNode.replicas': expand_replicas}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") # create index again start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.info(f'Create index on {expand_replicas} indexNode cost t1: {t1}') collection_w.drop_index() start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t2 = datetime.datetime.now() - start log.info(f'Create index on {expand_replicas} indexNode cost t2: {t2}') log.debug(f't2 is {t2}, t0 is {t0}, t0/t2 is {t0 / t2}') # assert round(t0 / t2) == 2 except Exception as e: raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_scale_query_node(self): """ target: test scale queryNode method: 1.deploy milvus cluster with 1 queryNode 2.prepare work (connect, create, insert, index and load) 3.continuously search (daemon thread) 4.expand queryNode from 2 to 5 5.continuously insert new data (daemon thread) 6.shrink queryNode from 5 to 3 expected: Verify milvus remains healthy and search successfully during scale """ fail_count = 0 release_name = "scale-query" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' query_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 1, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(query_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: # log.warning(f'Deploy {release_name} timeout and ready to uninstall') # mic.uninstall(release_name, namespace=constants.NAMESPACE) raise BaseException(f'Milvus healthy timeout 1200s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2) # insert two segments for i in range(3): df = cf.gen_default_dataframe_data(nb) collection_w.insert(df) log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) # load collection_w.load() # scale queryNode to 5 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE) # continuously search def do_search(): while True: search_res, _ = collection_w.search(cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) log.debug(search_res[0].ids) assert len(search_res[0].ids) == ct.default_limit t_search = threading.Thread(target=do_search, args=(), daemon=True) t_search.start() # wait new QN running, continuously insert mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") def do_insert(): while True: tmp_df = cf.gen_default_dataframe_data(1000) collection_w.insert(tmp_df) t_insert = threading.Thread(target=do_insert, args=(), daemon=True) t_insert.start() log.debug(collection_w.num_entities) time.sleep(20) log.debug("Expand querynode test finished") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(60) log.debug("Shrink querynode test finished") except Exception as e: log.error(str(e)) fail_count += 1 # raise Exception(str(e)) finally: log.info(f'Test finished with {fail_count} fail request') assert fail_count <= 1 label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_shrink_query_node(self): """ target: test shrink queryNode from 2 to 1 method: 1.deploy two queryNode 2.search two collections in two queryNode 3.upgrade queryNode from 2 to 1 4.search second collection expected: search result is correct """ # deploy release_name = "scale-query" env = HelmEnv(release_name=release_name, queryNode=2) host = env.helm_install_cluster_milvus( image_pull_policy=constants.IF_NOT_PRESENT) # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # collection one data = cf.gen_default_list_data(nb) c_name = "query_scale_one" collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) collection_w.insert(data) assert collection_w.num_entities == nb collection_w.load() res1, _ = collection_w.search(data[-1][:nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res1[0].ids[0] == data[0][0] # collection two c_name_2 = "query_scale_two" collection_w2 = ApiCollectionWrapper() collection_w2.init_collection( name=c_name_2, schema=cf.gen_default_collection_schema()) collection_w2.insert(data) assert collection_w2.num_entities == nb collection_w2.load() res2, _ = collection_w2.search(data[-1][:nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res2[0].ids[0] == data[0][0] # scale queryNode pod env.helm_upgrade_cluster_milvus(queryNode=1) # search res1, _ = collection_w.search(data[-1][:nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res1[0].ids[0] == data[0][0] res2, _ = collection_w2.search(data[-1][:nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res2[0].ids[0] == data[0][0]
def test_expand_index_node(self): """ target: test expand indexNode from 1 to 2 method: 1.deploy two indexNode 2.create index with two indexNode 3.expand indexNode from 1 to 2 4.create index with one indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "scale-index" image = f'{constants.IMAGE_REPOSITORY}:{constants.IMAGE_TAG}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.indexNode.replicas': 1, 'spec.components.dataNode.replicas': 2, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(data_config) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy: {healthy}") host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] # host = '10.98.0.8' # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') data = cf.gen_default_dataframe_data(nb) # create c_name = "index_scale_one" collection_w = ApiCollectionWrapper() # collection_w.init_collection(name=c_name) collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert loop = 100 for i in range(loop): collection_w.insert(data, timeout=60) assert collection_w.num_entities == nb * loop # create index on collection # note that the num of segments and the num of indexNode are related to indexing time collection_w.drop_index() start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.debug(f't0: {t0}') collection_w.drop_index() assert not collection_w.has_index()[0] # expand indexNode from 1 to 2 mic.upgrade(release_name, {'spec.components.indexNode.replicas': 2}, constants.NAMESPACE) time.sleep(60) mic.wait_for_healthy(release_name, constants.NAMESPACE) start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.debug(f't1: {t1}') assert round(t0 / t1) == 2
def test_scale_data_node(self): """ target: test scale dataNode method: 1.deploy milvus cluster with 2 dataNode 2.create collection with shards_num=5 3.continuously insert new data (daemon thread) 4.expand dataNode from 2 to 5 5.create new collection with shards_num=2 6.continuously insert new collection new data (daemon thread) 7.shrink dataNode from 5 to 3 expected: Verify milvus remains healthy, Insert and flush successfully during scale Average dataNode memory usage """ release_name = "scale-data" image = f'{constants.IMAGE_REPOSITORY}:{constants.IMAGE_TAG}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.dataNode.replicas': 2, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(data_config) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy: {healthy}") host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] # host = '10.98.0.4' # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=5) tmp_nb = 10000 def do_insert(): while True: tmp_df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(tmp_df) log.debug(collection_w.num_entities) t_insert = threading.Thread(target=do_insert, args=(), daemon=True) t_insert.start() # scale dataNode to 5 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 5}, constants.NAMESPACE) time.sleep(300) log.debug("Expand dataNode test finished") # create new collection and insert new_c_name = cf.gen_unique_str("scale_query") collection_w_new = ApiCollectionWrapper() collection_w_new.init_collection( name=new_c_name, schema=cf.gen_default_collection_schema(), shards_num=2) def do_new_insert(): while True: tmp_df = cf.gen_default_dataframe_data(tmp_nb) collection_w_new.insert(tmp_df) log.debug(collection_w_new.num_entities) t_insert_new = threading.Thread(target=do_new_insert, args=(), daemon=True) t_insert_new.start() # scale dataNode to 3 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 3}, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(300) log.debug("Shrink dataNode test finished")
def test_expand_data_node(self): """ target: test create and insert api after expand dataNode pod method: 1.create collection a and insert df 2.expand dataNode pod from 1 to 2 3.verify collection a property and verify create and insert of new collection expected: two collection create and insert op are both correctly """ # deploy all nodes one pod cluster milvus with helm release_name = "scale-data" # env = HelmEnv(release_name=release_name) # host = env.helm_install_cluster_milvus() # deploy cluster milvus with dataNode 1 replicas default_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': 'milvusdb/milvus-dev:master-20211020-b40513b', 'spec.components.proxy.serviceType': 'LoadBalancer', 'dependencies.etcd.inCluster.deletionPolicy': 'Delete', 'dependencies.etcd.inCluster.pvcDeletion': 'true', 'dependencies.pulsar.inCluster.deletionPolicy': 'Delete', 'dependencies.pulsar.inCluster.pvcDeletion': 'true', 'dependencies.storage.inCluster.deletionPolicy': 'Delete', 'dependencies.storage.inCluster.pvcDeletion': 'true', } milvusOp = MilvusOperator() milvusOp.install(default_config) if milvusOp.wait_for_healthy(release_name, namespace=constants.NAMESPACE): endpoint = milvusOp.endpoint(release_name, constants.NAMESPACE) endpoint = endpoint.split(':') host = endpoint[0] port = int(endpoint[-1]) else: raise Exception(f"Failed to install {release_name}") # connect connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default') # create c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # scale dataNode to 2 pods milvusOp.upgrade(release_name, {'spec.components.dataNode.replicas': 2}, constants.NAMESPACE) # env.helm_upgrade_cluster_milvus(dataNode=2) # after scale, assert data consistent assert utility.has_collection(c_name) assert collection_w.num_entities == ct.default_nb # assert new operations new_cname = cf.gen_unique_str(prefix) new_collection_w = ApiCollectionWrapper() new_collection_w.init_collection( name=new_cname, schema=cf.gen_default_collection_schema()) new_mutation_res, _ = new_collection_w.insert(data) assert new_mutation_res.insert_count == ct.default_nb assert new_collection_w.num_entities == ct.default_nb # assert old collection ddl mutation_res_2, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb * 2 collection_w.drop() new_collection_w.drop()
def test_scale_query_node(self): """ target: test scale queryNode method: 1.deploy milvus cluster with 1 queryNode 2.prepare work (connect, create, insert, index and load) 3.continuously search (daemon thread) 4.expand queryNode from 2 to 5 5.continuously insert new data (daemon thread) 6.shrink queryNode from 5 to 3 expected: Verify milvus remains healthy and search successfully during scale """ release_name = "scale-query" query_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': 'harbor.zilliz.cc/milvus/milvus:master-20211202-ed546d0', 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 1, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(query_config) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy: {healthy}") host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] # host = "10.98.0.8" # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2) # insert two segments for i in range(3): df = cf.gen_default_dataframe_data(nb) collection_w.insert(df) log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) # load collection_w.load() # scale queryNode to 5 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE) # continuously search def do_search(): while True: search_res, _ = collection_w.search(cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) log.debug(search_res[0].ids) assert len(search_res[0].ids) == ct.default_limit t_search = threading.Thread(target=do_search, args=(), daemon=True) t_search.start() # wait new QN running, continuously insert # time.sleep(10) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy after scale up: {healthy}") # wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") def do_insert(): while True: tmp_df = cf.gen_default_dataframe_data(1000) collection_w.insert(tmp_df) t_insert = threading.Thread(target=do_insert, args=(), daemon=True) t_insert.start() log.debug(collection_w.num_entities) time.sleep(20) log.debug("Expand querynode test finished") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE) time.sleep(60) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(60) log.debug("Shrink querynode test finished")
def test_shrink_index_node(self): """ target: test shrink indexNode from 2 to 1 method: 1.deploy two indexNode 2.create index with two indexNode 3.shrink indexNode from 2 to 1 4.create index with 1 indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "shrink-index" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.indexNode.replicas': 2, 'spec.components.dataNode.replicas': 2, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(data_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') data = cf.gen_default_dataframe_data(nb) # create c_name = "index_scale_one" collection_w = ApiCollectionWrapper() # collection_w.init_collection(name=c_name) collection_w.init_collection( name=c_name, schema=cf.gen_default_collection_schema()) # insert loop = 10 for i in range(loop): collection_w.insert(data) assert collection_w.num_entities == nb * loop # create index on collection one and two start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.info(f'Create index on 2 indexNode cost t0: {t0}') collection_w.drop_index() assert not collection_w.has_index()[0] # shrink indexNode from 2 to 1 mic.upgrade(release_name, {'spec.components.indexNode.replicas': 1}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.info(f'Create index on 1 indexNode cost t1: {t1}') collection_w.drop_index() start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t2 = datetime.datetime.now() - start log.info(f'Create index on 1 indexNode cost t2: {t2}') log.debug(f'one indexNode: {t2}') log.debug(f't2 is {t2}, t0 is {t0}, t2/t0 is {t2 / t0}') # assert round(t2 / t0) == 2 except Exception as e: raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_scale_data_node(self): """ target: test scale dataNode method: 1.deploy milvus cluster with 2 dataNode 2.create collection with shards_num=5 3.continuously insert new data (daemon thread) 4.expand dataNode from 2 to 5 5.create new collection with shards_num=2 6.continuously insert new collection new data (daemon thread) 7.shrink dataNode from 5 to 3 expected: Verify milvus remains healthy, Insert and flush successfully during scale Average dataNode memory usage """ release_name = "scale-data" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'spec.mode': 'cluster', 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.dataNode.replicas': 2, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(data_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_data") collection_w = ApiCollectionWrapper() collection_w.init_collection( name=c_name, schema=cf.gen_default_collection_schema(), shards_num=4) tmp_nb = 10000 @counter def do_insert(): """ do insert and flush """ insert_res, is_succ = collection_w.insert( cf.gen_default_dataframe_data(tmp_nb)) log.debug(collection_w.num_entities) return insert_res, is_succ def loop_insert(): """ loop do insert """ while True: do_insert() threading.Thread(target=loop_insert, args=(), daemon=True).start() # scale dataNode to 5 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 5}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug("Expand dataNode test finished") # create new collection and insert new_c_name = cf.gen_unique_str("scale_data") collection_w_new = ApiCollectionWrapper() collection_w_new.init_collection( name=new_c_name, schema=cf.gen_default_collection_schema(), shards_num=3) @counter def do_new_insert(): """ do new insert """ insert_res, is_succ = collection_w_new.insert( cf.gen_default_dataframe_data(tmp_nb)) log.debug(collection_w_new.num_entities) return insert_res, is_succ def loop_new_insert(): """ loop new insert """ while True: do_new_insert() threading.Thread(target=loop_new_insert, args=(), daemon=True).start() # scale dataNode to 3 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 3}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(300) scale_common.check_succ_rate(do_insert) scale_common.check_succ_rate(do_new_insert) log.debug("Shrink dataNode test finished") except Exception as e: log.error(str(e)) # raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_scale_query_node(self): """ target: test scale queryNode method: 1.deploy milvus cluster with 1 queryNode 2.prepare work (connect, create, insert, index and load) 3.continuously search (daemon thread) 4.expand queryNode from 2 to 5 5.continuously insert new data (daemon thread) 6.shrink queryNode from 5 to 3 expected: Verify milvus remains healthy and search successfully during scale """ release_name = "scale-query" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' query_config = { 'metadata.namespace': constants.NAMESPACE, 'spec.mode': 'cluster', 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 1, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(query_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection( name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2) # insert two segments for i in range(3): df = cf.gen_default_dataframe_data(nb) collection_w.insert(df) log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index( collection_w.collection, ct.default_float_vec_field_name, default_index_params) # load collection_w.load() # scale queryNode to 5 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE) @counter def do_search(): """ do search """ search_res, is_succ = collection_w.search( cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_task=CheckTasks.check_nothing) assert len(search_res) == 1 return search_res, is_succ def loop_search(): """ continuously search """ while True: do_search() threading.Thread(target=loop_search, args=(), daemon=True).start() # wait new QN running, continuously insert mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") @counter def do_insert(): """ do insert """ return collection_w.insert(cf.gen_default_dataframe_data(1000), check_task=CheckTasks.check_nothing) def loop_insert(): """ loop insert """ while True: do_insert() threading.Thread(target=loop_insert, args=(), daemon=True).start() log.debug(collection_w.num_entities) time.sleep(20) log.debug("Expand querynode test finished") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(60) scale_common.check_succ_rate(do_search) scale_common.check_succ_rate(do_insert) log.debug("Shrink querynode test finished") except Exception as e: raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)