def get_querynode_id_pod_pairs(namespace, label_selector): """ get milvus node id and corresponding pod name pairs with label selector :param namespace: the namespace where the release :type namespace: str :param label_selector: labels to restrict which pods to list :type label_selector: str :example: >>> querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", "app.kubernetes.io/instance=milvus-multi-querynode, component=querynode") { 5: 'milvus-multi-querynode-querynode-7b8f4b5c5-4pn42', 9: 'milvus-multi-querynode-querynode-7b8f4b5c5-99tx7', 1: 'milvus-multi-querynode-querynode-7b8f4b5c5-w9sk8', 3: 'milvus-multi-querynode-querynode-7b8f4b5c5-xx84j', 6: 'milvus-multi-querynode-querynode-7b8f4b5c5-x95dp' } """ # TODO: extend this function to other worker nodes, not only querynode querynode_ip_pod_pair = get_pod_ip_name_pairs(namespace, label_selector) querynode_id_pod_pair = {} ms = MilvusSys() for node in ms.query_nodes: ip = node["infos"]['hardware_infos']["ip"].split(":")[0] querynode_id_pod_pair[node["identifier"]] = querynode_ip_pod_pair[ip] return querynode_id_pod_pair
def test_task_all(self, index_type, is_compacted, segment_status, is_vector_indexed, is_string_indexed, replica_number, is_deleted, data_size): """ before reinstall: create collection and insert data, load and search after reinstall: get collection, search, create index, load, and search """ name = f"index_type_{index_type}_segment_status_{segment_status}_is_vector_indexed_{is_vector_indexed}_is_string_indexed_{is_string_indexed}_is_compacted_{is_compacted}_is_deleted_{is_deleted}_replica_number_{replica_number}_data_size_{data_size}" ms = MilvusSys() is_binary = True if "BIN" in index_type else False # insert with small size data without flush to get growing segment collection_w = self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000, is_flush=False, name=name)[0] # load for growing segment if replica_number > 0: collection_w.load(replica_number=replica_number) delete_expr = f"{ct.default_int64_field_name} in [0,1,2,3,4,5,6,7,8,9]" # delete data for growing segment if is_deleted: collection_w.delete(expr=delete_expr) if segment_status == "only_growing": pytest.skip("already get growing segment, skip testcase") # insert with flush multiple times to generate multiple sealed segment for i in range(5): self.init_collection_general(insert_data=True, is_binary=is_binary, nb=data_size, is_flush=False, name=name)[0] if is_binary: default_index_field = ct.default_binary_vec_field_name else: default_index_field = ct.default_float_vec_field_name if is_vector_indexed: # create index default_index_param = gen_index_param(index_type) collection_w.create_index(default_index_field, default_index_param) if is_string_indexed: # create index default_string_index_params = {} collection_w.create_index(default_string_field_name, default_string_index_params) # delete data for sealed segment delete_expr = f"{ct.default_int64_field_name} in [10,11,12,13,14,15,16,17,18,19]" if is_deleted: collection_w.delete(expr=delete_expr) if is_compacted: collection_w.compact() # reload after flush and create index if replica_number > 0: collection_w.release() collection_w.load(replica_number=replica_number)
def get_querynode_info(release_name): querynode_id_pod_pair = {} querynode_ip_pod_pair = get_pod_ip_name_pairs( "chaos-testing", f"app.kubernetes.io/instance={release_name}, component=querynode") ms = MilvusSys() for node in ms.query_nodes: ip = node["infos"]['hardware_infos']["ip"].split(":")[0] querynode_id_pod_pair[node["identifier"]] = querynode_ip_pod_pair[ip] return querynode_id_pod_pair
def test_customize_segment_size(self, size_id): """ steps 1. [test_milvus_install]: set up milvus with customized simd configured 2. [test_simd_compat_e2e]: verify milvus is working well 4. [test_milvus_cleanup]: delete milvus instances in teardown """ size = customize_segment_sizes[size_id] log.info(f"start to install milvus with segment size {size}") release_name, host, port = _install_milvus(size) self.release_name = release_name assert host is not None conn = connections.connect("default", host=host, port=port) assert conn is not None mil = MilvusSys(alias="default") log.info(f"milvus build version: {mil.build_version}")
def get_milvus_instance_name(namespace, host, port="19530"): """ get milvus instance name after connection :param namespace: the namespace where the release :type namespace: str :param host: milvus host ip :type host: str :param port: milvus port :type port: str :example: >>> milvus_instance_name = get_milvus_instance_name("chaos-testing", "10.96.250.111") "milvus-multi-querynode" """ connections.add_connection(_default={"host": host, "port": port}) connections.connect(alias='_default') ms = MilvusSys() query_node_ip = ms.query_nodes[0]["infos"]['hardware_infos']["ip"].split( ":")[0] pod_name = "" if ms.deploy_mode == "STANDALONE": # get all pods which label is app.kubernetes.io/name=milvus and component=standalone ip_name_pairs = get_pod_ip_name_pairs( namespace, "app.kubernetes.io/name=milvus, component=standalone") pod_name = ip_name_pairs[query_node_ip] if ms.deploy_mode == "DISTRIBUTED": # get all pods which label is app.kubernetes.io/name=milvus and component=querynode ip_name_pairs = get_pod_ip_name_pairs( namespace, "app.kubernetes.io/name=milvus, component=querynode") pod_name = ip_name_pairs[query_node_ip] init_k8s_client_config() api_instance = client.CoreV1Api() try: api_response = api_instance.read_namespaced_pod(namespace=namespace, name=pod_name) except ApiException as e: log.error( "Exception when calling CoreV1Api->list_namespaced_pod: %s\n" % e) raise Exception(str(e)) milvus_instance_name = api_response.metadata.labels[ "app.kubernetes.io/instance"] return milvus_instance_name
def prepare_bulk_load(self, nb=1000, row_based=True): if Op.bulk_load not in self.health_checkers: log.info( "bulk_load checker is not in health checkers, skip prepare bulk load" ) return log.info( "bulk_load checker is in health checkers, prepare data firstly") release_name = self.instance_name minio_ip_pod_pair = get_pod_ip_name_pairs( "chaos-testing", f"release={release_name}, app=minio") ms = MilvusSys() minio_ip = list(minio_ip_pod_pair.keys())[0] minio_port = "9000" minio_endpoint = f"{minio_ip}:{minio_port}" bucket_name = ms.index_nodes[0]["infos"]["system_configurations"][ "minio_bucket_name"] schema = cf.gen_default_collection_schema() data = cf.gen_default_list_data_for_bulk_load(nb=nb) fields_name = [field.name for field in schema.fields] if not row_based: data_dict = dict(zip(fields_name, data)) if row_based: entities = [] for i in range(nb): entity_value = [field_values[i] for field_values in data] entity = dict(zip(fields_name, entity_value)) entities.append(entity) data_dict = {"rows": entities} file_name = "bulk_load_data_source.json" files = [file_name] #TODO: npy file type is not supported so far log.info("generate bulk load file") with open(file_name, "w") as f: f.write(json.dumps(data_dict)) log.info("upload file to minio") client = Minio(minio_endpoint, access_key="minioadmin", secret_key="minioadmin", secure=False) client.fput_object(bucket_name, file_name, file_name) self.health_checkers[Op.bulk_load].update(schema=schema, files=files, row_based=row_based) log.info("prepare data for bulk load done")
def parser_testcase_config(self, chaos_yaml): tests_yaml = constants.TESTS_CONFIG_LOCATION + 'testcases.yaml' tests_config = cc.gen_experiment_config(tests_yaml) test_collections = tests_config.get('Collections', None) ms = MilvusSys(alias="default") node_map = { "querynode": "query_nodes", "datanode": "data_nodes", "indexnode": "index_nodes", "proxy": "proxy_nodes" } for t in test_collections: test_chaos = t.get('testcase', {}).get('chaos', {}) if test_chaos in chaos_yaml: expects = t.get('testcase', {}).get('expectation', {}).get('cluster_1_node', {}) # for cluster_n_node mode for node in node_map.keys(): if node in test_chaos and len(getattr(ms, node_map[node])) > 1: expects = t.get('testcase', {}).get('expectation', {}).get('cluster_n_node', {}) log.info(f"yaml.expects: {expects}") self.expect_create = expects.get(Op.create.value, constants.SUCC) self.expect_insert = expects.get(Op.insert.value, constants.SUCC) self.expect_flush = expects.get(Op.flush.value, constants.SUCC) self.expect_index = expects.get(Op.index.value, constants.SUCC) self.expect_search = expects.get(Op.search.value, constants.SUCC) self.expect_query = expects.get(Op.query.value, constants.SUCC) log.info( f"self.expects: create:{self.expect_create}, insert:{self.expect_insert}, " f"flush:{self.expect_flush}, index:{self.expect_index}, " f"search:{self.expect_search}, query:{self.expect_query}") return True return False
def test_milvus_install(self, request, simd): release_name = "mil-simd-" + cf.gen_digits_by_length(6) namespace = 'chaos-testing' cus_configs = { 'spec.components.image': 'milvusdb/milvus-dev:master-latest', 'metadata.namespace': namespace, 'metadata.name': release_name, 'spec.components.proxy.serviceType': 'LoadBalancer', # TODO: use simd config instead of replicas 'spec.components.queryNode.replicas': 2 } milvus_op = MilvusOperator() log.info(f"install milvus with configs: {cus_configs}") milvus_op.install(cus_configs) healthy = milvus_op.wait_for_healthy(release_name, namespace) log.info(f"milvus healthy: {healthy}") assert healthy endpoint = milvus_op.endpoint(release_name, namespace).split(':') log.info(f"milvus endpoint: {endpoint}") host = endpoint[0] port = endpoint[1] conn = connections.connect(simd, host=host, port=port) assert conn is not None mil = MilvusSys(alias=simd) log.info(f"milvus build version: {mil.build_version}") # TODO: Verify simd config instead of replicas assert len(mil.query_nodes) == 2 # cache results for dependent tests cache = { 'release_name': release_name, 'namespace': namespace, 'alias': simd, 'simd': simd } request.config.cache.set(simd, cache)
after upgrade: get collection, load with multi replicas, search, insert data with flush, load with multi replicas and search """ prefix = "task_5_" connections.connect(host=host, port=19530, timeout=60) get_collections(prefix) create_collections_and_insert_data(prefix, flush=False, count=data_size) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='config for deploy test') parser.add_argument('--host', type=str, default="127.0.0.1", help='milvus server ip') parser.add_argument('--data_size', type=int, default=3000, help='data size') args = parser.parse_args() data_size = args.data_size host = args.host print(f"data size: {data_size}") connections.connect(host=host, port=19530, timeout=60) ms = MilvusSys() task_1(data_size, host) task_2(data_size, host) if len(ms.query_nodes) >= NUM_REPLICAS: task_3(data_size, host) task_4(data_size, host) task_5(data_size, host)
def test_simd_compat_e2e(self, simd_id): """ steps 1. [test_milvus_install]: set up milvus with customized simd configured 2. [test_simd_compat_e2e]: verify milvus is working well 4. [test_milvus_cleanup]: delete milvus instances in teardown """ simd = supported_simd_types[simd_id] log.info(f"start to install milvus with simd {simd}") release_name, host, port = _install_milvus(simd) self.release_name = release_name assert host is not None conn = connections.connect("default", host=host, port=port) assert conn is not None mil = MilvusSys(alias="default") log.info(f"milvus build version: {mil.build_version}") log.info(f"milvus simdType: {mil.simd_type}") assert str(mil.simd_type).lower() in [ simd_type.lower() for simd_type in supported_simd_types[simd_id:] ] log.info(f"start to e2e verification: {simd}") # create name = cf.gen_unique_str("compat") t0 = time.time() collection_w = ApiCollectionWrapper() collection_w.init_collection(name=name, schema=cf.gen_default_collection_schema(), timeout=40) tt = time.time() - t0 assert collection_w.name == name entities = collection_w.num_entities log.info(f"assert create collection: {tt}, init_entities: {entities}") # insert data = cf.gen_default_list_data() t0 = time.time() _, res = collection_w.insert(data) tt = time.time() - t0 log.info(f"assert insert: {tt}") assert res # flush t0 = time.time() assert collection_w.num_entities == len(data[0]) + entities tt = time.time() - t0 entities = collection_w.num_entities log.info(f"assert flush: {tt}, entities: {entities}") # search collection_w.load() search_vectors = cf.gen_vectors(1, ct.default_dim) search_params = {"metric_type": "L2", "params": {"nprobe": 16}} t0 = time.time() res_1, _ = collection_w.search( data=search_vectors, anns_field=ct.default_float_vec_field_name, param=search_params, limit=1) tt = time.time() - t0 log.info(f"assert search: {tt}") assert len(res_1) == 1 collection_w.release() # index d = cf.gen_default_list_data() collection_w.insert(d) log.info(f"assert index entities: {collection_w.num_entities}") _index_params = { "index_type": "IVF_SQ8", "params": { "nlist": 64 }, "metric_type": "L2" } t0 = time.time() index, _ = collection_w.create_index( field_name=ct.default_float_vec_field_name, index_params=_index_params, name=cf.gen_unique_str()) tt = time.time() - t0 log.info(f"assert index: {tt}") assert len(collection_w.indexes) == 1 # search t0 = time.time() collection_w.load() tt = time.time() - t0 log.info(f"assert load: {tt}") search_vectors = cf.gen_vectors(1, ct.default_dim) t0 = time.time() res_1, _ = collection_w.search( data=search_vectors, anns_field=ct.default_float_vec_field_name, param=search_params, limit=1) tt = time.time() - t0 log.info(f"assert search: {tt}") # query term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]' t0 = time.time() res, _ = collection_w.query(term_expr) tt = time.time() - t0 log.info(f"assert query result {len(res)}: {tt}")
def test_check(self, collection_name, data_size): """ before reinstall: create collection """ self._connect() ms = MilvusSys() name = collection_name collection_w = self.init_collection_general( insert_data=False, name=name, active_trace=True)[0] schema = collection_w.schema data_type = [field.dtype.name for field in schema.fields] field_name = [field.name for field in schema.fields] type_field_map = dict(zip(data_type,field_name)) is_binary = False if "BINARY_VECTOR" in data_type: is_binary = True if is_binary: default_index_field = ct.default_binary_vec_field_name vector_index_type = "BIN_FLAT" else: default_index_field = ct.default_float_vec_field_name vector_index_type = "IVF_FLAT" is_vector_indexed = False is_string_indexed = False indexed_fields = [index.field_name for index in collection_w.indexes] binary_vector_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("BINARY_VECTOR", "")] float_vector_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("FLOAT_VECTOR", "")] string_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("VARCHAR", "")] index_names = [index.index_name for index in collection_w.indexes] # used to drop index vector_index_types = binary_vector_index_types + float_vector_index_types if len(vector_index_types) > 0: is_vector_indexed = True vector_index_type = vector_index_types[0] if len(string_index_types) > 0: is_string_indexed = True try: replicas, _ = collection_w.get_replicas(enable_traceback=False) replicas_loaded = len(replicas.groups) except Exception as e: log.info("get replicas failed") replicas_loaded = 0 # params for search and query if is_binary: _, vectors_to_search = cf.gen_binary_vectors( default_nb, default_dim) default_search_field = ct.default_binary_vec_field_name else: vectors_to_search = cf.gen_vectors(default_nb, default_dim) default_search_field = ct.default_float_vec_field_name search_params = gen_search_param(vector_index_type)[0] # load if not loaded if replicas_loaded == 0: collection_w.load() # search and query collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty) # flush collection_w.num_entities # search and query collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty) # insert data and flush for i in range(2): self.init_collection_general(insert_data=True, is_binary=is_binary, nb=data_size, is_flush=False, is_index=True, name=name) collection_w.num_entities # delete data delete_expr = f"{ct.default_int64_field_name} in [0,1,2,3,4,5,6,7,8,9]" collection_w.delete(expr=delete_expr) # search and query collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty) # drop index if exist if len(index_names) > 0: for index_name in index_names: collection_w.drop_index(index_name=index_name) # search and query after dropping index collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty) # create index default_index_param = gen_index_param(vector_index_type) collection_w.create_index(default_index_field, default_index_param, index_name=cf.gen_unique_str()) collection_w.create_index(default_string_field_name, {}, index_name=cf.gen_unique_str()) # search and query collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty) # release and reload with changed replicas collection_w.release() replica_number = 1 if replicas_loaded in [0,1] and len(ms.query_nodes)>=2 : replica_number = 2 collection_w.load(replica_number=replica_number) # search and query collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty)
def test_customize_segment_size(self, seg_size, seg_count): """ steps """ log.info(f"start to install milvus with segment size {seg_size}") release_name, host, port = _install_milvus(seg_size) self.release_name = release_name assert host is not None conn = connections.connect("default", host=host, port=port) assert conn is not None mil = MilvusSys(alias="default") log.info(f"milvus build version: {mil.build_version}") log.info(f"start to e2e verification: {seg_size}") # create name = cf.gen_unique_str("segsiz") t0 = time.time() collection_w = ApiCollectionWrapper() collection_w.init_collection(name=name, schema=cf.gen_default_collection_schema(), timeout=40) tt = time.time() - t0 assert collection_w.name == name entities = collection_w.num_entities log.info(f"assert create collection: {tt}, init_entities: {entities}") # insert nb = 50000 data = cf.gen_default_list_data(nb=nb) t0 = time.time() _, res = collection_w.insert(data) tt = time.time() - t0 log.info(f"assert insert: {tt}") assert res # insert 2 million entities rounds = 40 for _ in range(rounds - 1): _, res = collection_w.insert(data) entities = collection_w.num_entities assert entities == nb * rounds # load collection_w.load() utility_wrap = ApiUtilityWrapper() segs, _ = utility_wrap.get_query_segment_info(collection_w.name) log.info(f"assert segments: {len(segs)}") assert len(segs) == seg_count # search search_vectors = cf.gen_vectors(1, ct.default_dim) search_params = {"metric_type": "L2", "params": {"nprobe": 16}} t0 = time.time() res_1, _ = collection_w.search( data=search_vectors, anns_field=ct.default_float_vec_field_name, param=search_params, limit=1, timeout=30) tt = time.time() - t0 log.info(f"assert search: {tt}") assert len(res_1) == 1 collection_w.release() # index d = cf.gen_default_list_data() collection_w.insert(d) log.info(f"assert index entities: {collection_w.num_entities}") _index_params = { "index_type": "IVF_SQ8", "params": { "nlist": 64 }, "metric_type": "L2" } t0 = time.time() index, _ = collection_w.create_index( field_name=ct.default_float_vec_field_name, index_params=_index_params, name=cf.gen_unique_str(), timeout=120) tt = time.time() - t0 log.info(f"assert index: {tt}") assert len(collection_w.indexes) == 1 # search t0 = time.time() collection_w.load() tt = time.time() - t0 log.info(f"assert load: {tt}") search_vectors = cf.gen_vectors(1, ct.default_dim) t0 = time.time() res_1, _ = collection_w.search( data=search_vectors, anns_field=ct.default_float_vec_field_name, param=search_params, limit=1, timeout=30) tt = time.time() - t0 log.info(f"assert search: {tt}") # query term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]' t0 = time.time() res, _ = collection_w.query(term_expr, timeout=30) tt = time.time() - t0 log.info(f"assert query result {len(res)}: {tt}")
def test_task_all(self, index_type, is_compacted, segment_status, is_vector_indexed, is_string_indexed, replica_number, is_deleted, data_size): """ before reinstall: create collection and insert data, load and search """ name = "" for k,v in locals().items(): if k in ["self", "name"]: continue name += f"_{k}_{v}" name = prefix + name self._connect() ms = MilvusSys() if len(ms.query_nodes) < replica_number: # this step is to make sure this testcase can run on standalone mode # or cluster mode which has only one querynode pytest.skip("skip test, not enough nodes") log.info(f"collection name: {name}, replica_number: {replica_number}, is_compacted: {is_compacted}," f"is_deleted: {is_deleted}, is_vector_indexed: {is_vector_indexed}, is_string_indexed: {is_string_indexed}," f"segment_status: {segment_status}, index_type: {index_type}") is_binary = True if "BIN" in index_type else False # params for search and query if is_binary: _, vectors_to_search = cf.gen_binary_vectors( default_nb, default_dim) default_search_field = ct.default_binary_vec_field_name else: vectors_to_search = cf.gen_vectors(default_nb, default_dim) default_search_field = ct.default_float_vec_field_name search_params = gen_search_param(index_type)[0] # init collection and insert with small size data without flush to get growing segment collection_w = self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000, is_flush=False, is_index=True, name=name)[0] # load for growing segment if replica_number >= 1: try: collection_w.release() except Exception as e: log.error( f"release collection failed: {e} maybe the collection is not loaded") collection_w.load(replica_number=replica_number) # delete data for growing segment delete_expr = f"{ct.default_int64_field_name} in [0,1,2,3,4,5,6,7,8,9]" if is_deleted == "is_deleted": collection_w.delete(expr=delete_expr) # search and query for growing segment if replica_number >= 1: collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) output_fields = [ct.default_int64_field_name] collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_not_empty) # skip subsequent operations when segment_status is set to only_growing if segment_status == "only_growing": pytest.skip( "already get growing segment, skip subsequent operations") # insert with flush multiple times to generate multiple sealed segment for i in range(2): self.init_collection_general(insert_data=True, is_binary=is_binary, nb=data_size, is_flush=False, is_index=True, name=name) collection_w.flush() # params for creating index if is_binary: default_index_field = ct.default_binary_vec_field_name else: default_index_field = ct.default_float_vec_field_name # create index for vector if is_vector_indexed == "is_vector_indexed": default_index_param = gen_index_param(index_type) collection_w.create_index(default_index_field, default_index_param) # create index for string if is_string_indexed == "is_string_indexed": default_string_index_params = {} default_string_index_name = "_default_string_idx" collection_w.create_index( default_string_field_name, default_string_index_params, index_name=default_string_index_name) # delete data for sealed segment delete_expr = f"{ct.default_int64_field_name} in [10,11,12,13,14,15,16,17,18,19]" if is_deleted == "is_deleted": collection_w.delete(expr=delete_expr) if is_compacted == "is_compacted": collection_w.compact() if segment_status == "all": self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000, is_flush=False, is_index=True, name=name) # reload after flush and creating index if replica_number > 0: collection_w.release() collection_w.load(replica_number=replica_number) # insert data to get growing segment if segment_status == "all": self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000, is_flush=False, is_index=True, name=name) # search and query for sealed and growing segment if replica_number > 0: collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) output_fields = [ct.default_int64_field_name] collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_not_empty)