def test_memory_stress_replicas_group_load_balance(self, prepare_collection): """ target: test apply memory stress on replicas and load balance inside group method: 1.Deploy milvus and limit querynode memory 6Gi 2.Insret 1000,000 entities (500Mb), load 2 replicas (memory usage 1.5Gb) 3.Apply memory stress 4Gi on querynode expected: Verify that load balancing occurs """ collection_w = prepare_collection utility_w = ApiUtilityWrapper() release_name = "mic-memory" # load and searchc collection_w.load(replica_number=2) progress, _ = utility_w.loading_progress(collection_w.name) assert progress["loading_progress"] == "100%" # get the replica and random chaos querynode replicas, _ = collection_w.get_replicas() chaos_querynode_id = replicas.groups[0].group_nodes[0] label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode" querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label) chaos_querynode_pod = querynode_id_pod_pair[chaos_querynode_id] # get the segment num before chaos seg_info_before, _ = utility_w.get_query_segment_info(collection_w.name) seg_distribution_before = cf.get_segment_distribution(seg_info_before) segments_num_before = len(seg_distribution_before[chaos_querynode_id]["sealed"]) log.debug(segments_num_before) log.debug(seg_distribution_before[chaos_querynode_id]["sealed"]) # apply memory stress chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_replicas_memory_stress_pods.yaml") chaos_config['spec']['selector']['pods']['chaos-testing'] = [chaos_querynode_pod] log.debug(chaos_config) chaos_res = CusResource(kind=chaos_config['kind'], group=constants.CHAOS_GROUP, version=constants.CHAOS_VERSION, namespace=constants.CHAOS_NAMESPACE) chaos_res.create(chaos_config) log.debug(f"Apply memory stress on querynode {chaos_querynode_id}, pod {chaos_querynode_pod}") duration = chaos_config.get('spec').get('duration') duration = duration.replace('h', '*3600+').replace('m', '*60+').replace('s', '*1+') + '+0' sleep(eval(duration)) chaos_res.delete(metadata_name=chaos_config.get('metadata', None).get('name', None)) # Verfiy auto load loadbalance seg_info_after, _ = utility_w.get_query_segment_info(collection_w.name) seg_distribution_after = cf.get_segment_distribution(seg_info_after) segments_num_after = len(seg_distribution_after[chaos_querynode_id]["sealed"]) log.debug(segments_num_after) log.debug(seg_distribution_after[chaos_querynode_id]["sealed"]) assert segments_num_after < segments_num_before search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, timeout=120) assert 1 == len(search_res) and ct.default_limit == len(search_res[0])
class LoadBalanceChecker(Checker): """check loadbalance operations in a dependent thread""" def __init__(self, collection_name=None): super().__init__(collection_name=collection_name) self.utility_wrap = ApiUtilityWrapper() self.c_wrap.load(enable_traceback=enable_traceback) def keep_running(self): while True: c_name = self.c_wrap.name res, _ = self.c_wrap.get_replicas() # prepare load balance params # find a group which has multi nodes group_nodes = [] for g in res.groups: if len(g.group_nodes) >= 2: group_nodes = list(g.group_nodes) break src_node_id = group_nodes[0] dst_node_ids = group_nodes[1:] res, _ = self.utility_wrap.get_query_segment_info(c_name) segment_distribution = cf.get_segment_distribution(res) sealed_segment_ids = segment_distribution[src_node_id]["sealed"] # load balance t0 = time.time() _, result = self.utility_wrap.load_balance(c_name, src_node_id, dst_node_ids, sealed_segment_ids) t1 = time.time() # get segments distribution after load balance time.sleep(3) res, _ = self.utility_wrap.get_query_segment_info(c_name) segment_distribution = cf.get_segment_distribution(res) sealed_segment_ids_after_load_banalce = segment_distribution[ src_node_id]["sealed"] check_1 = len( set(sealed_segment_ids) & set(sealed_segment_ids_after_load_banalce)) == 0 des_sealed_segment_ids = [] for des_node_id in dst_node_ids: des_sealed_segment_ids += segment_distribution[des_node_id][ "sealed"] # assert sealed_segment_ids is subset of des_sealed_segment_ids check_2 = set(sealed_segment_ids).issubset( set(des_sealed_segment_ids)) if result and (check_1 and check_2): self.rsp_times.append(t1 - t0) self.average_time = ( (t1 - t0) + self.average_time * self._succ) / (self._succ + 1) self._succ += 1 log.debug( f"load balance success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}" ) else: self._fail += 1 sleep(10)
def __init__(self, collection_name=None): if collection_name is None: collection_name = cf.gen_unique_str("LoadBalanceChecker_") super().__init__(collection_name=collection_name) self.utility_wrap = ApiUtilityWrapper() self.c_wrap.load() self.sealed_segment_ids = None self.dst_node_ids = None self.src_node_id = None
def __init__(self, flush=False): super().__init__() self.utility_wrap = ApiUtilityWrapper() self.schema = cf.gen_default_collection_schema() self.flush = flush self.files = ["bulk_load_data_source.json"] self.row_based = True self.recheck_failed_task = False self.failed_tasks = []
def setup(self): log.info(("*" * 35) + " setup " + ("*" * 35)) self.connection_wrap = ApiConnectionsWrapper() self.utility_wrap = ApiUtilityWrapper() self.collection_wrap = ApiCollectionWrapper() self.partition_wrap = ApiPartitionWrapper() self.index_wrap = ApiIndexWrapper() self.collection_schema_wrap = ApiCollectionSchemaWrapper() self.field_schema_wrap = ApiFieldSchemaWrapper()
def setup_method(self, method): log.info(("*" * 35) + " setup " + ("*" * 35)) log.info("[setup_method] Start setup test case %s." % method.__name__) self.connection_wrap = ApiConnectionsWrapper() self.utility_wrap = ApiUtilityWrapper() self.collection_wrap = ApiCollectionWrapper() self.partition_wrap = ApiPartitionWrapper() self.index_wrap = ApiIndexWrapper() self.collection_schema_wrap = ApiCollectionSchemaWrapper() self.field_schema_wrap = ApiFieldSchemaWrapper()
def test_memory_stress_replicas_cross_group_load_balance(self, prepare_collection): """ target: test apply memory stress on one group and no load balance cross replica groups method: 1.Limit all querynodes memory 6Gi 2.Create and insert 1000,000 entities 3.Load collection with two replicas 4.Apply memory stress on one grooup 80% expected: Verify that load balancing across groups is not occurring """ collection_w = prepare_collection utility_w = ApiUtilityWrapper() release_name = "mic-memory" # load and searchc collection_w.load(replica_number=2) progress, _ = utility_w.loading_progress(collection_w.name) assert progress["loading_progress"] == "100%" seg_info_before, _ = utility_w.get_query_segment_info(collection_w.name) # get the replica and random chaos querynode replicas, _ = collection_w.get_replicas() group_nodes = list(replicas.groups[0].group_nodes) label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode" querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label) group_nodes_pod = [querynode_id_pod_pair[node_id] for node_id in group_nodes] # apply memory stress chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_replicas_memory_stress_pods.yaml") chaos_config['spec']['selector']['pods']['chaos-testing'] = group_nodes_pod log.debug(chaos_config) chaos_res = CusResource(kind=chaos_config['kind'], group=constants.CHAOS_GROUP, version=constants.CHAOS_VERSION, namespace=constants.CHAOS_NAMESPACE) chaos_res.create(chaos_config) log.debug(f"Apply memory stress on querynode {group_nodes}, pod {group_nodes_pod}") duration = chaos_config.get('spec').get('duration') duration = duration.replace('h', '*3600+').replace('m', '*60+').replace('s', '*1+') + '+0' sleep(eval(duration)) chaos_res.delete(metadata_name=chaos_config.get('metadata', None).get('name', None)) # Verfiy auto load loadbalance seg_info_after, _ = utility_w.get_query_segment_info(collection_w.name) seg_distribution_before = cf.get_segment_distribution(seg_info_before) seg_distribution_after = cf.get_segment_distribution(seg_info_after) for node_id in group_nodes: assert len(seg_distribution_before[node_id]) == len(seg_distribution_after[node_id]) search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, timeout=120) assert 1 == len(search_res) and ct.default_limit == len(search_res[0])
def test_memory_stress_replicas_group_insufficient(self, prepare_collection, mode): """ target: test apply stress memory on different number querynodes and the group failed to load, bacause of the memory is insufficient method: 1.Limit querynodes memory 5Gi 2.Create collection and insert 1000,000 entities 3.Apply memory stress on querynodes and it's memory is not enough to load replicas expected: Verify load raise exception, and after delete chaos, load and search successfully """ collection_w = prepare_collection utility_w = ApiUtilityWrapper() chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_querynode_memory_stress.yaml") # Update config chaos_config['spec']['mode'] = mode chaos_config['spec']['stressors']['memory']['size'] = '5Gi' log.debug(chaos_config) chaos_res = CusResource(kind=chaos_config['kind'], group=constants.CHAOS_GROUP, version=constants.CHAOS_VERSION, namespace=constants.CHAOS_NAMESPACE) chaos_res.create(chaos_config) # chaos_start = time.time() log.debug("chaos injected") sleep(10) try: # load failed err = {"err_code": 1, "err_msg": "shuffleSegmentsToQueryNodeV2: insufficient memory of available node"} collection_w.load(replica_number=5, timeout=60, check_task=CheckTasks.err_res, check_items=err) # query failed because not loaded err = {"err_code": 1, "err_msg": "not loaded into memory"} collection_w.query("int64 in [0]", check_task=CheckTasks.err_res, check_items=err) # delete chaos meta_name = chaos_config.get('metadata', None).get('name', None) chaos_res.delete(metadata_name=meta_name) sleep(10) # after delete chaos load and query successfully collection_w.load(replica_number=5, timeout=60) progress, _ = utility_w.loading_progress(collection_w.name) # assert progress["loading_progress"] == "100%" query_res, _ = collection_w.query("int64 in [0]") assert len(query_res) != 0 collection_w.release() except Exception as e: raise Exception(str(e)) finally: log.debug("Test finished")
def __init__(self, collection_name=None, files=[]): if collection_name is None: collection_name = cf.gen_unique_str("BulkLoadChecker_") super().__init__(collection_name=collection_name) self.utility_wrap = ApiUtilityWrapper() self.schema = cf.gen_default_collection_schema() self.files = files self.row_based = True self.recheck_failed_task = False self.failed_tasks = [] self.c_name = None
def test_wait_index_invalid_name(self, get_invalid_collection_name): """ target: test wait_index method: input invalid name expected: raise exception """ self._connect() c_name = get_invalid_collection_name ut = ApiUtilityWrapper() ex, _ = ut.wait_for_index_building_complete(c_name) log.error(str(ex)) assert "invalid" or "illegal" in str(ex)
def test_list_collections_using_invalid(self): """ target: test list_collections with invalid using method: input invalid name expected: raise exception """ self._connect() using = "empty" ut = ApiUtilityWrapper(using=using) ex, _ = ut.list_collections() log.error(str(ex)) assert "invalid" or "illegal" in str(ex)
def test_index_process_invalid_name(self, get_invalid_collection_name): """ target: test building_process method: input invalid name expected: raise exception """ self._connect() c_name = get_invalid_collection_name ut = ApiUtilityWrapper() ex, _ = ut.index_building_progress(c_name) log.error(str(ex)) assert "invalid" or "illegal" in str(ex)
def test_has_partition_name_invalid(self, get_invalid_partition_name): """ target: test has_partition with error partition name method: input invalid name expected: raise exception """ self._connect() ut = ApiUtilityWrapper() c_name = cf.gen_unique_str(prefix) p_name = get_invalid_partition_name ex, _ = ut.has_partition(c_name, p_name) log.error(str(ex)) assert "invalid" or "illegal" in str(ex)
def _test_list_collections_using_invalid(self): """ target: test list_collections with invalid using method: input invalid name expected: raise exception """ self._connect() using = "empty" ut = ApiUtilityWrapper() ex, _ = ut.list_collections(using=using, check_items={ ct.err_code: 0, ct.err_msg: "should create connect" })
class BulkLoadChecker(Checker): """check bulk load operations in a dependent thread""" def __init__(self, collection_name=None, files=[]): if collection_name is None: collection_name = cf.gen_unique_str("BulkLoadChecker_") super().__init__(collection_name=collection_name) self.utility_wrap = ApiUtilityWrapper() self.schema = cf.gen_default_collection_schema() self.files = files self.row_based = True self.recheck_failed_task = False self.failed_tasks = [] self.c_name = None def update(self, files=None, schema=None, row_based=None): if files is not None: self.files = files if schema is not None: self.schema = schema if row_based is not None: self.row_based = row_based @trace() def bulk_load(self): task_ids, result = self.utility_wrap.bulk_load(collection_name=self.c_name, row_based=self.row_based, files=self.files) completed, result = self.utility_wrap.wait_for_bulk_load_tasks_completed(task_ids=task_ids, timeout=30) return task_ids, completed @exception_handler() def run_task(self): if self.recheck_failed_task and self.failed_tasks: self.c_name = self.failed_tasks.pop(0) log.debug(f"check failed task: {self.c_name}") else: self.c_name = cf.gen_unique_str("BulkLoadChecker_") self.c_wrap.init_collection(name=self.c_name, schema=self.schema) # import data task_ids, completed = self.bulk_load() if not completed: self.failed_tasks.append(self.c_name) return task_ids, completed def keep_running(self): while self._keep_running: self.run_task() sleep(constants.WAIT_PER_OP / 10)
class Base: """ Initialize class object """ connection_wrap = None collection_wrap = None partition_wrap = None index_wrap = None utility_wrap = None collection_schema_wrap = None field_schema_wrap = None collection_object_list = [] def setup_class(self): log.info("[setup_class] Start setup class...") def teardown_class(self): log.info("[teardown_class] Start teardown class...") def setup_method(self, method): log.info(("*" * 35) + " setup " + ("*" * 35)) log.info("[setup_method] Start setup test case %s." % method.__name__) self.connection_wrap = ApiConnectionsWrapper() self.utility_wrap = ApiUtilityWrapper() self.collection_wrap = ApiCollectionWrapper() self.partition_wrap = ApiPartitionWrapper() self.index_wrap = ApiIndexWrapper() self.collection_schema_wrap = ApiCollectionSchemaWrapper() self.field_schema_wrap = ApiFieldSchemaWrapper() def teardown_method(self, method): log.info(("*" * 35) + " teardown " + ("*" * 35)) log.info("[teardown_method] Start teardown test case %s..." % method.__name__) try: """ Drop collection before disconnect """ if self.connection_wrap.get_connection(alias=DefaultConfig.DEFAULT_USING)[0] is None: self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=param_info.param_host, port=param_info.param_port) if self.collection_wrap.collection is not None: self.collection_wrap.drop(check_task=ct.CheckTasks.check_nothing) collection_list = self.utility_wrap.list_collections()[0] for collection_object in self.collection_object_list: if collection_object.collection is not None and collection_object.name in collection_list: collection_object.drop(check_task=ct.CheckTasks.check_nothing) except Exception as e: log.debug(str(e)) try: """ Delete connection and reset configuration""" res = self.connection_wrap.list_connections() for i in res[0]: self.connection_wrap.remove_connection(i[0]) # because the connection is in singleton mode, it needs to be restored to the original state after teardown self.connection_wrap.add_connection(default={"host": DefaultConfig.DEFAULT_HOST, "port": DefaultConfig.DEFAULT_PORT}) except Exception as e: log.debug(str(e))
class CompactChecker(Checker): """check compact operations in a dependent thread""" def __init__(self, collection_name=None): super().__init__(collection_name=collection_name) self.ut = ApiUtilityWrapper() self.c_wrap.load( enable_traceback=enable_traceback) # load before compact def keep_running(self): while True: seg_info = self.ut.get_query_segment_info(self.c_wrap.name) t0 = time.time() res, result = self.c_wrap.compact(timeout=timeout) print(f"compact done: res {res}") self.c_wrap.wait_for_compaction_completed() self.c_wrap.get_compaction_plans() t1 = time.time() if result: self.rsp_times.append(t1 - t0) self.average_time = ( (t1 - t0) + self.average_time * self._succ) / (self._succ + 1) self._succ += 1 log.debug( f"compact success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}" ) else: self._fail += 1 sleep(constants.WAIT_PER_OP / 10)
def test_memory_stress_replicas_befor_load(self, prepare_collection): """ target: test querynode group load with insufficient memory method: 1.Limit querynode memory ? 2Gi 2.Load sealed data (needed memory > memory limit) expected: Raise an exception """ collection_w = prepare_collection utility_w = ApiUtilityWrapper() err = {"err_code": 1, "err_msg": "xxxxxxxxx"} # collection_w.load(replica_number=2, timeout=60, check_task=CheckTasks.err_res, check_items=err) collection_w.load(replica_number=5) utility_w.loading_progress(collection_w.name) search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, timeout=60)
def test_memory_stress_replicas_load_balance_single_node(self, prepare_collection): """ target: test apply memory stress on single node replica, and it OOMKilled method: 1.Deploy 2 querynodes and limit memory 6Gi 2.Loading 1000,000 entities (data_size=500Mb) with 2 replicas (memory_usage=1.5Gb) 3.Apply memory stress on one querynode and make it OOMKilled expected: After deleting chaos, querynode turns running, search successfully """ collection_w = prepare_collection utility_w = ApiUtilityWrapper() # load and searchc collection_w.load(replica_number=2) progress, _ = utility_w.loading_progress(collection_w.name) assert progress["loading_progress"] == "100%" query_res, _ = collection_w.query("int64 in [0]") assert len(query_res) != 0 # apply memory stress chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_querynode_memory_stress.yaml") # Update config chaos_config['spec']['mode'] = "one" chaos_config['spec']['stressors']['memory']['size'] = '6Gi' chaos_config['spec']['duration'] = "1m" log.debug(chaos_config) duration = chaos_config.get('spec').get('duration') duration = duration.replace('h', '*3600+').replace('m', '*60+').replace('s', '*1+') + '+0' chaos_res = CusResource(kind=chaos_config['kind'], group=constants.CHAOS_GROUP, version=constants.CHAOS_VERSION, namespace=constants.CHAOS_NAMESPACE) chaos_res.create(chaos_config) sleep(eval(duration)) chaos_res.delete(metadata_name=chaos_config.get('metadata', None).get('name', None)) # release and load again collection_w.release() collection_w.load(replica_number=2) progress, _ = utility_w.loading_progress(collection_w.name) assert progress["loading_progress"] == "100%" search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, timeout=120) assert 1 == len(search_res) and ct.default_limit == len(search_res[0])
def test_has_partition_name_invalid(self, get_invalid_partition_name): """ target: test has_partition with error partition name method: input invalid name expected: raise exception """ self._connect() ut = ApiUtilityWrapper() c_name = cf.gen_unique_str(prefix) p_name = get_invalid_partition_name if isinstance(p_name, str) and p_name: ex, _ = ut.has_partition(c_name, p_name, check_task=CheckTasks.err_res, check_items={ ct.err_code: 1, ct.err_msg: "Invalid" })
def test_chaos_memory_stress_replicas_OOM(self, prepare_collection, mode): """ target: test apply memory stress during loading, and querynode OOMKilled method: 1.Deploy and limit querynode memory limit 6Gi 2.Create collection and insert 1000,000 entities 3.Apply memory stress and querynode OOMKilled during loading replicas expected: Verify the mic is available to load and search querynode restart """ collection_w = prepare_collection utility_w = ApiUtilityWrapper() chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_querynode_memory_stress.yaml") chaos_config['spec']['mode'] = mode chaos_config['spec']['duration'] = '3m' chaos_config['spec']['stressors']['memory']['size'] = '6Gi' log.debug(chaos_config) chaos_res = CusResource(kind=chaos_config['kind'], group=constants.CHAOS_GROUP, version=constants.CHAOS_VERSION, namespace=constants.CHAOS_NAMESPACE) chaos_res.create(chaos_config) log.debug("chaos injected") collection_w.load(replica_number=2, timeout=60, _async=True) utility_w.wait_for_loading_complete(collection_w.name) progress, _ = utility_w.loading_progress(collection_w.name) assert progress["loading_progress"] == '100%' sleep(180) chaos_res.delete(metadata_name=chaos_config.get('metadata', None).get('name', None)) # TODO search failed search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, timeout=120) assert 1 == len(search_res) and ct.default_limit == len(search_res[0]) collection_w.release() collection_w.load(replica_number=2) search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, timeout=120) assert 1 == len(search_res) and ct.default_limit == len(search_res[0])
class LoadBalanceChecker(Checker): """check loadbalance operations in a dependent thread""" def __init__(self, collection_name=None): if collection_name is None: collection_name = cf.gen_unique_str("LoadBalanceChecker_") super().__init__(collection_name=collection_name) self.utility_wrap = ApiUtilityWrapper() self.c_wrap.load() self.sealed_segment_ids = None self.dst_node_ids = None self.src_node_id = None @trace() def load_balance(self): res, result = self.utility_wrap.load_balance( self.c_wrap.name, self.src_node_id, self.dst_node_ids, self.sealed_segment_ids) return res, result def prepare(self): """prepare load balance params""" res, _ = self.c_wrap.get_replicas() # find a group which has multi nodes group_nodes = [] for g in res.groups: if len(g.group_nodes) >= 2: group_nodes = list(g.group_nodes) break self.src_node_id = group_nodes[0] self.dst_node_ids = group_nodes[1:] res, _ = self.utility_wrap.get_query_segment_info(self.c_wrap.name) segment_distribution = cf.get_segment_distribution(res) self.sealed_segment_ids = segment_distribution[self.src_node_id]["sealed"] @exception_handler() def run_task(self): self.prepare() res, result = self.load_balance() return res, result def keep_running(self): while self._keep_running: self.run_task() sleep(constants.WAIT_PER_OP / 10)
def test_memory_stress_replicas_group_sufficient(self, prepare_collection, mode): """ target: test apply stress memory on one querynode and the memory is enough to load replicas method: 1.Limit all querynodes memory 6Gi 2.Apply 3Gi memory stress on different number of querynodes (load whole collection need about 1.5GB) expected: Verify load successfully and search result are correct """ collection_w = prepare_collection utility_w = ApiUtilityWrapper() # # apply memory stress chaos chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_querynode_memory_stress.yaml") chaos_config['spec']['mode'] = mode chaos_config['spec']['duration'] = '3m' chaos_config['spec']['stressors']['memory']['size'] = '3Gi' log.debug(chaos_config) chaos_res = CusResource(kind=chaos_config['kind'], group=constants.CHAOS_GROUP, version=constants.CHAOS_VERSION, namespace=constants.CHAOS_NAMESPACE) chaos_res.create(chaos_config) log.debug("chaos injected") sleep(20) # try: collection_w.load(replica_number=2, timeout=60) utility_w.loading_progress(collection_w.name) replicas, _ = collection_w.get_replicas() log.debug(replicas) search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, timeout=120) assert 1 == len(search_res) and ct.default_limit == len(search_res[0]) collection_w.release() except Exception as e: raise Exception(str(e)) finally: # delete chaos meta_name = chaos_config.get('metadata', None).get('name', None) chaos_res.delete(metadata_name=meta_name) log.debug("Test finished")
class Base: """ Initialize class object """ connection_wrap = None collection_wrap = None partition_wrap = None index_wrap = None utility_wrap = None collection_schema_wrap = None field_schema_wrap = None collection_object_list = [] def setup_class(self): log.info("[setup_class] Start setup class...") def teardown_class(self): log.info("[teardown_class] Start teardown class...") pass def setup_method(self, method): log.info(("*" * 35) + " setup " + ("*" * 35)) log.info("[setup_method] Start setup test case %s..." % method.__name__) self.connection_wrap = ApiConnectionsWrapper() self.utility_wrap = ApiUtilityWrapper() self.collection_wrap = ApiCollectionWrapper() self.partition_wrap = ApiPartitionWrapper() self.index_wrap = ApiIndexWrapper() self.collection_schema_wrap = ApiCollectionSchemaWrapper() self.field_schema_wrap = ApiFieldSchemaWrapper() def teardown_method(self, method): log.info(("*" * 35) + " teardown " + ("*" * 35)) log.info("[teardown_method] Start teardown test case %s..." % method.__name__) try: """ Drop collection before disconnect """ if self.connection_wrap.get_connection(alias=DefaultConfig.DEFAULT_USING)[0] is None: self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=param_info.param_host, port=param_info.param_port) if self.collection_wrap.collection is not None: self.collection_wrap.drop(check_task=ct.CheckTasks.check_nothing) for collection_object in self.collection_object_list: if collection_object.collection is not None \ and collection_object.name in self.utility_wrap.list_collections()[0]: collection_object.drop(check_task=ct.CheckTasks.check_nothing) except Exception as e: log.debug(str(e)) try: """ Delete connection and reset configuration""" res = self.connection_wrap.list_connections() for i in res[0]: self.connection_wrap.remove_connection(i[0]) # because the connection is in singleton mode, it needs to be restored to the original state after teardown self.connection_wrap.add_connection(default={"host": DefaultConfig.DEFAULT_HOST, "port": DefaultConfig.DEFAULT_PORT}) except Exception as e: log.debug(str(e)) @pytest.fixture(scope="module", autouse=True) def initialize_env(self, request): """ clean log before testing """ host = request.config.getoption("--host") port = request.config.getoption("--port") handler = request.config.getoption("--handler") clean_log = request.config.getoption("--clean_log") """ params check """ assert ip_check(host) and number_check(port) """ modify log files """ cf.modify_file(file_path_list=[log_config.log_debug, log_config.log_info, log_config.log_err], is_modify=clean_log) log.info("#" * 80) log.info("[initialize_milvus] Log cleaned up, start testing...") param_info.prepare_param_info(host, port, handler)
def __init__(self, collection_name=None): super().__init__(collection_name=collection_name) self.utility_wrap = ApiUtilityWrapper() self.c_wrap.load(enable_traceback=enable_traceback)
class BulkLoadChecker(Checker): """check bulk load operations in a dependent thread""" def __init__(self, flush=False): super().__init__() self.utility_wrap = ApiUtilityWrapper() self.schema = cf.gen_default_collection_schema() self.flush = flush self.files = ["bulk_load_data_source.json"] self.row_based = True self.recheck_failed_task = False self.failed_tasks = [] def update(self, files=None, schema=None, row_based=None): if files is not None: self.files = files if schema is not None: self.schema = schema if row_based is not None: self.row_based = row_based def keep_running(self): while True: if self.recheck_failed_task and self.failed_tasks: c_name = self.failed_tasks.pop(0) log.info(f"check failed task: {c_name}") else: c_name = cf.gen_unique_str("BulkLoadChecker_") self.c_wrap.init_collection(name=c_name, schema=self.schema) if self.flush: t0 = time.time() pre_entities_num = self.c_wrap.num_entities tt = time.time() - t0 log.info(f"flush before bulk load, cost time: {tt:.4f}") # import data t0 = time.time() task_ids, res_1 = self.utility_wrap.bulk_load( collection_name=c_name, row_based=self.row_based, files=self.files) log.info(f"bulk load task ids:{task_ids}") completed, res_2 = self.utility_wrap.wait_for_bulk_load_tasks_completed( task_ids=task_ids, timeout=30) tt = time.time() - t0 # added_num = sum(res_2[task_id].row_count for task_id in task_ids) if completed: self.rsp_times.append(tt) self.average_time = (tt + self.average_time * self._succ) / ( self._succ + 1) self._succ += 1 log.info( f"bulk load success for collection {c_name}, time: {tt:.4f}, average_time: {self.average_time:4f}" ) if self.flush: t0 = time.time() cur_entities_num = self.c_wrap.num_entities tt = time.time() - t0 log.info(f"flush after bulk load, cost time: {tt:.4f}") else: self._fail += 1 # if the task failed, store the failed collection name for further checking after chaos self.failed_tasks.append(c_name) log.info( f"bulk load failed for collection {c_name} time: {tt:.4f}, average_time: {self.average_time:4f}" ) sleep(constants.WAIT_PER_OP / 10)
def test_scale_in_query_node_less_than_replicas(self): """ target: test scale in cluster and querynode < replica method: 1.Deploy cluster with 3 querynodes 2.Create and insert data, flush 3.Load collection with 2 replica number 4.Scale in querynode from 3 to 1 and query 5.Scale out querynode from 1 back to 3 expected: Verify search successfully after scale out """ release_name = "scale-in-query" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' query_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.mode': 'cluster', 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 2, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(query_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # prepare collection connections.connect("scale-in", host=host, port=19530) utility_w = ApiUtilityWrapper() collection_w = ApiCollectionWrapper() collection_w.init_collection( name=cf.gen_unique_str("scale_in"), schema=cf.gen_default_collection_schema(), using="scale-in") collection_w.insert(cf.gen_default_dataframe_data()) assert collection_w.num_entities == ct.default_nb # load multi replicas and search success collection_w.load(replica_number=2) search_res, is_succ = collection_w.search( cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res[0].ids) == ct.default_limit log.info("Search successfully after load with 2 replicas") log.debug(collection_w.get_replicas()[0]) log.debug( utility_w.get_query_segment_info(collection_w.name, using="scale-in")) # scale in querynode from 2 to 1, less than replica number log.debug("Scale in querynode from 2 to 1") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 1}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") # search and not assure success collection_w.search(cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_task=CheckTasks.check_nothing) log.debug( collection_w.get_replicas( check_task=CheckTasks.check_nothing)[0]) # scale querynode from 1 back to 2 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 2}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") # verify search success collection_w.search(cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) # Verify replica info is correct replicas = collection_w.get_replicas()[0] assert len(replicas.groups) == 2 for group in replicas.groups: assert len(group.group_nodes) == 1 # Verify loaded segment info is correct seg_info = utility_w.get_query_segment_info(collection_w.name, using="scale-in")[0] num_entities = 0 for seg in seg_info: assert len(seg.nodeIds) == 2 num_entities += seg.num_rows assert num_entities == ct.default_nb except Exception as e: raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_customize_segment_size(self, seg_size, seg_count): """ steps """ log.info(f"start to install milvus with segment size {seg_size}") release_name, host, port = _install_milvus(seg_size) self.release_name = release_name assert host is not None conn = connections.connect("default", host=host, port=port) assert conn is not None mil = MilvusSys(alias="default") log.info(f"milvus build version: {mil.build_version}") log.info(f"start to e2e verification: {seg_size}") # create name = cf.gen_unique_str("segsiz") t0 = time.time() collection_w = ApiCollectionWrapper() collection_w.init_collection(name=name, schema=cf.gen_default_collection_schema(), timeout=40) tt = time.time() - t0 assert collection_w.name == name entities = collection_w.num_entities log.info(f"assert create collection: {tt}, init_entities: {entities}") # insert nb = 50000 data = cf.gen_default_list_data(nb=nb) t0 = time.time() _, res = collection_w.insert(data) tt = time.time() - t0 log.info(f"assert insert: {tt}") assert res # insert 2 million entities rounds = 40 for _ in range(rounds - 1): _, res = collection_w.insert(data) entities = collection_w.num_entities assert entities == nb * rounds # load collection_w.load() utility_wrap = ApiUtilityWrapper() segs, _ = utility_wrap.get_query_segment_info(collection_w.name) log.info(f"assert segments: {len(segs)}") assert len(segs) == seg_count # search search_vectors = cf.gen_vectors(1, ct.default_dim) search_params = {"metric_type": "L2", "params": {"nprobe": 16}} t0 = time.time() res_1, _ = collection_w.search( data=search_vectors, anns_field=ct.default_float_vec_field_name, param=search_params, limit=1, timeout=30) tt = time.time() - t0 log.info(f"assert search: {tt}") assert len(res_1) == 1 collection_w.release() # index d = cf.gen_default_list_data() collection_w.insert(d) log.info(f"assert index entities: {collection_w.num_entities}") _index_params = { "index_type": "IVF_SQ8", "params": { "nlist": 64 }, "metric_type": "L2" } t0 = time.time() index, _ = collection_w.create_index( field_name=ct.default_float_vec_field_name, index_params=_index_params, name=cf.gen_unique_str(), timeout=120) tt = time.time() - t0 log.info(f"assert index: {tt}") assert len(collection_w.indexes) == 1 # search t0 = time.time() collection_w.load() tt = time.time() - t0 log.info(f"assert load: {tt}") search_vectors = cf.gen_vectors(1, ct.default_dim) t0 = time.time() res_1, _ = collection_w.search( data=search_vectors, anns_field=ct.default_float_vec_field_name, param=search_params, limit=1, timeout=30) tt = time.time() - t0 log.info(f"assert search: {tt}") # query term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]' t0 = time.time() res, _ = collection_w.query(term_expr, timeout=30) tt = time.time() - t0 log.info(f"assert query result {len(res)}: {tt}")
def __init__(self, collection_name=None): if collection_name is None: collection_name = cf.gen_unique_str("CompactChecker_") super().__init__(collection_name=collection_name) self.ut = ApiUtilityWrapper() self.c_wrap.load() # load before compact