def test_search_binary_tanimoto_flat_index(self): """ target: search binary_collection, and check the result: distance method: compare the return distance value with value computed with Inner product expected: the return distance equals to the computed value """ # 1. initialize with binary data collection_w, _, binary_raw_vector = \ self.init_collection_general(prefix, True, is_binary=True) # 2. create index default_index = { "index_type": "BIN_IVF_FLAT", "params": { "nlist": 128 }, "metric_type": "TANIMOTO" } collection_w.create_index("binary_vector", default_index) # 3. compute the distance query_raw_vector, binary_vectors = cf.gen_binary_vectors( 3000, default_dim) distance_0 = cf.tanimoto(query_raw_vector[0], binary_raw_vector[0]) distance_1 = cf.tanimoto(query_raw_vector[0], binary_raw_vector[1]) # 4. search and compare the distance search_params = {"metric_type": "TANIMOTO", "params": {"nprobe": 10}} res, _ = collection_w.search(binary_vectors[:default_nq], "binary_vector", search_params, default_limit, "int64 >= 0") assert abs(res[0]._distances[0] - min(distance_0, distance_1)) <= epsilon
def test_search_param_invalid_binary(self): """ target: test search within binary data (invalid parameter) method: search with wrong metric type expected: raise exception and report the error """ log.info( "test_search_param_invalid_binary: test invalid paramter with binary data" ) # 1. initialize with binary data collection_w = self.init_collection_general(prefix, True, is_binary=True)[0] # 2. create index default_index = { "index_type": "BIN_IVF_FLAT", "params": { "nlist": 128 }, "metric_type": "JACCARD" } collection_w.create_index("binary_vector", default_index) # 3. search with exception binary_vectors = cf.gen_binary_vectors(3000, default_dim)[1] wrong_search_params = {"metric_type": "L2", "params": {"nprobe": 10}} collection_w.search(binary_vectors[:default_nq], "binary_vector", wrong_search_params, default_limit, default_search_exp, check_task=CheckTasks.err_res, check_items={ "err_code": 1, "err_msg": "unsupported" })
def test_search_binary_flat_with_L2(self): """ target: search binary collection, and check the result: distance method: compare the return distance value with value computed with L2 expected: the return distance equals to the computed value """ log.info( "Test case of search interface: test_search_binary_flat_with_L2") # 1. initialize with binary data collection_w = self.init_collection_general(prefix, True, is_binary=True)[0] # 2. search and assert query_raw_vector, binary_vectors = cf.gen_binary_vectors( 3000, default_dim) search_params = {"metric_type": "L2", "params": {"nprobe": 10}} collection_w.search(binary_vectors[:default_nq], "binary_vector", search_params, default_limit, "int64 >= 0", check_task=CheckTasks.err_res, check_items={ "err_code": 1, "err_msg": "Search failed" })
def test_calc_distance_binary_metric(self, metric_field, metric_binary): """ target: test calculate distance with binary vectors method: calculate distance between binary vectors expected: distance calculated successfully """ self._connect() nb = 10 raw_vectors_l, vectors_l = cf.gen_binary_vectors(nb, default_dim) raw_vectors_r, vectors_r = cf.gen_binary_vectors(nb, default_dim) op_l = {"bin_vectors": vectors_l} op_r = {"bin_vectors": vectors_r} params = {metric_field: metric_binary} vectors_l = raw_vectors_l vectors_r = raw_vectors_r self.utility_wrap.calc_distance(op_l, op_r, params, check_task=CheckTasks.check_distance, check_items={ "vectors_l": vectors_l, "vectors_r": vectors_r, "metric": metric_binary })
def test_task_2(self, index_type, data_size): """ before reinstall: create collection, insert data and create index,load and search after reinstall: get collection, load, search, insert data, create index, load, and search """ name = "task_2_" + index_type insert_data = True is_binary = True if "BIN" in index_type else False is_flush = False # create collection and insert data collection_w = self.init_collection_general(insert_data=insert_data, is_binary=is_binary, nb=data_size, is_flush=is_flush, name=name, active_trace=True)[0] vectors_to_search = cf.gen_vectors(default_nb, default_dim) default_search_field = ct.default_float_vec_field_name if is_binary: _, vectors_to_search = cf.gen_binary_vectors( default_nb, default_dim) default_search_field = ct.default_binary_vec_field_name # create index default_index = gen_index_param(index_type) collection_w.create_index(default_search_field, default_index) # load collection_w.load() # search search_params = gen_search_param(index_type)[0] collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, check_task=CheckTasks.check_search_results, check_items={ "nq": default_nq, "limit": default_limit }) # query output_fields = [ct.default_int64_field_name] collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_not_empty)
def test_check(self, collection_name, data_size): """ before reinstall: create collection """ self._connect() ms = MilvusSys() name = collection_name collection_w = self.init_collection_general( insert_data=False, name=name, active_trace=True)[0] schema = collection_w.schema data_type = [field.dtype.name for field in schema.fields] field_name = [field.name for field in schema.fields] type_field_map = dict(zip(data_type,field_name)) is_binary = False if "BINARY_VECTOR" in data_type: is_binary = True if is_binary: default_index_field = ct.default_binary_vec_field_name vector_index_type = "BIN_FLAT" else: default_index_field = ct.default_float_vec_field_name vector_index_type = "IVF_FLAT" is_vector_indexed = False is_string_indexed = False indexed_fields = [index.field_name for index in collection_w.indexes] binary_vector_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("BINARY_VECTOR", "")] float_vector_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("FLOAT_VECTOR", "")] string_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("VARCHAR", "")] index_names = [index.index_name for index in collection_w.indexes] # used to drop index vector_index_types = binary_vector_index_types + float_vector_index_types if len(vector_index_types) > 0: is_vector_indexed = True vector_index_type = vector_index_types[0] if len(string_index_types) > 0: is_string_indexed = True try: replicas, _ = collection_w.get_replicas(enable_traceback=False) replicas_loaded = len(replicas.groups) except Exception as e: log.info("get replicas failed") replicas_loaded = 0 # params for search and query if is_binary: _, vectors_to_search = cf.gen_binary_vectors( default_nb, default_dim) default_search_field = ct.default_binary_vec_field_name else: vectors_to_search = cf.gen_vectors(default_nb, default_dim) default_search_field = ct.default_float_vec_field_name search_params = gen_search_param(vector_index_type)[0] # load if not loaded if replicas_loaded == 0: collection_w.load() # search and query collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty) # flush collection_w.num_entities # search and query collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty) # insert data and flush for i in range(2): self.init_collection_general(insert_data=True, is_binary=is_binary, nb=data_size, is_flush=False, is_index=True, name=name) collection_w.num_entities # delete data delete_expr = f"{ct.default_int64_field_name} in [0,1,2,3,4,5,6,7,8,9]" collection_w.delete(expr=delete_expr) # search and query collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty) # drop index if exist if len(index_names) > 0: for index_name in index_names: collection_w.drop_index(index_name=index_name) # search and query after dropping index collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty) # create index default_index_param = gen_index_param(vector_index_type) collection_w.create_index(default_index_field, default_index_param, index_name=cf.gen_unique_str()) collection_w.create_index(default_string_field_name, {}, index_name=cf.gen_unique_str()) # search and query collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty) # release and reload with changed replicas collection_w.release() replica_number = 1 if replicas_loaded in [0,1] and len(ms.query_nodes)>=2 : replica_number = 2 collection_w.load(replica_number=replica_number) # search and query collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name], check_task=CheckTasks.check_query_not_empty)
def test_compact_after_binary_index(self): """ target: test compact after create index method: 1.insert binary data into two segments 2.create binary index 3.compact 4.search expected: Verify segment info and index info """ collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), shards_num=1, schema=cf.gen_default_binary_collection_schema()) for i in range(2): df, _ = cf.gen_default_binary_dataframe_data() collection_w.insert(data=df) assert collection_w.num_entities == (i + 1) * ct.default_nb # create index collection_w.create_index(ct.default_binary_vec_field_name, ct.default_binary_index) log.debug(collection_w.index()) collection_w.load() search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} vectors = cf.gen_binary_vectors(ct.default_nq, ct.default_dim)[1] search_res_one, _ = collection_w.search( vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit) assert len(search_res_one) == ct.default_nq for hits in search_res_one: assert len(hits) == ct.default_limit # compact collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans( check_task=CheckTasks.check_merge_compact) # verify index re-build and re-load search_params = {"metric_type": "L1", "params": {"nprobe": 10}} search_res_two, _ = collection_w.search( vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit, check_task=CheckTasks.err_res, check_items={ ct.err_code: 1, ct.err_msg: "metric type not found: (L1)" }) # verify search result search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} search_res_two, _ = collection_w.search( vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit) for i in range(ct.default_nq): for j in range(ct.default_limit): assert search_res_two[i][j].id == search_res_one[i][j].id
def test_task_all(self, index_type, is_compacted, segment_status, is_vector_indexed, is_string_indexed, replica_number, is_deleted, data_size): """ before reinstall: create collection and insert data, load and search """ name = "" for k,v in locals().items(): if k in ["self", "name"]: continue name += f"_{k}_{v}" name = prefix + name self._connect() ms = MilvusSys() if len(ms.query_nodes) < replica_number: # this step is to make sure this testcase can run on standalone mode # or cluster mode which has only one querynode pytest.skip("skip test, not enough nodes") log.info(f"collection name: {name}, replica_number: {replica_number}, is_compacted: {is_compacted}," f"is_deleted: {is_deleted}, is_vector_indexed: {is_vector_indexed}, is_string_indexed: {is_string_indexed}," f"segment_status: {segment_status}, index_type: {index_type}") is_binary = True if "BIN" in index_type else False # params for search and query if is_binary: _, vectors_to_search = cf.gen_binary_vectors( default_nb, default_dim) default_search_field = ct.default_binary_vec_field_name else: vectors_to_search = cf.gen_vectors(default_nb, default_dim) default_search_field = ct.default_float_vec_field_name search_params = gen_search_param(index_type)[0] # init collection and insert with small size data without flush to get growing segment collection_w = self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000, is_flush=False, is_index=True, name=name)[0] # load for growing segment if replica_number >= 1: try: collection_w.release() except Exception as e: log.error( f"release collection failed: {e} maybe the collection is not loaded") collection_w.load(replica_number=replica_number) # delete data for growing segment delete_expr = f"{ct.default_int64_field_name} in [0,1,2,3,4,5,6,7,8,9]" if is_deleted == "is_deleted": collection_w.delete(expr=delete_expr) # search and query for growing segment if replica_number >= 1: collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) output_fields = [ct.default_int64_field_name] collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_not_empty) # skip subsequent operations when segment_status is set to only_growing if segment_status == "only_growing": pytest.skip( "already get growing segment, skip subsequent operations") # insert with flush multiple times to generate multiple sealed segment for i in range(2): self.init_collection_general(insert_data=True, is_binary=is_binary, nb=data_size, is_flush=False, is_index=True, name=name) collection_w.flush() # params for creating index if is_binary: default_index_field = ct.default_binary_vec_field_name else: default_index_field = ct.default_float_vec_field_name # create index for vector if is_vector_indexed == "is_vector_indexed": default_index_param = gen_index_param(index_type) collection_w.create_index(default_index_field, default_index_param) # create index for string if is_string_indexed == "is_string_indexed": default_string_index_params = {} default_string_index_name = "_default_string_idx" collection_w.create_index( default_string_field_name, default_string_index_params, index_name=default_string_index_name) # delete data for sealed segment delete_expr = f"{ct.default_int64_field_name} in [10,11,12,13,14,15,16,17,18,19]" if is_deleted == "is_deleted": collection_w.delete(expr=delete_expr) if is_compacted == "is_compacted": collection_w.compact() if segment_status == "all": self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000, is_flush=False, is_index=True, name=name) # reload after flush and creating index if replica_number > 0: collection_w.release() collection_w.load(replica_number=replica_number) # insert data to get growing segment if segment_status == "all": self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000, is_flush=False, is_index=True, name=name) # search and query for sealed and growing segment if replica_number > 0: collection_w.search(vectors_to_search[:default_nq], default_search_field, search_params, default_limit, default_search_exp, check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit}) output_fields = [ct.default_int64_field_name] collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_not_empty)