def init_collection_general(self, prefix="test", insert_data=False, nb=ct.default_nb, partition_num=0, is_binary=False, is_all_data_type=False, auto_id=False, dim=ct.default_dim, is_index=False, primary_field=ct.default_int64_field_name, is_flush=True, name=None, **kwargs): """ target: create specified collections method: 1. create collections (binary/non-binary, default/all data type, auto_id or not) 2. create partitions if specified 3. insert specified (binary/non-binary, default/all data type) data into each partition if any 4. not load if specifying is_index as True expected: return collection and raw data, insert ids """ log.info("Test case of search interface: initialize before test case") self._connect() collection_name = cf.gen_unique_str(prefix) if name is not None: collection_name = name vectors = [] binary_raw_vectors = [] insert_ids = [] time_stamp = 0 # 1 create collection default_schema = cf.gen_default_collection_schema( auto_id=auto_id, dim=dim, primary_field=primary_field) if is_binary: default_schema = cf.gen_default_binary_collection_schema( auto_id=auto_id, dim=dim, primary_field=primary_field) if is_all_data_type: default_schema = cf.gen_collection_schema_all_datatype( auto_id=auto_id, dim=dim, primary_field=primary_field) log.info("init_collection_general: collection creation") collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs) # 2 add extra partitions if specified (default is 1 partition named "_default") if partition_num > 0: cf.gen_partitions(collection_w, partition_num) # 3 insert data if specified if insert_data: collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \ cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id, dim=dim) if is_flush: assert collection_w.is_empty is False assert collection_w.num_entities == nb # This condition will be removed after auto index feature if not is_index: collection_w.load() return collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp
def test_collection_binary_created_by_dataframe(self): """ target: test collection with dataframe method: create collection with dataframe expected: create successfully """ conn = self._connect() c_name = cf.gen_unique_str(prefix) df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) schema = cf.gen_default_binary_collection_schema() self.collection_wrap.init_collection(name=c_name, data=df, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_name, exp_schema: schema}) conn.flush([c_name])
def init_collection_general(self, prefix, insert_data=False, nb=ct.default_nb, partition_num=0, is_binary=False, is_all_data_type=False, auto_id=False, dim=ct.default_dim, is_index=False): """ target: create specified collections method: 1. create collections (binary/non-binary) 2. create partitions if specified 3. insert specified binary/non-binary data into each partition if any expected: return collection and raw data """ log.info("Test case of search interface: initialize before test case") self._connect() collection_name = cf.gen_unique_str(prefix) vectors = [] binary_raw_vectors = [] insert_ids = [] # 1 create collection default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim) if is_binary: default_schema = cf.gen_default_binary_collection_schema( auto_id=auto_id, dim=dim) if is_all_data_type: default_schema = cf.gen_collection_schema_all_datatype( auto_id=auto_id, dim=dim) log.info("init_collection_general: collection creation") collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema) # 2 add extra partitions if specified (default is 1 partition named "_default") if partition_num > 0: cf.gen_partitions(collection_w, partition_num) # 3 insert data if specified if insert_data: collection_w, vectors, binary_raw_vectors, insert_ids = \ cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id, dim=dim) assert collection_w.is_empty is False assert collection_w.num_entities == nb # This condition will be removed after auto index feature if not is_index: collection_w.load() return collection_w, vectors, binary_raw_vectors, insert_ids
def test_compact_after_binary_index(self): """ target: test compact after create index method: 1.insert binary data into two segments 2.create binary index 3.compact 4.search expected: Verify segment info and index info """ # create collection with 1 shard and insert 2 segments collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1, schema=cf.gen_default_binary_collection_schema()) for i in range(2): df, _ = cf.gen_default_binary_dataframe_data() collection_w.insert(data=df) assert collection_w.num_entities == (i + 1) * ct.default_nb # create index collection_w.create_index(ct.default_binary_vec_field_name, ct.default_binary_index) log.debug(collection_w.index()) # load and search collection_w.load() search_params = {"metric_type": "JACCARD", "params": {"nprobe": 32}} search_res_one, _ = collection_w.search(df[ct.default_binary_vec_field_name][:ct.default_nq].to_list(), ct.default_binary_vec_field_name, search_params, ct.default_limit) # compact collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans(check_task=CheckTasks.check_merge_compact)[0] # waiting for handoff completed and search cost = 30 start = time() while True: sleep(5) segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] if len(segment_info) != 0 and segment_info[0].segmentID == c_plans.plans[0].target: log.debug(segment_info) break if time() - start > cost: raise MilvusException(1, f"Handoff after compact and index cost more than {cost}s") # verify search result search_res_two, _ = collection_w.search(df[ct.default_binary_vec_field_name][:ct.default_nq].to_list(), ct.default_binary_vec_field_name, search_params, ct.default_limit) assert len(search_res_one) == ct.default_nq for hits in search_res_one: assert len(hits) == ct.default_limit
def test_insert_binary_after_index(self): """ target: test insert binary after index method: 1.create index 2.insert binary data expected: 1.index ok 2.num entities correct """ schema = cf.gen_default_binary_collection_schema() collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) collection_w.create_index(ct.default_binary_vec_field_name, default_binary_index_params) assert collection_w.has_index()[0] index, _ = collection_w.index() assert index == Index(collection_w.collection, ct.default_binary_vec_field_name, default_binary_index_params) assert collection_w.indexes[0] == index df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) collection_w.insert(data=df) assert collection_w.num_entities == ct.default_nb
def init_collection_general(self, prefix, insert_data=False, nb=ct.default_nb, partition_num=0, is_binary=False): """ target: create specified collections method: 1. create collections (binary/non-binary) 2. create partitions if specified 3. insert specified binary/non-binary data into each partition if any expected: return collection and raw data """ log.info("Test case of search interface: initialize before test case") conn = self._connect() collection_name = cf.gen_unique_str(prefix) vectors = [] binary_raw_vectors = [] # 1 create collection if is_binary: default_schema = cf.gen_default_binary_collection_schema() else: default_schema = cf.gen_default_collection_schema() log.info("init_collection_general: collection creation") collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema) # 2 add extra partitions if specified (default is 1 partition named "_default") if partition_num > 0: cf.gen_partitions(collection_w, partition_num) # 3 insert data if specified if insert_data: collection_w, vectors, binary_raw_vectors = cf.insert_data( collection_w, nb, is_binary) if nb <= 32000: conn.flush([collection_w.name]) assert collection_w.is_empty == False assert collection_w.num_entities == nb collection_w.load() return collection_w, vectors, binary_raw_vectors
import pytest from pymilvus import Index from base.client_base import TestcaseBase from utils.util_log import test_log as log from common import common_func as cf from common import common_type as ct from common.common_type import CaseLabel, CheckTasks prefix = "insert" exp_name = "name" exp_schema = "schema" exp_num = "num_entities" exp_primary = "primary" default_schema = cf.gen_default_collection_schema() default_binary_schema = cf.gen_default_binary_collection_schema() default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} default_binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}} class TestInsertParams(TestcaseBase): """ Test case of Insert interface """ @pytest.fixture(scope="function", params=ct.get_invalid_strs) def get_non_data_type(self, request): if isinstance(request.param, list) or request.param is None: pytest.skip("list and None type is valid data type") yield request.param @pytest.fixture(scope="module", params=ct.get_invalid_strs) def get_invalid_field_name(self, request):
def test_compact_after_binary_index(self): """ target: test compact after create index method: 1.insert binary data into two segments 2.create binary index 3.compact 4.search expected: Verify segment info and index info """ collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), shards_num=1, schema=cf.gen_default_binary_collection_schema()) for i in range(2): df, _ = cf.gen_default_binary_dataframe_data() collection_w.insert(data=df) assert collection_w.num_entities == (i + 1) * ct.default_nb # create index collection_w.create_index(ct.default_binary_vec_field_name, ct.default_binary_index) log.debug(collection_w.index()) collection_w.load() search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} vectors = cf.gen_binary_vectors(ct.default_nq, ct.default_dim)[1] search_res_one, _ = collection_w.search( vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit) assert len(search_res_one) == ct.default_nq for hits in search_res_one: assert len(hits) == ct.default_limit # compact collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans( check_task=CheckTasks.check_merge_compact) # verify index re-build and re-load search_params = {"metric_type": "L1", "params": {"nprobe": 10}} search_res_two, _ = collection_w.search( vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit, check_task=CheckTasks.err_res, check_items={ ct.err_code: 1, ct.err_msg: "metric type not found: (L1)" }) # verify search result search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} search_res_two, _ = collection_w.search( vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit) for i in range(ct.default_nq): for j in range(ct.default_limit): assert search_res_two[i][j].id == search_res_one[i][j].id