def test_insert_binary_dim_not_match(self): """ target: test insert binary with dim not match method: insert binary data dim not equal to schema expected: raise exception """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema) dim = 120 df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb, dim=dim) error = {ct.err_code: 1, ct.err_msg: f'Collection field dim is {ct.default_dim}, but entities field dim is {dim}'} collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error)
def test_insert_binary_dataframe(self): """ target: test insert binary dataframe method: 1. create by schema 2. insert dataframe expected: assert num_entities """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema) df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) mutation_res, _ = collection_w.insert(data=df) assert mutation_res.insert_count == ct.default_nb assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist() assert collection_w.num_entities == ct.default_nb
def test_collection_binary_created_by_dataframe(self): """ target: test collection with dataframe method: create collection with dataframe expected: create successfully """ conn = self._connect() c_name = cf.gen_unique_str(prefix) df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) schema = cf.gen_default_binary_collection_schema() self.collection_wrap.init_collection(name=c_name, data=df, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_name, exp_schema: schema}) conn.flush([c_name])
def test_insert_binary_dataframe(self): """ target: test insert binary dataframe method: 1. create by schema 2. insert dataframe expected: assert num_entities """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema) df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) collection_w.insert(data=df) conn, _ = self.connection_wrap.get_connection() conn.flush([c_name]) assert collection_w.num_entities == ct.default_nb
def test_compact_after_binary_index(self): """ target: test compact after create index method: 1.insert binary data into two segments 2.create binary index 3.compact 4.search expected: Verify segment info and index info """ # create collection with 1 shard and insert 2 segments collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1, schema=cf.gen_default_binary_collection_schema()) for i in range(2): df, _ = cf.gen_default_binary_dataframe_data() collection_w.insert(data=df) assert collection_w.num_entities == (i + 1) * ct.default_nb # create index collection_w.create_index(ct.default_binary_vec_field_name, ct.default_binary_index) log.debug(collection_w.index()) # load and search collection_w.load() search_params = {"metric_type": "JACCARD", "params": {"nprobe": 32}} search_res_one, _ = collection_w.search(df[ct.default_binary_vec_field_name][:ct.default_nq].to_list(), ct.default_binary_vec_field_name, search_params, ct.default_limit) # compact collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans(check_task=CheckTasks.check_merge_compact)[0] # waiting for handoff completed and search cost = 30 start = time() while True: sleep(5) segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] if len(segment_info) != 0 and segment_info[0].segmentID == c_plans.plans[0].target: log.debug(segment_info) break if time() - start > cost: raise MilvusException(1, f"Handoff after compact and index cost more than {cost}s") # verify search result search_res_two, _ = collection_w.search(df[ct.default_binary_vec_field_name][:ct.default_nq].to_list(), ct.default_binary_vec_field_name, search_params, ct.default_limit) assert len(search_res_one) == ct.default_nq for hits in search_res_one: assert len(hits) == ct.default_limit
def test_collection_binary_with_dataframe(self): """ target: test binary collection with dataframe method: create binary collection with dataframe expected: collection num entities equal to nb """ conn = self._connect() c_name = cf.gen_unique_str(prefix) df, _ = cf.gen_default_binary_dataframe_data(nb=ct.default_nb) self.collection_wrap.init_collection(c_name, schema=default_binary_schema, data=df, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_name, exp_schema: default_binary_schema}) conn.flush([c_name]) assert self.collection_wrap.num_entities == ct.default_nb
def test_insert_binary_after_index(self): """ target: test insert binary after index method: 1.create index 2.insert binary data expected: 1.index ok 2.num entities correct """ schema = cf.gen_default_binary_collection_schema() collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) collection_w.create_index(ct.default_binary_vec_field_name, default_binary_index_params) assert collection_w.has_index()[0] index, _ = collection_w.index() assert index == Index(collection_w.collection, ct.default_binary_vec_field_name, default_binary_index_params) assert collection_w.indexes[0] == index df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) collection_w.insert(data=df) assert collection_w.num_entities == ct.default_nb
def test_compact_after_binary_index(self): """ target: test compact after create index method: 1.insert binary data into two segments 2.create binary index 3.compact 4.search expected: Verify segment info and index info """ collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), shards_num=1, schema=cf.gen_default_binary_collection_schema()) for i in range(2): df, _ = cf.gen_default_binary_dataframe_data() collection_w.insert(data=df) assert collection_w.num_entities == (i + 1) * ct.default_nb # create index collection_w.create_index(ct.default_binary_vec_field_name, ct.default_binary_index) log.debug(collection_w.index()) collection_w.load() search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} vectors = cf.gen_binary_vectors(ct.default_nq, ct.default_dim)[1] search_res_one, _ = collection_w.search( vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit) assert len(search_res_one) == ct.default_nq for hits in search_res_one: assert len(hits) == ct.default_limit # compact collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans( check_task=CheckTasks.check_merge_compact) # verify index re-build and re-load search_params = {"metric_type": "L1", "params": {"nprobe": 10}} search_res_two, _ = collection_w.search( vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit, check_task=CheckTasks.err_res, check_items={ ct.err_code: 1, ct.err_msg: "metric type not found: (L1)" }) # verify search result search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} search_res_two, _ = collection_w.search( vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit) for i in range(ct.default_nq): for j in range(ct.default_limit): assert search_res_two[i][j].id == search_res_one[i][j].id