def test_partition_replicas_change_cross_partitions(self): """ target: test load with different replicas between partitions method: 1.Create two partitions and insert data 2.Load two partitions with different replicas expected: Raise an exception """ # Create two partitions and insert data collection_w = self.init_collection_wrap() partition_w1 = self.init_partition_wrap(collection_w) partition_w2 = self.init_partition_wrap(collection_w) partition_w1.insert(cf.gen_default_dataframe_data()) partition_w2.insert(cf.gen_default_dataframe_data(start=ct.default_nb)) assert collection_w.num_entities == ct.default_nb * 2 # load with different replicas partition_w1.load(replica_number=1) partition_w1.release() partition_w2.load(replica_number=2) # verify different have same replicas replicas_1, _ = partition_w1.get_replicas() replicas_2, _ = partition_w2.get_replicas() group1_ids = list(map(lambda g: g.id, replicas_1.groups)) group2_ids = list(map(lambda g: g.id, replicas_1.groups)) assert group1_ids.sort() == group2_ids.sort() # verify loaded segments included 2 replicas and 1 partition seg_info, _ = self.utility_wrap.get_query_segment_info( collection_w.name) seg_ids = list(map(lambda seg: seg.segmentID, seg_info)) num_entities = list(map(lambda seg: seg.num_rows, seg_info)) assert reduce(lambda x, y: x ^ y, seg_ids) == 0 assert reduce(lambda x, y: x + y, num_entities) == ct.default_nb * 2
def test_delete_query_ids_both_sealed_and_channel(self): """ target: test query that delete ids from both channel and sealed method: 1.create and insert 2.delete id 0 and flush 3.load and query id 0 4.insert new id and delete the id 5.query id 0 and new id expected: Empty query result """ # init collection and insert data without flush collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) # delete id 0 and flush del_res, _ = collection_w.delete(tmp_expr) assert del_res.delete_count == 1 assert collection_w.num_entities == tmp_nb # load and query id 0 collection_w.load() collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty) # insert id tmp_nb and delete id 0 and tmp_nb df_new = cf.gen_default_dataframe_data(nb=1, start=tmp_nb) collection_w.insert(df_new) collection_w.delete( expr=f'{ct.default_int64_field_name} in {[tmp_nb]}') # query with id 0 and tmp_nb collection_w.query( expr=f'{ct.default_int64_field_name} in {[0, tmp_nb]}', check_task=CheckTasks.check_query_empty)
def test_compact_delete_ratio(self): """ target: test delete entities reaches ratio and auto-compact method: 1.create with shard_num=1 2.insert (compact load delta log, not from dmlChannel) 3.delete 20% of nb, flush expected: Verify auto compaction, merge insert log and delta log """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data(tmp_nb) insert_res, _ = collection_w.insert(df) # delete 20% entities ratio_expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:tmp_nb // ct.compact_delta_ratio_reciprocal]}' collection_w.delete(ratio_expr) assert collection_w.num_entities == tmp_nb # Flush a new segment and meet condition 20% deleted entities, triggre compaction but no way to get plan collection_w.insert(cf.gen_default_dataframe_data(1, start=tmp_nb)) assert collection_w.num_entities == tmp_nb + 1 collection_w.load() collection_w.query(ratio_expr, check_items=CheckTasks.check_query_empty) res = df.iloc[-1:, :1].to_dict('records') collection_w.query(f'{ct.default_int64_field_name} in {insert_res.primary_keys[-1:]}', check_items={'exp_res': res})
def test_delete_sealed_segment_with_twice_flush(self): """ target: test delete data from sealed segment and flush delta log method: 1.create and insert and flush data 2.delete entities and flush (insert and flush) 3.load collection (load data and delta log) 4.query deleted ids expected: No query result """ # create collection collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) # insert and flush data df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) assert collection_w.num_entities == tmp_nb # delete id 0 and flush del_res = collection_w.delete(tmp_expr)[0] assert del_res.delete_count == 1 collection_w.insert(cf.gen_default_dataframe_data(nb=1, start=tmp_nb)) log.info(collection_w.num_entities) # load and query id 0 collection_w.load() collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty)
def test_compact_merge_inside_time_travel(self): """ target: test compact and merge segments inside time_travel range method: search with time travel after merge compact expected: Verify segments inside time_travel merged """ from pymilvus import utility # create collection shard_num=1, insert 2 segments, each with tmp_nb entities collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) # insert twice df1 = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df1)[0] assert collection_w.num_entities == tmp_nb df2 = cf.gen_default_dataframe_data(tmp_nb, start=tmp_nb) insert_two = collection_w.insert(df2)[0] assert collection_w.num_entities == tmp_nb * 2 tt = utility.mkts_from_hybridts(insert_two.timestamp, milliseconds=0.1) collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans(check_task=CheckTasks.check_merge_compact) collection_w.load() search_res, _ = collection_w.search(df2[ct.default_float_vec_field_name][:1].to_list(), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, travel_timestamp=tt) assert tmp_nb in search_res[0].ids assert len(search_res[0]) == ct.default_limit
def test_delete_insert_same_id_sealed(self, to_query): """ target: test insert same id entity after delete from sealed data method: 1.create and insert with flush 2.load and query with the id 3.delte the id entity 4.insert new entity with the same id and flush 5.query the id expected: Verify that the query gets the newly inserted entity """ # init collection and insert data without flush collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) # insert df = cf.gen_default_dataframe_data(1000) collection_w.insert(df) log.debug(collection_w.num_entities) # load and query collection_w.load() res = df.iloc[:1, :1].to_dict('records') default_search_params = {"metric_type": "L2", "params": {"nprobe": 16}} collection_w.search(data=[df[ct.default_float_vec_field_name][0]], anns_field=ct.default_float_vec_field_name, param=default_search_params, limit=1) collection_w.query(tmp_expr, check_task=CheckTasks.check_query_results, check_items={'exp_res': res}) # delete collection_w.delete(tmp_expr) if to_query: collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty) # re-insert df_new = cf.gen_default_dataframe_data(nb=1) collection_w.insert(df_new) log.debug(collection_w.num_entities) # re-query res = df_new.iloc[[0], [0, 2]].to_dict('records') collection_w.query(tmp_expr, output_fields=[ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, check_items={ 'exp_res': res, 'with_vec': True }) collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]], anns_field=ct.default_float_vec_field_name, param=default_search_params, limit=1)
def test_compact_both_delete_merge(self): """ target: test compact both delete and merge method: 1.create collection with shard_num=1 2.insert data into two segments 3.delete and flush (new insert) 4.compact 5.load and search expected: Triggre two types compaction """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1) ids = [] for i in range(2): df = cf.gen_default_dataframe_data(tmp_nb, start=i * tmp_nb) insert_res, _ = collection_w.insert(df) assert collection_w.num_entities == (i + 1) * tmp_nb ids.extend(insert_res.primary_keys) expr = f'{ct.default_int64_field_name} in {[0, 2 * tmp_nb - 1]}' collection_w.delete(expr) collection_w.insert(cf.gen_default_dataframe_data(1, start=2 * tmp_nb)) assert collection_w.num_entities == 2 * tmp_nb + 1 sleep(ct.compact_retention_duration + 1) collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans()[0] assert len(c_plans.plans) == 2 # todo assert two types compaction plan # search ids.pop(0) ids.pop(-1) collection_w.load() search_res, _ = collection_w.search(cf.gen_vectors( ct.default_nq, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_items={ "nq": ct.default_nq, "ids": ids, "limit": ct.default_limit })
def test_delete_sealed_only(self): """ target: test delete sealed-only method: 1.deploy sealed-only: two dmlChannel and three queryNodes 2.create and insert with flush 3.load 4.delete all data 5.query expected: """ # init collection and insert data without flush collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), shards_num=2) # insert 3000 entities into 3 segments segment_num = 3 segment_per_count = 2000 ids = [] for i in range(segment_num): df = cf.gen_default_dataframe_data(nb=segment_per_count, start=(i * segment_per_count)) res, _ = collection_w.insert(df) assert collection_w.num_entities == (i + 1) * segment_per_count ids.extend(res.primary_keys) collection_w.load() expr = f'{ct.default_int64_field_name} in {ids}' collection_w.delete(expr) collection_w.query(expr, check_task=CheckTasks.check_query_empty)
def test_delete_partition(self): """ target: test delete from partition method: delete with partition names expected: verify partition entities are deleted """ # init collection and partition collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) partition_w = self.init_partition_wrap(collection_wrap=collection_w) # load collection and insert data to partition collection_w.load() df = cf.gen_default_dataframe_data(tmp_nb) partition_w.insert(df) # delete ids from partition del_res, _ = collection_w.delete(tmp_expr, partition_name=partition_w.name) assert del_res.delete_count == 1 # query with deleted id and query with existed id collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty, partition_names=[partition_w.name]) res = df.iloc[1:2, :1].to_dict('records') collection_w.query(f'{ct.default_int64_field_name} in [1]', check_task=CheckTasks.check_query_results, check_items={exp_res: res})
def test_delete_time_travel(self): """ target: test search with time travel after delete method: 1.insert and flush 2.delete 3.load and search with time travel expected: search successfully """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data(tmp_nb) insert_res, _ = collection_w.insert(df) collection_w.load() tt = self.utility_wrap.mkts_from_hybridts(insert_res.timestamp, milliseconds=0.) res_before, _ = collection_w.search( df[ct.default_float_vec_field_name][:1].to_list(), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:tmp_nb // 2]}' delete_res, _ = collection_w.delete(expr) res_travel, _ = collection_w.search( df[ct.default_float_vec_field_name][:1].to_list(), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, travel_timestamp=tt) assert res_before[0].ids == res_travel[0].ids
def test_delete_insert_multi(self): """ target: test delete after multi insert method: 1.create 2.insert multi times, no flush 3.load 3.delete even number 4.search and query expected: Verify result """ # create collection, insert multi times, each with tmp_nb entities collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) multi = 3 for i in range(multi): start = i * tmp_nb df = cf.gen_default_dataframe_data(tmp_nb, start=start) collection_w.insert(df) # delete even numbers ids = [i for i in range(0, tmp_nb * multi, 2)] expr = f'{ct.default_int64_field_name} in {ids}' collection_w.delete(expr) collection_w.load() collection_w.query(expr, check_task=CheckTasks.check_query_empty) search_res, _ = collection_w.search( cf.gen_vectors(ct.default_nq, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) for res_id in search_res[0].ids: assert res_id not in ids
def test_compact_delete_and_search(self): """ target: test delete and compact segment, and search method: 1.create collection and insert 2.delete part entities 3.compact 4.load and search expected: Verify search result """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data() insert_res, _ = collection_w.insert(df) expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:ct.default_nb // 2]}' collection_w.delete(expr) assert collection_w.num_entities == ct.default_nb sleep(ct.compact_retention_duration + 1) collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans(check_task=CheckTasks.check_delete_compact) # search collection_w.load() search_res, _ = collection_w.search(cf.gen_vectors(ct.default_nq, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_task=CheckTasks.check_search_results, check_items={"nq": ct.default_nq, "ids": insert_res.primary_keys[ct.default_nb // 2:], "limit": ct.default_limit} ) collection_w.query("int64 in [0]", check_task=CheckTasks.check_query_empty)
def test_partition_insert_dropped_collection(self): """ target: verify insert data into dropped collection method: 1.create a collection 2. insert some data into dropped collection expected: raise exception """ # create collection collection_w = self.init_collection_wrap() # create partition partition_name = cf.gen_unique_str(prefix) partition_w = self.init_partition_wrap(collection_w, partition_name) assert collection_w.has_partition(partition_name)[0] # drop collection collection_w.drop() # insert data to partition partition_w.insert(cf.gen_default_dataframe_data(), check_task=CheckTasks.err_res, check_items={ ct.err_code: 1, ct.err_msg: "None Type" })
def test_delete_merge_same_id_channel_and_sealed(self): """ target: test merge same delete ids from channel and sealed method: 1.create, insert 2.delete id and flush (data and deleted become sealed) 3.load and query (verify delete successfully) 4.insert entity with deleted id 5.delete id 6.query with id expected: Empty query result """ # init collection and insert data without flush collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) # delete id 0 and flush del_res, _ = collection_w.delete(tmp_expr) assert del_res.delete_count == 1 assert collection_w.num_entities == tmp_nb # load and query id 0 collection_w.load() collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty) # re-insert id 0 and re-delete id 0 collection_w.insert(df[:1]) collection_w.delete(tmp_expr) collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty)
def test_delete_from_partitions_with_same_ids(self): """ target: test delete same ids from two partitions with same data method: 1.insert same nb data into two partitions 2.delete same ids from partition_1 expected: The data only in partition_1 will be deleted """ # init collection and partition collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) partition_w = self.init_partition_wrap(collection_wrap=collection_w) # insert same data into partition_w and default partition df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) partition_w.insert(df) # delete same id 0 from default_partition, and query on it get empty result collection_w.delete(tmp_expr, partition_name=ct.default_partition_name) assert collection_w.num_entities == tmp_nb * 2 collection_w.load() collection_w.query(tmp_expr, partition_names=[ct.default_partition_name], check_task=CheckTasks.check_query_empty) # query on partition_w with id 0 and get an result collection_w.query(tmp_expr, partition_names=[partition_w.name], check_task=CheckTasks.check_query_results, check_items={exp_res: query_res_tmp_expr})
def test_compact_no_merge(self): """ target: test compact when no segments merge method: 1.create with shard_num=1 2.insert and flush 3.compact and search expected: No exception and compact plans """ # create collection collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) assert collection_w.num_entities == tmp_nb collection_w.load() seg_before, _ = self.utility_wrap.get_query_segment_info(collection_w.name) collection_w.compact() collection_w.wait_for_compaction_completed() c_plans, _ = collection_w.get_compaction_plans() assert len(c_plans.plans) == 1 assert [seg_before[0].segmentID] == c_plans.plans[0].sources collection_w.release() collection_w.load() seg_after, _ = self.utility_wrap.get_query_segment_info(collection_w.name) assert seg_after[0].segmentID == c_plans.plans[0].target
def test_compact_delete_multi_segments(self): """ target: test compact multi delete segments method: 1.create collection with shard_num=2 2.insert data into two segments 3.delete entities from two segments 4.compact 5.load and search expected: Verify two delta compaction plans """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data(2*tmp_nb) insert_res, _ = collection_w.insert(df) assert collection_w.num_entities == 2 * tmp_nb collection_w.load() log.debug(self.utility_wrap.get_query_segment_info(collection_w.name)) delete_ids = [i for i in range(10)] expr = f'{ct.default_int64_field_name} in {delete_ids}' collection_w.delete(expr) sleep(ct.compact_retention_duration + 1) collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans()[0] assert len(c_plans.plans) == 2 for plan in c_plans.plans: assert len(plan.sources) == 1 collection_w.query(f"{ct.default_int64_field_name} in {delete_ids}", check_task=CheckTasks.check_query_empty)
def test_compact_partition(self): """ target: test compact partition method: compact partition expected: Verify partition segments merged """ # create collection with shard_num=1, and create partition collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) partition_w = self.init_partition_wrap(collection_wrap=collection_w) # insert flush twice for i in range(2): df = cf.gen_default_dataframe_data(tmp_nb) partition_w.insert(df) assert partition_w.num_entities == tmp_nb * (i + 1) # compact collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans()[0] assert len(c_plans.plans) == 1 assert len(c_plans.plans[0].sources) == 2 target = c_plans.plans[0].target # verify queryNode load the compacted segments collection_w.load() segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] assert target == segment_info[0].segmentID
def test_compact_delete_inside_time_travel(self): """ target: test compact inside time_travel range method: 1.insert data and get ts 2.delete all ids 4.compact 5.search with ts expected: Verify search result """ from pymilvus import utility collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1) # insert and get tt df = cf.gen_default_dataframe_data(tmp_nb) insert_res, _ = collection_w.insert(df) tt = utility.mkts_from_hybridts(insert_res.timestamp, milliseconds=0.) # delete all expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys}' delete_res, _ = collection_w.delete(expr) log.debug(collection_w.num_entities) collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans() collection_w.load() search_one, _ = collection_w.search(df[ct.default_float_vec_field_name][:1].to_list(), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, travel_timestamp=tt) assert 0 in search_one[0].ids
def test_compact_repeatedly_delete_same_id(self): """ target: test compact after repeatedly delete same entity method: 1.Create and insert entities 2.repeatedly delete the same id 3.compact expected: No exception or delta log just delete one """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data() insert_res, _ = collection_w.insert(df) expr = f'{ct.default_int64_field_name} in [0]' for _ in range(100): collection_w.delete(expr=expr) assert collection_w.num_entities == ct.default_nb sleep(ct.compact_retention_duration + 1) collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans(check_task=CheckTasks.check_delete_compact) collection_w.load() collection_w.query(expr, check_task=CheckTasks.check_query_empty)
def test_compact_max_time_interval(self): """ target: test auto compact with max interval 60s method: 1.create with shard_num=1 2.insert flush twice (two segments) 3.wait max_compaction_interval (60s) expected: Verify compaction results """ # create collection shard_num=1, insert 2 segments, each with tmp_nb entities collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) collection_w.compact() # Notice:The merge segments compaction triggered by max_compaction_interval also needs to meet # the compaction_segment_ num_threshold for i in range(ct.compact_segment_num_threshold): df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) assert collection_w.num_entities == tmp_nb * (i + 1) sleep(ct.max_compaction_interval + 1) # verify queryNode load the compacted segments collection_w.load() replicas = collection_w.get_replicas()[0] replica_num = len(replicas.groups) segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] assert len(segment_info) == 1*replica_num
def test_compact_twice(self): """ target: test compact twice method: 1.create with shard_num=1 2.insert and flush twice (two segments) 3.compact 4.insert new data 5.compact expected: Merge into one segment """ # init collection with one shard, insert into two segments collection_w = self.collection_insert_multi_segments_one_shard(prefix, nb_of_segment=tmp_nb) # first compact two segments collection_w.compact() collection_w.wait_for_compaction_completed() c_plans1 = collection_w.get_compaction_plans()[0] target_1 = c_plans1.plans[0].target # insert new data df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) log.debug(collection_w.num_entities) # second compact collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_state() c_plans2 = collection_w.get_compaction_plans()[0] assert target_1 in c_plans2.plans[0].sources log.debug(c_plans2.plans[0].target)
def test_delete_duplicate_primary_keys(self): """ target: test delete from duplicate primary keys method: 1.insert data with dup ids 2.delete with repeated or not values expected: currently only delete one entity, query get one entity todo delete all entities """ collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data(nb=tmp_nb) df[ct.default_int64_field_name] = 0 collection_w.insert(df) assert collection_w.num_entities == tmp_nb del_res, _ = collection_w.delete(tmp_expr) collection_w.load() # Just one query res and search res, because de-dup res, _ = collection_w.query(tmp_expr, output_fields=["*"]) assert len(res) == 1 search_res, _ = collection_w.search( [df[ct.default_float_vec_field_name][1]], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, output_fields=[ ct.default_int64_field_name, ct.default_float_field_name ]) assert len(search_res) == 1
def test_partition_drop_non_empty_partition(self): """ target: verify drop a partition which has data inserted method: 1.create a partition with default schema 2. insert some data 3. flush / not flush 3. drop the partition expected: drop successfully """ # create collection collection_w = self.init_collection_wrap() # create partition partition_name = cf.gen_unique_str(prefix) partition_w = self.init_partition_wrap(collection_w, partition_name) assert collection_w.has_partition(partition_name)[0] # insert data to partition partition_w.insert(cf.gen_default_dataframe_data()) # # flush remove flush for issue #5837 # if flush: # self._connect().flush([collection_w.name]) # drop partition partition_w.drop() assert not collection_w.has_partition(partition_name)[0]
def test_delete_query_without_loading(self): """ target: test delete and query without loading method: 1.insert and flush data 2.delete ids 3.query without loading expected: Raise exception """ # create collection, insert data without flush collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) assert collection_w.num_entities == tmp_nb # delete res = collection_w.delete(tmp_expr)[0] assert res.delete_count == 1 # query without loading and raise exception error = { ct.err_code: 1, ct.err_msg: f"collection {collection_w.name} was not loaded into memory" } collection_w.query(expr=tmp_expr, check_task=CheckTasks.err_res, check_items=error)
def test_compact_merge_multi_shards(self): """ target: test compact merge multi shards method: 1.Create a collection with 2 shards 2.Insert twice and generate 4 segments 3.Compact and wait it completed expected: Verify there are 2 merge type complation plans """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) for i in range(2): df = cf.gen_default_dataframe_data(2 * tmp_nb) insert_res, _ = collection_w.insert(df) log.debug(collection_w.num_entities) collection_w.load() log.debug(self.utility_wrap.get_query_segment_info(collection_w.name)) collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans()[0] assert len(c_plans.plans) == 2 targets = [] for plan in c_plans.plans: assert len(plan.sources) == 2 targets.append(plan.target) collection_w.release() collection_w.load() seg_info, _ = self.utility_wrap.get_query_segment_info(collection_w.name) for seg in seg_info: seg.segmentID in targets
def test_delete_insert_same_entity(self): """ target: test delete and insert same entity method: 1.delete entity one 2.insert entity one 3.query entity one expected: verify query result """ # init collection and insert data without flush collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) collection_w.load() df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) # delete del_res, _ = collection_w.delete(tmp_expr) assert del_res.delete_count == 1 # assert collection_w.num_entities == tmp_nb collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty) # insert entity with primary key 0 collection_w.insert(df[:1]) # query entity one res = df.iloc[0:1, :1].to_dict('records') collection_w.query(tmp_expr, check_task=CheckTasks.check_query_results, check_items={'exp_res': res})
def test_delete_query_after_handoff(self): """ target: test search after delete and handoff method: 1.create and load collection 2.insert entities and delete id 0 3.flush entities 4.query deleted id after handoff completed expected: Delete successfully, query get empty result """ # init collection and load collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), shards_num=1) collection_w.load() # insert data and delete id 0 df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) del_res, _ = collection_w.delete(tmp_expr) # flush assert collection_w.num_entities == tmp_nb # wait for the handoff to complete while True: time.sleep(0.5) segment_infos = self.utility_wrap.get_query_segment_info( collection_w.name)[0] if segment_infos[0].state == SegmentState.Sealed: break # query deleted id collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty)
def test_delete_merge_ids_channel_and_sealed(self): """ target: test merge deleted ids come from both channel and sealed method: 1.create, insert ids [0, tmp_nb) with shard_num=1 2.delete id 0 and flush 3.load and query with id 0 4.delete id 1 (merge same segment deleted ids 0 and 1) 5.query with id 0 and 1 expected: Empty query result """ # init collection and insert data without flush collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) # delete id 0 and flush del_res, _ = collection_w.delete(tmp_expr) assert del_res.delete_count == 1 assert collection_w.num_entities == tmp_nb # load and query id 0 collection_w.load() collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty) # delete id 1 and query id 0 and 1 collection_w.delete(expr=f'{ct.default_int64_field_name} in {[1]}') collection_w.query(expr=f'{ct.default_int64_field_name} in {[0, 1]}', check_task=CheckTasks.check_query_empty)
def test_insert_multi_threading(self): """ target: test concurrent insert method: multi threads insert expected: verify num entities """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data(ct.default_nb) thread_num = 4 threads = [] primary_keys = df[ct.default_int64_field_name].values.tolist() def insert(thread_i): log.debug(f'In thread-{thread_i}') mutation_res, _ = collection_w.insert(df) assert mutation_res.insert_count == ct.default_nb assert mutation_res.primary_keys == primary_keys for i in range(thread_num): x = threading.Thread(target=insert, args=(i,)) threads.append(x) x.start() for t in threads: t.join() assert collection_w.num_entities == ct.default_nb * thread_num