Ejemplo n.º 1
0
    def test_partition_replicas_change_cross_partitions(self):
        """
        target: test load with different replicas between partitions
        method: 1.Create two partitions and insert data
                2.Load two partitions with different replicas
        expected: Raise an exception
        """
        # Create two partitions and insert data
        collection_w = self.init_collection_wrap()
        partition_w1 = self.init_partition_wrap(collection_w)
        partition_w2 = self.init_partition_wrap(collection_w)
        partition_w1.insert(cf.gen_default_dataframe_data())
        partition_w2.insert(cf.gen_default_dataframe_data(start=ct.default_nb))
        assert collection_w.num_entities == ct.default_nb * 2

        # load with different replicas
        partition_w1.load(replica_number=1)
        partition_w1.release()
        partition_w2.load(replica_number=2)

        # verify different have same replicas
        replicas_1, _ = partition_w1.get_replicas()
        replicas_2, _ = partition_w2.get_replicas()
        group1_ids = list(map(lambda g: g.id, replicas_1.groups))
        group2_ids = list(map(lambda g: g.id, replicas_1.groups))
        assert group1_ids.sort() == group2_ids.sort()

        # verify loaded segments included 2 replicas and 1 partition
        seg_info, _ = self.utility_wrap.get_query_segment_info(
            collection_w.name)
        seg_ids = list(map(lambda seg: seg.segmentID, seg_info))
        num_entities = list(map(lambda seg: seg.num_rows, seg_info))
        assert reduce(lambda x, y: x ^ y, seg_ids) == 0
        assert reduce(lambda x, y: x + y, num_entities) == ct.default_nb * 2
Ejemplo n.º 2
0
    def test_delete_query_ids_both_sealed_and_channel(self):
        """
        target: test query that delete ids from both channel and sealed
        method: 1.create and insert
                2.delete id 0 and flush
                3.load and query id 0
                4.insert new id and delete the id
                5.query id 0 and new id
        expected: Empty query result
        """
        # init collection and insert data without flush
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix))
        df = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df)

        # delete id 0 and flush
        del_res, _ = collection_w.delete(tmp_expr)
        assert del_res.delete_count == 1
        assert collection_w.num_entities == tmp_nb

        # load and query id 0
        collection_w.load()
        collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty)

        # insert id tmp_nb and delete id 0 and tmp_nb
        df_new = cf.gen_default_dataframe_data(nb=1, start=tmp_nb)
        collection_w.insert(df_new)
        collection_w.delete(
            expr=f'{ct.default_int64_field_name} in {[tmp_nb]}')

        # query with id 0 and tmp_nb
        collection_w.query(
            expr=f'{ct.default_int64_field_name} in {[0, tmp_nb]}',
            check_task=CheckTasks.check_query_empty)
Ejemplo n.º 3
0
    def test_compact_delete_ratio(self):
        """
        target: test delete entities reaches ratio and auto-compact
        method: 1.create with shard_num=1
                2.insert (compact load delta log, not from dmlChannel)
                3.delete 20% of nb, flush
        expected: Verify auto compaction, merge insert log and delta log
        """
        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1)
        df = cf.gen_default_dataframe_data(tmp_nb)
        insert_res, _ = collection_w.insert(df)

        # delete 20% entities
        ratio_expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:tmp_nb // ct.compact_delta_ratio_reciprocal]}'
        collection_w.delete(ratio_expr)
        assert collection_w.num_entities == tmp_nb

        # Flush a new segment and meet condition 20% deleted entities, triggre compaction but no way to get plan
        collection_w.insert(cf.gen_default_dataframe_data(1, start=tmp_nb))
        assert collection_w.num_entities == tmp_nb + 1

        collection_w.load()
        collection_w.query(ratio_expr, check_items=CheckTasks.check_query_empty)

        res = df.iloc[-1:, :1].to_dict('records')
        collection_w.query(f'{ct.default_int64_field_name} in {insert_res.primary_keys[-1:]}',
                           check_items={'exp_res': res})
Ejemplo n.º 4
0
    def test_delete_sealed_segment_with_twice_flush(self):
        """
        target: test delete data from sealed segment and flush delta log
        method: 1.create and insert and flush data
                2.delete entities and flush (insert and flush)
                3.load collection (load data and delta log)
                4.query deleted ids
        expected: No query result
        """
        # create collection
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix))
        # insert and flush data
        df = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df)
        assert collection_w.num_entities == tmp_nb

        # delete id 0 and flush
        del_res = collection_w.delete(tmp_expr)[0]
        assert del_res.delete_count == 1
        collection_w.insert(cf.gen_default_dataframe_data(nb=1, start=tmp_nb))
        log.info(collection_w.num_entities)
        # load and query id 0
        collection_w.load()
        collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty)
Ejemplo n.º 5
0
    def test_compact_merge_inside_time_travel(self):
        """
        target: test compact and merge segments inside time_travel range
        method: search with time travel after merge compact
        expected: Verify segments inside time_travel merged
        """
        from pymilvus import utility
        # create collection shard_num=1, insert 2 segments, each with tmp_nb entities
        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1)

        # insert twice
        df1 = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df1)[0]
        assert collection_w.num_entities == tmp_nb

        df2 = cf.gen_default_dataframe_data(tmp_nb, start=tmp_nb)
        insert_two = collection_w.insert(df2)[0]
        assert collection_w.num_entities == tmp_nb * 2

        tt = utility.mkts_from_hybridts(insert_two.timestamp, milliseconds=0.1)

        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        collection_w.get_compaction_plans(check_task=CheckTasks.check_merge_compact)

        collection_w.load()
        search_res, _ = collection_w.search(df2[ct.default_float_vec_field_name][:1].to_list(),
                                            ct.default_float_vec_field_name,
                                            ct.default_search_params, ct.default_limit,
                                            travel_timestamp=tt)
        assert tmp_nb in search_res[0].ids
        assert len(search_res[0]) == ct.default_limit
Ejemplo n.º 6
0
    def test_delete_insert_same_id_sealed(self, to_query):
        """
        target: test insert same id entity after delete from sealed data
        method: 1.create and insert with flush
                2.load and query with the id
                3.delte the id entity
                4.insert new entity with the same id and flush
                5.query the id
        expected: Verify that the query gets the newly inserted entity
        """
        # init collection and insert data without flush
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix))

        # insert
        df = cf.gen_default_dataframe_data(1000)
        collection_w.insert(df)
        log.debug(collection_w.num_entities)

        # load and query
        collection_w.load()
        res = df.iloc[:1, :1].to_dict('records')
        default_search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        collection_w.search(data=[df[ct.default_float_vec_field_name][0]],
                            anns_field=ct.default_float_vec_field_name,
                            param=default_search_params,
                            limit=1)
        collection_w.query(tmp_expr,
                           check_task=CheckTasks.check_query_results,
                           check_items={'exp_res': res})

        # delete
        collection_w.delete(tmp_expr)
        if to_query:
            collection_w.query(tmp_expr,
                               check_task=CheckTasks.check_query_empty)

        # re-insert
        df_new = cf.gen_default_dataframe_data(nb=1)
        collection_w.insert(df_new)
        log.debug(collection_w.num_entities)

        # re-query
        res = df_new.iloc[[0], [0, 2]].to_dict('records')
        collection_w.query(tmp_expr,
                           output_fields=[ct.default_float_vec_field_name],
                           check_task=CheckTasks.check_query_results,
                           check_items={
                               'exp_res': res,
                               'with_vec': True
                           })
        collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]],
                            anns_field=ct.default_float_vec_field_name,
                            param=default_search_params,
                            limit=1)
Ejemplo n.º 7
0
    def test_compact_both_delete_merge(self):
        """
        target: test compact both delete and merge
        method: 1.create collection with shard_num=1
                2.insert data into two segments
                3.delete and flush (new insert)
                4.compact
                5.load and search
        expected: Triggre two types compaction
        """
        collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix),
                                                 shards_num=1)
        ids = []
        for i in range(2):
            df = cf.gen_default_dataframe_data(tmp_nb, start=i * tmp_nb)
            insert_res, _ = collection_w.insert(df)
            assert collection_w.num_entities == (i + 1) * tmp_nb
            ids.extend(insert_res.primary_keys)

        expr = f'{ct.default_int64_field_name} in {[0, 2 * tmp_nb - 1]}'
        collection_w.delete(expr)

        collection_w.insert(cf.gen_default_dataframe_data(1, start=2 * tmp_nb))
        assert collection_w.num_entities == 2 * tmp_nb + 1

        sleep(ct.compact_retention_duration + 1)

        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        c_plans = collection_w.get_compaction_plans()[0]

        assert len(c_plans.plans) == 2
        # todo assert two types compaction plan

        # search
        ids.pop(0)
        ids.pop(-1)
        collection_w.load()
        search_res, _ = collection_w.search(cf.gen_vectors(
            ct.default_nq, ct.default_dim),
                                            ct.default_float_vec_field_name,
                                            ct.default_search_params,
                                            ct.default_limit,
                                            check_items={
                                                "nq": ct.default_nq,
                                                "ids": ids,
                                                "limit": ct.default_limit
                                            })
Ejemplo n.º 8
0
    def test_delete_sealed_only(self):
        """
        target: test delete sealed-only
        method: 1.deploy sealed-only: two dmlChannel and three queryNodes
                2.create and insert with flush
                3.load
                4.delete all data
                5.query
        expected:
        """
        # init collection and insert data without flush
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), shards_num=2)
        # insert 3000 entities into 3 segments
        segment_num = 3
        segment_per_count = 2000
        ids = []
        for i in range(segment_num):
            df = cf.gen_default_dataframe_data(nb=segment_per_count,
                                               start=(i * segment_per_count))
            res, _ = collection_w.insert(df)
            assert collection_w.num_entities == (i + 1) * segment_per_count
            ids.extend(res.primary_keys)

        collection_w.load()

        expr = f'{ct.default_int64_field_name} in {ids}'
        collection_w.delete(expr)

        collection_w.query(expr, check_task=CheckTasks.check_query_empty)
Ejemplo n.º 9
0
    def test_delete_partition(self):
        """
        target: test delete from partition
        method: delete with partition names
        expected: verify partition entities are deleted
        """
        # init collection and partition
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix))
        partition_w = self.init_partition_wrap(collection_wrap=collection_w)

        # load collection and insert data to partition
        collection_w.load()
        df = cf.gen_default_dataframe_data(tmp_nb)
        partition_w.insert(df)

        # delete ids from partition
        del_res, _ = collection_w.delete(tmp_expr,
                                         partition_name=partition_w.name)
        assert del_res.delete_count == 1

        # query with deleted id and query with existed id
        collection_w.query(tmp_expr,
                           check_task=CheckTasks.check_query_empty,
                           partition_names=[partition_w.name])
        res = df.iloc[1:2, :1].to_dict('records')
        collection_w.query(f'{ct.default_int64_field_name} in [1]',
                           check_task=CheckTasks.check_query_results,
                           check_items={exp_res: res})
Ejemplo n.º 10
0
    def test_delete_time_travel(self):
        """
        target: test search with time travel after delete
        method: 1.insert and flush
                2.delete
                3.load and search with time travel
        expected: search successfully
        """

        collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix))
        df = cf.gen_default_dataframe_data(tmp_nb)
        insert_res, _ = collection_w.insert(df)
        collection_w.load()

        tt = self.utility_wrap.mkts_from_hybridts(insert_res.timestamp,
                                                  milliseconds=0.)

        res_before, _ = collection_w.search(
            df[ct.default_float_vec_field_name][:1].to_list(),
            ct.default_float_vec_field_name, ct.default_search_params,
            ct.default_limit)

        expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:tmp_nb // 2]}'
        delete_res, _ = collection_w.delete(expr)

        res_travel, _ = collection_w.search(
            df[ct.default_float_vec_field_name][:1].to_list(),
            ct.default_float_vec_field_name,
            ct.default_search_params,
            ct.default_limit,
            travel_timestamp=tt)
        assert res_before[0].ids == res_travel[0].ids
Ejemplo n.º 11
0
    def test_delete_insert_multi(self):
        """
        target: test delete after multi insert
        method: 1.create
                2.insert multi times, no flush
                3.load
                3.delete even number
                4.search and query
        expected: Verify result
        """
        # create collection, insert multi times, each with tmp_nb entities
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix))
        multi = 3
        for i in range(multi):
            start = i * tmp_nb
            df = cf.gen_default_dataframe_data(tmp_nb, start=start)
            collection_w.insert(df)

        # delete even numbers
        ids = [i for i in range(0, tmp_nb * multi, 2)]
        expr = f'{ct.default_int64_field_name} in {ids}'
        collection_w.delete(expr)

        collection_w.load()
        collection_w.query(expr, check_task=CheckTasks.check_query_empty)
        search_res, _ = collection_w.search(
            cf.gen_vectors(ct.default_nq,
                           ct.default_dim), ct.default_float_vec_field_name,
            ct.default_search_params, ct.default_limit)
        for res_id in search_res[0].ids:
            assert res_id not in ids
Ejemplo n.º 12
0
    def test_compact_delete_and_search(self):
        """
        target: test delete and compact segment, and search
        method: 1.create collection and insert
                2.delete part entities
                3.compact
                4.load and search
        expected: Verify search result
        """
        collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1)
        df = cf.gen_default_dataframe_data()
        insert_res, _ = collection_w.insert(df)

        expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:ct.default_nb // 2]}'
        collection_w.delete(expr)
        assert collection_w.num_entities == ct.default_nb

        sleep(ct.compact_retention_duration + 1)
        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        collection_w.get_compaction_plans(check_task=CheckTasks.check_delete_compact)

        # search
        collection_w.load()
        search_res, _ = collection_w.search(cf.gen_vectors(ct.default_nq, ct.default_dim),
                                            ct.default_float_vec_field_name,
                                            ct.default_search_params, ct.default_limit,
                                            check_task=CheckTasks.check_search_results,
                                            check_items={"nq": ct.default_nq,
                                                         "ids": insert_res.primary_keys[ct.default_nb // 2:],
                                                         "limit": ct.default_limit}
                                            )
        collection_w.query("int64 in [0]", check_task=CheckTasks.check_query_empty)
Ejemplo n.º 13
0
    def test_partition_insert_dropped_collection(self):
        """
        target: verify insert data into dropped collection
        method: 1.create a collection
                2. insert some data into dropped collection
        expected: raise exception
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(collection_w, partition_name)
        assert collection_w.has_partition(partition_name)[0]

        # drop collection
        collection_w.drop()

        # insert data to partition
        partition_w.insert(cf.gen_default_dataframe_data(),
                           check_task=CheckTasks.err_res,
                           check_items={
                               ct.err_code: 1,
                               ct.err_msg: "None Type"
                           })
Ejemplo n.º 14
0
    def test_delete_merge_same_id_channel_and_sealed(self):
        """
        target: test merge same delete ids from channel and sealed
        method: 1.create, insert
                2.delete id and flush (data and deleted become sealed)
                3.load and query (verify delete successfully)
                4.insert entity with deleted id
                5.delete id
                6.query with id
        expected: Empty query result
        """
        # init collection and insert data without flush
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), shards_num=1)
        df = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df)

        # delete id 0 and flush
        del_res, _ = collection_w.delete(tmp_expr)
        assert del_res.delete_count == 1
        assert collection_w.num_entities == tmp_nb

        # load and query id 0
        collection_w.load()
        collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty)

        # re-insert id 0 and re-delete id 0
        collection_w.insert(df[:1])
        collection_w.delete(tmp_expr)
        collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty)
Ejemplo n.º 15
0
    def test_delete_from_partitions_with_same_ids(self):
        """
        target: test delete same ids from two partitions with same data
        method: 1.insert same nb data into two partitions
                2.delete same ids from partition_1
        expected: The data only in partition_1 will be deleted
        """
        # init collection and partition
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix))
        partition_w = self.init_partition_wrap(collection_wrap=collection_w)

        # insert same data into partition_w and default partition
        df = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df)
        partition_w.insert(df)

        # delete same id 0 from default_partition, and query on it get empty result
        collection_w.delete(tmp_expr, partition_name=ct.default_partition_name)
        assert collection_w.num_entities == tmp_nb * 2
        collection_w.load()
        collection_w.query(tmp_expr,
                           partition_names=[ct.default_partition_name],
                           check_task=CheckTasks.check_query_empty)

        # query on partition_w with id 0 and get an result
        collection_w.query(tmp_expr,
                           partition_names=[partition_w.name],
                           check_task=CheckTasks.check_query_results,
                           check_items={exp_res: query_res_tmp_expr})
Ejemplo n.º 16
0
    def test_compact_no_merge(self):
        """
        target: test compact when no segments merge
        method: 1.create with shard_num=1
                2.insert and flush
                3.compact and search
        expected: No exception and compact plans
        """
        # create collection
        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1)
        df = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df)
        assert collection_w.num_entities == tmp_nb

        collection_w.load()

        seg_before, _ = self.utility_wrap.get_query_segment_info(collection_w.name)

        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        c_plans, _ = collection_w.get_compaction_plans()
        assert len(c_plans.plans) == 1
        assert [seg_before[0].segmentID] == c_plans.plans[0].sources

        collection_w.release()
        collection_w.load()
        seg_after, _ = self.utility_wrap.get_query_segment_info(collection_w.name)
        assert seg_after[0].segmentID == c_plans.plans[0].target
Ejemplo n.º 17
0
    def test_compact_delete_multi_segments(self):
        """
        target: test compact multi delete segments
        method: 1.create collection with shard_num=2
                2.insert data into two segments
                3.delete entities from two segments
                4.compact
                5.load and search
        expected: Verify two delta compaction plans
        """
        collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix))
        df = cf.gen_default_dataframe_data(2*tmp_nb)
        insert_res, _ = collection_w.insert(df)
        assert collection_w.num_entities == 2 * tmp_nb

        collection_w.load()
        log.debug(self.utility_wrap.get_query_segment_info(collection_w.name))

        delete_ids = [i for i in range(10)]
        expr = f'{ct.default_int64_field_name} in {delete_ids}'
        collection_w.delete(expr)

        sleep(ct.compact_retention_duration + 1)

        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        c_plans = collection_w.get_compaction_plans()[0]
        assert len(c_plans.plans) == 2
        for plan in c_plans.plans:
            assert len(plan.sources) == 1

        collection_w.query(f"{ct.default_int64_field_name} in {delete_ids}", check_task=CheckTasks.check_query_empty)
Ejemplo n.º 18
0
    def test_compact_partition(self):
        """
        target: test compact partition
        method: compact partition
        expected: Verify partition segments merged
        """
        # create collection with shard_num=1, and create partition
        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1)
        partition_w = self.init_partition_wrap(collection_wrap=collection_w)

        # insert flush twice
        for i in range(2):
            df = cf.gen_default_dataframe_data(tmp_nb)
            partition_w.insert(df)
            assert partition_w.num_entities == tmp_nb * (i + 1)

        # compact
        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        c_plans = collection_w.get_compaction_plans()[0]

        assert len(c_plans.plans) == 1
        assert len(c_plans.plans[0].sources) == 2
        target = c_plans.plans[0].target

        # verify queryNode load the compacted segments
        collection_w.load()
        segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0]
        assert target == segment_info[0].segmentID
Ejemplo n.º 19
0
    def test_compact_delete_inside_time_travel(self):
        """
        target: test compact inside time_travel range
        method: 1.insert data and get ts
                2.delete all ids
                4.compact
                5.search with ts
        expected: Verify search result
        """
        from pymilvus import utility
        collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1)

        # insert and get tt
        df = cf.gen_default_dataframe_data(tmp_nb)
        insert_res, _ = collection_w.insert(df)
        tt = utility.mkts_from_hybridts(insert_res.timestamp, milliseconds=0.)

        # delete all
        expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys}'
        delete_res, _ = collection_w.delete(expr)
        log.debug(collection_w.num_entities)

        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        collection_w.get_compaction_plans()

        collection_w.load()
        search_one, _ = collection_w.search(df[ct.default_float_vec_field_name][:1].to_list(),
                                            ct.default_float_vec_field_name,
                                            ct.default_search_params, ct.default_limit,
                                            travel_timestamp=tt)
        assert 0 in search_one[0].ids
Ejemplo n.º 20
0
    def test_compact_repeatedly_delete_same_id(self):
        """
        target: test compact after repeatedly delete same entity
        method: 1.Create and insert entities
                2.repeatedly delete the same id
                3.compact
        expected: No exception or delta log just delete one
        """
        collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1)

        df = cf.gen_default_dataframe_data()
        insert_res, _ = collection_w.insert(df)
        expr = f'{ct.default_int64_field_name} in [0]'

        for _ in range(100):
            collection_w.delete(expr=expr)
        assert collection_w.num_entities == ct.default_nb

        sleep(ct.compact_retention_duration + 1)
        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        collection_w.get_compaction_plans(check_task=CheckTasks.check_delete_compact)

        collection_w.load()
        collection_w.query(expr, check_task=CheckTasks.check_query_empty)
Ejemplo n.º 21
0
    def test_compact_max_time_interval(self):
        """
        target: test auto compact with max interval 60s
        method: 1.create with shard_num=1
                2.insert flush twice (two segments)
                3.wait max_compaction_interval (60s)
        expected: Verify compaction results
        """
        # create collection shard_num=1, insert 2 segments, each with tmp_nb entities
        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1)
        collection_w.compact()

        # Notice:The merge segments compaction triggered by max_compaction_interval also needs to meet
        # the compaction_segment_ num_threshold
        for i in range(ct.compact_segment_num_threshold):
            df = cf.gen_default_dataframe_data(tmp_nb)
            collection_w.insert(df)
            assert collection_w.num_entities == tmp_nb * (i + 1)

        sleep(ct.max_compaction_interval + 1)

        # verify queryNode load the compacted segments
        collection_w.load()
        replicas = collection_w.get_replicas()[0]
        replica_num = len(replicas.groups)
        segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0]
        assert len(segment_info) == 1*replica_num
Ejemplo n.º 22
0
    def test_compact_twice(self):
        """
        target: test compact twice
        method: 1.create with shard_num=1
                2.insert and flush twice (two segments)
                3.compact
                4.insert new data
                5.compact
        expected: Merge into one segment
        """
        # init collection with one shard, insert into two segments
        collection_w = self.collection_insert_multi_segments_one_shard(prefix, nb_of_segment=tmp_nb)

        # first compact two segments
        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        c_plans1 = collection_w.get_compaction_plans()[0]
        target_1 = c_plans1.plans[0].target

        # insert new data
        df = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df)
        log.debug(collection_w.num_entities)

        # second compact
        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        collection_w.get_compaction_state()
        c_plans2 = collection_w.get_compaction_plans()[0]

        assert target_1 in c_plans2.plans[0].sources
        log.debug(c_plans2.plans[0].target)
Ejemplo n.º 23
0
    def test_delete_duplicate_primary_keys(self):
        """
        target: test delete from duplicate primary keys
        method: 1.insert data with dup ids
                2.delete with repeated or not values
        expected: currently only delete one entity, query get one entity
        todo delete all entities
        """
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix))
        df = cf.gen_default_dataframe_data(nb=tmp_nb)
        df[ct.default_int64_field_name] = 0
        collection_w.insert(df)
        assert collection_w.num_entities == tmp_nb
        del_res, _ = collection_w.delete(tmp_expr)
        collection_w.load()

        # Just one query res and search res, because de-dup
        res, _ = collection_w.query(tmp_expr, output_fields=["*"])
        assert len(res) == 1

        search_res, _ = collection_w.search(
            [df[ct.default_float_vec_field_name][1]],
            ct.default_float_vec_field_name,
            ct.default_search_params,
            ct.default_limit,
            output_fields=[
                ct.default_int64_field_name, ct.default_float_field_name
            ])
        assert len(search_res) == 1
Ejemplo n.º 24
0
    def test_partition_drop_non_empty_partition(self):
        """
        target: verify drop a partition which has data inserted
        method: 1.create a partition with default schema
                2. insert some data
                3. flush / not flush
                3. drop the partition
        expected: drop successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(collection_w, partition_name)
        assert collection_w.has_partition(partition_name)[0]

        # insert data to partition
        partition_w.insert(cf.gen_default_dataframe_data())

        # # flush   remove flush for issue #5837
        # if flush:
        #      self._connect().flush([collection_w.name])

        # drop partition
        partition_w.drop()
        assert not collection_w.has_partition(partition_name)[0]
Ejemplo n.º 25
0
    def test_delete_query_without_loading(self):
        """
        target: test delete and query without loading
        method: 1.insert and flush data
                2.delete ids
                3.query without loading
        expected: Raise exception
        """
        # create collection, insert data without flush
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix))
        df = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df)
        assert collection_w.num_entities == tmp_nb

        # delete
        res = collection_w.delete(tmp_expr)[0]
        assert res.delete_count == 1

        # query without loading and raise exception
        error = {
            ct.err_code:
            1,
            ct.err_msg:
            f"collection {collection_w.name} was not loaded into memory"
        }
        collection_w.query(expr=tmp_expr,
                           check_task=CheckTasks.err_res,
                           check_items=error)
Ejemplo n.º 26
0
    def test_compact_merge_multi_shards(self):
        """
        target: test compact merge multi shards
        method: 1.Create a collection with 2 shards
                2.Insert twice and generate 4 segments
                3.Compact and wait it completed
        expected: Verify there are 2 merge type complation plans
        """
        collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix))
        for i in range(2):
            df = cf.gen_default_dataframe_data(2 * tmp_nb)
            insert_res, _ = collection_w.insert(df)
            log.debug(collection_w.num_entities)

        collection_w.load()
        log.debug(self.utility_wrap.get_query_segment_info(collection_w.name))

        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        c_plans = collection_w.get_compaction_plans()[0]
        assert len(c_plans.plans) == 2
        targets = []
        for plan in c_plans.plans:
            assert len(plan.sources) == 2
            targets.append(plan.target)

        collection_w.release()
        collection_w.load()
        seg_info, _ = self.utility_wrap.get_query_segment_info(collection_w.name)
        for seg in seg_info:
            seg.segmentID in targets
Ejemplo n.º 27
0
    def test_delete_insert_same_entity(self):
        """
        target: test delete and insert same entity
        method: 1.delete entity one
                2.insert entity one
                3.query  entity one
        expected: verify query result
        """
        # init collection and insert data without flush
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix))
        collection_w.load()
        df = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df)

        # delete
        del_res, _ = collection_w.delete(tmp_expr)
        assert del_res.delete_count == 1
        # assert collection_w.num_entities == tmp_nb
        collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty)

        # insert entity with primary key 0
        collection_w.insert(df[:1])

        # query entity one
        res = df.iloc[0:1, :1].to_dict('records')
        collection_w.query(tmp_expr,
                           check_task=CheckTasks.check_query_results,
                           check_items={'exp_res': res})
Ejemplo n.º 28
0
    def test_delete_query_after_handoff(self):
        """
        target: test search after delete and handoff
        method: 1.create and load collection
                2.insert entities and delete id 0
                3.flush entities
                4.query deleted id after handoff completed
        expected: Delete successfully, query get empty result
        """
        # init collection and load
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), shards_num=1)
        collection_w.load()

        # insert data and delete id 0
        df = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df)
        del_res, _ = collection_w.delete(tmp_expr)

        # flush
        assert collection_w.num_entities == tmp_nb

        # wait for the handoff to complete
        while True:
            time.sleep(0.5)
            segment_infos = self.utility_wrap.get_query_segment_info(
                collection_w.name)[0]
            if segment_infos[0].state == SegmentState.Sealed:
                break
        # query deleted id
        collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty)
Ejemplo n.º 29
0
    def test_delete_merge_ids_channel_and_sealed(self):
        """
        target: test merge deleted ids come from both channel and sealed
        method: 1.create, insert ids [0, tmp_nb) with shard_num=1
                2.delete id 0 and flush
                3.load and query with id 0
                4.delete id 1 (merge same segment deleted ids 0 and 1)
                5.query with id 0 and 1
        expected: Empty query result
        """
        # init collection and insert data without flush
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), shards_num=1)
        df = cf.gen_default_dataframe_data(tmp_nb)
        collection_w.insert(df)

        # delete id 0 and flush
        del_res, _ = collection_w.delete(tmp_expr)
        assert del_res.delete_count == 1
        assert collection_w.num_entities == tmp_nb

        # load and query id 0
        collection_w.load()
        collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty)

        # delete id 1 and query id 0 and 1
        collection_w.delete(expr=f'{ct.default_int64_field_name} in {[1]}')
        collection_w.query(expr=f'{ct.default_int64_field_name} in {[0, 1]}',
                           check_task=CheckTasks.check_query_empty)
Ejemplo n.º 30
0
    def test_insert_multi_threading(self):
        """
        target: test concurrent insert
        method: multi threads insert
        expected: verify num entities
        """
        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
        df = cf.gen_default_dataframe_data(ct.default_nb)
        thread_num = 4
        threads = []
        primary_keys = df[ct.default_int64_field_name].values.tolist()

        def insert(thread_i):
            log.debug(f'In thread-{thread_i}')
            mutation_res, _ = collection_w.insert(df)
            assert mutation_res.insert_count == ct.default_nb
            assert mutation_res.primary_keys == primary_keys

        for i in range(thread_num):
            x = threading.Thread(target=insert, args=(i,))
            threads.append(x)
            x.start()
        for t in threads:
            t.join()
        assert collection_w.num_entities == ct.default_nb * thread_num