Ejemplo n.º 1
0
def e2e_milvus(host, c_name, collection_exist=False):
    # connect
    connections.add_connection(default={"host": host, "port": 19530})
    connections.connect(alias='default')

    # create
    collection_w = ApiCollectionWrapper()
    if collection_exist:
        collection_w.init_collection(name=c_name)
    else:
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())

    # insert
    data = cf.gen_default_list_data(ct.default_nb)
    mutation_res, _ = collection_w.insert(data)
    assert mutation_res.insert_count == ct.default_nb
    log.debug(collection_w.num_entities)

    # create index
    collection_w.create_index(ct.default_float_vec_field_name,
                              ct.default_index)
    assert collection_w.has_index()[0]
    assert collection_w.index()[0] == Index(collection_w.collection,
                                            ct.default_float_vec_field_name,
                                            ct.default_index)

    # search
    collection_w.load()
    search_res, _ = collection_w.search(data[-1][:ct.default_nq],
                                        ct.default_float_vec_field_name,
                                        ct.default_search_params,
                                        ct.default_limit)
    assert len(search_res[0]) == ct.default_limit
    log.debug(search_res[0][0].id)

    # query
    ids = search_res[0].ids[0]
    term_expr = f'{ct.default_int64_field_name} in [{ids}]'
    query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"])
    assert query_res[0][ct.default_int64_field_name] == ids
Ejemplo n.º 2
0
    def test_load_replica_greater_than_querynodes(self):
        """
        target: test load with replicas that greater than querynodes
        method: load with 3 replicas (2 querynode)
        expected: Verify load successfully and 1 available replica
        """
        # create, insert
        self._connect()
        collection_w = self.init_collection_wrap()
        partition_w = self.init_partition_wrap(collection_w)
        partition_w.insert(cf.gen_default_list_data())
        assert partition_w.num_entities == ct.default_nb

        # load with 2 replicas
        error = {
            ct.err_code: 1,
            ct.err_msg: f"no enough nodes to create replicas"
        }
        partition_w.load(replica_number=3,
                         check_task=CheckTasks.err_res,
                         check_items=error)
Ejemplo n.º 3
0
 def keep_running(self):
     while True:
         t0 = time.time()
         _, insert_result = \
             self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.DELTA_PER_INS),
                                timeout=timeout,
                                enable_traceback=enable_traceback,
                                check_task=CheckTasks.check_nothing)
         t1 = time.time()
         if not self._flush:
             if insert_result:
                 self.rsp_times.append(t1 - t0)
                 self.average_time = (
                     (t1 - t0) +
                     self.average_time * self._succ) / (self._succ + 1)
                 self._succ += 1
                 log.debug(
                     f"insert success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}"
                 )
             else:
                 self._fail += 1
             sleep(constants.WAIT_PER_OP / 10)
         else:
             # call flush in property num_entities
             t0 = time.time()
             num_entities = self.c_wrap.num_entities
             t1 = time.time()
             if num_entities == (self.initial_entities +
                                 constants.DELTA_PER_INS):
                 self.rsp_times.append(t1 - t0)
                 self.average_time = (
                     (t1 - t0) +
                     self.average_time * self._succ) / (self._succ + 1)
                 self._succ += 1
                 log.debug(
                     f"flush success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}"
                 )
                 self.initial_entities += constants.DELTA_PER_INS
             else:
                 self._fail += 1
Ejemplo n.º 4
0
    def test_shrink_data_node(self):
        """
        target: test shrink dataNode from 2 to 1
        method: 1.create collection and insert df 2. shrink dataNode 3.insert df
        expected: verify the property of collection which channel on shrink pod
        """
        release_name = "scale-data"
        env = HelmEnv(release_name=release_name, dataNode=2)
        host = env.helm_install_cluster_milvus(
            image_pull_policy=constants.IF_NOT_PRESENT)

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')

        c_name = "data_scale_one"
        data = cf.gen_default_list_data(ct.default_nb)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb

        c_name_2 = "data_scale_two"
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(
            name=c_name_2, schema=cf.gen_default_collection_schema())
        mutation_res2, _ = collection_w2.insert(data)
        assert mutation_res2.insert_count == ct.default_nb
        assert collection_w2.num_entities == ct.default_nb

        env.helm_upgrade_cluster_milvus(dataNode=1)

        assert collection_w.num_entities == ct.default_nb
        mutation_res2, _ = collection_w2.insert(data)
        assert collection_w2.num_entities == ct.default_nb * 2
        collection_w.drop()
        collection_w2.drop()
Ejemplo n.º 5
0
 def keep_running(self):
     while True:
         _, insert_result = \
             self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.DELTA_PER_INS),
                                timeout=timeout, check_task=CheckTasks.check_nothing)
         if not self._flush:
             if insert_result:
                 self._succ += 1
             else:
                 self._fail += 1
             sleep(constants.WAIT_PER_OP / 10)
         else:
             # call flush in property num_entities
             t0 = datetime.datetime.now()
             num_entities = self.c_wrap.num_entities
             tt = datetime.datetime.now() - t0
             log.info(f"flush time cost: {tt}")
             if num_entities == (self.initial_entities +
                                 constants.DELTA_PER_INS):
                 self._succ += 1
                 self.initial_entities += constants.DELTA_PER_INS
             else:
                 self._fail += 1
Ejemplo n.º 6
0
    def test_query_without_loading(self):
        """
        target: test query without loading
        method: no loading before query
        expected: raise exception
        """

        # init a collection with default connection
        collection_name = cf.gen_unique_str(prefix)
        collection_w = self.init_collection_wrap(name=collection_name)

        # insert data to collection
        collection_w.insert(data=cf.gen_default_list_data(ct.default_nb))

        # check number of entities and that method calls the flush interface
        assert collection_w.num_entities == ct.default_nb

        # query without load
        collection_w.query(default_term_expr,
                           check_task=CheckTasks.err_res,
                           check_items={
                               ct.err_code: 1,
                               ct.err_msg: clem.CollNotLoaded % collection_name
                           })
Ejemplo n.º 7
0
    def test_simd_compat_e2e(self, simd_id):
        """
       steps
       1. [test_milvus_install]: set up milvus with customized simd configured
       2. [test_simd_compat_e2e]: verify milvus is working well
       4. [test_milvus_cleanup]: delete milvus instances in teardown
       """
        simd = supported_simd_types[simd_id]
        log.info(f"start to install milvus with simd {simd}")
        release_name, host, port = _install_milvus(simd)
        self.release_name = release_name
        assert host is not None
        conn = connections.connect("default", host=host, port=port)
        assert conn is not None
        mil = MilvusSys(alias="default")
        log.info(f"milvus build version: {mil.build_version}")
        log.info(f"milvus simdType: {mil.simd_type}")
        assert str(mil.simd_type).lower() in [
            simd_type.lower() for simd_type in supported_simd_types[simd_id:]
        ]

        log.info(f"start to e2e verification: {simd}")
        # create
        name = cf.gen_unique_str("compat")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        data = cf.gen_default_list_data()
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = time.time()
        assert collection_w.num_entities == len(data[0]) + entities
        tt = time.time() - t0
        entities = collection_w.num_entities
        log.info(f"assert flush: {tt}, entities: {entities}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str())
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
Ejemplo n.º 8
0
    def test_chaos_data_consist(self, connection, chaos_yaml):
        """
        target: verify data consistence after chaos injected and recovered
        method: 1. create a collection, insert some data, search and query
                2. inject a chaos object
                3. reconnect to service
                4. verify a) data entities persists, index persists,
                          b) search and query results persist
        expected: collection data and results persist
        """
        c_name = cf.gen_unique_str('chaos_collection_')
        nb = 5000
        i_name = cf.gen_unique_str('chaos_index_')
        index_params = {
            "index_type": "IVF_SQ8",
            "metric_type": "L2",
            "params": {
                "nlist": 64
            }
        }

        # create
        t0 = datetime.datetime.now()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        tt = datetime.datetime.now() - t0
        log.info(f"assert create: {tt}")
        assert collection_w.name == c_name

        # insert
        data = cf.gen_default_list_data(nb=nb)
        t0 = datetime.datetime.now()
        _, res = collection_w.insert(data)
        tt = datetime.datetime.now() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = datetime.datetime.now()
        assert collection_w.num_entities == nb
        tt = datetime.datetime.now() - t0
        log.info(f"assert flush: {tt}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = datetime.datetime.now()
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = datetime.datetime.now() - t0
        log.info(f"assert search: {tt}")
        assert len(search_res) == 1

        # index
        t0 = datetime.datetime.now()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=index_params,
            name=i_name)
        tt = datetime.datetime.now() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,999,99]'
        t0 = datetime.datetime.now()
        query_res, _ = collection_w.query(term_expr)
        tt = datetime.datetime.now() - t0
        log.info(f"assert query: {tt}")
        assert len(query_res) == 4

        # reboot a pod
        reboot_pod(chaos_yaml)

        # parse chaos object
        chaos_config = cc.gen_experiment_config(chaos_yaml)
        meta_name = chaos_config.get('metadata', None).get('name', None)

        # wait all pods ready
        log.info(
            f"wait for pods in namespace {constants.CHAOS_NAMESPACE} with label app.kubernetes.io/instance={meta_name}"
        )
        wait_pods_ready(constants.CHAOS_NAMESPACE,
                        f"app.kubernetes.io/instance={meta_name}")
        log.info(
            f"wait for pods in namespace {constants.CHAOS_NAMESPACE} with label release={meta_name}"
        )
        wait_pods_ready(constants.CHAOS_NAMESPACE, f"release={meta_name}")
        log.info("all pods are ready")

        # reconnect if needed
        sleep(constants.WAIT_PER_OP * 3)
        reconnect(connections, alias='default')

        # verify collection persists
        assert utility.has_collection(c_name)
        log.info("assert collection persists")
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(c_name)
        # verify data persist
        assert collection_w2.num_entities == nb
        log.info("assert data persists")
        # verify index persists
        assert collection_w2.has_index(i_name)
        log.info("assert index persists")
        # verify search results persist
        collection_w2.load()
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = datetime.datetime.now() - t0
        log.info(f"assert search: {tt}")
        assert len(search_res) == 1
        # verify query results persist
        query_res2, _ = collection_w2.query(term_expr)
        assert len(query_res2) == len(query_res)
        log.info("assert query result persists")
Ejemplo n.º 9
0
    def test_shrink_query_node(self):
        """
        target: test shrink queryNode from 2 to 1
        method: 1.deploy two queryNode
                2.search two collections in two queryNode
                3.upgrade queryNode from 2 to 1
                4.search second collection
        expected: search result is correct
        """
        # deploy
        release_name = "scale-query"
        env = HelmEnv(release_name=release_name, queryNode=2)
        host = env.helm_install_cluster_milvus(
            image_pull_policy=constants.IF_NOT_PRESENT)

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')

        # collection one
        data = cf.gen_default_list_data(nb)
        c_name = "query_scale_one"
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        collection_w.load()
        res1, _ = collection_w.search(data[-1][:nq],
                                      ct.default_float_vec_field_name,
                                      ct.default_search_params,
                                      ct.default_limit)
        assert res1[0].ids[0] == data[0][0]

        # collection two
        c_name_2 = "query_scale_two"
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(
            name=c_name_2, schema=cf.gen_default_collection_schema())
        collection_w2.insert(data)
        assert collection_w2.num_entities == nb
        collection_w2.load()
        res2, _ = collection_w2.search(data[-1][:nq],
                                       ct.default_float_vec_field_name,
                                       ct.default_search_params,
                                       ct.default_limit)
        assert res2[0].ids[0] == data[0][0]

        # scale queryNode pod
        env.helm_upgrade_cluster_milvus(queryNode=1)

        # search
        res1, _ = collection_w.search(data[-1][:nq],
                                      ct.default_float_vec_field_name,
                                      ct.default_search_params,
                                      ct.default_limit)
        assert res1[0].ids[0] == data[0][0]
        res2, _ = collection_w2.search(data[-1][:nq],
                                       ct.default_float_vec_field_name,
                                       ct.default_search_params,
                                       ct.default_limit)
        assert res2[0].ids[0] == data[0][0]
Ejemplo n.º 10
0
class TestPartitionOperations(TestcaseBase):
    """ Test case of partition interface in operations """
    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    def test_partition_dropped_collection(self):
        """
        target: verify create partition against a dropped collection
        method: 1. create collection1
                2. drop collection1
                3. create partition in collection1
        expected: 1. raise exception
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # drop collection
        collection_w.drop()

        # create partition failed
        self.partition_wrap.init_partition(collection_w.collection,
                                           cf.gen_unique_str(prefix),
                                           check_task=CheckTasks.err_res,
                                           check_items={
                                               ct.err_code: 1,
                                               ct.err_msg:
                                               "can't find collection"
                                           })

    @pytest.mark.tags(CaseLabel.L2)
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    def test_partition_same_name_in_diff_collections(self):
        """
        target: verify create partitions with same name in diff collections
        method: 1. create a partition in collection1
                2. create a partition in collection2
        expected: 1. create successfully
        """
        # create two collections
        collection_w1 = self.init_collection_wrap()
        collection_w2 = self.init_collection_wrap()

        # create 2 partitions in 2 diff collections
        partition_name = cf.gen_unique_str(prefix)
        self.init_partition_wrap(collection_wrap=collection_w1,
                                 name=partition_name)
        self.init_partition_wrap(collection_wrap=collection_w2,
                                 name=partition_name)

        # check result
        assert collection_w1.has_partition(partition_name)[0]
        assert collection_w2.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L2)
    def test_partition_multi_partitions_in_collection(self):
        """
        target: verify create multiple partitions in one collection
        method: 1. create multiple partitions in one collection
        expected: 1. create successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        for _ in range(10):
            partition_name = cf.gen_unique_str(prefix)
            # create partition with different names and check the partition exists
            self.init_partition_wrap(collection_w, partition_name)
            assert collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.skip(reason="skip for memory issue check")
    def test_partition_maximum_partitions(self):
        """
        target: verify create maximum partitions
        method: 1. create maximum partitions
                2. create one more partition
        expected: 1. raise exception
        """

        threads_num = 8
        threads = []

        def create_partition(collection, threads_n):
            for _ in range(ct.max_partition_num // threads_n):
                name = cf.gen_unique_str(prefix)
                par_wrap = ApiPartitionWrapper()
                par_wrap.init_partition(collection,
                                        name,
                                        check_task=CheckTasks.check_nothing)

        collection_w = self.init_collection_wrap()
        for _ in range(threads_num):
            t = threading.Thread(target=create_partition,
                                 args=(collection_w.collection, threads_num))
            threads.append(t)
            t.start()
        for t in threads:
            t.join()
        p_name = cf.gen_unique_str()
        self.partition_wrap.init_partition(
            collection_w.collection,
            p_name,
            check_task=CheckTasks.err_res,
            check_items={
                ct.err_code: 1,
                ct.err_msg:
                "maximum partition's number should be limit to 4096"
            })

    @pytest.mark.tags(CaseLabel.L0)
    def test_partition_drop_default_partition(self):
        """
        target: verify drop the _default partition
        method: 1. drop the _default partition
        expected: 1. raise exception
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # get the default partition
        default_partition, _ = collection_w.partition(
            ct.default_partition_name)
        partition_w = self.init_partition_wrap(collection_w,
                                               ct.default_partition_name)
        assert default_partition.name == partition_w.name

        # verify that drop partition with error
        partition_w.drop(check_task=CheckTasks.err_res,
                         check_items={
                             ct.err_code: 1,
                             ct.err_msg: "default partition cannot be deleted"
                         })

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    def test_partition_drop_partition_twice(self):
        """
        target: verify drop the same partition twice
        method: 1.create a partition with default schema
                2. drop the partition
                3. drop the same partition again
        expected: raise exception when 2nd time
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(collection_w, partition_name)
        collection_w.has_partition(partition_name)

        # drop partition
        partition_w.drop()
        assert not collection_w.has_partition(partition_name)[0]

        # verify that drop the partition again with exception
        partition_w.drop(check_task=CheckTasks.err_res,
                         check_items={
                             ct.err_code: 1,
                             ct.err_msg:
                             PartitionErrorMessage.PartitionNotExist
                         })

    @pytest.mark.tags(CaseLabel.L2)
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    def test_partition_create_and_drop_multi_times(self):
        """
        target: verify create and drop for times
        method: 1.create a partition with default schema
                2. drop the partition
                3. loop #1 and #2 for times
        expected: create and drop successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # range for 5 times
        partition_name = cf.gen_unique_str(prefix)
        for i in range(5):
            # create partition and check that the partition exists
            partition_w = self.init_partition_wrap(collection_w,
                                                   partition_name)
            assert collection_w.has_partition(partition_name)[0]

            # drop partition and check that the partition not exists
            partition_w.drop()
            assert not collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L2)
    # @pytest.mark.parametrize("flush", [True, False])
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    def test_partition_drop_non_empty_partition(self):
        """
        target: verify drop a partition which has data inserted
        method: 1.create a partition with default schema
                2. insert some data
                3. flush / not flush
                3. drop the partition
        expected: drop successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(collection_w, partition_name)
        assert collection_w.has_partition(partition_name)[0]

        # insert data to partition
        partition_w.insert(cf.gen_default_dataframe_data())

        # # flush   remove flush for issue #5837
        # if flush:
        #      self._connect().flush([collection_w.name])

        # drop partition
        partition_w.drop()
        assert not collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L2)
    # @pytest.mark.parametrize("flush", [True, False])
    @pytest.mark.parametrize("data", [cf.gen_default_list_data(nb=3000)])
    @pytest.mark.parametrize("index_param", cf.gen_simple_index())
    def test_partition_drop_indexed_partition(self, data, index_param):
        """
        target: verify drop an indexed partition
        method: 1.create a partition
                2. insert same data
                3. create an index
                4. flush or not flush (remove flush step for issue # 5837)
                5. drop the partition
        expected: drop successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(collection_w, partition_name)
        assert collection_w.has_partition(partition_name)[0]

        # insert data to partition
        ins_res, _ = partition_w.insert(data)
        assert len(ins_res.primary_keys) == len(data[0])

        # create index of collection
        collection_w.create_index(ct.default_float_vec_field_name, index_param)

        # # flush
        # if flush:
        #     self._connect().flush([collection_w.name])

        # drop partition
        partition_w.drop()
        assert not collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L1)
    def test_partition_release_empty_partition(self):
        """
        target: verify release an empty partition
        method: 1.create a partition
                2. release the partition
        expected: release successfully
        """
        # create partition
        partition_w = self.init_partition_wrap()
        assert partition_w.is_empty

        # release partition
        partition_w.release()
        # TODO: assert no more memory consumed

    @pytest.mark.tags(CaseLabel.L1)
    def test_partition_release_dropped_partition(self):
        """
        target: verify release an dropped partition
        method: 1.create a partition
                2. drop the partition
                2. release the partition
        expected: raise exception
        """
        # create partition
        partition_w = self.init_partition_wrap()

        # drop partition
        partition_w.drop()

        # release the dropped partition and check err response
        partition_w.release(check_task=CheckTasks.err_res,
                            check_items={
                                ct.err_code: 1,
                                ct.err_msg:
                                PartitionErrorMessage.PartitionNotExist
                            })

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    def test_partition_release_dropped_collection(self):
        """
        target: verify release an dropped collection
        method: 1.create a collection and partition
                2. drop the collection
                2. release the partition
        expected: raise exception
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(collection_w, partition_name)
        assert collection_w.has_partition(partition_name)[0]

        # drop collection
        collection_w.drop()

        # release the partition and check err response
        partition_w.release(check_task=CheckTasks.err_res,
                            check_items={
                                ct.err_code: 1,
                                ct.err_msg: "can't find collection"
                            })

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name, search_vectors",
    #                          [(cf.gen_unique_str(prefix), cf.gen_vectors(1, ct.default_dim))])
    def test_partition_release_after_collection_released(self):
        """
        target: verify release a partition after the collection released
        method: 1.create a collection and partition
                2. insert some data
                2. release the collection
                2. release the partition
        expected: partition released successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(collection_w, partition_name)
        assert collection_w.has_partition(partition_name)[0]

        # insert data to partition
        data = cf.gen_default_list_data()
        partition_w.insert(data)
        assert partition_w.num_entities == len(data[0])
        assert collection_w.num_entities == len(data[0])

        # load partition
        partition_w.load()

        # search of partition
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        res_1, _ = partition_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            params={"nprobe": 32},
            limit=1)
        assert len(res_1) == 1

        # release collection
        collection_w.release()

        # search of partition
        res_2, _ = partition_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            params={"nprobe": 32},
            limit=1,
            check_task=ct.CheckTasks.err_res,
            check_items={
                ct.err_code: 0,
                ct.err_msg: "not loaded into memory"
            })
        # release partition
        partition_w.release()

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name, data", [(ct.default_partition_name, cf.gen_default_dataframe_data())])
    def test_partition_insert_default_partition(self):
        """
        target: verify insert data into _default partition
        method: 1.create a collection
                2. insert some data into _default partition
        expected: insert successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # get the default partition
        partition_name = ct.default_partition_name
        assert collection_w.has_partition(partition_name)[0]
        partition_w = self.init_partition_wrap(collection_w, partition_name)

        # insert data to partition
        data = cf.gen_default_dataframe_data()
        partition_w.insert(data)
        # self._connect().flush([collection_w.name])
        assert partition_w.num_entities == len(data)

    @pytest.mark.tags(CaseLabel.L1)
    def test_partition_insert_dropped_partition(self):
        """
        target: verify insert data into dropped partition
        method: 1.create a collection
                2. insert some data into dropped partition
        expected: raise exception
        """
        # create partition
        partition_w = self.init_partition_wrap()

        # drop partition
        partition_w.drop()

        # insert data to partition
        partition_w.insert(cf.gen_default_dataframe_data(),
                           check_task=CheckTasks.err_res,
                           check_items={
                               ct.err_code: 1,
                               ct.err_msg: "Partition not exist"
                           })
        # TODO: update the assert error

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    def test_partition_insert_dropped_collection(self):
        """
        target: verify insert data into dropped collection
        method: 1.create a collection
                2. insert some data into dropped collection
        expected: raise exception
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(collection_w, partition_name)
        assert collection_w.has_partition(partition_name)[0]

        # drop collection
        collection_w.drop()

        # insert data to partition
        partition_w.insert(cf.gen_default_dataframe_data(),
                           check_task=CheckTasks.err_res,
                           check_items={
                               ct.err_code: 1,
                               ct.err_msg: "None Type"
                           })

    @pytest.mark.tags(CaseLabel.L2)
    def test_partition_insert_maximum_size_data(self):
        """
        target: verify insert maximum size data(256M?) a time
        method: 1.create a partition
                2. insert maximum size data
        expected: insert successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_w = self.init_partition_wrap(collection_w)

        # insert data to partition
        max_size = 100000  # TODO: clarify the max size of data
        ins_res, _ = partition_w.insert(
            cf.gen_default_dataframe_data(max_size), timeout=40)
        assert len(ins_res.primary_keys) == max_size
        # self._connect().flush([collection_w.name])
        assert partition_w.num_entities == max_size

    @pytest.mark.tags(CaseLabel.L1)
    @pytest.mark.parametrize("dim", [ct.default_dim - 1, ct.default_dim + 1])
    def test_partition_insert_mismatched_dimensions(self, dim):
        """
        target: verify insert maximum size data(256M?) a time
        method: 1.create a collection with default dim
                2. insert dismatch dim data
        expected: raise exception
        """
        # create partition
        partition_w = self.init_partition_wrap()

        data = cf.gen_default_list_data(nb=10, dim=dim)
        # insert data to partition
        partition_w.insert(data,
                           check_task=CheckTasks.err_res,
                           check_items={
                               ct.err_code: 1,
                               ct.err_msg: "but entities field dim"
                           })

    @pytest.mark.tags(CaseLabel.L1)
    @pytest.mark.parametrize("sync", [True, False])
    def test_partition_insert_sync(self, sync):
        """
        target: verify insert sync
        method: 1.create a partition
                2. insert data in sync
        expected: insert successfully
        """
        pass
    def test_customize_segment_size(self, seg_size, seg_count):
        """
       steps
       """
        log.info(f"start to install milvus with segment size {seg_size}")
        release_name, host, port = _install_milvus(seg_size)
        self.release_name = release_name
        assert host is not None
        conn = connections.connect("default", host=host, port=port)
        assert conn is not None
        mil = MilvusSys(alias="default")
        log.info(f"milvus build version: {mil.build_version}")

        log.info(f"start to e2e verification: {seg_size}")
        # create
        name = cf.gen_unique_str("segsiz")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        nb = 50000
        data = cf.gen_default_list_data(nb=nb)
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res
        # insert 2 million entities
        rounds = 40
        for _ in range(rounds - 1):
            _, res = collection_w.insert(data)
        entities = collection_w.num_entities
        assert entities == nb * rounds

        # load
        collection_w.load()
        utility_wrap = ApiUtilityWrapper()
        segs, _ = utility_wrap.get_query_segment_info(collection_w.name)
        log.info(f"assert segments: {len(segs)}")
        assert len(segs) == seg_count

        # search
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1,
            timeout=30)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str(),
            timeout=120)
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1,
            timeout=30)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr, timeout=30)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
Ejemplo n.º 12
0
 def __init__(self):
     super().__init__()
     self.c_wrap.insert(data=cf.gen_default_list_data(nb=5 * constants.ENTITIES_FOR_SEARCH),
                        timeout=timeout, enable_traceback=enable_traceback)
     log.debug(f"Index ready entities: {self.c_wrap.num_entities}")  # do as a flush before indexing
Ejemplo n.º 13
0
    def test_milvus_default(self):
        # create
        name = cf.gen_unique_str(prefix)
        t0 = time.time()
        collection_w = self.init_collection_wrap(name=name, active_trace=True)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        data = cf.gen_default_list_data()
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = time.time()
        num_entities, check_result = collection_w.flush(timeout=180)
        assert check_result
        assert num_entities == len(data[0]) + entities
        tt = time.time() - t0
        entities = collection_w.num_entities
        log.info(f"assert flush: {tt}, entities: {entities}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str())
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
Ejemplo n.º 14
0
    def test_chaos_data_consist(self, connection, chaos_yaml):
        c_name = cf.gen_unique_str('chaos_collection_')
        nb = 5000
        i_name = cf.gen_unique_str('chaos_index_')
        index_params = {
            "index_type": "IVF_SQ8",
            "metric_type": "L2",
            "params": {
                "nlist": 64
            }
        }

        # create
        t0 = datetime.datetime.now()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        tt = datetime.datetime.now() - t0
        log.debug(f"assert create: {tt}")
        assert collection_w.name == c_name

        # insert
        data = cf.gen_default_list_data(nb=nb)
        t0 = datetime.datetime.now()
        _, res = collection_w.insert(data)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert insert: {tt}")
        assert res

        # flush
        t0 = datetime.datetime.now()
        assert collection_w.num_entities == nb
        tt = datetime.datetime.now() - t0
        log.debug(f"assert flush: {tt}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = datetime.datetime.now()
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param={"nprobe": 16},
            limit=1)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert search: {tt}")
        assert len(search_res) == 1

        # index
        t0 = datetime.datetime.now()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=index_params,
            name=i_name)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # query
        term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]'
        t0 = datetime.datetime.now()
        query_res, _ = collection_w.query(term_expr)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert query: {tt}")
        assert len(query_res) == 4

        # reboot a pod
        reboot_pod(chaos_yaml)

        # reconnect if needed
        sleep(constants.WAIT_PER_OP * 4)
        reconnect(connections, self.host, self.port)

        # verify collection persists
        assert utility.has_collection(c_name)
        log.debug("assert collection persists")
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(c_name)
        # verify data persist
        assert collection_w2.num_entities == nb
        log.debug("assert data persists")
        # verify index persists
        assert collection_w2.has_index(i_name)
        log.debug("assert index persists")
        # verify search results persist

        # verify query results persist
        query_res2, _ = collection_w2.query(term_expr)
        assert query_res2 == query_res
        log.debug("assert query result persists")
Ejemplo n.º 15
0
 def insert(self):
     res, result = self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.DELTA_PER_INS),
                                      timeout=timeout,
                                      enable_traceback=enable_traceback,
                                      check_task=CheckTasks.check_nothing)
     return res, result
Ejemplo n.º 16
0
    def test_expand_data_node(self):
        """
        target: test create and insert api after expand dataNode pod
        method: 1.create collection a and insert df
                2.expand dataNode pod from 1 to 2
                3.verify collection a property and verify create and insert of new collection
        expected: two collection create and insert op are both correctly
        """
        # deploy all nodes one pod cluster milvus with helm
        release_name = "scale-data"
        # env = HelmEnv(release_name=release_name)
        # host = env.helm_install_cluster_milvus()

        # deploy cluster milvus with dataNode 1 replicas
        default_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image':
            'milvusdb/milvus-dev:master-20211020-b40513b',
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'dependencies.etcd.inCluster.deletionPolicy': 'Delete',
            'dependencies.etcd.inCluster.pvcDeletion': 'true',
            'dependencies.pulsar.inCluster.deletionPolicy': 'Delete',
            'dependencies.pulsar.inCluster.pvcDeletion': 'true',
            'dependencies.storage.inCluster.deletionPolicy': 'Delete',
            'dependencies.storage.inCluster.pvcDeletion': 'true',
        }
        milvusOp = MilvusOperator()
        milvusOp.install(default_config)
        if milvusOp.wait_for_healthy(release_name,
                                     namespace=constants.NAMESPACE):
            endpoint = milvusOp.endpoint(release_name, constants.NAMESPACE)
            endpoint = endpoint.split(':')
            host = endpoint[0]
            port = int(endpoint[-1])
        else:
            raise Exception(f"Failed to install {release_name}")

        # connect
        connections.add_connection(default={"host": host, "port": port})
        connections.connect(alias='default')
        # create
        c_name = cf.gen_unique_str(prefix)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        # # insert
        data = cf.gen_default_list_data(ct.default_nb)
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        # scale dataNode to 2 pods
        milvusOp.upgrade(release_name,
                         {'spec.components.dataNode.replicas': 2},
                         constants.NAMESPACE)
        # env.helm_upgrade_cluster_milvus(dataNode=2)
        # after scale, assert data consistent
        assert utility.has_collection(c_name)
        assert collection_w.num_entities == ct.default_nb
        # assert new operations
        new_cname = cf.gen_unique_str(prefix)
        new_collection_w = ApiCollectionWrapper()
        new_collection_w.init_collection(
            name=new_cname, schema=cf.gen_default_collection_schema())
        new_mutation_res, _ = new_collection_w.insert(data)
        assert new_mutation_res.insert_count == ct.default_nb
        assert new_collection_w.num_entities == ct.default_nb
        # assert old collection ddl
        mutation_res_2, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb * 2

        collection_w.drop()
        new_collection_w.drop()
Ejemplo n.º 17
0
class TestQueryOperation(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test query interface operations
    ******************************************************************
    """

    @pytest.mark.tags(ct.CaseLabel.L3)
    @pytest.mark.parametrize("collection_name", [cf.gen_unique_str(prefix)])
    def test_query_without_connection(self, collection_name):
        """
        target: test query without connection
        method: close connect and query
        expected: raise exception
        """

        # init a collection with default connection
        collection_w = self.init_collection_wrap(name=collection_name)

        # remove default connection
        self.connection_wrap.remove_connection(alias=DefaultConfig.DEFAULT_USING)

        # list connection to check
        self.connection_wrap.list_connections(check_task=ct.CheckTasks.ccr, check_items={ct.list_content: []})

        # query after remove default connection
        collection_w.query(default_term_expr, check_task=CheckTasks.err_res,
                           check_items={ct.err_code: 0, ct.err_msg: cem.ConnectFirst})

    @pytest.mark.tags(ct.CaseLabel.L3)
    @pytest.mark.parametrize("collection_name, data",
                             [(cf.gen_unique_str(prefix), cf.gen_default_list_data(ct.default_nb))])
    def test_query_without_loading(self, collection_name, data):
        """
        target: test query without loading
        method: no loading before query
        expected: raise exception
        """

        # init a collection with default connection
        collection_w = self.init_collection_wrap(name=collection_name)

        # insert data to collection
        collection_w.insert(data=data)

        # check number of entities and that method calls the flush interface
        assert collection_w.num_entities == ct.default_nb

        # query without load
        collection_w.query(default_term_expr, check_task=CheckTasks.err_res,
                           check_items={ct.err_code: 1, ct.err_msg: clem.CollNotLoaded % collection_name})

    @pytest.mark.tags(ct.CaseLabel.L3)
    @pytest.mark.parametrize("term_expr", [f'{ct.default_int64_field_name} in [0]'])
    def test_query_expr_single_term_array(self, term_expr):
        """
        target: test query with single array term expr
        method: query with single array value
        expected: query result is one entity
        """

        # init a collection and insert data
        collection_w, vectors, binary_raw_vectors = self.init_collection_general(prefix, insert_data=True)

        # query the first row of data
        check_vec = vectors[0].iloc[:, [0, 1]][0:1].to_dict('records')
        collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec})

    @pytest.mark.tags(ct.CaseLabel.L3)
    @pytest.mark.parametrize("term_expr", [f'{ct.default_int64_field_name} in [0]'])
    def test_query_binary_expr_single_term_array(self, term_expr, check_content):
        """
        target: test query with single array term expr
        method: query with single array value
        expected: query result is one entity
        """

        # init a collection and insert data
        collection_w, vectors, binary_raw_vectors = self.init_collection_general(prefix, insert_data=True,
                                                                                 is_binary=True)

        # query the first row of data
        check_vec = vectors[0].iloc[:, [0, 1]][0:1].to_dict('records')
        collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec})

    @pytest.mark.tags(ct.CaseLabel.L3)
    def test_query_expr_all_term_array(self):
        """
        target: test query with all array term expr
        method: query with all array value
        expected: verify query result
        """

        # init a collection and insert data
        collection_w, vectors, binary_raw_vectors = self.init_collection_general(prefix, insert_data=True)

        # data preparation
        int_values = vectors[0][ct.default_int64_field_name].values.tolist()
        term_expr = f'{ct.default_int64_field_name} in {int_values}'
        check_vec = vectors[0].iloc[:, [0, 1]][0:len(int_values)].to_dict('records')

        # query all array value
        collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec})

    @pytest.mark.tags(ct.CaseLabel.L3)
    def test_query_expr_half_term_array(self):
        """
        target: test query with half array term expr
        method: query with half array value
        expected: verify query result
        """

        half = ct.default_nb // 2
        collection_w, partition_w, df_partition, df_default = self.insert_entities_into_two_partitions_in_half(half)

        int_values = df_default[ct.default_int64_field_name].values.tolist()
        term_expr = f'{ct.default_int64_field_name} in {int_values}'
        res, _ = collection_w.query(term_expr)
        assert len(res) == len(int_values)

    @pytest.mark.xfail(reason="fail")
    @pytest.mark.tags(ct.CaseLabel.L3)
    def test_query_expr_repeated_term_array(self):
        """
        target: test query with repeated term array on primary field with unique value
        method: query with repeated array value
        expected: verify query result
        """
        collection_w, vectors, binary_raw_vectors = self.init_collection_general(prefix, insert_data=True)
        int_values = [0, 0, 0, 0]
        term_expr = f'{ct.default_int64_field_name} in {int_values}'
        res, _ = collection_w.query(term_expr)
        assert len(res) == 1
        assert res[0][ct.default_int64_field_name] == int_values[0]

    @pytest.mark.tags(ct.CaseLabel.L3)
    def test_query_after_index(self):
        """
        target: test query after creating index
        method: query after index
        expected: query result is correct
        """
        collection_w, vectors, binary_raw_vectors = self.init_collection_general(prefix, insert_data=True)

        default_field_name = ct.default_float_vec_field_name
        default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
        index_name = ct.default_index_name
        collection_w.create_index(default_field_name, default_index_params, index_name=index_name)

        collection_w.load()

        int_values = [0]
        term_expr = f'{ct.default_int64_field_name} in {int_values}'
        check_vec = vectors[0].iloc[:, [0, 1]][0:len(int_values)].to_dict('records')
        collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec})

    @pytest.mark.tags(ct.CaseLabel.L3)
    def test_query_after_search(self):
        """
        target: test query after search
        method: query after search
        expected: query result is correct
        """

        limit = 1000
        nb_old = 500
        collection_w, vectors, binary_raw_vectors = self.init_collection_general(prefix, True, nb_old)

        # 2. search for original data after load
        vectors_s = [[random.random() for _ in range(ct.default_dim)] for _ in range(ct.default_nq)]
        collection_w.search(vectors_s[:ct.default_nq], ct.default_float_vec_field_name,
                            ct.default_search_params, limit, "int64 >= 0",
                            check_task=CheckTasks.check_search_results,
                            check_items={"nq": ct.default_nq, "limit": nb_old})

        # check number of entities and that method calls the flush interface
        assert collection_w.num_entities == nb_old

        term_expr = f'{ct.default_int64_field_name} in [0, 1]'
        check_vec = vectors[0].iloc[:, [0, 1]][0:2].to_dict('records')
        collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec})

    @pytest.mark.tags(ct.CaseLabel.L3)
    def test_query_partition_repeatedly(self):
        """
        target: test query repeatedly on partition
        method: query on partition twice
        expected: verify query result
        """

        # create connection
        self._connect()

        # init collection
        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))

        # init partition
        partition_w = self.init_partition_wrap(collection_wrap=collection_w)

        # insert data to partition
        df = cf.gen_default_dataframe_data(ct.default_nb)
        partition_w.insert(df)

        # check number of entities and that method calls the flush interface
        assert collection_w.num_entities == ct.default_nb

        # load partition
        partition_w.load()

        # query twice
        res_one, _ = collection_w.query(default_term_expr, partition_names=[partition_w.name])
        res_two, _ = collection_w.query(default_term_expr, partition_names=[partition_w.name])
        assert res_one == res_two

    @pytest.mark.tags(ct.CaseLabel.L3)
    def test_query_another_partition(self):
        """
        target: test query another partition
        method: 1. insert entities into two partitions
                2.query on one partition and query result empty
        expected: query result is empty
        """
        half = ct.default_nb // 2
        collection_w, partition_w, _, _ = self.insert_entities_into_two_partitions_in_half(half)

        term_expr = f'{ct.default_int64_field_name} in [{half}]'
        # half entity in _default partition rather than partition_w
        collection_w.query(term_expr, partition_names=[partition_w.name], check_task=CheckTasks.check_query_results,
                           check_items={exp_res: []})

    @pytest.mark.tags(ct.CaseLabel.L3)
    def test_query_multi_partitions_multi_results(self):
        """
        target: test query on multi partitions and get multi results
        method: 1.insert entities into two partitions
                2.query on two partitions and query multi result
        expected: query results from two partitions
        """
        half = ct.default_nb // 2
        collection_w, partition_w, _, _ = self.insert_entities_into_two_partitions_in_half(half)

        term_expr = f'{ct.default_int64_field_name} in [{half - 1}, {half}]'
        # half entity in _default, half-1 entity in partition_w
        res, _ = collection_w.query(term_expr, partition_names=[ct.default_partition_name, partition_w.name])
        assert len(res) == 2

    @pytest.mark.tags(ct.CaseLabel.L3)
    def test_query_multi_partitions_single_result(self):
        """
        target: test query on multi partitions and get single result
        method: 1.insert into two partitions
                2.query on two partitions and query single result
        expected: query from two partitions and get single result
        """
        half = ct.default_nb // 2
        collection_w, partition_w, df_partition, df_default = self.insert_entities_into_two_partitions_in_half(half)

        term_expr = f'{ct.default_int64_field_name} in [{half}]'
        # half entity in _default
        res, _ = collection_w.query(term_expr, partition_names=[ct.default_partition_name, partition_w.name])
        assert len(res) == 1
        assert res[0][ct.default_int64_field_name] == half

    def insert_entities_into_two_partitions_in_half(self, half):
        """
        insert default entities into two partitions(partition_w and _default) in half(int64 and float fields values)
        :param half: half of nb
        :return: collection wrap and partition wrap
        """
        conn = self._connect()
        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
        partition_w = self.init_partition_wrap(collection_wrap=collection_w)
        # insert [0, half) into partition_w
        df_partition = cf.gen_default_dataframe_data(nb=half, start=0)
        partition_w.insert(df_partition)
        # insert [half, nb) into _default
        df_default = cf.gen_default_dataframe_data(nb=half, start=half)
        collection_w.insert(df_default)
        conn.flush([collection_w.name])
        collection_w.load()
        return collection_w, partition_w, df_partition, df_default
Ejemplo n.º 18
0
class TestPartitionParams(TestcaseBase):
    """ Test case of partition interface in parameters"""
    @pytest.mark.tags(CaseLabel.L0)
    # @pytest.mark.parametrize("partition_name, description",
    # [(cf.gen_unique_str(prefix), cf.gen_unique_str("desc_"))])
    def test_partition_default(self):
        """
        target: verify create a partition
        method: 1. create a partition
        expected: 1. create successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        description = cf.gen_unique_str("desc_")
        self.init_partition_wrap(
            collection_w,
            partition_name,
            description=description,
            check_task=CheckTasks.check_partition_property,
            check_items={
                "name": partition_name,
                "description": description,
                "is_empty": True,
                "num_entities": 0
            })

        # check that the partition has been created
        assert collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.xfail(reason="issue #5375")
    @pytest.mark.parametrize("partition_name", [""])
    def test_partition_empty_name(self, partition_name):
        """
        target: verify create a partition with empyt name
        method: 1. create a partition empty none name
        expected: 1. raise exception
        """
        # create a collection
        collection_w = self.init_collection_wrap()

        # create partition
        self.partition_wrap.init_partition(
            collection_w.collection,
            partition_name,
            check_task=CheckTasks.err_res,
            check_items={
                ct.err_code: 1,
                ct.err_msg: "Partition name should not be empty"
            })

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name, description", [(cf.gen_unique_str(prefix), "")])
    def test_partition_empty_description(self):
        """
        target: verify create a partition with empty description
        method: 1. create a partition with empty description
        expected: 1. create successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # init partition
        partition_name = cf.gen_unique_str(prefix)
        description = ""
        self.init_partition_wrap(
            collection_w,
            partition_name,
            description=description,
            check_task=CheckTasks.check_partition_property,
            check_items={
                "name": partition_name,
                "description": description,
                "is_empty": True,
                "num_entities": 0
            })

        # check that the partition has been created
        assert collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name, description",
    # [(cf.gen_str_by_length(255), cf.gen_str_by_length(2048))])
    def test_partition_max_description_length(self):
        """
        target: verify create a partition with 255 length name and 1024 length description
        method: 1. create a partition with 255 length name and 1024 length description
        expected: 1. create successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # init partition
        partition_name = cf.gen_str_by_length(255)
        description = cf.gen_str_by_length(2048)
        self.init_partition_wrap(
            collection_w,
            partition_name,
            description=description,
            check_task=CheckTasks.check_partition_property,
            check_items={
                "name": partition_name,
                "description": description,
                "is_empty": True
            })

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("collection_name, partition_name, description",
    # [(cf.gen_unique_str(), cf.gen_unique_str(prefix), cf.gen_unique_str())])
    def test_partition_dup_name(self):
        """
        target: verify create partitions with duplicate name
        method: 1. create partitions with duplicate name
        expected: 1. create successfully
                  2. the same partition returned with diff object id
        """
        # create a collection
        collection_w = self.init_collection_wrap()

        # create two partitions
        partition_name = cf.gen_unique_str(prefix)
        description = cf.gen_unique_str()
        partition_w1 = self.init_partition_wrap(collection_w, partition_name,
                                                description)
        partition_w2 = self.init_partition_wrap(collection_w, partition_name,
                                                description)

        # public check func to be extracted
        assert id(partition_w1.partition) != id(partition_w2.partition)
        assert partition_w1.name == partition_w2.name
        assert partition_w1.description == partition_w2.description

    @pytest.mark.tags(CaseLabel.L1)
    @pytest.mark.parametrize("description", ct.get_invalid_strs)
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    def test_partition_special_chars_description(self, description):
        """
        target: verify create a partition with special characters in description
        method: 1. create a partition with special characters in description
        expected: 1. create successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        self.init_partition_wrap(
            collection_w,
            partition_name,
            description=description,
            check_task=CheckTasks.check_partition_property,
            check_items={
                "name": partition_name,
                "description": description,
                "is_empty": True,
                "num_entities": 0
            })
        assert collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L0)
    def test_partition_default_name(self):
        """
        target: verify create a partition with default name
        method: 1. get the _default partition
                2. create a partition with _default name
        expected: 1. the same partition returned
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # check that the default partition exists
        assert collection_w.has_partition(ct.default_partition_name)[0]

        # check that can get the _default partition
        collection, _ = collection_w.partition(ct.default_partition_name)

        # check that init the _default partition object
        partition_w = self.init_partition_wrap(collection_w,
                                               ct.default_partition_name)
        assert collection.name == partition_w.name

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name", [cf.gen_str_by_length(256)])
    def test_partition_maxlength_name(self):
        """
        target: verify create a partition with maxlength(256) name
        method: 1. create a partition with max length names
        expected: 1. raise exception
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_str_by_length(256)
        self.partition_wrap.init_partition(collection_w.collection,
                                           partition_name,
                                           check_task=CheckTasks.err_res,
                                           check_items={
                                               ct.err_code: 1,
                                               'err_msg': "is illegal"
                                           })

    @pytest.mark.tags(CaseLabel.L1)
    @pytest.mark.parametrize("partition_name", ct.get_invalid_strs)
    def test_partition_invalid_name(self, partition_name):
        """
        target: verify create a partition with invalid name
        method: 1. create a partition with invalid names
        expected: 1. raise exception
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        self.partition_wrap.init_partition(collection_w.collection,
                                           partition_name,
                                           check_task=CheckTasks.err_res,
                                           check_items={
                                               ct.err_code: 1,
                                               'err_msg': "is illegal"
                                           })
        # TODO: need an error code issue #5144 and assert independently

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    def test_partition_none_collection(self):
        """
        target: verify create a partition with none collection
        method: 1. create a partition with none collection
        expected: 1. raise exception
        """
        # create partition with collection is None
        partition_name = cf.gen_unique_str(prefix)
        self.partition_wrap.init_partition(collection=None,
                                           name=partition_name,
                                           check_task=CheckTasks.err_res,
                                           check_items={
                                               ct.err_code:
                                               1,
                                               ct.err_msg:
                                               "must be pymilvus.Collection"
                                           })

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    def test_partition_drop(self):
        """
        target: verify drop a partition in one collection
        method: 1. create a partition in one collection
                2. drop the partition
        expected: 1. drop successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(collection_w, partition_name)

        # check that the partition exists
        assert collection_w.has_partition(partition_name)[0]

        # drop partition
        partition_w.drop()

        # check that the partition not exists
        assert not collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("search_vectors", [cf.gen_vectors(1, ct.default_dim)])
    def test_partition_release(self):
        """
        target: verify release partition
        method: 1. create a collection and several partitions
                2. insert data into each partition
                3. flush and load the partitions
                4. release partition1
                5. release partition1 twice
        expected: 1. the released partition is released
                  2. the other partition is not released
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create two partitions
        partition_w1 = self.init_partition_wrap(collection_w)
        partition_w2 = self.init_partition_wrap(collection_w)

        # insert data to two partition
        partition_w1.insert(cf.gen_default_list_data())
        partition_w2.insert(cf.gen_default_list_data())

        # load two partitions
        partition_w1.load()
        partition_w2.load()

        # search two partitions
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        res1, _ = partition_w1.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            params={"nprobe": 32},
            limit=1)
        res2, _ = partition_w2.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            params={"nprobe": 32},
            limit=1)
        assert len(res1) == 1 and len(res2) == 1

        # release the first partition
        partition_w1.release()

        # check result
        res1, _ = partition_w1.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            params={"nprobe": 32},
            limit=1,
            check_task=ct.CheckTasks.err_res,
            check_items={
                ct.err_code: 1,
                ct.err_msg: "partitions have been released"
            })
        res2, _ = partition_w2.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            params={"nprobe": 32},
            limit=1)
        assert len(res2) == 1

    @pytest.mark.tags(CaseLabel.L1)
    # @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
    @pytest.mark.parametrize("data", [
        cf.gen_default_dataframe_data(10),
        cf.gen_default_list_data(10),
        cf.gen_default_tuple_data(10)
    ])
    def test_partition_insert(self, data):
        """
        target: verify insert multi entities by dataFrame
        method: 1. create a collection and a partition
                2. partition.insert(data)
                3. insert data again
        expected: 1. insert data successfully
        """
        nums = 10
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(
            collection_w,
            partition_name,
            check_task=CheckTasks.check_partition_property,
            check_items={
                "name": partition_name,
                "is_empty": True,
                "num_entities": 0
            })

        # insert data
        partition_w.insert(data)
        # self._connect().flush([collection_w.name])     # don't need flush for issue #5737
        assert not partition_w.is_empty
        assert partition_w.num_entities == nums

        # insert data
        partition_w.insert(data)
        # self._connect().flush([collection_w.name])
        assert not partition_w.is_empty
        assert partition_w.num_entities == (nums + nums)
Ejemplo n.º 19
0
class TestPartitionParams(TestcaseBase):
    """ Test case of partition interface in parameters"""
    @pytest.mark.tags(CaseLabel.L0)
    def test_partition_default(self):
        """
        target: verify create a partition
        method: create a partition
        expected: create successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        description = cf.gen_unique_str("desc_")
        self.init_partition_wrap(
            collection_w,
            partition_name,
            description=description,
            check_task=CheckTasks.check_partition_property,
            check_items={
                "name": partition_name,
                "description": description,
                "is_empty": True,
                "num_entities": 0
            })

        # check that the partition has been created
        assert collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("partition_name", [""])
    def test_partition_empty_name(self, partition_name):
        """
        target: verify create a partition with empty name
        method: create a partition with empty name
        expected: raise exception
        """
        # create a collection
        collection_w = self.init_collection_wrap()

        # create partition
        self.partition_wrap.init_partition(
            collection_w.collection,
            partition_name,
            check_task=CheckTasks.err_res,
            check_items={
                ct.err_code: 1,
                ct.err_msg: "Partition name should not be empty"
            })

    @pytest.mark.tags(CaseLabel.L2)
    def test_partition_empty_description(self):
        """
        target: verify create a partition with empty description
        method: create a partition with empty description
        expected: create successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # init partition
        partition_name = cf.gen_unique_str(prefix)
        description = ""
        self.init_partition_wrap(
            collection_w,
            partition_name,
            description=description,
            check_task=CheckTasks.check_partition_property,
            check_items={
                "name": partition_name,
                "description": description,
                "is_empty": True,
                "num_entities": 0
            })

        # check that the partition has been created
        assert collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L2)
    def test_partition_max_description_length(self):
        """
        target: verify create a partition with 255 length name and 1024 length description
        method: create a partition with 255 length name and 1024 length description
        expected: create successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # init partition
        partition_name = cf.gen_str_by_length(255)
        description = cf.gen_str_by_length(2048)
        self.init_partition_wrap(
            collection_w,
            partition_name,
            description=description,
            check_task=CheckTasks.check_partition_property,
            check_items={
                "name": partition_name,
                "description": description,
                "is_empty": True
            })

    @pytest.mark.tags(CaseLabel.L1)
    def test_partition_dup_name(self):
        """
        target: verify create partitions with duplicate names
        method: create partitions with duplicate names
        expected: 1. create successfully
                  2. the same partition returned with diff object ids
        """
        # create a collection
        collection_w = self.init_collection_wrap()

        # create two partitions
        partition_name = cf.gen_unique_str(prefix)
        description = cf.gen_unique_str()
        partition_w1 = self.init_partition_wrap(collection_w, partition_name,
                                                description)
        partition_w2 = self.init_partition_wrap(collection_w, partition_name,
                                                description)

        # public check func to be extracted
        assert id(partition_w1.partition) != id(partition_w2.partition)
        assert partition_w1.name == partition_w2.name
        assert partition_w1.description == partition_w2.description

    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("description", ct.get_invalid_strs)
    def test_partition_special_chars_description(self, description):
        """
        target: verify create a partition with special characters in description
        method: create a partition with special characters in description
        expected: create successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        self.init_partition_wrap(
            collection_w,
            partition_name,
            description=description,
            check_task=CheckTasks.check_partition_property,
            check_items={
                "name": partition_name,
                "description": description,
                "is_empty": True,
                "num_entities": 0
            })
        assert collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L0)
    def test_partition_default_name(self):
        """
        target: verify create a partition with default name
        method: 1. get the _default partition
                2. create a partition with _default name
        expected: the same partition returned
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # check that the default partition exists
        assert collection_w.has_partition(ct.default_partition_name)[0]

        # check that can get the _default partition
        collection, _ = collection_w.partition(ct.default_partition_name)

        # check that init the _default partition object
        partition_w = self.init_partition_wrap(collection_w,
                                               ct.default_partition_name)
        assert collection.name == partition_w.name

    @pytest.mark.tags(CaseLabel.L2)
    def test_partition_max_length_name(self):
        """
        target: verify create a partition with max length(256) name
        method: create a partition with max length name
        expected: raise exception
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_str_by_length(256)
        self.partition_wrap.init_partition(collection_w.collection,
                                           partition_name,
                                           check_task=CheckTasks.err_res,
                                           check_items={
                                               ct.err_code: 1,
                                               'err_msg': "is illegal"
                                           })

    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("partition_name", ct.get_invalid_strs)
    def test_partition_invalid_name(self, partition_name):
        """
        target: verify create a partition with invalid name
        method: create a partition with invalid names
        expected: raise exception
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        self.partition_wrap.init_partition(collection_w.collection,
                                           partition_name,
                                           check_task=CheckTasks.err_res,
                                           check_items={
                                               ct.err_code: 1,
                                               'err_msg': "is illegal"
                                           })
        # TODO: need an error code issue #5144 and assert independently

    @pytest.mark.tags(CaseLabel.L2)
    def test_partition_none_collection(self):
        """
        target: verify create a partition with none collection
        method: create a partition with none collection
        expected: raise exception
        """
        # create partition with collection is None
        partition_name = cf.gen_unique_str(prefix)
        self.partition_wrap.init_partition(collection=None,
                                           name=partition_name,
                                           check_task=CheckTasks.err_res,
                                           check_items={
                                               ct.err_code:
                                               1,
                                               ct.err_msg:
                                               "must be pymilvus.Collection"
                                           })

    @pytest.mark.tags(CaseLabel.L1)
    def test_partition_drop(self):
        """
        target: verify drop a partition in one collection
        method: 1. create a partition in one collection
                2. drop the partition
        expected: drop successfully
        """
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(collection_w, partition_name)

        # check that the partition exists
        assert collection_w.has_partition(partition_name)[0]

        # drop partition
        partition_w.drop()

        # check that the partition not exists
        assert not collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L2)
    def test_load_partiton_respectively(self):
        """
        target: test release the partition after load partition
        method: load partition1 and load another partition
        expected: raise exception
        """
        self._connect()
        collection_w = self.init_collection_wrap()
        partition_w1 = self.init_partition_wrap(collection_w)
        partition_w2 = self.init_partition_wrap(collection_w)
        partition_w1.insert(cf.gen_default_list_data())
        partition_w2.insert(cf.gen_default_list_data())
        partition_w1.load()
        error = {
            ct.err_code:
            1,
            ct.err_msg:
            f'load the partition after load collection is not supported'
        }
        partition_w2.load(check_task=CheckTasks.err_res, check_items=error)

    @pytest.mark.tags(CaseLabel.L2)
    def test_load_partitions_after_release(self):
        """
        target: test release the partition after load partition
        method: load partitions and release partitions
        expected: no exception
        """
        self._connect()
        collection_w = self.init_collection_wrap()
        partition_w1 = self.init_partition_wrap(collection_w,
                                                name="partition_w1")
        partition_w2 = self.init_partition_wrap(collection_w,
                                                name="partition_w2")
        partition_w1.insert(cf.gen_default_list_data())
        partition_w2.insert(cf.gen_default_list_data())
        partition_names = ["partition_w1", "partition_w2"]
        collection_w.load(partition_names)
        collection_w.release(partition_names)

    @pytest.mark.tags(CaseLabel.L2)
    def test_load_partition_after_load_partition(self):
        """
        target: test release the partition after load partition
        method: load partition1 and release the partition1
                load partition2
        expected: no exception
        """
        self._connect()
        collection_w = self.init_collection_wrap()
        partition_w1 = self.init_partition_wrap(collection_w)
        partition_w2 = self.init_partition_wrap(collection_w)
        partition_w1.insert(cf.gen_default_list_data())
        partition_w2.insert(cf.gen_default_list_data())
        partition_w1.load()
        partition_w1.release()
        partition_w2.load()

    @pytest.fixture(scope="function", params=ct.get_invalid_strs)
    def get_non_number_replicas(self, request):
        if request.param == 1:
            pytest.skip("1 is valid replica number")
        if request.param is None:
            pytest.skip("None is valid replica number")
        yield request.param

    @pytest.mark.tags(CaseLabel.L2)
    def test_load_partition_replica_non_number(self, get_non_number_replicas):
        """
        target: test load partition with non-number replicas
        method: load with non-number replicas
        expected: raise exceptions
        """
        # create, insert
        self._connect()
        collection_w = self.init_collection_wrap()
        partition_w = self.init_partition_wrap(collection_w)
        partition_w.insert(cf.gen_default_list_data(nb=100))

        # load with non-number replicas
        error = {ct.err_code: 0, ct.err_msg: f"but expected one of: int, long"}
        partition_w.load(replica_number=get_non_number_replicas,
                         check_task=CheckTasks.err_res,
                         check_items=error)

    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("replicas", [0, -1, None])
    def test_load_replica_invalid_number(self, replicas):
        """
        target: test load partition with invalid replica number
        method: load with invalid replica number
        expected: raise exception
        """
        # create, insert
        self._connect()
        collection_w = self.init_collection_wrap()
        partition_w = self.init_partition_wrap(collection_w)
        partition_w.insert(cf.gen_default_list_data())
        assert partition_w.num_entities == ct.default_nb

        partition_w.load(replica_number=replicas)
        p_replicas = partition_w.get_replicas()[0]
        assert len(p_replicas.groups) == 1
        query_res, _ = partition_w.query(
            expr=f"{ct.default_int64_field_name} in [0]")
        assert len(query_res) == 1

    @pytest.mark.tags(CaseLabel.L2)
    def test_load_replica_greater_than_querynodes(self):
        """
        target: test load with replicas that greater than querynodes
        method: load with 3 replicas (2 querynode)
        expected: Verify load successfully and 1 available replica
        """
        # create, insert
        self._connect()
        collection_w = self.init_collection_wrap()
        partition_w = self.init_partition_wrap(collection_w)
        partition_w.insert(cf.gen_default_list_data())
        assert partition_w.num_entities == ct.default_nb

        # load with 2 replicas
        error = {
            ct.err_code: 1,
            ct.err_msg: f"no enough nodes to create replicas"
        }
        partition_w.load(replica_number=3,
                         check_task=CheckTasks.err_res,
                         check_items=error)

    @pytest.mark.tags(CaseLabel.ClusterOnly)
    def test_load_replica_change(self):
        """
        target: test load replica change
        method: 1.load with replica 1
                2.load with a new replica number
                3.release partition
                4.load with a new replica
        expected: The second time successfully loaded with a new replica number
        """
        # create, insert
        self._connect()
        collection_w = self.init_collection_wrap()
        partition_w = self.init_partition_wrap(collection_w)
        partition_w.insert(cf.gen_default_list_data())
        assert partition_w.num_entities == ct.default_nb

        partition_w.load(replica_number=1)
        collection_w.query(expr=f"{ct.default_int64_field_name} in [0]",
                           check_task=CheckTasks.check_query_results,
                           check_items={'exp_res': [{
                               'int64': 0
                           }]})
        error = {
            ct.err_code:
            5,
            ct.err_msg:
            f"Should release first then reload with the new number of replicas"
        }
        partition_w.load(replica_number=2,
                         check_task=CheckTasks.err_res,
                         check_items=error)

        partition_w.release()
        partition_w.load(replica_number=2)
        collection_w.query(expr=f"{ct.default_int64_field_name} in [0]",
                           check_task=CheckTasks.check_query_results,
                           check_items={'exp_res': [{
                               'int64': 0
                           }]})

        two_replicas, _ = collection_w.get_replicas()
        assert len(two_replicas.groups) == 2

        # verify loaded segments included 2 replicas and twice num entities
        seg_info, _ = self.utility_wrap.get_query_segment_info(
            collection_w.name)
        seg_ids = list(map(lambda seg: seg.segmentID, seg_info))
        num_entities = list(map(lambda seg: seg.num_rows, seg_info))
        assert reduce(lambda x, y: x ^ y, seg_ids) == 0
        assert reduce(lambda x, y: x + y, num_entities) == ct.default_nb * 2

    @pytest.mark.tags(CaseLabel.ClusterOnly)
    def test_partition_replicas_change_cross_partitions(self):
        """
        target: test load with different replicas between partitions
        method: 1.Create two partitions and insert data
                2.Load two partitions with different replicas
        expected: Raise an exception
        """
        # Create two partitions and insert data
        collection_w = self.init_collection_wrap()
        partition_w1 = self.init_partition_wrap(collection_w)
        partition_w2 = self.init_partition_wrap(collection_w)
        partition_w1.insert(cf.gen_default_dataframe_data())
        partition_w2.insert(cf.gen_default_dataframe_data(start=ct.default_nb))
        assert collection_w.num_entities == ct.default_nb * 2

        # load with different replicas
        partition_w1.load(replica_number=1)
        partition_w1.release()
        partition_w2.load(replica_number=2)

        # verify different have same replicas
        replicas_1, _ = partition_w1.get_replicas()
        replicas_2, _ = partition_w2.get_replicas()
        group1_ids = list(map(lambda g: g.id, replicas_1.groups))
        group2_ids = list(map(lambda g: g.id, replicas_1.groups))
        assert group1_ids.sort() == group2_ids.sort()

        # verify loaded segments included 2 replicas and 1 partition
        seg_info, _ = self.utility_wrap.get_query_segment_info(
            collection_w.name)
        seg_ids = list(map(lambda seg: seg.segmentID, seg_info))
        num_entities = list(map(lambda seg: seg.num_rows, seg_info))
        assert reduce(lambda x, y: x ^ y, seg_ids) == 0
        assert reduce(lambda x, y: x + y, num_entities) == ct.default_nb * 2

    @pytest.mark.tags(CaseLabel.L1)
    def test_partition_release(self):
        """
        target: verify release partition
        method: 1. create a collection and two partitions
                2. insert data into each partition
                3. flush and load the partition1
                4. release partition1
                5. release partition2
        expected: 1. the 1st partition is released
                  2. the 2nd partition is released
        """
        # create collection

        collection_w = self.init_collection_wrap()

        # create two partitions
        partition_w1 = self.init_partition_wrap(collection_w)
        partition_w2 = self.init_partition_wrap(collection_w)

        # insert data to two partition
        partition_w1.insert(cf.gen_default_list_data())
        partition_w2.insert(cf.gen_default_list_data())

        # load two partitions
        partition_w1.load()

        # search  partition1
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        res1, _ = partition_w1.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            params={"nprobe": 32},
            limit=1)
        assert len(res1) == 1

        # release the first partition
        partition_w1.release()
        partition_w2.release()

        # check result
        res1, _ = partition_w1.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            params={"nprobe": 32},
            limit=1,
            check_task=ct.CheckTasks.err_res,
            check_items={
                ct.err_code: 1,
                ct.err_msg: "partitions have been released"
            })

    @pytest.mark.tags(CaseLabel.L1)
    @pytest.mark.parametrize("data", [
        cf.gen_default_dataframe_data(10),
        cf.gen_default_list_data(10),
        cf.gen_default_tuple_data(10)
    ])
    def test_partition_insert(self, data):
        """
        target: verify insert entities multiple times
        method: 1. create a collection and a partition
                2. partition.insert(data)
                3. insert data again
        expected: insert data successfully
        """
        nums = 10
        # create collection
        collection_w = self.init_collection_wrap()

        # create partition
        partition_name = cf.gen_unique_str(prefix)
        partition_w = self.init_partition_wrap(
            collection_w,
            partition_name,
            check_task=CheckTasks.check_partition_property,
            check_items={
                "name": partition_name,
                "is_empty": True,
                "num_entities": 0
            })

        # insert data
        partition_w.insert(data)
        # self._connect().flush([collection_w.name])     # don't need flush for issue #5737
        assert not partition_w.is_empty
        assert partition_w.num_entities == nums

        # insert data
        partition_w.insert(data)
        # self._connect().flush([collection_w.name])
        assert not partition_w.is_empty
        assert partition_w.num_entities == (nums + nums)
Ejemplo n.º 20
0
    def test_simd_compat_e2e(self, request, simd):
        log.info(f"start to e2e verification: {simd}")
        # parse results from previous results
        results = request.config.cache.get(simd, None)
        alias = results.get('alias', simd)
        conn = connections.connect(alias=alias)
        assert conn is not None
        simd_cache = request.config.cache.get(simd, None)
        log.info(f"simd_cache: {simd_cache}")
        # create
        name = cf.gen_unique_str("compat")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     using=alias,
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        data = cf.gen_default_list_data()
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = time.time()
        assert collection_w.num_entities == len(data[0]) + entities
        tt = time.time() - t0
        entities = collection_w.num_entities
        log.info(f"assert flush: {tt}, entities: {entities}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str())
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
Ejemplo n.º 21
0
 def __init__(self):
     super().__init__()
     self.c_wrap.insert(data=cf.gen_default_list_data(nb=5*constants.ENTITIES_FOR_SEARCH),
                        check_task='check_nothing')
     log.debug(f"Index ready entities: {self.c_wrap.num_entities }")  # do as a flush before indexing