def test_shrink_query_node(self):
        """
        target: test shrink queryNode from 2 to 1
        method: 1.deploy two queryNode
                2.search two collections in two queryNode
                3.upgrade queryNode from 2 to 1
                4.search second collection
        expected: search result is correct
        """
        # deploy
        release_name = "scale-query"
        env = HelmEnv(release_name=release_name, queryNode=2)
        host = env.helm_install_cluster_milvus(
            image_pull_policy=constants.IF_NOT_PRESENT)

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')

        # collection one
        data = cf.gen_default_list_data(nb)
        c_name = "query_scale_one"
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        collection_w.load()
        res1, _ = collection_w.search(data[-1][:nq],
                                      ct.default_float_vec_field_name,
                                      ct.default_search_params,
                                      ct.default_limit)
        assert res1[0].ids[0] == data[0][0]

        # collection two
        c_name_2 = "query_scale_two"
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(
            name=c_name_2, schema=cf.gen_default_collection_schema())
        collection_w2.insert(data)
        assert collection_w2.num_entities == nb
        collection_w2.load()
        res2, _ = collection_w2.search(data[-1][:nq],
                                       ct.default_float_vec_field_name,
                                       ct.default_search_params,
                                       ct.default_limit)
        assert res2[0].ids[0] == data[0][0]

        # scale queryNode pod
        env.helm_upgrade_cluster_milvus(queryNode=1)

        # search
        res1, _ = collection_w.search(data[-1][:nq],
                                      ct.default_float_vec_field_name,
                                      ct.default_search_params,
                                      ct.default_limit)
        assert res1[0].ids[0] == data[0][0]
        res2, _ = collection_w2.search(data[-1][:nq],
                                       ct.default_float_vec_field_name,
                                       ct.default_search_params,
                                       ct.default_limit)
        assert res2[0].ids[0] == data[0][0]
    def test_customize_segment_size(self, seg_size, seg_count):
        """
       steps
       """
        log.info(f"start to install milvus with segment size {seg_size}")
        release_name, host, port = _install_milvus(seg_size)
        self.release_name = release_name
        assert host is not None
        conn = connections.connect("default", host=host, port=port)
        assert conn is not None
        mil = MilvusSys(alias="default")
        log.info(f"milvus build version: {mil.build_version}")

        log.info(f"start to e2e verification: {seg_size}")
        # create
        name = cf.gen_unique_str("segsiz")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        nb = 50000
        data = cf.gen_default_list_data(nb=nb)
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res
        # insert 2 million entities
        rounds = 40
        for _ in range(rounds-1):
            _, res = collection_w.insert(data)
        entities = collection_w.num_entities
        assert entities == nb * rounds

        # load
        collection_w.load()
        utility_wrap = ApiUtilityWrapper()
        segs, _ = utility_wrap.get_query_segment_info(collection_w.name)
        log.info(f"assert segments: {len(segs)}")
        assert len(segs) == seg_count

        # search
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(data=search_vectors,
                                       anns_field=ct.default_float_vec_field_name,
                                       param=search_params, limit=1, timeout=30)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {"index_type": "IVF_SQ8", "params": {"nlist": 64}, "metric_type": "L2"}
        t0 = time.time()
        index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name,
                                             index_params=_index_params,
                                             name=cf.gen_unique_str(), timeout=120)
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(data=search_vectors,
                                       anns_field=ct.default_float_vec_field_name,
                                       param=search_params, limit=1, timeout=30)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr, timeout=30)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
Beispiel #3
0
    def test_scale_in_query_node_less_than_replicas(self):
        """
        target: test scale in cluster and querynode < replica
        method: 1.Deploy cluster with 3 querynodes
                2.Create and insert data, flush
                3.Load collection with 2 replica number
                4.Scale in querynode from 3 to 1 and query
                5.Scale out querynode from 1 back to 3
        expected: Verify search successfully after scale out
        """
        release_name = "scale-in-query"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.mode': 'cluster',
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 2,
            'spec.config.common.retentionDuration': 60
        }
        mic = MilvusOperator()
        mic.install(query_config)
        if mic.wait_for_healthy(release_name,
                                constants.NAMESPACE,
                                timeout=1800):
            host = mic.endpoint(release_name,
                                constants.NAMESPACE).split(':')[0]
        else:
            raise MilvusException(message=f'Milvus healthy timeout 1800s')
        try:
            # prepare collection
            connections.connect("scale-in", host=host, port=19530)
            utility_w = ApiUtilityWrapper()
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(
                name=cf.gen_unique_str("scale_in"),
                schema=cf.gen_default_collection_schema(),
                using="scale-in")
            collection_w.insert(cf.gen_default_dataframe_data())
            assert collection_w.num_entities == ct.default_nb

            # load multi replicas and search success
            collection_w.load(replica_number=2)
            search_res, is_succ = collection_w.search(
                cf.gen_vectors(1, ct.default_dim),
                ct.default_float_vec_field_name, ct.default_search_params,
                ct.default_limit)
            assert len(search_res[0].ids) == ct.default_limit
            log.info("Search successfully after load with 2 replicas")
            log.debug(collection_w.get_replicas()[0])
            log.debug(
                utility_w.get_query_segment_info(collection_w.name,
                                                 using="scale-in"))

            # scale in querynode from 2 to 1, less than replica number
            log.debug("Scale in querynode from 2 to 1")
            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 1},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            # search and not assure success
            collection_w.search(cf.gen_vectors(1, ct.default_dim),
                                ct.default_float_vec_field_name,
                                ct.default_search_params,
                                ct.default_limit,
                                check_task=CheckTasks.check_nothing)
            log.debug(
                collection_w.get_replicas(
                    check_task=CheckTasks.check_nothing)[0])

            # scale querynode from 1 back to 2
            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 2},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            # verify search success
            collection_w.search(cf.gen_vectors(1, ct.default_dim),
                                ct.default_float_vec_field_name,
                                ct.default_search_params, ct.default_limit)
            # Verify replica info is correct
            replicas = collection_w.get_replicas()[0]
            assert len(replicas.groups) == 2
            for group in replicas.groups:
                assert len(group.group_nodes) == 1
            # Verify loaded segment info is correct
            seg_info = utility_w.get_query_segment_info(collection_w.name,
                                                        using="scale-in")[0]
            num_entities = 0
            for seg in seg_info:
                assert len(seg.nodeIds) == 2
                num_entities += seg.num_rows
            assert num_entities == ct.default_nb

        except Exception as e:
            raise Exception(str(e))

        finally:
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE,
                         label_selector=label,
                         release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
Beispiel #4
0
    def test_scale_query_node(self):
        """
        target: test scale queryNode
        method: 1.deploy milvus cluster with 1 queryNode
                2.prepare work (connect, create, insert, index and load)
                3.continuously search (daemon thread)
                4.expand queryNode from 2 to 5
                5.continuously insert new data (daemon thread)
                6.shrink queryNode from 5 to 3
        expected: Verify milvus remains healthy and search successfully during scale
        """
        release_name = "scale-query"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'spec.mode': 'cluster',
            'metadata.name': release_name,
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 1,
            'spec.config.common.retentionDuration': 60
        }
        mic = MilvusOperator()
        mic.install(query_config)
        if mic.wait_for_healthy(release_name,
                                constants.NAMESPACE,
                                timeout=1800):
            host = mic.endpoint(release_name,
                                constants.NAMESPACE).split(':')[0]
        else:
            raise MilvusException(message=f'Milvus healthy timeout 1800s')

        try:
            # connect
            connections.add_connection(default={"host": host, "port": 19530})
            connections.connect(alias='default')

            # create
            c_name = cf.gen_unique_str("scale_query")
            # c_name = 'scale_query_DymS7kI4'
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(
                name=c_name,
                schema=cf.gen_default_collection_schema(),
                shards_num=2)

            # insert two segments
            for i in range(3):
                df = cf.gen_default_dataframe_data(nb)
                collection_w.insert(df)
                log.debug(collection_w.num_entities)

            # create index
            collection_w.create_index(ct.default_float_vec_field_name,
                                      default_index_params)
            assert collection_w.has_index()[0]
            assert collection_w.index()[0] == Index(
                collection_w.collection, ct.default_float_vec_field_name,
                default_index_params)

            # load
            collection_w.load()

            # scale queryNode to 5
            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 5},
                        constants.NAMESPACE)

            @counter
            def do_search():
                """ do search """
                search_res, is_succ = collection_w.search(
                    cf.gen_vectors(1, ct.default_dim),
                    ct.default_float_vec_field_name,
                    ct.default_search_params,
                    ct.default_limit,
                    check_task=CheckTasks.check_nothing)
                assert len(search_res) == 1
                return search_res, is_succ

            def loop_search():
                """ continuously search """
                while True:
                    do_search()

            threading.Thread(target=loop_search, args=(), daemon=True).start()

            # wait new QN running, continuously insert
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            @counter
            def do_insert():
                """ do insert """
                return collection_w.insert(cf.gen_default_dataframe_data(1000),
                                           check_task=CheckTasks.check_nothing)

            def loop_insert():
                """ loop insert """
                while True:
                    do_insert()

            threading.Thread(target=loop_insert, args=(), daemon=True).start()

            log.debug(collection_w.num_entities)
            time.sleep(20)
            log.debug("Expand querynode test finished")

            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 3},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            log.debug(collection_w.num_entities)
            time.sleep(60)
            scale_common.check_succ_rate(do_search)
            scale_common.check_succ_rate(do_insert)
            log.debug("Shrink querynode test finished")

        except Exception as e:
            raise Exception(str(e))

        finally:
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE,
                         label_selector=label,
                         release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
Beispiel #5
0
    def test_scale_query_node_replicas(self):
        """
        target: test scale out querynode when load multi replicas
        method: 1.Deploy cluster with 5 querynodes
                2.Create collection with 2 shards
                3.Insert 10 segments and flushed
                4.Load collection with 2 replicas
                5.Scale out querynode from 5 to 6 while search and insert growing data
        expected: Verify search succ rate is 100%
        """
        release_name = "scale-replica"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.mode': 'cluster',
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 5,
            'spec.config.common.retentionDuration': 60
        }
        mic = MilvusOperator()
        mic.install(query_config)
        if mic.wait_for_healthy(release_name,
                                constants.NAMESPACE,
                                timeout=1800):
            host = mic.endpoint(release_name,
                                constants.NAMESPACE).split(':')[0]
        else:
            raise MilvusException(message=f'Milvus healthy timeout 1800s')

        try:
            scale_querynode = random.choice([6, 7, 4, 3])
            connections.connect("scale-replica", host=host, port=19530)

            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(
                name=cf.gen_unique_str("scale_out"),
                schema=cf.gen_default_collection_schema(),
                using='scale-replica',
                shards_num=3)

            # insert 10 sealed segments
            for i in range(5):
                df = cf.gen_default_dataframe_data(nb=nb, start=i * nb)
                collection_w.insert(df)
                assert collection_w.num_entities == (i + 1) * nb

            collection_w.load(replica_number=2)

            @counter
            def do_search():
                """ do search """
                search_res, is_succ = collection_w.search(
                    cf.gen_vectors(1, ct.default_dim),
                    ct.default_float_vec_field_name,
                    ct.default_search_params,
                    ct.default_limit,
                    check_task=CheckTasks.check_nothing)
                assert len(search_res) == 1
                return search_res, is_succ

            def loop_search():
                """ continuously search """
                while True:
                    do_search()

            threading.Thread(target=loop_search, args=(), daemon=True).start()

            # scale out
            mic.upgrade(
                release_name,
                {'spec.components.queryNode.replicas': scale_querynode},
                constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")
            log.debug("Scale out querynode success")

            time.sleep(100)
            scale_common.check_succ_rate(do_search)
            log.debug("Scale out test finished")

        except Exception as e:
            raise Exception(str(e))

        finally:
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE,
                         label_selector=label,
                         release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
    def test_chaos_data_consist(self, connection, chaos_yaml):
        c_name = cf.gen_unique_str('chaos_collection_')
        nb = 5000
        i_name = cf.gen_unique_str('chaos_index_')
        index_params = {
            "index_type": "IVF_SQ8",
            "metric_type": "L2",
            "params": {
                "nlist": 64
            }
        }

        # create
        t0 = datetime.datetime.now()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        tt = datetime.datetime.now() - t0
        log.debug(f"assert create: {tt}")
        assert collection_w.name == c_name

        # insert
        data = cf.gen_default_list_data(nb=nb)
        t0 = datetime.datetime.now()
        _, res = collection_w.insert(data)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert insert: {tt}")
        assert res

        # flush
        t0 = datetime.datetime.now()
        assert collection_w.num_entities == nb
        tt = datetime.datetime.now() - t0
        log.debug(f"assert flush: {tt}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = datetime.datetime.now()
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param={"nprobe": 16},
            limit=1)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert search: {tt}")
        assert len(search_res) == 1

        # index
        t0 = datetime.datetime.now()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=index_params,
            name=i_name)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # query
        term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]'
        t0 = datetime.datetime.now()
        query_res, _ = collection_w.query(term_expr)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert query: {tt}")
        assert len(query_res) == 4

        # reboot a pod
        reboot_pod(chaos_yaml)

        # reconnect if needed
        sleep(constants.WAIT_PER_OP * 4)
        reconnect(connections, self.host, self.port)

        # verify collection persists
        assert utility.has_collection(c_name)
        log.debug("assert collection persists")
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(c_name)
        # verify data persist
        assert collection_w2.num_entities == nb
        log.debug("assert data persists")
        # verify index persists
        assert collection_w2.has_index(i_name)
        log.debug("assert index persists")
        # verify search results persist

        # verify query results persist
        query_res2, _ = collection_w2.query(term_expr)
        assert query_res2 == query_res
        log.debug("assert query result persists")
Beispiel #7
0
    def test_chaos_data_consist(self, connection, chaos_yaml):
        """
        target: verify data consistence after chaos injected and recovered
        method: 1. create a collection, insert some data, search and query
                2. inject a chaos object
                3. reconnect to service
                4. verify a) data entities persists, index persists,
                          b) search and query results persist
        expected: collection data and results persist
        """
        c_name = cf.gen_unique_str('chaos_collection_')
        nb = 5000
        i_name = cf.gen_unique_str('chaos_index_')
        index_params = {
            "index_type": "IVF_SQ8",
            "metric_type": "L2",
            "params": {
                "nlist": 64
            }
        }

        # create
        t0 = datetime.datetime.now()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        tt = datetime.datetime.now() - t0
        log.info(f"assert create: {tt}")
        assert collection_w.name == c_name

        # insert
        data = cf.gen_default_list_data(nb=nb)
        t0 = datetime.datetime.now()
        _, res = collection_w.insert(data)
        tt = datetime.datetime.now() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = datetime.datetime.now()
        assert collection_w.num_entities == nb
        tt = datetime.datetime.now() - t0
        log.info(f"assert flush: {tt}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = datetime.datetime.now()
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = datetime.datetime.now() - t0
        log.info(f"assert search: {tt}")
        assert len(search_res) == 1

        # index
        t0 = datetime.datetime.now()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=index_params,
            name=i_name)
        tt = datetime.datetime.now() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,999,99]'
        t0 = datetime.datetime.now()
        query_res, _ = collection_w.query(term_expr)
        tt = datetime.datetime.now() - t0
        log.info(f"assert query: {tt}")
        assert len(query_res) == 4

        # reboot a pod
        reboot_pod(chaos_yaml)

        # reconnect if needed
        sleep(constants.WAIT_PER_OP * 3)
        reconnect(connections, alias='default')

        # verify collection persists
        assert utility.has_collection(c_name)
        log.info("assert collection persists")
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(c_name)
        # verify data persist
        assert collection_w2.num_entities == nb
        log.info("assert data persists")
        # verify index persists
        assert collection_w2.has_index(i_name)
        log.info("assert index persists")
        # verify search results persist
        collection_w2.load()
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = datetime.datetime.now() - t0
        log.info(f"assert search: {tt}")
        assert len(search_res) == 1
        # verify query results persist
        query_res2, _ = collection_w2.query(term_expr)
        assert len(query_res2) == len(query_res)
        log.info("assert query result persists")
Beispiel #8
0
    def test_scale_query_node(self):
        """
        target: test scale queryNode
        method: 1.deploy milvus cluster with 1 queryNode
                2.prepare work (connect, create, insert, index and load)
                3.continuously search (daemon thread)
                4.expand queryNode from 2 to 5
                5.continuously insert new data (daemon thread)
                6.shrink queryNode from 5 to 3
        expected: Verify milvus remains healthy and search successfully during scale
        """
        fail_count = 0
        release_name = "scale-query"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 1,
            'spec.config.dataCoord.enableCompaction': True,
            'spec.config.dataCoord.enableGarbageCollection': True
        }
        mic = MilvusOperator()
        mic.install(query_config)
        if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200):
            host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0]
        else:
            # log.warning(f'Deploy {release_name} timeout and ready to uninstall')
            # mic.uninstall(release_name, namespace=constants.NAMESPACE)
            raise BaseException(f'Milvus healthy timeout 1200s')

        try:
            # connect
            connections.add_connection(default={"host": host, "port": 19530})
            connections.connect(alias='default')

            # create
            c_name = cf.gen_unique_str("scale_query")
            # c_name = 'scale_query_DymS7kI4'
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2)

            # insert two segments
            for i in range(3):
                df = cf.gen_default_dataframe_data(nb)
                collection_w.insert(df)
                log.debug(collection_w.num_entities)

            # create index
            collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
            assert collection_w.has_index()[0]
            assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name,
                                                    default_index_params)

            # load
            collection_w.load()

            # scale queryNode to 5
            mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE)

            # continuously search
            def do_search():
                while True:
                    search_res, _ = collection_w.search(cf.gen_vectors(1, ct.default_dim),
                                                        ct.default_float_vec_field_name,
                                                        ct.default_search_params, ct.default_limit)
                    log.debug(search_res[0].ids)
                    assert len(search_res[0].ids) == ct.default_limit

            t_search = threading.Thread(target=do_search, args=(), daemon=True)
            t_search.start()

            # wait new QN running, continuously insert
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

            def do_insert():
                while True:
                    tmp_df = cf.gen_default_dataframe_data(1000)
                    collection_w.insert(tmp_df)

            t_insert = threading.Thread(target=do_insert, args=(), daemon=True)
            t_insert.start()

            log.debug(collection_w.num_entities)
            time.sleep(20)
            log.debug("Expand querynode test finished")

            mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

            log.debug(collection_w.num_entities)
            time.sleep(60)
            log.debug("Shrink querynode test finished")

        except Exception as e:
            log.error(str(e))
            fail_count += 1
            # raise Exception(str(e))

        finally:
            log.info(f'Test finished with {fail_count} fail request')
            assert fail_count <= 1
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
Beispiel #9
0
    def test_simd_compat_e2e(self, simd_id):
        """
       steps
       1. [test_milvus_install]: set up milvus with customized simd configured
       2. [test_simd_compat_e2e]: verify milvus is working well
       4. [test_milvus_cleanup]: delete milvus instances in teardown
       """
        simd = supported_simd_types[simd_id]
        log.info(f"start to install milvus with simd {simd}")
        release_name, host, port = _install_milvus(simd)
        self.release_name = release_name
        assert host is not None
        conn = connections.connect("default", host=host, port=port)
        assert conn is not None
        mil = MilvusSys(alias="default")
        log.info(f"milvus build version: {mil.build_version}")
        log.info(f"milvus simdType: {mil.simd_type}")
        assert str(mil.simd_type).lower() in [
            simd_type.lower() for simd_type in supported_simd_types[simd_id:]
        ]

        log.info(f"start to e2e verification: {simd}")
        # create
        name = cf.gen_unique_str("compat")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        data = cf.gen_default_list_data()
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = time.time()
        assert collection_w.num_entities == len(data[0]) + entities
        tt = time.time() - t0
        entities = collection_w.num_entities
        log.info(f"assert flush: {tt}, entities: {entities}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str())
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")