Python Indexの例、pymilvus.Index Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_insert_20.py プロジェクト: symphony233/milvus

 def test_insert_after_create_index(self):
     """
     target: test insert after create index
     method: 1. create index 2. insert data
     expected: verify index and num entities
     """
     collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
     collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
     assert collection_w.has_index()[0]
     index, _ = collection_w.index()
     assert index == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params)
     assert collection_w.indexes[0] == index
     df = cf.gen_default_dataframe_data(ct.default_nb)
     collection_w.insert(data=df)
     assert collection_w.num_entities == ct.default_nb

コード例 #2

0

ファイルを表示

ファイル: test_insert_20.py プロジェクト: symphony233/milvus

 def test_insert_binary_after_index(self):
     """
     target: test insert binary after index
     method: 1.create index 2.insert binary data
     expected: 1.index ok 2.num entities correct
     """
     schema = cf.gen_default_binary_collection_schema()
     collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema)
     collection_w.create_index(ct.default_binary_vec_field_name, default_binary_index_params)
     assert collection_w.has_index()[0]
     index, _ = collection_w.index()
     assert index == Index(collection_w.collection, ct.default_binary_vec_field_name, default_binary_index_params)
     assert collection_w.indexes[0] == index
     df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
     collection_w.insert(data=df)
     assert collection_w.num_entities == ct.default_nb

コード例 #3

0

ファイルを表示

ファイル: test_insert_20.py プロジェクト: symphony233/milvus

 def test_insert_auto_id_create_index(self):
     """
     target: test create index in auto_id=True collection
     method: 1.create auto_id=True collection and insert 2.create index
     expected: index correct
     """
     schema = cf.gen_default_collection_schema(auto_id=True)
     collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema)
     df = cf.gen_default_dataframe_data(ct.default_nb)
     df.drop(ct.default_int64_field_name, axis=1, inplace=True)
     mutation_res, _ = collection_w.insert(data=df)
     assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb)
     assert collection_w.num_entities == ct.default_nb
     # create index
     collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
     assert collection_w.has_index()[0]
     index, _ = collection_w.index()
     assert index == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params)
     assert collection_w.indexes[0] == index

コード例 #4

0

ファイルを表示

ファイル: scale_common.py プロジェクト: haojunyu/milvus

def e2e_milvus(host, c_name, collection_exist=False):
    # connect
    connections.add_connection(default={"host": host, "port": 19530})
    connections.connect(alias='default')

    # create
    collection_w = ApiCollectionWrapper()
    if collection_exist:
        collection_w.init_collection(name=c_name)
    else:
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())

    # insert
    data = cf.gen_default_list_data(ct.default_nb)
    mutation_res, _ = collection_w.insert(data)
    assert mutation_res.insert_count == ct.default_nb
    log.debug(collection_w.num_entities)

    # create index
    collection_w.create_index(ct.default_float_vec_field_name,
                              ct.default_index)
    assert collection_w.has_index()[0]
    assert collection_w.index()[0] == Index(collection_w.collection,
                                            ct.default_float_vec_field_name,
                                            ct.default_index)

    # search
    collection_w.load()
    search_res, _ = collection_w.search(data[-1][:ct.default_nq],
                                        ct.default_float_vec_field_name,
                                        ct.default_search_params,
                                        ct.default_limit)
    assert len(search_res[0]) == ct.default_limit
    log.debug(search_res[0][0].id)

    # query
    ids = search_res[0].ids[0]
    term_expr = f'{ct.default_int64_field_name} in [{ids}]'
    query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"])
    assert query_res[0][ct.default_int64_field_name] == ids

コード例 #5

0

ファイルを表示

ファイル: scale_common.py プロジェクト: shanghaikid/milvus

def e2e_milvus(host, c_name):
    """ e2e milvus """
    log.debug(f'pid: {os.getpid()}')
    # connect
    connections.add_connection(default={"host": host, "port": 19530})
    connections.connect(alias='default')

    # create
    collection_w = ApiCollectionWrapper()
    collection_w.init_collection(name=c_name,
                                 schema=cf.gen_default_collection_schema())

    # insert
    df = cf.gen_default_dataframe_data()
    mutation_res, _ = collection_w.insert(df)
    assert mutation_res.insert_count == ct.default_nb
    log.debug(collection_w.num_entities)

    # create index
    collection_w.create_index(ct.default_float_vec_field_name,
                              ct.default_index)
    assert collection_w.has_index()[0]
    assert collection_w.index()[0] == Index(collection_w.collection,
                                            ct.default_float_vec_field_name,
                                            ct.default_index)

    # search
    collection_w.load()
    search_res, _ = collection_w.search(cf.gen_vectors(1, dim=ct.default_dim),
                                        ct.default_float_vec_field_name,
                                        ct.default_search_params,
                                        ct.default_limit)
    assert len(search_res[0]) == ct.default_limit
    log.debug(search_res[0].ids)

    # query
    ids = search_res[0].ids[0]
    term_expr = f'{ct.default_int64_field_name} in [{ids}]'
    query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"])
    assert query_res[0][ct.default_int64_field_name] == ids

コード例 #6

0

ファイルを表示

    def test_scale_query_node(self):
        """
        target: test scale queryNode
        method: 1.deploy milvus cluster with 1 queryNode
                2.prepare work (connect, create, insert, index and load)
                3.continuously search (daemon thread)
                4.expand queryNode from 2 to 5
                5.continuously insert new data (daemon thread)
                6.shrink queryNode from 5 to 3
        expected: Verify milvus remains healthy and search successfully during scale
        """
        release_name = "scale-query"
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image': 'harbor.zilliz.cc/milvus/milvus:master-20211202-ed546d0',
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 1,
            'spec.config.dataCoord.enableCompaction': True,
            'spec.config.dataCoord.enableGarbageCollection': True
        }
        mic = MilvusOperator()
        mic.install(query_config)
        healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200)
        log.info(f"milvus healthy: {healthy}")
        host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0]
        # host = "10.98.0.8"

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')

        # create
        c_name = cf.gen_unique_str("scale_query")
        # c_name = 'scale_query_DymS7kI4'
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2)

        # insert two segments
        for i in range(3):
            df = cf.gen_default_dataframe_data(nb)
            collection_w.insert(df)
            log.debug(collection_w.num_entities)

        # create index
        collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
        assert collection_w.has_index()[0]
        assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name,
                                                default_index_params)

        # load
        collection_w.load()

        # scale queryNode to 5
        mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE)

        # continuously search
        def do_search():
            while True:
                search_res, _ = collection_w.search(cf.gen_vectors(1, ct.default_dim),
                                                    ct.default_float_vec_field_name,
                                                    ct.default_search_params, ct.default_limit)
                log.debug(search_res[0].ids)
                assert len(search_res[0].ids) == ct.default_limit

        t_search = threading.Thread(target=do_search, args=(), daemon=True)
        t_search.start()

        # wait new QN running, continuously insert
        # time.sleep(10)
        healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200)
        log.info(f"milvus healthy after scale up: {healthy}")
        # wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

        def do_insert():
            while True:
                tmp_df = cf.gen_default_dataframe_data(1000)
                collection_w.insert(tmp_df)

        t_insert = threading.Thread(target=do_insert, args=(), daemon=True)
        t_insert.start()

        log.debug(collection_w.num_entities)
        time.sleep(20)
        log.debug("Expand querynode test finished")

        mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE)
        time.sleep(60)
        wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

        log.debug(collection_w.num_entities)
        time.sleep(60)
        log.debug("Shrink querynode test finished")

コード例 #7

0

ファイルを表示

ファイル: test_index.py プロジェクト: LocoRichard/pymilvus

 def index(self, name, field_name, collection_name, schema,
           get_simple_index):
     connections.connect()
     collection = Collection(collection_name, schema=schema)
     return Index(collection, field_name, get_simple_index)

コード例 #8

0

ファイルを表示

    def test_scale_query_node(self):
        """
        target: test scale queryNode
        method: 1.deploy milvus cluster with 1 queryNode
                2.prepare work (connect, create, insert, index and load)
                3.continuously search (daemon thread)
                4.expand queryNode from 2 to 5
                5.continuously insert new data (daemon thread)
                6.shrink queryNode from 5 to 3
        expected: Verify milvus remains healthy and search successfully during scale
        """
        release_name = "scale-query"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'spec.mode': 'cluster',
            'metadata.name': release_name,
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 1,
            'spec.config.common.retentionDuration': 60
        }
        mic = MilvusOperator()
        mic.install(query_config)
        if mic.wait_for_healthy(release_name,
                                constants.NAMESPACE,
                                timeout=1800):
            host = mic.endpoint(release_name,
                                constants.NAMESPACE).split(':')[0]
        else:
            raise MilvusException(message=f'Milvus healthy timeout 1800s')

        try:
            # connect
            connections.add_connection(default={"host": host, "port": 19530})
            connections.connect(alias='default')

            # create
            c_name = cf.gen_unique_str("scale_query")
            # c_name = 'scale_query_DymS7kI4'
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(
                name=c_name,
                schema=cf.gen_default_collection_schema(),
                shards_num=2)

            # insert two segments
            for i in range(3):
                df = cf.gen_default_dataframe_data(nb)
                collection_w.insert(df)
                log.debug(collection_w.num_entities)

            # create index
            collection_w.create_index(ct.default_float_vec_field_name,
                                      default_index_params)
            assert collection_w.has_index()[0]
            assert collection_w.index()[0] == Index(
                collection_w.collection, ct.default_float_vec_field_name,
                default_index_params)

            # load
            collection_w.load()

            # scale queryNode to 5
            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 5},
                        constants.NAMESPACE)

            @counter
            def do_search():
                """ do search """
                search_res, is_succ = collection_w.search(
                    cf.gen_vectors(1, ct.default_dim),
                    ct.default_float_vec_field_name,
                    ct.default_search_params,
                    ct.default_limit,
                    check_task=CheckTasks.check_nothing)
                assert len(search_res) == 1
                return search_res, is_succ

            def loop_search():
                """ continuously search """
                while True:
                    do_search()

            threading.Thread(target=loop_search, args=(), daemon=True).start()

            # wait new QN running, continuously insert
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            @counter
            def do_insert():
                """ do insert """
                return collection_w.insert(cf.gen_default_dataframe_data(1000),
                                           check_task=CheckTasks.check_nothing)

            def loop_insert():
                """ loop insert """
                while True:
                    do_insert()

            threading.Thread(target=loop_insert, args=(), daemon=True).start()

            log.debug(collection_w.num_entities)
            time.sleep(20)
            log.debug("Expand querynode test finished")

            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 3},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            log.debug(collection_w.num_entities)
            time.sleep(60)
            scale_common.check_succ_rate(do_search)
            scale_common.check_succ_rate(do_insert)
            log.debug("Shrink querynode test finished")

        except Exception as e:
            raise Exception(str(e))

        finally:
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE,
                         label_selector=label,
                         release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)

コード例 #9

0

ファイルを表示

                     dtype=DataType.INT64,
                     description="int64",
                     is_primary=True)
field2 = FieldSchema(name="embedding",
                     dtype=DataType.FLOAT_VECTOR,
                     description="float vector",
                     dim=128,
                     is_primary=False)
schema = CollectionSchema(fields=[field1, field2],
                          description="collection description")
collection = Collection(name='demo_film_tutorial', data=None, schema=schema)

print(f"\nCreate index...")
index_params = {
    "index_type": "IVF_FLAT",
    "metric_type": "L2",
    "params": {
        "nlist": 100
    }
}
index = Index(collection, "embedding", index_params)
print(index.params)

print([index.params for index in collection.indexes])

print(f"\nDrop index...")
index.drop()

print([index.params for index in collection.indexes])
collection.drop()

コード例 #10

0

ファイルを表示

    def test_scale_query_node(self):
        """
        target: test scale queryNode
        method: 1.deploy milvus cluster with 1 queryNode
                2.prepare work (connect, create, insert, index and load)
                3.continuously search (daemon thread)
                4.expand queryNode from 2 to 5
                5.continuously insert new data (daemon thread)
                6.shrink queryNode from 5 to 3
        expected: Verify milvus remains healthy and search successfully during scale
        """
        fail_count = 0
        release_name = "scale-query"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 1,
            'spec.config.dataCoord.enableCompaction': True,
            'spec.config.dataCoord.enableGarbageCollection': True
        }
        mic = MilvusOperator()
        mic.install(query_config)
        if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200):
            host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0]
        else:
            # log.warning(f'Deploy {release_name} timeout and ready to uninstall')
            # mic.uninstall(release_name, namespace=constants.NAMESPACE)
            raise BaseException(f'Milvus healthy timeout 1200s')

        try:
            # connect
            connections.add_connection(default={"host": host, "port": 19530})
            connections.connect(alias='default')

            # create
            c_name = cf.gen_unique_str("scale_query")
            # c_name = 'scale_query_DymS7kI4'
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2)

            # insert two segments
            for i in range(3):
                df = cf.gen_default_dataframe_data(nb)
                collection_w.insert(df)
                log.debug(collection_w.num_entities)

            # create index
            collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
            assert collection_w.has_index()[0]
            assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name,
                                                    default_index_params)

            # load
            collection_w.load()

            # scale queryNode to 5
            mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE)

            # continuously search
            def do_search():
                while True:
                    search_res, _ = collection_w.search(cf.gen_vectors(1, ct.default_dim),
                                                        ct.default_float_vec_field_name,
                                                        ct.default_search_params, ct.default_limit)
                    log.debug(search_res[0].ids)
                    assert len(search_res[0].ids) == ct.default_limit

            t_search = threading.Thread(target=do_search, args=(), daemon=True)
            t_search.start()

            # wait new QN running, continuously insert
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

            def do_insert():
                while True:
                    tmp_df = cf.gen_default_dataframe_data(1000)
                    collection_w.insert(tmp_df)

            t_insert = threading.Thread(target=do_insert, args=(), daemon=True)
            t_insert.start()

            log.debug(collection_w.num_entities)
            time.sleep(20)
            log.debug("Expand querynode test finished")

            mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

            log.debug(collection_w.num_entities)
            time.sleep(60)
            log.debug("Shrink querynode test finished")

        except Exception as e:
            log.error(str(e))
            fail_count += 1
            # raise Exception(str(e))

        finally:
            log.info(f'Test finished with {fail_count} fail request')
            assert fail_count <= 1
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)