Ejemplo n.º 1
0
def e2e_milvus(host, c_name):
    # connect
    connections.add_connection(default={"host": host, "port": 19530})
    connections.connect(alias='default')

    # create
    # c_name = cf.gen_unique_str(prefix)
    collection_w = ApiCollectionWrapper()
    collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema())
    # collection_w.init_collection(name=c_name)

    # insert
    data = cf.gen_default_list_data(ct.default_nb)
    mutation_res, _ = collection_w.insert(data)
    assert mutation_res.insert_count == ct.default_nb

    # create index
    collection_w.create_index(ct.default_float_vec_field_name, ct.default_index)
    assert collection_w.has_index()[0]
    assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name,
                                            ct.default_index)

    # search
    collection_w.load()
    search_res, _ = collection_w.search(data[-1][:ct.default_nq], ct.default_float_vec_field_name,
                                        ct.default_search_params, ct.default_limit)
    assert len(search_res[0]) == ct.default_limit

    # query
    ids = search_res[0].ids[0]
    term_expr = f'{ct.default_int64_field_name} in [{ids}]'
    query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"])
    assert query_res[0][ct.default_int64_field_name] == ids
Ejemplo n.º 2
0
    def test_shrink_index_node(self):
        """
        target: test shrink indexNode from 2 to 1
        method: 1.deploy two indexNode
                2.create index with two indexNode
                3.shrink indexNode from 2 to 1
                4.create index with 1 indexNode
        expected: The cost of one indexNode is about twice that of two indexNodes
        """
        release_name = "scale-index"
        env = HelmEnv(release_name=release_name, indexNode=2)
        env.helm_install_cluster_milvus()

        # connect
        connections.add_connection(default={
            "host": '10.98.0.8',
            "port": 19530
        })
        connections.connect(alias='default')

        data = cf.gen_default_dataframe_data(nb)

        # create
        c_name = "index_scale_one"
        collection_w = ApiCollectionWrapper()
        # collection_w.init_collection(name=c_name)
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        # insert
        loop = 10
        for i in range(loop):
            collection_w.insert(data)
        assert collection_w.num_entities == nb * loop

        # create index on collection one and two
        start = datetime.datetime.now()
        collection_w.create_index(ct.default_float_vec_field_name,
                                  default_index_params)
        assert collection_w.has_index()[0]
        t0 = datetime.datetime.now() - start

        log.debug(f'two indexNodes: {t0}')

        collection_w.drop_index()
        assert not collection_w.has_index()[0]

        # expand indexNode from 1 to 2
        # pdb.set_trace()
        env.helm_upgrade_cluster_milvus(indexNode=1)

        start = datetime.datetime.now()
        collection_w.create_index(ct.default_float_vec_field_name,
                                  default_index_params)
        assert collection_w.has_index()[0]
        t1 = datetime.datetime.now() - start

        log.debug(f'one indexNode: {t1}')
        log.debug(t1 / t0)
        assert round(t1 / t0) == 2
Ejemplo n.º 3
0
 def __init__(self):
     try:
         self.collection = None
         connections.connect(host=MILVUS_HOST, port=MILVUS_PORT)
         LOGGER.debug("Successfully connect to Milvus with IP:{} and PORT:{}".format(MILVUS_HOST, MILVUS_PORT))
     except Exception as e:
         LOGGER.error("Failed to connect Milvus: {}".format(e))
         sys.exit(1)
Ejemplo n.º 4
0
 def test_collection_by_DataFrame(self):
     from pymilvus_orm import Collection, connections
     from pymilvus_orm.schema import FieldSchema, CollectionSchema
     from pymilvus_orm.types import DataType
     fields = [
         FieldSchema("int64", DataType.INT64),
         FieldSchema("float", DataType.FLOAT),
         FieldSchema("float_vector", DataType.FLOAT_VECTOR, dim=128)
     ]
     collection_schema = CollectionSchema(fields, primary_field="int64")
     connections.connect()
     collection = Collection(name=gen_collection_name(),
                             schema=collection_schema)
     collection.drop()
Ejemplo n.º 5
0
    def test_expand_query_node(self):
        release_name = "scale-query"
        env = HelmEnv(release_name=release_name)
        env.helm_install_cluster_milvus()

        # connect
        connections.add_connection(default={
            "host": '10.98.0.8',
            "port": 19530
        })
        connections.connect(alias='default')

        # create
        c_name = "query_scale_one"
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        # insert
        data = cf.gen_default_list_data(ct.default_nb)
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        # # create index
        # collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
        # assert collection_w.has_index()[0]
        # assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name,
        #                                         default_index_params)
        collection_w.load()
        # vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(5)]
        res1, _ = collection_w.search(data[-1][:5],
                                      ct.default_float_vec_field_name,
                                      ct.default_search_params,
                                      ct.default_limit)

        # scale queryNode pod
        env.helm_upgrade_cluster_milvus(queryNode=2)

        c_name_2 = "query_scale_two"
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(
            name=c_name_2, schema=cf.gen_default_collection_schema())
        collection_w2.insert(data)
        assert collection_w2.num_entities == ct.default_nb
        collection_w2.load()
        res2, _ = collection_w2.search(data[-1][:5],
                                       ct.default_float_vec_field_name,
                                       ct.default_search_params,
                                       ct.default_limit)

        assert res1[0].ids == res2[0].ids
Ejemplo n.º 6
0
 def connection(self, host, port):
     connections.add_connection(default={"host": host, "port": port})
     conn = connections.connect(alias='default')
     if conn is None:
         raise Exception("no connections")
     self.host = host
     self.port = port
     return conn
Ejemplo n.º 7
0
 def connection(self):
     connections.add_connection(default={
         "host": "192.168.1.239",
         "port": 19530
     })
     conn = connections.connect(alias='default')
     if conn is None:
         raise Exception("no connections")
     return conn
Ejemplo n.º 8
0
    def __init__(self, name):
        connections.add_connection(default={
            'host': '127.0.0.1',
            'port': '19530',
        })
        connections.connect(alias=name)
        id_field = FieldSchema(name='id',
                               is_primary=True,
                               dtype=DataType.INT64,
                               description='id')
        image_field = FieldSchema(name='image',
                                  dtype=DataType.FLOAT_VECTOR,
                                  dim=512,
                                  description='image')

        schema = CollectionSchema(fields=[id_field, image_field],
                                  primary_field='id')
        collection = Collection(name=name, schema=schema)
Ejemplo n.º 9
0
    def test_expand_data_node(self):
        """
        target: test create and insert api after expand dataNode pod
        method: 1.create collection a and insert df
                2.expand dataNode pod from 1 to 2
                3.verify collection a property and verify create and insert of new collection
        expected: two collection create and insert op are both correctly
        """
        # deploy all nodes one pod cluster milvus with helm
        release_name = "scale-test"
        env = HelmEnv(release_name=release_name)
        env.helm_install_cluster_milvus()
        host = env.get_svc_external_ip()

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')
        # create
        c_name = cf.gen_unique_str(prefix)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema())
        # # insert
        data = cf.gen_default_list_data(ct.default_nb)
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        # scale dataNode to 2 pods
        env.helm_upgrade_cluster_milvus(dataNode=2)
        # after scale, assert data consistent
        assert utility.has_collection(c_name)
        assert collection_w.num_entities == ct.default_nb
        # assert new operations
        new_cname = cf.gen_unique_str(prefix)
        new_collection_w = ApiCollectionWrapper()
        new_collection_w.init_collection(name=new_cname, schema=cf.gen_default_collection_schema())
        new_mutation_res, _ = new_collection_w.insert(data)
        assert new_mutation_res.insert_count == ct.default_nb
        assert new_collection_w.num_entities == ct.default_nb
        # assert old collection ddl
        mutation_res_2, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb*2

        collection_w.drop()
        new_collection_w.drop()
Ejemplo n.º 10
0
    def test_shrink_data_node(self):
        """
        target: test shrink dataNode from 2 to 1
        method: 1.create collection and insert df 2. shrink dataNode 3.insert df
        expected: verify the property of collection which channel on shrink pod
        """
        release_name = "scale-test"
        env = HelmEnv(release_name=release_name, dataNode=2)
        env.helm_install_cluster_milvus(image_pull_policy=constants.IF_NOT_PRESENT)

        # connect
        connections.add_connection(default={"host": '10.98.0.8', "port": 19530})
        connections.connect(alias='default')

        c_name = "data_scale_one"
        data = cf.gen_default_list_data(ct.default_nb)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema())
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb

        c_name_2 = "data_scale_two"
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(name=c_name_2, schema=cf.gen_default_collection_schema())
        mutation_res2, _ = collection_w2.insert(data)
        assert mutation_res2.insert_count == ct.default_nb
        assert collection_w2.num_entities == ct.default_nb

        env.helm_upgrade_cluster_milvus(dataNode=1)

        assert collection_w.num_entities == ct.default_nb
        mutation_res2, _ = collection_w2.insert(data)
        assert collection_w2.num_entities == ct.default_nb*2
        collection_w.drop()
        collection_w2.drop()
Ejemplo n.º 11
0
def create_collection():
    connections.connect()
    yield
    connections.remove_connection(alias='default')
Ejemplo n.º 12
0
    def test_shrink_query_node(self):
        """
        target: test shrink queryNode from 2 to 1
        method: 1.deploy two queryNode
                2.search two collections in two queryNode
                3.upgrade queryNode from 2 to 1
                4.search second collection
        expected: search result is correct
        """
        # deploy
        release_name = "scale-query"
        env = HelmEnv(release_name=release_name, queryNode=2)
        env.helm_install_cluster_milvus(
            image_pull_policy=constants.IF_NOT_PRESENT)

        # connect
        connections.add_connection(default={
            "host": '10.98.0.8',
            "port": 19530
        })
        connections.connect(alias='default')

        # collection one
        data = cf.gen_default_list_data(nb)
        c_name = "query_scale_one"
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        collection_w.load()
        res1, _ = collection_w.search(data[-1][:nq],
                                      ct.default_float_vec_field_name,
                                      ct.default_search_params,
                                      ct.default_limit)
        assert res1[0].ids[0] == data[0][0]

        # collection two
        c_name_2 = "query_scale_two"
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(
            name=c_name_2, schema=cf.gen_default_collection_schema())
        collection_w2.insert(data)
        assert collection_w2.num_entities == nb
        collection_w2.load()
        res2, _ = collection_w2.search(data[-1][:nq],
                                       ct.default_float_vec_field_name,
                                       ct.default_search_params,
                                       ct.default_limit)
        assert res2[0].ids[0] == data[0][0]

        # scale queryNode pod
        pdb.set_trace()
        env.helm_upgrade_cluster_milvus(queryNode=1)

        # search
        res1, _ = collection_w.search(data[-1][:nq],
                                      ct.default_float_vec_field_name,
                                      ct.default_search_params,
                                      ct.default_limit)
        assert res1[0].ids[0] == data[0][0]
        res2, _ = collection_w2.search(data[-1][:nq],
                                       ct.default_float_vec_field_name,
                                       ct.default_search_params,
                                       ct.default_limit)
        assert res2[0].ids[0] == data[0][0]
Ejemplo n.º 13
0
 def __init__(self, logger, milvus_host='127.0.0.1', milvus_port=19530):
     self.logger = logger
     self.host = milvus_host
     self.port = milvus_port
     self.collection = None
     connections.connect(host=self.host, port=self.port)
Ejemplo n.º 14
0
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License.

import logging

try:
    from pymilvus_orm import connections
except ImportError:
    from os.path import dirname, abspath
    import sys

    sys.path.append(dirname(dirname(abspath(__file__))))

    from pymilvus_orm import connections

LOGGER = logging.getLogger(__name__)

print("start connection")
conn = connections.connect()
LOGGER.info(conn.list_collections())
print("end connection")

Ejemplo n.º 15
0
def hello_milvus():
    # create connection
    connections.connect()

    print(f"\nList collections...")
    print(list_collections())

    # create collection
    dim = 128
    default_fields = [
        FieldSchema(name="count", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="random_value", dtype=DataType.DOUBLE),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")

    print(f"\nCreate collection...")
    collection = Collection(name="hello_milvus", schema=default_schema)

    print(f"\nList collections...")
    print(list_collections())

    #  insert data
    nb = 3000
    vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
    collection.insert([[i for i in range(nb)],
                       [float(random.randrange(-20, -10)) for _ in range(nb)],
                       vectors])

    print(f"\nGet collection entities...")
    print(collection.num_entities)

    # create index and load table
    default_index = {
        "index_type": "IVF_FLAT",
        "params": {
            "nlist": 128
        },
        "metric_type": "L2"
    }
    print(f"\nCreate index...")
    collection.create_index(field_name="float_vector",
                            index_params=default_index)
    print(f"\nload collection...")
    collection.load()

    # load and search
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    import time
    start_time = time.time()
    print(f"\nSearch...")
    # define output_fields of search result
    res = collection.search(vectors[-2:],
                            "float_vector",
                            search_params,
                            topK,
                            "count > 100",
                            output_fields=["count", "random_value"])
    end_time = time.time()

    # show result
    for hits in res:
        for hit in hits:
            # Get value of the random value field for search result
            print(hit, hit.entity.get("random_value"))
    print("search latency = %.4fs" % (end_time - start_time))

    # drop collection
    collection.drop()