def e2e_milvus(host, c_name): # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create # c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # collection_w.init_collection(name=c_name) # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # create index collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, ct.default_index) # search collection_w.load() search_res, _ = collection_w.search(data[-1][:ct.default_nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res[0]) == ct.default_limit # query ids = search_res[0].ids[0] term_expr = f'{ct.default_int64_field_name} in [{ids}]' query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"]) assert query_res[0][ct.default_int64_field_name] == ids
def test_shrink_index_node(self): """ target: test shrink indexNode from 2 to 1 method: 1.deploy two indexNode 2.create index with two indexNode 3.shrink indexNode from 2 to 1 4.create index with 1 indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "scale-index" env = HelmEnv(release_name=release_name, indexNode=2) env.helm_install_cluster_milvus() # connect connections.add_connection(default={ "host": '10.98.0.8', "port": 19530 }) connections.connect(alias='default') data = cf.gen_default_dataframe_data(nb) # create c_name = "index_scale_one" collection_w = ApiCollectionWrapper() # collection_w.init_collection(name=c_name) collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert loop = 10 for i in range(loop): collection_w.insert(data) assert collection_w.num_entities == nb * loop # create index on collection one and two start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.debug(f'two indexNodes: {t0}') collection_w.drop_index() assert not collection_w.has_index()[0] # expand indexNode from 1 to 2 # pdb.set_trace() env.helm_upgrade_cluster_milvus(indexNode=1) start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.debug(f'one indexNode: {t1}') log.debug(t1 / t0) assert round(t1 / t0) == 2
def __init__(self): try: self.collection = None connections.connect(host=MILVUS_HOST, port=MILVUS_PORT) LOGGER.debug("Successfully connect to Milvus with IP:{} and PORT:{}".format(MILVUS_HOST, MILVUS_PORT)) except Exception as e: LOGGER.error("Failed to connect Milvus: {}".format(e)) sys.exit(1)
def test_collection_by_DataFrame(self): from pymilvus_orm import Collection, connections from pymilvus_orm.schema import FieldSchema, CollectionSchema from pymilvus_orm.types import DataType fields = [ FieldSchema("int64", DataType.INT64), FieldSchema("float", DataType.FLOAT), FieldSchema("float_vector", DataType.FLOAT_VECTOR, dim=128) ] collection_schema = CollectionSchema(fields, primary_field="int64") connections.connect() collection = Collection(name=gen_collection_name(), schema=collection_schema) collection.drop()
def test_expand_query_node(self): release_name = "scale-query" env = HelmEnv(release_name=release_name) env.helm_install_cluster_milvus() # connect connections.add_connection(default={ "host": '10.98.0.8', "port": 19530 }) connections.connect(alias='default') # create c_name = "query_scale_one" collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # # create index # collection_w.create_index(ct.default_float_vec_field_name, default_index_params) # assert collection_w.has_index()[0] # assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, # default_index_params) collection_w.load() # vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(5)] res1, _ = collection_w.search(data[-1][:5], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) # scale queryNode pod env.helm_upgrade_cluster_milvus(queryNode=2) c_name_2 = "query_scale_two" collection_w2 = ApiCollectionWrapper() collection_w2.init_collection( name=c_name_2, schema=cf.gen_default_collection_schema()) collection_w2.insert(data) assert collection_w2.num_entities == ct.default_nb collection_w2.load() res2, _ = collection_w2.search(data[-1][:5], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res1[0].ids == res2[0].ids
def connection(self, host, port): connections.add_connection(default={"host": host, "port": port}) conn = connections.connect(alias='default') if conn is None: raise Exception("no connections") self.host = host self.port = port return conn
def connection(self): connections.add_connection(default={ "host": "192.168.1.239", "port": 19530 }) conn = connections.connect(alias='default') if conn is None: raise Exception("no connections") return conn
def __init__(self, name): connections.add_connection(default={ 'host': '127.0.0.1', 'port': '19530', }) connections.connect(alias=name) id_field = FieldSchema(name='id', is_primary=True, dtype=DataType.INT64, description='id') image_field = FieldSchema(name='image', dtype=DataType.FLOAT_VECTOR, dim=512, description='image') schema = CollectionSchema(fields=[id_field, image_field], primary_field='id') collection = Collection(name=name, schema=schema)
def test_expand_data_node(self): """ target: test create and insert api after expand dataNode pod method: 1.create collection a and insert df 2.expand dataNode pod from 1 to 2 3.verify collection a property and verify create and insert of new collection expected: two collection create and insert op are both correctly """ # deploy all nodes one pod cluster milvus with helm release_name = "scale-test" env = HelmEnv(release_name=release_name) env.helm_install_cluster_milvus() host = env.get_svc_external_ip() # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # scale dataNode to 2 pods env.helm_upgrade_cluster_milvus(dataNode=2) # after scale, assert data consistent assert utility.has_collection(c_name) assert collection_w.num_entities == ct.default_nb # assert new operations new_cname = cf.gen_unique_str(prefix) new_collection_w = ApiCollectionWrapper() new_collection_w.init_collection(name=new_cname, schema=cf.gen_default_collection_schema()) new_mutation_res, _ = new_collection_w.insert(data) assert new_mutation_res.insert_count == ct.default_nb assert new_collection_w.num_entities == ct.default_nb # assert old collection ddl mutation_res_2, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb*2 collection_w.drop() new_collection_w.drop()
def test_shrink_data_node(self): """ target: test shrink dataNode from 2 to 1 method: 1.create collection and insert df 2. shrink dataNode 3.insert df expected: verify the property of collection which channel on shrink pod """ release_name = "scale-test" env = HelmEnv(release_name=release_name, dataNode=2) env.helm_install_cluster_milvus(image_pull_policy=constants.IF_NOT_PRESENT) # connect connections.add_connection(default={"host": '10.98.0.8', "port": 19530}) connections.connect(alias='default') c_name = "data_scale_one" data = cf.gen_default_list_data(ct.default_nb) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb c_name_2 = "data_scale_two" collection_w2 = ApiCollectionWrapper() collection_w2.init_collection(name=c_name_2, schema=cf.gen_default_collection_schema()) mutation_res2, _ = collection_w2.insert(data) assert mutation_res2.insert_count == ct.default_nb assert collection_w2.num_entities == ct.default_nb env.helm_upgrade_cluster_milvus(dataNode=1) assert collection_w.num_entities == ct.default_nb mutation_res2, _ = collection_w2.insert(data) assert collection_w2.num_entities == ct.default_nb*2 collection_w.drop() collection_w2.drop()
def create_collection(): connections.connect() yield connections.remove_connection(alias='default')
def test_shrink_query_node(self): """ target: test shrink queryNode from 2 to 1 method: 1.deploy two queryNode 2.search two collections in two queryNode 3.upgrade queryNode from 2 to 1 4.search second collection expected: search result is correct """ # deploy release_name = "scale-query" env = HelmEnv(release_name=release_name, queryNode=2) env.helm_install_cluster_milvus( image_pull_policy=constants.IF_NOT_PRESENT) # connect connections.add_connection(default={ "host": '10.98.0.8', "port": 19530 }) connections.connect(alias='default') # collection one data = cf.gen_default_list_data(nb) c_name = "query_scale_one" collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) collection_w.insert(data) assert collection_w.num_entities == nb collection_w.load() res1, _ = collection_w.search(data[-1][:nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res1[0].ids[0] == data[0][0] # collection two c_name_2 = "query_scale_two" collection_w2 = ApiCollectionWrapper() collection_w2.init_collection( name=c_name_2, schema=cf.gen_default_collection_schema()) collection_w2.insert(data) assert collection_w2.num_entities == nb collection_w2.load() res2, _ = collection_w2.search(data[-1][:nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res2[0].ids[0] == data[0][0] # scale queryNode pod pdb.set_trace() env.helm_upgrade_cluster_milvus(queryNode=1) # search res1, _ = collection_w.search(data[-1][:nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res1[0].ids[0] == data[0][0] res2, _ = collection_w2.search(data[-1][:nq], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert res2[0].ids[0] == data[0][0]
def __init__(self, logger, milvus_host='127.0.0.1', milvus_port=19530): self.logger = logger self.host = milvus_host self.port = milvus_port self.collection = None connections.connect(host=self.host, port=self.port)
# Copyright (C) 2019-2020 Zilliz. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing permissions and limitations under the License. import logging try: from pymilvus_orm import connections except ImportError: from os.path import dirname, abspath import sys sys.path.append(dirname(dirname(abspath(__file__)))) from pymilvus_orm import connections LOGGER = logging.getLogger(__name__) print("start connection") conn = connections.connect() LOGGER.info(conn.list_collections()) print("end connection")
def hello_milvus(): # create connection connections.connect() print(f"\nList collections...") print(list_collections()) # create collection dim = 128 default_fields = [ FieldSchema(name="count", dtype=DataType.INT64, is_primary=True), FieldSchema(name="random_value", dtype=DataType.DOUBLE), FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim) ] default_schema = CollectionSchema(fields=default_fields, description="test collection") print(f"\nCreate collection...") collection = Collection(name="hello_milvus", schema=default_schema) print(f"\nList collections...") print(list_collections()) # insert data nb = 3000 vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] collection.insert([[i for i in range(nb)], [float(random.randrange(-20, -10)) for _ in range(nb)], vectors]) print(f"\nGet collection entities...") print(collection.num_entities) # create index and load table default_index = { "index_type": "IVF_FLAT", "params": { "nlist": 128 }, "metric_type": "L2" } print(f"\nCreate index...") collection.create_index(field_name="float_vector", index_params=default_index) print(f"\nload collection...") collection.load() # load and search topK = 5 search_params = {"metric_type": "L2", "params": {"nprobe": 10}} import time start_time = time.time() print(f"\nSearch...") # define output_fields of search result res = collection.search(vectors[-2:], "float_vector", search_params, topK, "count > 100", output_fields=["count", "random_value"]) end_time = time.time() # show result for hits in res: for hit in hits: # Get value of the random value field for search result print(hit, hit.entity.get("random_value")) print("search latency = %.4fs" % (end_time - start_time)) # drop collection collection.drop()