def milvus_test(usr_features, mov_features, ids): _HOST = '127.0.0.1' _PORT = '19530' # default value milvus = Milvus() param = {'host': _HOST, 'port': _PORT} status = milvus.connect(**param) if status.OK(): print("\nServer connected.") else: print("\nServer connect fail.") sys.exit(1) table_name = 'paddle_demo1' status, ok = milvus.has_collection(table_name) if not ok: param = { 'collection_name': table_name, 'dimension': 200, 'index_file_size': 1024, # optional 'metric_type': MetricType.IP # optional } milvus.create_collection(param) insert_vectors = normaliz_data([usr_features.tolist()]) status, ids = milvus.insert(collection_name=table_name, records=insert_vectors, ids = ids) time.sleep(1) status, result = milvus.count_collection(table_name) print("rows in table paddle_demo1:", result) status, table = milvus.count_collection(table_name) search_vectors = normaliz_data([mov_features.tolist()]) param = { 'collection_name': table_name, 'query_records': search_vectors, 'top_k': 1, 'params': {'nprobe': 16} } status, results = milvus.search(**param) print("Searched ids:", results[0][0].id) print("Score:", float(results[0][0].distance)*5) status = milvus.drop_collection(table_name)
def validate_insert(_collection_name): milvus = Milvus() milvus.connect(**server_config) status, count = milvus.count_collection(_collection_name) assert count == 10 * 10000, "Insert validate fail. Vectors num is not matched." milvus.disconnect()
def milvus_test(usr_features, IS_INFER, mov_features=None, ids=None): _HOST = '127.0.0.1' _PORT = '19530' # default value table_name = 'recommender_demo' milvus = Milvus() param = {'host': _HOST, 'port': _PORT} status = milvus.connect(**param) if status.OK(): print("Server connected.") else: print("Server connect fail.") sys.exit(1) if IS_INFER: status = milvus.drop_collection(table_name) time.sleep(3) status, ok = milvus.has_collection(table_name) if not ok: if mov_features is None: print("Insert vectors is none!") sys.exit(1) param = { 'collection_name': table_name, 'dimension': 200, 'index_file_size': 1024, # optional 'metric_type': MetricType.IP # optional } print(milvus.create_collection(param)) insert_vectors = normaliz_data(mov_features) status, ids = milvus.insert(collection_name=table_name, records=insert_vectors, ids=ids) time.sleep(1) status, result = milvus.count_collection(table_name) print("rows in table recommender_demo:", result) search_vectors = normaliz_data(usr_features) param = { 'collection_name': table_name, 'query_records': search_vectors, 'top_k': 5, 'params': { 'nprobe': 16 } } time1 = time.time() status, results = milvus.search(**param) time2 = time.time() print("Top\t", "Ids\t", "Title\t", "Score") for i, re in enumerate(results[0]): title = paddle.dataset.movielens.movie_info()[int(re.id)].title print(i, "\t", re.id, "\t", title, "\t", float(re.distance) * 5)
def test_not_connect(self): client = Milvus() with pytest.raises(NotConnectError): client.create_collection({}) with pytest.raises(NotConnectError): client.has_collection("a") with pytest.raises(NotConnectError): client.describe_collection("a") with pytest.raises(NotConnectError): client.drop_collection("a") with pytest.raises(NotConnectError): client.create_index("a") with pytest.raises(NotConnectError): client.insert("a", [], None) with pytest.raises(NotConnectError): client.count_collection("a") with pytest.raises(NotConnectError): client.show_collections() with pytest.raises(NotConnectError): client.search("a", 1, 2, [], None) with pytest.raises(NotConnectError): client.search_in_files("a", [], [], 2, 1, None) with pytest.raises(NotConnectError): client._cmd("") with pytest.raises(NotConnectError): client.preload_collection("a") with pytest.raises(NotConnectError): client.describe_index("a") with pytest.raises(NotConnectError): client.drop_index("")
def main(): milvus = Milvus() # Connect to Milvus server # You may need to change _HOST and _PORT accordingly param = {'host': _HOST, 'port': _PORT} status = milvus.connect(**param) if status.OK(): print("Server connected.") else: print("Server connect fail.") sys.exit(1) # Create collection demo_collection if it dosen't exist. collection_name = 'example_collection' status, ok = milvus.has_collection(collection_name) if not ok: param = { 'collection_name': collection_name, 'dimension': _DIM, 'index_file_size': _INDEX_FILE_SIZE, # optional 'metric_type': MetricType.L2 # optional } milvus.create_collection(param) # Show collections in Milvus server _, collections = milvus.show_collections() # present collection info _, info = milvus.collection_info(collection_name) print(info) # Describe demo_collection _, collection = milvus.describe_collection(collection_name) print(collection) # 10000 vectors with 16 dimension # element per dimension is float32 type # vectors should be a 2-D array vectors = [[random.random() for _ in range(_DIM)] for _ in range(10000)] # You can also use numpy to generate random vectors: # `vectors = np.random.rand(10000, 16).astype(np.float32)` # Insert vectors into demo_collection, return status and vectors id list status, ids = milvus.insert(collection_name=collection_name, records=vectors) # Flush collection inserted data to disk. milvus.flush([collection_name]) # Get demo_collection row count status, result = milvus.count_collection(collection_name) # create index of vectors, search more rapidly index_param = { 'nlist': 2048 } # Create ivflat index in demo_collection # You can search vectors without creating index. however, Creating index help to # search faster print("Creating index: {}".format(index_param)) status = milvus.create_index(collection_name, IndexType.IVF_FLAT, index_param) # describe index, get information of index status, index = milvus.describe_index(collection_name) print(index) # Use the top 10 vectors for similarity search query_vectors = vectors[0:10] # execute vector similarity search search_param = { "nprobe": 16 } param = { 'collection_name': collection_name, 'query_records': query_vectors, 'top_k': 1, 'params': search_param } print("Searching ... ") status, results = milvus.search(**param) if status.OK(): # indicate search result # also use by: # `results.distance_array[0][0] == 0.0 or results.id_array[0][0] == ids[0]` if results[0][0].distance == 0.0 or results[0][0].id == ids[0]: print('Query result is correct') else: print('Query result isn\'t correct') # print results print(results) # Delete demo_collection status = milvus.drop_collection(collection_name) # Disconnect from Milvus status = milvus.disconnect()
def main(): # Connect to Milvus server # You may need to change _HOST and _PORT accordingly param = {'host': _HOST, 'port': _PORT} # You can create a instance specified server addr and # invoke rpc method directly client = Milvus(**param) # Create collection demo_collection if it dosen't exist. collection_name = 'demo_partition_collection' partition_tag = "random" status, ok = client.has_collection(collection_name) # if collection exists, then drop it if status.OK() and ok: client.drop_collection(collection_name) param = { 'collection_name': collection_name, 'dimension': _DIM, 'index_file_size': _INDEX_FILE_SIZE, # optional 'metric_type': MetricType.L2 # optional } client.create_collection(param) # Show collections in Milvus server _, collections = client.show_collections() # Describe collection _, collection = client.describe_collection(collection_name) print(collection) # create partition client.create_partition(collection_name, partition_tag=partition_tag) # display partitions _, partitions = client.show_partitions(collection_name) # 10000 vectors with 16 dimension # element per dimension is float32 type # vectors should be a 2-D array vectors = [[random.random() for _ in range(_DIM)] for _ in range(10000)] # You can also use numpy to generate random vectors: # `vectors = np.random.rand(10000, 16).astype(np.float32).tolist()` # Insert vectors into partition of collection, return status and vectors id list status, ids = client.insert(collection_name=collection_name, records=vectors, partition_tag=partition_tag) # Wait for 6 seconds, until Milvus server persist vector data. time.sleep(6) # Get demo_collection row count status, num = client.count_collection(collection_name) # create index of vectors, search more rapidly index_param = { 'nlist': 2048 } # Create ivflat index in demo_collection # You can search vectors without creating index. however, Creating index help to # search faster status = client.create_index(collection_name, IndexType.IVF_FLAT, index_param) # describe index, get information of index status, index = client.describe_index(collection_name) print(index) # Use the top 10 vectors for similarity search query_vectors = vectors[0:10] # execute vector similarity search, search range in partition `partition1` search_param = { "nprobe": 10 } param = { 'collection_name': collection_name, 'query_records': query_vectors, 'top_k': 1, 'partition_tags': ["random"], 'params': search_param } status, results = client.search(**param) if status.OK(): # indicate search result # also use by: # `results.distance_array[0][0] == 0.0 or results.id_array[0][0] == ids[0]` if results[0][0].distance == 0.0 or results[0][0].id == ids[0]: print('Query result is correct') else: print('Query result isn\'t correct') # print results print(results) # Delete partition. You can also invoke `drop_collection()`, so that all of partitions belongs to # designated collections will be deleted. # status = client.drop_partition(collection_name, partition_tag) # Delete collection. All of partitions of this collection will be dropped. status = client.drop_collection(collection_name)
class MilvusClient(object): def __init__(self, collection_name=None, ip=None, port=None, timeout=60): self._collection_name = collection_name try: i = 1 start_time = time.time() if not ip: self._milvus = Milvus(host=SERVER_HOST_DEFAULT, port=SERVER_PORT_DEFAULT) else: # retry connect for remote server while time.time() < start_time + timeout: try: self._milvus = Milvus(host=ip, port=port) if self._milvus.server_status(): logger.debug( "Try connect times: %d, %s" % (i, round(time.time() - start_time, 2))) break except Exception as e: logger.debug("Milvus connect failed") i = i + 1 except Exception as e: raise e def __str__(self): return 'Milvus collection %s' % self._collection_name def check_status(self, status): if not status.OK(): logger.error(status.message) raise Exception("Status not ok") def check_result_ids(self, result): for index, item in enumerate(result): if item[0].distance >= epsilon: logger.error(index) logger.error(item[0].distance) raise Exception("Distance wrong") def create_collection(self, collection_name, dimension, index_file_size, metric_type): if not self._collection_name: self._collection_name = collection_name if metric_type == "l2": metric_type = MetricType.L2 elif metric_type == "ip": metric_type = MetricType.IP elif metric_type == "jaccard": metric_type = MetricType.JACCARD elif metric_type == "hamming": metric_type = MetricType.HAMMING elif metric_type == "sub": metric_type = MetricType.SUBSTRUCTURE elif metric_type == "super": metric_type = MetricType.SUPERSTRUCTURE else: logger.error("Not supported metric_type: %s" % metric_type) create_param = { 'collection_name': collection_name, 'dimension': dimension, 'index_file_size': index_file_size, "metric_type": metric_type } status = self._milvus.create_collection(create_param) self.check_status(status) @time_wrapper def insert(self, X, ids=None): status, result = self._milvus.add_vectors(self._collection_name, X, ids) self.check_status(status) return status, result @time_wrapper def delete_vectors(self, ids): status = self._milvus.delete_by_id(self._collection_name, ids) self.check_status(status) @time_wrapper def flush(self): status = self._milvus.flush([self._collection_name]) self.check_status(status) @time_wrapper def compact(self): status = self._milvus.compact(self._collection_name) self.check_status(status) @time_wrapper def create_index(self, index_type, index_param=None): index_type = INDEX_MAP[index_type] logger.info( "Building index start, collection_name: %s, index_type: %s" % (self._collection_name, index_type)) if index_param: logger.info(index_param) status = self._milvus.create_index(self._collection_name, index_type, index_param) self.check_status(status) def describe_index(self): status, result = self._milvus.describe_index(self._collection_name) self.check_status(status) index_type = None for k, v in INDEX_MAP.items(): if result._index_type == v: index_type = k break return {"index_type": index_type, "index_param": result._params} def drop_index(self): logger.info("Drop index: %s" % self._collection_name) return self._milvus.drop_index(self._collection_name) @time_wrapper def query(self, X, top_k, search_param=None): status, result = self._milvus.search_vectors(self._collection_name, top_k, query_records=X, params=search_param) self.check_status(status) return result @time_wrapper def query_ids(self, top_k, ids, search_param=None): status, result = self._milvus.search_by_ids(self._collection_name, ids, top_k, params=search_param) self.check_result_ids(result) return result def count(self): return self._milvus.count_collection(self._collection_name)[1] def delete(self, timeout=120): timeout = int(timeout) logger.info("Start delete collection: %s" % self._collection_name) self._milvus.drop_collection(self._collection_name) i = 0 while i < timeout: if self.count(): time.sleep(1) i = i + 1 continue else: break if i >= timeout: logger.error("Delete collection timeout") def describe(self): return self._milvus.describe_collection(self._collection_name) def show_collections(self): return self._milvus.show_collections() def exists_collection(self, collection_name=None): if collection_name is None: collection_name = self._collection_name status, res = self._milvus.has_collection(collection_name) # self.check_status(status) return res @time_wrapper def preload_collection(self): status = self._milvus.preload_collection(self._collection_name, timeout=3000) self.check_status(status) return status def get_server_version(self): status, res = self._milvus.server_version() return res def get_server_mode(self): return self.cmd("mode") def get_server_commit(self): return self.cmd("build_commit_id") def get_server_config(self): return json.loads(self.cmd("get_config *")) def get_mem_info(self): result = json.loads(self.cmd("get_system_info")) result_human = { # unit: Gb "memory_used": round(int(result["memory_used"]) / (1024 * 1024 * 1024), 2) } return result_human def cmd(self, command): status, res = self._milvus._cmd(command) logger.info("Server command: %s, result: %s" % (command, res)) self.check_status(status) return res