def delete_table(self, table_name): sql = "drop table if exists " + table_name + ";" try: self.cursor.execute(sql) LOGGER.debug("MYSQL delete table:{}".format(table_name)) except Exception as e: LOGGER.error("MYSQL ERROR: {} with sql: {}".format(e, sql)) sys.exit(1)
def create_mysql_table(self, table_name): sql = "create table if not exists " + table_name + "(milvus_id TEXT, data_path TEXT);" try: self.cursor.execute(sql) LOGGER.debug("MYSQL create table: {} with sql: {}".format( table_name, sql)) except Exception as e: LOGGER.error("MYSQL ERROR: {} with sql: {}".format(e, sql)) sys.exit(1)
def __init__(self): try: self.client = Milvus(host=MILVUS_HOST, port=MILVUS_PORT) LOGGER.debug( "Successfully connect to Milvus with IP:{} and PORT:{}".format( MILVUS_HOST, MILVUS_PORT)) except Exception as e: LOGGER.error("Failed to connect Milvus: {}".format(e)) sys.exit(1)
def delete_collection(self, collection_name): try: self.set_collection(collection_name) self.collection.drop() LOGGER.debug("Successfully drop collection!") return "ok" except Exception as e: LOGGER.error("Failed to drop collection: {}".format(e)) sys.exit(1)
def count_table(self, table_name): sql = "select count(milvus_id) from " + table_name + ";" try: self.cursor.execute(sql) results = self.cursor.fetchall() LOGGER.debug("MYSQL count table:{}".format(table_name)) return results[0][0] except Exception as e: LOGGER.error("MYSQL ERROR: {} with sql: {}".format(e, sql)) sys.exit(1)
def delete_all_data(self, table_name): sql = 'delete from ' + table_name + ';' try: self.cursor.execute(sql) self.conn.commit() LOGGER.debug( "MYSQL delete all data in table:{}".format(table_name)) except Exception as e: LOGGER.error("MYSQL ERROR: {} with sql: {}".format(e, sql)) sys.exit(1)
def load_data_to_mysql(self, table_name, data): sql = "insert into " + table_name + " (milvus_id,data_path) values (%s,%s);" try: self.cursor.executemany(sql, data) self.conn.commit() LOGGER.debug("MYSQL loads data to table: {} successfully".format( table_name)) except Exception as e: LOGGER.error("MYSQL ERROR: {} with sql: {}".format(e, sql)) sys.exit(1)
def count(self, collection_name): try: self.set_collection(collection_name) num = self.collection.num_entities LOGGER.debug( "Successfully get the num:{} of the collection:{}".format( num, collection_name)) return num except Exception as e: LOGGER.error("Failed to count vectors in Milvus: {}".format(e)) sys.exit(1)
def search_by_milvus_ids(self, ids, table_name): str_ids = str(ids).replace('[', '').replace(']', '') sql = "select data_path from " + table_name + " where milvus_id in (" + str_ids + ") order by field (milvus_id," + str_ids + ");" try: self.cursor.execute(sql) results = self.cursor.fetchall() results = [res[0] for res in results] LOGGER.debug("MYSQL search by milvus id.") return results except Exception as e: LOGGER.error("MYSQL ERROR: {} with sql: {}".format(e, sql)) sys.exit(1)
def delete_collection(self, collection_name): try: status = self.client.drop_collection( collection_name=collection_name) if not status.code: LOGGER.debug( "Successfully drop collection: {}".format(collection_name)) return status else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to drop collection: {}".format(e)) sys.exit(1)
def count(self, collection_name): try: status, num = self.client.count_entities( collection_name=collection_name) if not status.code: LOGGER.debug( "Successfully get the num:{} of the collection:{}".format( num, collection_name)) return num else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to count vectors in Milvus: {}".format(e)) sys.exit(1)
def insert(self, collection_name, vectors): try: self.create_collection(collection_name) data = [vectors] mr = self.collection.insert(data) ids = mr.primary_keys self.collection.load() LOGGER.debug( "Insert vectors to Milvus in collection: {} with {} rows". format(collection_name, len(vectors))) return ids except Exception as e: LOGGER.error("Failed to load data to Milvus: {}".format(e)) sys.exit(1)
def insert(self, collection_name, vectors): try: self.create_colllection(collection_name) status, ids = self.client.insert(collection_name=collection_name, records=vectors) if not status.code: LOGGER.debug( "Insert vectors to Milvus in collection: {} with {} rows". format(collection_name, len(vectors))) return ids else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to load data to Milvus: {}".format(e)) sys.exit(1)
def do_count(table_name, milvus_cli, mysql_cli): if not table_name: table_name = DEFAULT_TABLE try: if not milvus_cli.has_collection(table_name): return None milvus_num = milvus_cli.count(table_name) mysql_num = mysql_cli.count_table(table_name) LOGGER.debug("The num of Milvus: {} and Mysql: {}".format( milvus_num, mysql_num)) assert milvus_num == mysql_num return milvus_num except Exception as e: LOGGER.error(" Error with count table {}".format(e)) sys.exit(1)
def create_index(self, collection_name): try: index_param = {'nlist': 16384} status = self.client.create_index(collection_name, IndexType.IVF_FLAT, index_param) if not status.code: LOGGER.debug( "Successfully create index in collection:{} with param:{}". format(collection_name, index_param)) return status else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to create index: {}".format(e)) sys.exit(1)
def create_colllection(self, collection_name): try: if not self.has_collection(collection_name): collection_param = { 'collection_name': collection_name, 'dimension': VECTOR_DIMENSION, 'index_file_size': INDEX_FILE_SIZE, 'metric_type': METRIC_TYPE } status = self.client.create_collection(collection_param) if status.code != 0: raise Exception(status.message) LOGGER.debug( "Create Milvus collection: {}".format(collection_name)) except Exception as e: LOGGER.error("Failed to load data to Milvus: {}".format(e)) sys.exit(1)
def search_vectors(self, collection_name, vectors, top_k): try: search_param = {'nprobe': 16} status, result = self.client.search( collection_name=collection_name, query_records=vectors, top_k=top_k, params=search_param) if not status.code: LOGGER.debug("Successfully search in collection: {}".format( collection_name)) return result else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to search vectors in Milvus: {}".format(e)) sys.exit(1)
def search_vectors(self, collection_name, vectors, top_k): try: self.set_collection(collection_name) search_params = { "metric_type": METRIC_TYPE, "params": { "nprobe": 16 } } res = self.collection.search(vectors, anns_field="embedding", param=search_params, limit=top_k) print(res[0]) LOGGER.debug("Successfully search in collection: {}".format(res)) return res except Exception as e: LOGGER.error("Failed to search vectors in Milvus: {}".format(e)) sys.exit(1)
def create_index(self, collection_name): try: self.set_collection(collection_name) default_index = { "index_type": "IVF_SQ8", "metric_type": METRIC_TYPE, "params": { "nlist": 16384 } } status = self.collection.create_index(field_name="embedding", index_params=default_index) if not status.code: LOGGER.debug( "Successfully create index in collection:{} with param:{}". format(collection_name, default_index)) return status else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to create index: {}".format(e)) sys.exit(1)
def create_collection(self, collection_name): try: if not self.has_collection(collection_name): field1 = FieldSchema(name="id", dtype=DataType.INT64, descrition="int64", is_primary=True, auto_id=True) field2 = FieldSchema(name="embedding", dtype=DataType.BINARY_VECTOR, descrition="binary vector", dim=VECTOR_DIMENSION, is_primary=False) schema = CollectionSchema(fields=[field1, field2], description="collection description") self.collection = Collection(name=collection_name, schema=schema) LOGGER.debug("Create Milvus collection: {}".format( self.collection)) return "OK" except Exception as e: LOGGER.error("Failed to load data to Milvus: {}".format(e)) sys.exit(1)