def create_collection(self): list_collection = list_collections() if self.collection_name in list_collection: collection = Collection(name=self.collection_name) else: pk_face_id = FieldSchema(name=self.primary_name, dtype=DataType.INT64, is_primary=True, auto_id=True) field = FieldSchema(name=self.embedding_vector_field, dtype=DataType.FLOAT_VECTOR, dim=512) schema = CollectionSchema(fields=[pk_face_id, field], description="clover_staging collection") collection = Collection(name=self.collection_name, schema=schema) if self.partition_name is not None and collection.has_partition( self.partition_name) is False: collection.create_partition(self.partition_name) return collection
def test_collection_by_DataFrame(self): from pymilvus_orm import Collection, connections from pymilvus_orm.schema import FieldSchema, CollectionSchema from pymilvus_orm.types import DataType fields = [ FieldSchema("int64", DataType.INT64), FieldSchema("float", DataType.FLOAT), FieldSchema("float_vector", DataType.FLOAT_VECTOR, dim=128) ] collection_schema = CollectionSchema(fields, primary_field="int64") connections.connect() collection = Collection(name=gen_collection_name(), schema=collection_schema) collection.drop()
def __init__(self, name): connections.add_connection(default={ 'host': '127.0.0.1', 'port': '19530', }) connections.connect(alias=name) id_field = FieldSchema(name='id', is_primary=True, dtype=DataType.INT64, description='id') image_field = FieldSchema(name='image', dtype=DataType.FLOAT_VECTOR, dim=512, description='image') schema = CollectionSchema(fields=[id_field, image_field], primary_field='id') collection = Collection(name=name, schema=schema)
def test_construct_from_dataframe(self): assert type( Collection.construct_from_dataframe( gen_collection_name(), gen_pd_data(default_nb), primary_field="int64")[0]) is Collection
def collection(self): name = gen_collection_name() schema = gen_schema() yield Collection(name, schema=schema) if connections.get_connection().has_collection(name): connections.get_connection().drop_collection(name)
def hello_milvus(): # create connection connections.connect() print(f"\nList collections...") print(list_collections()) # create collection dim = 128 default_fields = [ FieldSchema(name="count", dtype=DataType.INT64, is_primary=True), FieldSchema(name="random_value", dtype=DataType.DOUBLE), FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim) ] default_schema = CollectionSchema(fields=default_fields, description="test collection") print(f"\nCreate collection...") collection = Collection(name="hello_milvus", schema=default_schema) print(f"\nList collections...") print(list_collections()) # insert data nb = 3000 vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] collection.insert([[i for i in range(nb)], [float(random.randrange(-20, -10)) for _ in range(nb)], vectors]) print(f"\nGet collection entities...") print(collection.num_entities) # create index and load table default_index = { "index_type": "IVF_FLAT", "params": { "nlist": 128 }, "metric_type": "L2" } print(f"\nCreate index...") collection.create_index(field_name="float_vector", index_params=default_index) print(f"\nload collection...") collection.load() # load and search topK = 5 search_params = {"metric_type": "L2", "params": {"nprobe": 10}} import time start_time = time.time() print(f"\nSearch...") # define output_fields of search result res = collection.search(vectors[-2:], "float_vector", search_params, topK, "count > 100", output_fields=["count", "random_value"]) end_time = time.time() # show result for hits in res: for hit in hits: # Get value of the random value field for search result print(hit, hit.entity.get("random_value")) print("search latency = %.4fs" % (end_time - start_time)) # drop collection collection.drop()
def index(self, name, field_name, collection_name, schema, get_simple_index): # from pymilvus_orm.collection import Collection collection = Collection(collection_name, schema=schema) return Index(collection, field_name, get_simple_index)
def collection(self, collection_name, schema): c = Collection(collection_name, schema=schema) yield c c.drop()