def main(): # connect_milvus_server() milvus = Milvus(host=SERVER_ADDR, port=SERVER_PORT) create_milvus_collection(milvus) build_collection(milvus) conn = connect_postgres_server() cur = conn.cursor() create_pg_table(conn, cur) count = 0 while count < (VEC_NUM // BASE_LEN): vectors = load_bvecs_data(FILE_PATH, BASE_LEN, count) vectors_ids = [ id for id in range(count * BASE_LEN, (count + 1) * BASE_LEN) ] sex = random.choice(['female', 'male']) get_time = fake.past_datetime(start_date="-120d", tzinfo=None) is_glasses = random.choice(['True', 'False']) line = str(vectors_ids[i]) + "|" + sex + "|'" + str( get_time) + "'|" + str(is_glasses) + "\n" hybrid_entities = [{ "name": "sex", "values": sex, "type": DataType.INT32 }, { "name": "is_glasses", "values": get_time, "type": DataType.INT32 }, { "name": "get_time", "values": is_glasses, "type": DataType.INT32 }, { "name": "Vec", "values": vectors, "type": DataType.FLOAT_VECTOR }] time_start = time.time() ids = milvus.bulk_insert(MILVUS_collection, hybrid_entities, ids=vectors_ids) time_end = time.time() print(count, "insert milvue time: ", time_end - time_start) #print(format(ids)) # print(count) time_start = time.time() with open(fname, 'w+') as f: for i in range(len(ids)): res = client.get_entity_by_id(collection_name, ids) line = str(res.id) + "|" + res.get(sex) + "|'" + str( res.get(get_time)) + "'|" + str(res.get(is_glasses)) + "\n" f.write(line) copy_data_to_pg(conn, cur) time_end = time.time() print(count, "insert pg time: ", time_end - time_start) count = count + 1 build_pg_index(conn, cur)
# for example: # '{"name": "duration", "values": durations, "type": DataType.INT32}' # ------ entities = [ # Milvus doesn't support string type yet, so we cannot insert "title". {"name": "duration", "values": durations}, {"name": "release_year", "values": release_years}, {"name": "embedding", "values": embeddings}, ] # ------ # Basic insert entities: # We insert the `hybrid_entities` into our collection, into partition `American`, with ids we provide. # If succeed, ids we provide will be returned. # ------ ids = client.bulk_insert(collection_name, entities, ids, partition_tag="American") print("\n----------insert----------") print("Films are inserted and the ids are: {}".format(ids)) # ------ # Basic insert entities: # We can insert `The_Lord_of_the_Rings` directly. # Here we define a group of data called `Batmans` in form like `The_Lord_of_the_Rings`. # Note that, milvus doesn't support string data, so we comment "title" key-value, and # the key of id is required to be `_id`. # ------ Batmans = [ { # "title": "Batman_Begins", "_id": 4,
{ "name": "release_year", "values": release_years }, { "name": "embedding", "values": embeddings }, ] # ------ # Basic insert: # After preparing the data, we are going to insert them into our collection. # The number of films inserted should be 8657. # ------ ids = client.bulk_insert(collection_name, entities, ids) client.flush([collection_name]) after_flush_counts = client.count_entities(collection_name) print(" > There are {} films in collection `{}` after flush".format( after_flush_counts, collection_name)) # ------ # Basic create index: # Now that we have inserted all the films into Milvus, we are going to build index with these data. # # While building index, we have to indicate which `field` to build index for, the `index_type`, # `metric_type` and params for the specific index type. In our case, we want to build a `IVF_FLAT` # index, so the specific params are "nlist". See pymilvus documentation # (https://milvus-io.github.io/milvus-sdk-python/pythondoc/v0.3.0/param.html) for `index_type` we # support and the params accordingly.