Beispiel #1
0
def main():
    # connect_milvus_server()
    milvus = Milvus(host=SERVER_ADDR, port=SERVER_PORT)
    create_milvus_collection(milvus)
    build_collection(milvus)
    conn = connect_postgres_server()
    cur = conn.cursor()
    create_pg_table(conn, cur)
    count = 0
    while count < (VEC_NUM // BASE_LEN):
        vectors = load_bvecs_data(FILE_PATH, BASE_LEN, count)
        vectors_ids = [
            id for id in range(count * BASE_LEN, (count + 1) * BASE_LEN)
        ]
        sex = random.choice(['female', 'male'])
        get_time = fake.past_datetime(start_date="-120d", tzinfo=None)
        is_glasses = random.choice(['True', 'False'])
        line = str(vectors_ids[i]) + "|" + sex + "|'" + str(
            get_time) + "'|" + str(is_glasses) + "\n"
        hybrid_entities = [{
            "name": "sex",
            "values": sex,
            "type": DataType.INT32
        }, {
            "name": "is_glasses",
            "values": get_time,
            "type": DataType.INT32
        }, {
            "name": "get_time",
            "values": is_glasses,
            "type": DataType.INT32
        }, {
            "name": "Vec",
            "values": vectors,
            "type": DataType.FLOAT_VECTOR
        }]
        time_start = time.time()
        ids = milvus.bulk_insert(MILVUS_collection,
                                 hybrid_entities,
                                 ids=vectors_ids)
        time_end = time.time()
        print(count, "insert milvue time: ", time_end - time_start)
        #print(format(ids))
        # print(count)
        time_start = time.time()
        with open(fname, 'w+') as f:
            for i in range(len(ids)):
                res = client.get_entity_by_id(collection_name, ids)
                line = str(res.id) + "|" + res.get(sex) + "|'" + str(
                    res.get(get_time)) + "'|" + str(res.get(is_glasses)) + "\n"
                f.write(line)
        copy_data_to_pg(conn, cur)
        time_end = time.time()
        print(count, "insert pg time: ", time_end - time_start)
        count = count + 1

    build_pg_index(conn, cur)
Beispiel #2
0
#     for example:
#           '{"name": "duration", "values": durations, "type": DataType.INT32}'
# ------
entities = [
    # Milvus doesn't support string type yet, so we cannot insert "title".
    {"name": "duration", "values": durations},
    {"name": "release_year", "values": release_years},
    {"name": "embedding", "values": embeddings},
]

# ------
# Basic insert entities:
#     We insert the `hybrid_entities` into our collection, into partition `American`, with ids we provide.
#     If succeed, ids we provide will be returned.
# ------
ids = client.bulk_insert(collection_name, entities, ids, partition_tag="American")
print("\n----------insert----------")
print("Films are inserted and the ids are: {}".format(ids))


# ------
# Basic insert entities:
#     We can insert `The_Lord_of_the_Rings` directly.
#     Here we define a group of data called `Batmans` in form  like `The_Lord_of_the_Rings`.
#     Note that, milvus doesn't support string data, so we comment "title" key-value, and
#     the key of id is required to be `_id`.
# ------
Batmans = [
    {
        # "title": "Batman_Begins",
        "_id": 4,
    {
        "name": "release_year",
        "values": release_years
    },
    {
        "name": "embedding",
        "values": embeddings
    },
]

# ------
# Basic insert:
#     After preparing the data, we are going to insert them into our collection.
#     The number of films inserted should be 8657.
# ------
ids = client.bulk_insert(collection_name, entities, ids)

client.flush([collection_name])
after_flush_counts = client.count_entities(collection_name)
print(" > There are {} films in collection `{}` after flush".format(
    after_flush_counts, collection_name))

# ------
# Basic create index:
#     Now that we have inserted all the films into Milvus, we are going to build index with these data.
#
#     While building index, we have to indicate which `field` to build index for, the `index_type`,
#     `metric_type` and params for the specific index type. In our case, we want to build a `IVF_FLAT`
#     index, so the specific params are "nlist". See pymilvus documentation
#     (https://milvus-io.github.io/milvus-sdk-python/pythondoc/v0.3.0/param.html) for `index_type` we
#     support and the params accordingly.