Ejemplo n.º 1
0
def main():
    milvus = Milvus()

    # Connect to Milvus server
    # You may need to change _HOST and _PORT accordingly
    param = {'host': _HOST, 'port': _PORT}
    status = milvus.connect(**param)
    if status.OK():
        print("Server connected.")
    else:
        print("Server connect fail.")
        sys.exit(1)

    # Create collection demo_collection if it dosen't exist.
    collection_name = 'example_collection'

    status, ok = milvus.has_collection(collection_name)
    if not ok:
        param = {
            'collection_name': collection_name,
            'dimension': _DIM,
            'index_file_size': _INDEX_FILE_SIZE,  # optional
            'metric_type': MetricType.L2  # optional
        }

        milvus.create_collection(param)

    # Show collections in Milvus server
    _, collections = milvus.show_collections()

    # present collection info
    _, info = milvus.collection_info(collection_name)
    print(info)

    # Describe demo_collection
    _, collection = milvus.describe_collection(collection_name)
    print(collection)

    # 10000 vectors with 16 dimension
    # element per dimension is float32 type
    # vectors should be a 2-D array
    vectors = [[random.random() for _ in range(_DIM)] for _ in range(10000)]
    # You can also use numpy to generate random vectors:
    #     `vectors = np.random.rand(10000, 16).astype(np.float32)`

    # Insert vectors into demo_collection, return status and vectors id list
    status, ids = milvus.insert(collection_name=collection_name, records=vectors)

    # Flush collection  inserted data to disk.
    milvus.flush([collection_name])

    # Get demo_collection row count
    status, result = milvus.count_collection(collection_name)

    # create index of vectors, search more rapidly
    index_param = {
        'nlist': 2048
    }

    # Create ivflat index in demo_collection
    # You can search vectors without creating index. however, Creating index help to
    # search faster
    print("Creating index: {}".format(index_param))
    status = milvus.create_index(collection_name, IndexType.IVF_FLAT, index_param)

    # describe index, get information of index
    status, index = milvus.describe_index(collection_name)
    print(index)

    # Use the top 10 vectors for similarity search
    query_vectors = vectors[0:10]

    # execute vector similarity search
    search_param = {
        "nprobe": 16
    }
    param = {
        'collection_name': collection_name,
        'query_records': query_vectors,
        'top_k': 1,
        'params': search_param
    }
    print("Searching ... ")
    status, results = milvus.search(**param)

    if status.OK():
        # indicate search result
        # also use by:
        #   `results.distance_array[0][0] == 0.0 or results.id_array[0][0] == ids[0]`
        if results[0][0].distance == 0.0 or results[0][0].id == ids[0]:
            print('Query result is correct')
        else:
            print('Query result isn\'t correct')

    # print results
    print(results)

    # Delete demo_collection
    status = milvus.drop_collection(collection_name)

    # Disconnect from Milvus
    status = milvus.disconnect()
Ejemplo n.º 2
0
        'dimension': _DIM,
        'index_file_size': 10,  # optional
        'metric_type': MetricType.L2  # optional
    }

    client.create_collection(param)

    # randomly generate 100000 vectors and insert collection
    vectors = [[random.random() for _ in range(_DIM)] for _ in range(10000)]
    client.insert(collection_name, vectors)

    # flush data to disk
    client.flush([collection_name])

    # query collection's statistical information.
    status, info = client.collection_info(collection_name)
    if not status.OK():
        print("Query collection statistical information fail. exiting ....")
        sys.exit(1)

    # show collection information
    print("Total amount of vectors in collection {} is {}".format(
        collection_name, info.count))
    for par in info.partitions_stat:
        print("\tpartition tag: {}, vector count: {}".format(
            par.tag, par.count))
        # show segment information
        for seg in par.segments_stat:
            print(
                "\t\tsegment name: {}, vector count: {}, index: {}, storage size {:.3f} MB"
                .format(seg.segment_name, seg.count, seg.index_name,