Exemple #1
0
    def alias_cases():
        teardown()
        A, B = setup()

        latest_coll_alias = "latest_collection"

        utility.create_alias(A.name, latest_coll_alias)

        alias_collection = Collection(latest_coll_alias)
        assert alias_collection.description == A.description

        utility.alter_alias(B.name, latest_coll_alias)

        alias_collection = Collection(latest_coll_alias)
        assert alias_collection.description == B.description

        utility.drop_alias(latest_coll_alias)
        try:
            alias_collection = Collection(latest_coll_alias)
        except BaseException as e:
            print(
                f" - Alias [{latest_coll_alias}] dropped, cannot get collection from it. Error msg: {e}"
            )
        finally:
            teardown()
    def test_collection_by_DataFrame(self):
        from pymilvus import Collection
        from pymilvus import FieldSchema, CollectionSchema
        from pymilvus import DataType
        coll_name = gen_collection_name()
        fields = [
            FieldSchema("int64", DataType.INT64),
            FieldSchema("float", DataType.FLOAT),
            FieldSchema("float_vector", DataType.FLOAT_VECTOR, dim=128)
        ]

        prefix = "pymilvus.client.grpc_handler.GrpcHandler"

        collection_schema = CollectionSchema(fields, primary_field="int64")
        with mock.patch(f"{prefix}.__init__", return_value=None):
            with mock.patch(f"{prefix}._wait_for_channel_ready",
                            return_value=None):
                connections.connect()

        with mock.patch(f"{prefix}.create_collection", return_value=None):
            with mock.patch(f"{prefix}.has_collection", return_value=False):
                collection = Collection(name=coll_name,
                                        schema=collection_schema)

        with mock.patch(f"{prefix}.create_collection", return_value=None):
            with mock.patch(f"{prefix}.has_collection", return_value=True):
                with mock.patch(f"{prefix}.describe_collection",
                                return_value=collection_schema.to_dict()):
                    collection = Collection(name=coll_name)

        with mock.patch(f"{prefix}.drop_collection", return_value=None):
            with mock.patch(f"{prefix}.describe_index", return_value=None):
                collection.drop()
    def test_collection_by_DataFrame(self):
        from pymilvus import Collection, connections
        from pymilvus import FieldSchema, CollectionSchema
        from pymilvus import DataType
        coll_name = gen_collection_name()
        fields = [
            FieldSchema("int64", DataType.INT64),
            FieldSchema("float", DataType.FLOAT),
            FieldSchema("float_vector", DataType.FLOAT_VECTOR, dim=128)
        ]

        collection_schema = CollectionSchema(fields, primary_field="int64")
        with mock.patch("pymilvus.Milvus.__init__", return_value=None):
            connections.connect()

        with mock.patch("pymilvus.Milvus.create_collection",
                        return_value=None):
            with mock.patch("pymilvus.Milvus.has_collection",
                            return_value=False):
                collection = Collection(name=coll_name,
                                        schema=collection_schema)

        with mock.patch("pymilvus.Milvus.create_collection",
                        return_value=None):
            with mock.patch("pymilvus.Milvus.has_collection",
                            return_value=True):
                with mock.patch("pymilvus.Milvus.describe_collection",
                                return_value=collection_schema.to_dict()):
                    collection = Collection(name=coll_name)

        with mock.patch("pymilvus.Milvus.drop_collection", return_value=None):
            with mock.patch("pymilvus.Milvus.describe_index",
                            return_value=None):
                collection.drop()
Exemple #4
0
def test_collection_only_name():
    name = gen_unique_str()
    collection_temp = Collection(name=name, schema=gen_default_fields())
    collection = Collection(name=name)
    data = gen_float_data(default_nb)
    collection.insert(data)
    collection.load()
    assert collection.is_empty is False
    assert collection.num_entities == default_nb
    collection.drop()
Exemple #5
0
def test_specify_primary_key():
    data = gen_float_data(default_nb)
    collection = Collection(name=gen_unique_str(), data=data, schema=gen_default_fields_with_primary_key_1())
    for index_param in gen_simple_index():
        collection.create_index(field_name=default_float_vec_field_name, index_params=index_param)
    assert len(collection.indexes) != 0
    collection.drop()

    collection2 = Collection(name=gen_unique_str(), data=data, schema=gen_default_fields_with_primary_key_2())
    for index_param in gen_simple_index():
        collection2.create_index(field_name=default_float_vec_field_name, index_params=index_param)
    assert len(collection2.indexes) != 0
    collection2.drop()
Exemple #6
0
def test_partition():
    connections.connect(alias="default")
    print("create collection")
    collection = Collection(name=gen_unique_str(), schema=gen_default_fields())
    print("create partition")
    partition = Partition(collection, name=gen_unique_str())
    print(list_collections())
    assert has_partition(collection.name, partition.name) is True

    data = gen_data(default_nb)
    print("insert data to partition")
    partition.insert(data)
    assert partition.is_empty is False
    assert partition.num_entities == default_nb

    print("load partition")
    partition.load()
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    print("search partition")
    res = partition.search(data[2][-2:], "float_vector", search_params, topK, "count > 100")
    for hits in res:
        for hit in hits:
            print(hit)

    print("release partition")
    partition.release()
    print("drop partition")
    partition.drop()
    print("drop collection")
    collection.drop()
def create_collection(name, id_field, vector_field, attr1_name, attr2_name):
    field1 = FieldSchema(name=id_field,
                         dtype=DataType.INT64,
                         description="int64",
                         is_primary=True)
    field2 = FieldSchema(name=vector_field,
                         dtype=DataType.FLOAT_VECTOR,
                         description="float vector",
                         dim=_DIM,
                         is_primary=False)
    # TODO: remove dim.
    field3 = FieldSchema(name=attr1_name,
                         dtype=DataType.INT64,
                         description="attr1",
                         is_primary=False,
                         dim=_DIM)
    field4 = FieldSchema(name=attr2_name,
                         dtype=DataType.DOUBLE,
                         description="attr2",
                         is_primary=False,
                         dim=_DIM)
    schema = CollectionSchema(fields=[field1, field2, field3, field4],
                              description="collection description")
    collection = Collection(name=name, data=None, schema=schema)
    print("\ncollection created:", name)
    return collection
Exemple #8
0
def test_create_index_binary_vector():
    collection = Collection(name=gen_unique_str(), schema=gen_binary_schema())
    data = gen_binary_data(default_nb)
    collection.insert(data)
    collection.create_index(field_name=default_binary_vec_field_name, index_params=default_binary_index)
    assert len(collection.indexes) != 0
    collection.drop()
Exemple #9
0
def test_create_index_float_vector():
    data = gen_float_data(default_nb)
    collection = Collection(name=gen_unique_str(), data=data, schema=gen_default_fields())
    for index_param in gen_simple_index():
        collection.create_index(field_name=default_float_vec_field_name, index_params=index_param)
    assert len(collection.indexes) != 0
    collection.drop()
Exemple #10
0
def create_collections_and_insert_data():
    import random
    import time
    dim = 128
    default_fields = [
        FieldSchema(name="count", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="random_value", dtype=DataType.DOUBLE),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")
    print(f"\nList collections...")
    print(list_collections())
    for col_name in all_index_types:
        print(f"\nCreate collection...")
        collection = Collection(name=col_name, schema=default_schema)
        #  insert data
        nb = 3000
        vectors = [[i / nb for _ in range(dim)] for i in range(nb)]
        collection.insert(
            [[i for i in range(nb)],
             [float(random.randrange(-20, -10)) for _ in range(nb)], vectors])
        print(f"collection name: {col_name}")
        print("Get collection entities")
        start_time = time.time()
        print(f"collection entities: {collection.num_entities}")
        end_time = time.time()
        print("Get collection entities time = %.4fs" % (end_time - start_time))
    print(f"\nList collections...")
    print(list_collections())
Exemple #11
0
def test_collection_with_dataframe():
    data = gen_dataframe(default_nb)
    collection, _ = Collection.construct_from_dataframe(name=gen_unique_str(), dataframe=data, primary_field="int64")
    collection.load()
    assert collection.is_empty is False
    assert collection.num_entities == default_nb
    collection.drop()
Exemple #12
0
def get_collections():
    print(f"\nList collections...")
    col_list = list_collections()
    print(f"collections_nums: {len(col_list)}")
    # list entities if collections
    for name in col_list:
        c = Collection(name=name)
        print(f"{name}: {c.num_entities}")
Exemple #13
0
def get_collections(prefix):
    print(f"\nList collections...")
    col_list = filter_collections_by_prefix(prefix)
    print(f"collections_nums: {len(col_list)}")
    # list entities if collections
    for name in col_list:
        c = Collection(name=name)
        print(f"{name}: {c.num_entities}")
    return col_list
Exemple #14
0
def load_and_search(prefix, replicas=1):
    print("search data starts")
    col_list = get_collections(prefix)
    for col_name in col_list:
        c = Collection(name=col_name)
        print(f"collection name: {col_name}")
        print("release collection")
        c.release()
        print("load collection")
        t0 = time.time()
        if replicas == 1:
            c.load()
        if replicas > 1:
            c.load(replica_number=replicas)
            print(c.get_replicas())
        print(f"load time: {time.time() - t0:.4f}")
        topK = 5
        vectors = [[1.0 for _ in range(128)] for _ in range(3000)]
        index_name = col_name.replace(prefix, "")
        search_params = gen_search_param(index_name)[0]
        print(search_params)
        # search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
        start_time = time.time()
        print(f"\nSearch...")
        # define output_fields of search result
        res = c.search(vectors[:1],
                       "float_vector",
                       search_params,
                       topK,
                       "count > 500",
                       output_fields=["count", "random_value"],
                       timeout=120)
        end_time = time.time()
        # show result
        for hits in res:
            for hit in hits:
                # Get value of the random value field for search result
                print(hit, hit.entity.get("random_value"))
            ids = hits.ids
            print(ids)
        print("search latency: %.4fs" % (end_time - start_time))
        t0 = time.time()
        expr = "count in [2,4,6,8]"
        output_fields = ["count", "random_value"]
        res = c.query(expr, output_fields, timeout=20)
        sorted_res = sorted(res, key=lambda k: k['count'])
        for r in sorted_res:
            print(r)
        t1 = time.time()
        print("query latency: %.4fs" % (t1 - t0))
        # c.release()
        print("###########")
    print("search data ends")
Exemple #15
0
def get_collections(prefix, check=False):
    print("\nList collections...")
    col_list = filter_collections_by_prefix(prefix)
    print(f"collections_nums: {len(col_list)}")
    # list entities if collections
    for name in col_list:
        c = Collection(name=name)
        num_entities = c.num_entities
        print(f"{name}: {num_entities}")
        if check:
            assert num_entities >= 3000
    return col_list
Exemple #16
0
def create_collection(name, id_field, vector_field):
    field1 = FieldSchema(name=id_field,
                         dtype=DataType.INT64,
                         description="int64",
                         is_primary=True)
    field2 = FieldSchema(name=vector_field,
                         dtype=DataType.FLOAT_VECTOR,
                         description="float vector",
                         dim=_DIM,
                         is_primary=False)
    schema = CollectionSchema(fields=[field1, field2],
                              description="collection description")
    collection = Collection(name=name, data=None, schema=schema)
    print("\ncollection created:", name)
    return collection
Exemple #17
0
def create_collections_and_insert_data(prefix,
                                       flush=True,
                                       count=3000,
                                       collection_cnt=11):
    import random
    dim = 128
    nb = count // 10
    default_fields = [
        FieldSchema(name="count", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="random_value", dtype=DataType.DOUBLE),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")
    for index_name in all_index_types[:collection_cnt]:
        print("\nCreate collection...")
        col_name = prefix + index_name
        collection = Collection(name=col_name, schema=default_schema)
        print(f"collection name: {col_name}")
        print(f"begin insert, count: {count} nb: {nb}")
        times = int(count // nb)
        total_time = 0.0
        vectors = [[random.random() for _ in range(dim)] for _ in range(count)]
        for j in range(times):
            start_time = time.time()
            collection.insert(
                [[i for i in range(nb * j, nb * j + nb)],
                 [float(random.randrange(-20, -10)) for _ in range(nb)],
                 vectors[nb * j:nb * j + nb]])
            end_time = time.time()
            print(
                f"[{j+1}/{times}] insert {nb} data, time: {end_time - start_time:.4f}"
            )
            total_time += end_time - start_time

        print(f"end insert, time: {total_time:.4f}")
        if flush:
            print("Get collection entities")
            start_time = time.time()
            print(f"collection entities: {collection.num_entities}")
            end_time = time.time()
            print("Get collection entities time = %.4fs" %
                  (end_time - start_time))
    print("\nList collections...")
    print(get_collections(prefix))
Exemple #18
0
def create_collection(name, id_field, vector_field, str_field):
    field1 = FieldSchema(name=id_field,
                         dtype=DataType.INT64,
                         description="int64",
                         is_primary=True)
    field2 = FieldSchema(name=vector_field,
                         dtype=DataType.FLOAT_VECTOR,
                         description="float vector",
                         dim=_DIM,
                         is_primary=False)
    field3 = FieldSchema(name=str_field,
                         dtype=DataType.VARCHAR,
                         description="string",
                         max_len_per_row=_MAX_LEN_PER_ROW,
                         is_primary=False)
    schema = CollectionSchema(fields=[field1, field2, field3],
                              description="collection description")
    collection = Collection(name=name, data=None, schema=schema)
    print("\ncollection created:", name)
    return collection
Exemple #19
0
def create_index():
    # create index
    default_index = {
        "index_type": "IVF_FLAT",
        "params": {
            "nlist": 128
        },
        "metric_type": "L2"
    }
    col_list = list_collections()
    print(f"\nCreate index...")
    for name in col_list:
        c = Collection(name=name)

        print(name)
        print(c)
        index = copy.deepcopy(default_index)
        index["index_type"] = name
        index["params"] = index_params_map[name]
        if name in ["BIN_FLAT", "BIN_IVF_FLAT"]:
            index["metric_type"] = "HAMMING"
        c.create_index(field_name="float_vector", index_params=index)
Exemple #20
0
def load_and_search():
    print("search data starts")
    col_list = list_collections()
    for name in col_list:
        c = Collection(name=name)
        print(f"collection name: {name}")
        c.load()
        topK = 5
        vectors = [[0.0 for _ in range(128)] for _ in range(3000)]
        index_type = name
        search_params = gen_search_param(index_type)[0]
        print(search_params)
        # search_params = {"metric_type": "L2", "params": {"nprobe": 10}}

        import time
        start_time = time.time()
        print(f"\nSearch...")
        # define output_fields of search result
        res = c.search(vectors[:1],
                       "float_vector",
                       search_params,
                       topK,
                       "count > 500",
                       output_fields=["count", "random_value"],
                       timeout=20)
        end_time = time.time()
        # show result
        for hits in res:
            for hit in hits:
                # Get value of the random value field for search result
                print(hit, hit.entity.get("random_value"))
            ids = hits.ids
            print(ids)

            print("###########")
        print("search latency = %.4fs" % (end_time - start_time))
        c.release()
    print("search data ends")
Exemple #21
0
def create_index(prefix):
    # create index
    default_index = {
        "index_type": "IVF_FLAT",
        "params": {
            "nlist": 128
        },
        "metric_type": "L2"
    }
    col_list = get_collections(prefix)
    print("\nCreate index...")
    for col_name in col_list:
        c = Collection(name=col_name)
        index_name = col_name.replace(prefix, "")
        print(index_name)
        print(c)
        index = copy.deepcopy(default_index)
        index["index_type"] = index_name
        index["params"] = index_params_map[index_name]
        if index_name in ["BIN_FLAT", "BIN_IVF_FLAT"]:
            index["metric_type"] = "HAMMING"
        t0 = time.time()
        c.create_index(field_name="float_vector", index_params=index)
        print(f"create index time: {time.time() - t0:.4f}")
def hello_milvus(collection_name):
    import time
    # create collection
    dim = 128
    default_fields = [
        FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="float", dtype=DataType.FLOAT),
        FieldSchema(name="varchar",
                    dtype=DataType.VARCHAR,
                    max_length_per_row=65535),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")
    if utility.has_collection(collection_name):
        print("collection is exist")
        collection = Collection(name=collection_name)
        default_schema = collection.schema
        dim = [
            field.params['dim'] for field in default_schema.fields
            if field.dtype in [101, 102]
        ][0]
    print(f"\nCreate collection...")
    collection = Collection(name=collection_name, schema=default_schema)
    #  insert data
    nb = 3000
    vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
    t0 = time.time()

    collection.insert([[i for i in range(nb)],
                       [np.float32(i) for i in range(nb)],
                       [str(i) for i in range(nb)], vectors])
    t1 = time.time()
    print(f"\nInsert {nb} vectors cost {t1 - t0:.4f} seconds")

    t0 = time.time()
    print(f"\nGet collection entities...")
    print(collection.num_entities)
    t1 = time.time()
    print(f"\nGet collection entities cost {t1 - t0:.4f} seconds")

    # create index and load table
    default_index = {
        "index_type": "IVF_SQ8",
        "metric_type": "L2",
        "params": {
            "nlist": 64
        }
    }
    print(f"\nCreate index...")
    t0 = time.time()
    collection.create_index(field_name="float_vector",
                            index_params=default_index)
    t1 = time.time()
    print(f"\nCreate index cost {t1 - t0:.4f} seconds")
    print(f"\nload collection...")
    t0 = time.time()
    collection.load()
    t1 = time.time()
    print(f"\nload collection cost {t1 - t0:.4f} seconds")

    # load and search
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    t0 = time.time()
    print(f"\nSearch...")
    # define output_fields of search result
    res = collection.search(vectors[-2:],
                            "float_vector",
                            search_params,
                            topK,
                            "int64 > 100",
                            output_fields=["int64", "float"],
                            timeout=TIMEOUT)
    t1 = time.time()
    print(f"search cost  {t1 - t0:.4f} seconds")
    # show result
    for hits in res:
        for hit in hits:
            # Get value of the random value field for search result
            print(hit, hit.entity.get("float"))

    # query
    expr = "int64 in [2,4,6,8]"
    output_fields = ["int64", "float"]
    res = collection.query(expr, output_fields, timeout=TIMEOUT)
    sorted_res = sorted(res, key=lambda k: k['int64'])
    for r in sorted_res:
        print(r)
    collection.release()
Exemple #23
0
print(list_collections())

# Create a collection named 'demo_film_tutorial'
print(f"\nCreate collection...")
field1 = FieldSchema(name="release_year",
                     dtype=DataType.INT64,
                     description="int64",
                     is_primary=True)
field2 = FieldSchema(name="embedding",
                     dtype=DataType.FLOAT_VECTOR,
                     description="float vector",
                     dim=8,
                     is_primary=False)
schema = CollectionSchema(fields=[field1, field2],
                          description="collection description")
collection = Collection(name='demo_film_tutorial', data=None, schema=schema)

# List all collection names
print(f"\nList collections...")
print(list_collections())

print(f"\nGet collection name, schema and description...")
print(collection.name)
print(collection.schema)
print(collection.description)

# List all partition names in demo collection
print(f"\nList partitions...")
print(collection.partitions)

# Create a partition named 'American'
Exemple #24
0
def test_exist_collection(name):
    assert utility.has_collection(name) is True
    collection = Collection(name)
    collection.drop()
Exemple #25
0
def test_create_collection():
    name = gen_unique_str()
    collection = Collection(name=name, schema=gen_default_fields())
    assert collection.is_empty is True
    assert collection.num_entities == 0
    return name
Exemple #26
0
 def index(self, name, field_name, collection_name, schema,
           get_simple_index):
     connections.connect()
     collection = Collection(collection_name, schema=schema)
     return Index(collection, field_name, get_simple_index)
Exemple #27
0
def hello_milvus(host="127.0.0.1"):
    import time
    # create connection
    connections.connect(host=host, port="19530")

    print(f"\nList collections...")
    print(list_collections())

    # create collection
    dim = 128
    default_fields = [
        FieldSchema(name="count", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="random_value", dtype=DataType.DOUBLE),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")

    print(f"\nCreate collection...")
    collection = Collection(name="hello_milvus", schema=default_schema)

    print(f"\nList collections...")
    print(list_collections())

    #  insert data
    nb = 3000
    vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
    t0 = time.time()
    collection.insert([[i for i in range(nb)],
                       [float(random.randrange(-20, -10)) for _ in range(nb)],
                       vectors])
    t1 = time.time()
    print(f"\nInsert {nb} vectors cost {t1 - t0} seconds")

    t0 = time.time()
    print(f"\nGet collection entities...")
    print(collection.num_entities)
    t1 = time.time()
    print(f"\nGet collection entities cost {t1 - t0} seconds")

    # create index and load table
    default_index = {
        "index_type": "IVF_FLAT",
        "params": {
            "nlist": 128
        },
        "metric_type": "L2"
    }
    print(f"\nCreate index...")
    t0 = time.time()
    collection.create_index(field_name="float_vector",
                            index_params=default_index)
    t1 = time.time()
    print(f"\nCreate index cost {t1 - t0} seconds")
    print(f"\nload collection...")
    t0 = time.time()
    collection.load()
    t1 = time.time()
    print(f"\nload collection cost {t1 - t0} seconds")

    # load and search
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    start_time = time.time()
    print(f"\nSearch...")
    # define output_fields of search result
    res = collection.search(vectors[-2:],
                            "float_vector",
                            search_params,
                            topK,
                            "count > 100",
                            output_fields=["count", "random_value"])
    end_time = time.time()

    # show result
    for hits in res:
        for hit in hits:
            # Get value of the random value field for search result
            print(hit, hit.entity.get("random_value"))
    print("search latency = %.4fs" % (end_time - start_time))

    # query
    expr = "count in [2,4,6,8]"
    output_fields = ["count", "random_value"]
    res = collection.query(expr, output_fields)
    sorted_res = sorted(res, key=lambda k: k['count'])
    for r in sorted_res:
        print(r)
Exemple #28
0
def hello_milvus():
    # create connection
    connections.connect()

    print(f"\nList collections...")
    print(list_collections())

    # create collection
    dim = 128
    default_fields = [
        FieldSchema(name="count", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="random_value", dtype=DataType.DOUBLE),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")

    print(f"\nCreate collection...")
    collection = Collection(name="hello_milvus", schema=default_schema)

    print(f"\nList collections...")
    print(list_collections())

    #  insert data
    nb = 3000
    vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
    collection.insert([[i for i in range(nb)],
                       [float(random.randrange(-20, -10)) for _ in range(nb)],
                       vectors])

    print(f"\nGet collection entities...")
    print(collection.num_entities)

    # create index and load table
    default_index = {
        "index_type": "IVF_FLAT",
        "params": {
            "nlist": 128
        },
        "metric_type": "L2"
    }
    print(f"\nCreate index...")
    collection.create_index(field_name="float_vector",
                            index_params=default_index)
    print(f"\nload collection...")
    collection.load()

    # load and search
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    import time
    start_time = time.time()
    print(f"\nSearch...")
    # define output_fields of search result
    res = collection.search(vectors[-2:],
                            "float_vector",
                            search_params,
                            topK,
                            "count > 100",
                            output_fields=["count", "random_value"])
    end_time = time.time()

    # show result
    for hits in res:
        for hit in hits:
            # Get value of the random value field for search result
            print(hit, hit.entity.get("random_value"))
    print("search latency = %.4fs" % (end_time - start_time))

    # drop collection
    collection.drop()
Exemple #29
0
def hello_milvus(host="127.0.0.1"):
    import time
    # create connection
    connections.connect(host=host, port="19530")

    print(f"\nList collections...")
    print(list_collections())

    # create collection
    dim = 128
    default_fields = [
        FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="float", dtype=DataType.FLOAT),
        FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")

    print(f"\nCreate collection...")
    collection = Collection(name="hello_milvus", schema=default_schema)

    print(f"\nList collections...")
    print(list_collections())

    #  insert data
    nb = 3000
    vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
    t0 = time.time()
    collection.insert([[i for i in range(nb)],
                       [np.float32(i) for i in range(nb)],
                       [str(i) for i in range(nb)], vectors])
    t1 = time.time()
    print(f"\nInsert {nb} vectors cost {t1 - t0:.4f} seconds")

    t0 = time.time()
    print(f"\nGet collection entities...")
    print(collection.num_entities)
    t1 = time.time()
    print(f"\nGet collection entities cost {t1 - t0:.4f} seconds")

    # create index and load table
    default_index = {
        "index_type": "IVF_SQ8",
        "metric_type": "L2",
        "params": {
            "nlist": 64
        }
    }
    print(f"\nCreate index...")
    t0 = time.time()
    collection.create_index(field_name="float_vector",
                            index_params=default_index)
    t1 = time.time()
    print(f"\nCreate index cost {t1 - t0:.4f} seconds")
    print("\nGet replicas number")
    try:
        replicas_info = collection.get_replicas()
        replica_number = len(replicas_info.groups)
        print(f"\nReplicas number is {replica_number}")
    except Exception as e:
        print(str(e))
        replica_number = 1
    print(f"\nload collection...")
    t0 = time.time()
    collection.load(replica_number=replica_number)
    t1 = time.time()
    print(f"\nload collection cost {t1 - t0:.4f} seconds")

    # load and search
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    t0 = time.time()
    print(f"\nSearch...")
    # define output_fields of search result
    res = collection.search(vectors[-2:],
                            "float_vector",
                            search_params,
                            topK,
                            "int64 > 100",
                            output_fields=["int64", "float"],
                            timeout=TIMEOUT)
    t1 = time.time()
    print(f"search cost  {t1 - t0:.4f} seconds")
    # show result
    for hits in res:
        for hit in hits:
            # Get value of the random value field for search result
            print(hit, hit.entity.get("float"))

    # query
    expr = "int64 in [2,4,6,8]"
    output_fields = ["int64", "float"]
    res = collection.query(expr, output_fields, timeout=TIMEOUT)
    sorted_res = sorted(res, key=lambda k: k['int64'])
    for r in sorted_res:
        print(r)
 def test_construct_from_dataframe(self):
     assert type(
         Collection.construct_from_dataframe(
             gen_collection_name(),
             gen_pd_data(default_nb),
             primary_field="int64")[0]) is Collection