Esempio n. 1
0
    def test_expand_data_node(self):
        """
        target: test create and insert api after expand dataNode pod
        method: 1.create collection a and insert df
                2.expand dataNode pod from 1 to 2
                3.verify collection a property and verify create and insert of new collection
        expected: two collection create and insert op are both correctly
        """
        # deploy all nodes one pod cluster milvus with helm
        release_name = "scale-data"
        env = HelmEnv(release_name=release_name)
        host = env.helm_install_cluster_milvus()

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')
        # create
        c_name = cf.gen_unique_str(prefix)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        # # insert
        data = cf.gen_default_list_data(ct.default_nb)
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        # scale dataNode to 2 pods
        env.helm_upgrade_cluster_milvus(dataNode=2)
        # after scale, assert data consistent
        assert utility.has_collection(c_name)
        assert collection_w.num_entities == ct.default_nb
        # assert new operations
        new_cname = cf.gen_unique_str(prefix)
        new_collection_w = ApiCollectionWrapper()
        new_collection_w.init_collection(
            name=new_cname, schema=cf.gen_default_collection_schema())
        new_mutation_res, _ = new_collection_w.insert(data)
        assert new_mutation_res.insert_count == ct.default_nb
        assert new_collection_w.num_entities == ct.default_nb
        # assert old collection ddl
        mutation_res_2, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb * 2

        collection_w.drop()
        new_collection_w.drop()
Esempio n. 2
0
    def test_expand_data_node(self):
        """
        target: test create and insert api after expand dataNode pod
        method: 1.create collection a and insert df
                2.expand dataNode pod from 1 to 2
                3.verify collection a property and verify create and insert of new collection
        expected: two collection create and insert op are both correctly
        """
        release_name = "scale-data"
        milvusOp, host, port = scale_common.deploy_default_milvus(release_name)


        # connect
        connections.add_connection(default={"host": host, "port": port})
        connections.connect(alias='default')
        # create
        c_name = cf.gen_unique_str(prefix)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema())
        # # insert
        data = cf.gen_default_list_data()
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        # scale dataNode to 2 pods
        milvusOp.upgrade(release_name, {'spec.components.dataNode.replicas': 2}, constants.NAMESPACE)
        milvusOp.wait_for_healthy(release_name, constants.NAMESPACE)

        # after scale, assert data consistent
        assert utility.has_collection(c_name)
        assert collection_w.num_entities == ct.default_nb
        # assert new operations
        new_cname = cf.gen_unique_str(prefix)
        new_collection_w = ApiCollectionWrapper()
        new_collection_w.init_collection(name=new_cname, schema=cf.gen_default_collection_schema())
        new_mutation_res, _ = new_collection_w.insert(data)
        assert new_mutation_res.insert_count == ct.default_nb
        assert new_collection_w.num_entities == ct.default_nb
        # assert old collection ddl
        mutation_res_2, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb*2

        collection_w.drop()
        new_collection_w.drop()
Esempio n. 3
0
def hello_milvus(collection_name):
    import time
    # create collection
    dim = 128
    default_fields = [
        FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="float", dtype=DataType.FLOAT),
        FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")
    if utility.has_collection(collection_name):
        print("collection is exist")
        collection = Collection(name=collection_name)
        default_schema = collection.schema
        dim = [
            field.params['dim'] for field in default_schema.fields
            if field.dtype in [101, 102]
        ][0]
    print(f"\nCreate collection...")
    collection = Collection(name=collection_name, schema=default_schema)
    #  insert data
    nb = 3000
    vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
    t0 = time.time()

    collection.insert([[i for i in range(nb)],
                       [np.float32(i) for i in range(nb)],
                       [str(i) for i in range(nb)], vectors])
    t1 = time.time()
    print(f"\nInsert {nb} vectors cost {t1 - t0:.4f} seconds")

    t0 = time.time()
    print(f"\nGet collection entities...")
    print(collection.num_entities)
    t1 = time.time()
    print(f"\nGet collection entities cost {t1 - t0:.4f} seconds")

    # create index and load table
    default_index = {
        "index_type": "IVF_SQ8",
        "metric_type": "L2",
        "params": {
            "nlist": 64
        }
    }
    print(f"\nCreate index...")
    t0 = time.time()
    collection.create_index(field_name="float_vector",
                            index_params=default_index)
    t1 = time.time()
    print(f"\nCreate index cost {t1 - t0:.4f} seconds")
    print("\nGet replicas number")
    try:
        replicas_info = collection.get_replicas()
        replica_number = len(replicas_info.groups)
        print(f"\nReplicas number is {replica_number}")
    except Exception as e:
        print(str(e))
        replica_number = 1
    print(f"\nload collection...")
    t0 = time.time()
    collection.load(replica_number=replica_number)
    t1 = time.time()
    print(f"\nload collection cost {t1 - t0:.4f} seconds")

    # load and search
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    t0 = time.time()
    print(f"\nSearch...")
    # define output_fields of search result
    res = collection.search(vectors[-2:],
                            "float_vector",
                            search_params,
                            topK,
                            "int64 > 100",
                            output_fields=["int64", "float"],
                            timeout=TIMEOUT)
    t1 = time.time()
    print(f"search cost  {t1 - t0:.4f} seconds")
    # show result
    for hits in res:
        for hit in hits:
            # Get value of the random value field for search result
            print(hit, hit.entity.get("float"))

    # query
    expr = "int64 in [2,4,6,8]"
    output_fields = ["int64", "float"]
    res = collection.query(expr, output_fields, timeout=TIMEOUT)
    sorted_res = sorted(res, key=lambda k: k['int64'])
    for r in sorted_res:
        print(r)
Esempio n. 4
0
    def test_expand_data_node(self):
        """
        target: test create and insert api after expand dataNode pod
        method: 1.create collection a and insert df
                2.expand dataNode pod from 1 to 2
                3.verify collection a property and verify create and insert of new collection
        expected: two collection create and insert op are both correctly
        """
        # deploy all nodes one pod cluster milvus with helm
        release_name = "scale-data"
        # env = HelmEnv(release_name=release_name)
        # host = env.helm_install_cluster_milvus()

        # deploy cluster milvus with dataNode 1 replicas
        default_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image':
            'milvusdb/milvus-dev:master-20211020-b40513b',
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'dependencies.etcd.inCluster.deletionPolicy': 'Delete',
            'dependencies.etcd.inCluster.pvcDeletion': 'true',
            'dependencies.pulsar.inCluster.deletionPolicy': 'Delete',
            'dependencies.pulsar.inCluster.pvcDeletion': 'true',
            'dependencies.storage.inCluster.deletionPolicy': 'Delete',
            'dependencies.storage.inCluster.pvcDeletion': 'true',
        }
        milvusOp = MilvusOperator()
        milvusOp.install(default_config)
        if milvusOp.wait_for_healthy(release_name,
                                     namespace=constants.NAMESPACE):
            endpoint = milvusOp.endpoint(release_name, constants.NAMESPACE)
            endpoint = endpoint.split(':')
            host = endpoint[0]
            port = int(endpoint[-1])
        else:
            raise Exception(f"Failed to install {release_name}")

        # connect
        connections.add_connection(default={"host": host, "port": port})
        connections.connect(alias='default')
        # create
        c_name = cf.gen_unique_str(prefix)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        # # insert
        data = cf.gen_default_list_data(ct.default_nb)
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        # scale dataNode to 2 pods
        milvusOp.upgrade(release_name,
                         {'spec.components.dataNode.replicas': 2},
                         constants.NAMESPACE)
        # env.helm_upgrade_cluster_milvus(dataNode=2)
        # after scale, assert data consistent
        assert utility.has_collection(c_name)
        assert collection_w.num_entities == ct.default_nb
        # assert new operations
        new_cname = cf.gen_unique_str(prefix)
        new_collection_w = ApiCollectionWrapper()
        new_collection_w.init_collection(
            name=new_cname, schema=cf.gen_default_collection_schema())
        new_mutation_res, _ = new_collection_w.insert(data)
        assert new_mutation_res.insert_count == ct.default_nb
        assert new_collection_w.num_entities == ct.default_nb
        # assert old collection ddl
        mutation_res_2, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb * 2

        collection_w.drop()
        new_collection_w.drop()
Esempio n. 5
0
def test_exist_collection(name):
    assert utility.has_collection(name) is True
    collection = Collection(name)
    collection.drop()
    def test_chaos_data_consist(self, connection, chaos_yaml):
        """
        target: verify data consistence after chaos injected and recovered
        method: 1. create a collection, insert some data, search and query
                2. inject a chaos object
                3. reconnect to service
                4. verify a) data entities persists, index persists,
                          b) search and query results persist
        expected: collection data and results persist
        """
        c_name = cf.gen_unique_str('chaos_collection_')
        nb = 5000
        i_name = cf.gen_unique_str('chaos_index_')
        index_params = {
            "index_type": "IVF_SQ8",
            "metric_type": "L2",
            "params": {
                "nlist": 64
            }
        }

        # create
        t0 = datetime.datetime.now()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        tt = datetime.datetime.now() - t0
        log.info(f"assert create: {tt}")
        assert collection_w.name == c_name

        # insert
        data = cf.gen_default_list_data(nb=nb)
        t0 = datetime.datetime.now()
        _, res = collection_w.insert(data)
        tt = datetime.datetime.now() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = datetime.datetime.now()
        assert collection_w.num_entities == nb
        tt = datetime.datetime.now() - t0
        log.info(f"assert flush: {tt}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = datetime.datetime.now()
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = datetime.datetime.now() - t0
        log.info(f"assert search: {tt}")
        assert len(search_res) == 1

        # index
        t0 = datetime.datetime.now()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=index_params,
            name=i_name)
        tt = datetime.datetime.now() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,999,99]'
        t0 = datetime.datetime.now()
        query_res, _ = collection_w.query(term_expr)
        tt = datetime.datetime.now() - t0
        log.info(f"assert query: {tt}")
        assert len(query_res) == 4

        # reboot a pod
        reboot_pod(chaos_yaml)

        # parse chaos object
        chaos_config = cc.gen_experiment_config(chaos_yaml)
        meta_name = chaos_config.get('metadata', None).get('name', None)

        # wait all pods ready
        log.info(
            f"wait for pods in namespace {constants.CHAOS_NAMESPACE} with label app.kubernetes.io/instance={meta_name}"
        )
        wait_pods_ready(constants.CHAOS_NAMESPACE,
                        f"app.kubernetes.io/instance={meta_name}")
        log.info(
            f"wait for pods in namespace {constants.CHAOS_NAMESPACE} with label release={meta_name}"
        )
        wait_pods_ready(constants.CHAOS_NAMESPACE, f"release={meta_name}")
        log.info("all pods are ready")

        # reconnect if needed
        sleep(constants.WAIT_PER_OP * 3)
        reconnect(connections, alias='default')

        # verify collection persists
        assert utility.has_collection(c_name)
        log.info("assert collection persists")
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(c_name)
        # verify data persist
        assert collection_w2.num_entities == nb
        log.info("assert data persists")
        # verify index persists
        assert collection_w2.has_index(i_name)
        log.info("assert index persists")
        # verify search results persist
        collection_w2.load()
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = datetime.datetime.now() - t0
        log.info(f"assert search: {tt}")
        assert len(search_res) == 1
        # verify query results persist
        query_res2, _ = collection_w2.query(term_expr)
        assert len(query_res2) == len(query_res)
        log.info("assert query result persists")
Esempio n. 7
0
def has_collection(name):
    return utility.has_collection(name)
Esempio n. 8
0
    def test_chaos_data_consist(self, connection, chaos_yaml):
        c_name = cf.gen_unique_str('chaos_collection_')
        nb = 5000
        i_name = cf.gen_unique_str('chaos_index_')
        index_params = {
            "index_type": "IVF_SQ8",
            "metric_type": "L2",
            "params": {
                "nlist": 64
            }
        }

        # create
        t0 = datetime.datetime.now()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        tt = datetime.datetime.now() - t0
        log.debug(f"assert create: {tt}")
        assert collection_w.name == c_name

        # insert
        data = cf.gen_default_list_data(nb=nb)
        t0 = datetime.datetime.now()
        _, res = collection_w.insert(data)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert insert: {tt}")
        assert res

        # flush
        t0 = datetime.datetime.now()
        assert collection_w.num_entities == nb
        tt = datetime.datetime.now() - t0
        log.debug(f"assert flush: {tt}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = datetime.datetime.now()
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param={"nprobe": 16},
            limit=1)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert search: {tt}")
        assert len(search_res) == 1

        # index
        t0 = datetime.datetime.now()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=index_params,
            name=i_name)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # query
        term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]'
        t0 = datetime.datetime.now()
        query_res, _ = collection_w.query(term_expr)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert query: {tt}")
        assert len(query_res) == 4

        # reboot a pod
        reboot_pod(chaos_yaml)

        # reconnect if needed
        sleep(constants.WAIT_PER_OP * 4)
        reconnect(connections, self.host, self.port)

        # verify collection persists
        assert utility.has_collection(c_name)
        log.debug("assert collection persists")
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(c_name)
        # verify data persist
        assert collection_w2.num_entities == nb
        log.debug("assert data persists")
        # verify index persists
        assert collection_w2.has_index(i_name)
        log.debug("assert index persists")
        # verify search results persist

        # verify query results persist
        query_res2, _ = collection_w2.query(term_expr)
        assert query_res2 == query_res
        log.debug("assert query result persists")
Esempio n. 9
0
fmt = "\n=== {:30} ===\n"
search_latency_fmt = "search latency = {:.4f}s"
num_entities, dim = 3000, 8

#################################################################################
# 1. connect to Milvus
# Add a new connection alias `default` for Milvus server in `localhost:19530`
# Actually the "default" alias is a buildin in PyMilvus.
# If the address of Milvus is the same as `localhost:19530`, you can omit all
# parameters and call the method as: `connections.connect()`.
#
# Note: the `using` parameter of the following methods is default to "default".
print(fmt.format("start connecting to Milvus"))
connections.connect("default", host="localhost", port="19530")

has = utility.has_collection("hello_milvus")
print(f"Does collection hello_milvus exist in Milvus: {has}")

#################################################################################
# 2. create collection
# We're going to create a collection with 3 fields.
# +-+------------+------------+------------------+------------------------------+
# | | field name | field type | other attributes |       field description      |
# +-+------------+------------+------------------+------------------------------+
# |1|    "pk"    |    Int64   |  is_primary=True |      "primary field"         |
# | |            |            |   auto_id=False  |                              |
# +-+------------+------------+------------------+------------------------------+
# |2|  "random"  |    Double  |                  |      "a double field"        |
# +-+------------+------------+------------------+------------------------------+
# |3|"embeddings"| FloatVector|     dim=8        |  "float vector with dim 8"   |
# +-+------------+------------+------------------+------------------------------+
Esempio n. 10
0
 def teardown():
     if utility.has_collection(name_A):
         utility.drop_collection(name_A)
     if utility.has_collection(name_B):
         utility.drop_collection(name_B)
Esempio n. 11
0
 def test_has_collection(self):
     assert utility.has_collection(gen_collection_name()) is False