Exemple #1
0
def get_querynode_id_pod_pairs(namespace, label_selector):
    """
    get milvus node id and corresponding pod name pairs with label selector

    :param namespace: the namespace where the release
    :type namespace: str

    :param label_selector: labels to restrict which pods to list
    :type label_selector: str

    :example:
            >>> querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", "app.kubernetes.io/instance=milvus-multi-querynode, component=querynode")
            {
             5: 'milvus-multi-querynode-querynode-7b8f4b5c5-4pn42',
             9: 'milvus-multi-querynode-querynode-7b8f4b5c5-99tx7',
             1: 'milvus-multi-querynode-querynode-7b8f4b5c5-w9sk8',
             3: 'milvus-multi-querynode-querynode-7b8f4b5c5-xx84j',
             6: 'milvus-multi-querynode-querynode-7b8f4b5c5-x95dp'
            }
    """
    # TODO: extend this function to other worker nodes, not only querynode
    querynode_ip_pod_pair = get_pod_ip_name_pairs(namespace, label_selector)
    querynode_id_pod_pair = {}
    ms = MilvusSys()
    for node in ms.query_nodes:
        ip = node["infos"]['hardware_infos']["ip"].split(":")[0]
        querynode_id_pod_pair[node["identifier"]] = querynode_ip_pod_pair[ip]
    return querynode_id_pod_pair
Exemple #2
0
    def test_task_all(self, index_type, is_compacted, segment_status,
                      is_vector_indexed, is_string_indexed, replica_number,
                      is_deleted, data_size):
        """
        before reinstall: create collection and insert data, load and search
        after reinstall: get collection, search, create index, load, and search
        """
        name = f"index_type_{index_type}_segment_status_{segment_status}_is_vector_indexed_{is_vector_indexed}_is_string_indexed_{is_string_indexed}_is_compacted_{is_compacted}_is_deleted_{is_deleted}_replica_number_{replica_number}_data_size_{data_size}"
        ms = MilvusSys()
        is_binary = True if "BIN" in index_type else False
        # insert with small size data without flush to get growing segment
        collection_w = self.init_collection_general(insert_data=True,
                                                    is_binary=is_binary,
                                                    nb=3000,
                                                    is_flush=False,
                                                    name=name)[0]

        # load for growing segment
        if replica_number > 0:
            collection_w.load(replica_number=replica_number)

        delete_expr = f"{ct.default_int64_field_name} in [0,1,2,3,4,5,6,7,8,9]"
        # delete data for growing segment
        if is_deleted:
            collection_w.delete(expr=delete_expr)
        if segment_status == "only_growing":
            pytest.skip("already get growing segment, skip testcase")
        # insert with flush multiple times to generate multiple sealed segment
        for i in range(5):
            self.init_collection_general(insert_data=True,
                                         is_binary=is_binary,
                                         nb=data_size,
                                         is_flush=False,
                                         name=name)[0]
        if is_binary:
            default_index_field = ct.default_binary_vec_field_name
        else:
            default_index_field = ct.default_float_vec_field_name
        if is_vector_indexed:
            # create index
            default_index_param = gen_index_param(index_type)
            collection_w.create_index(default_index_field, default_index_param)
        if is_string_indexed:
            # create index
            default_string_index_params = {}
            collection_w.create_index(default_string_field_name,
                                      default_string_index_params)
        # delete data for sealed segment
        delete_expr = f"{ct.default_int64_field_name} in [10,11,12,13,14,15,16,17,18,19]"
        if is_deleted:
            collection_w.delete(expr=delete_expr)
        if is_compacted:
            collection_w.compact()
        # reload after flush and create index
        if replica_number > 0:
            collection_w.release()
            collection_w.load(replica_number=replica_number)
Exemple #3
0
def get_querynode_info(release_name):
    querynode_id_pod_pair = {}
    querynode_ip_pod_pair = get_pod_ip_name_pairs(
        "chaos-testing",
        f"app.kubernetes.io/instance={release_name}, component=querynode")
    ms = MilvusSys()
    for node in ms.query_nodes:
        ip = node["infos"]['hardware_infos']["ip"].split(":")[0]
        querynode_id_pod_pair[node["identifier"]] = querynode_ip_pod_pair[ip]
    return querynode_id_pod_pair
Exemple #4
0
 def test_customize_segment_size(self, size_id):
     """
    steps
    1. [test_milvus_install]: set up milvus with customized simd configured
    2. [test_simd_compat_e2e]: verify milvus is working well
    4. [test_milvus_cleanup]: delete milvus instances in teardown
    """
     size = customize_segment_sizes[size_id]
     log.info(f"start to install milvus with segment size {size}")
     release_name, host, port = _install_milvus(size)
     self.release_name = release_name
     assert host is not None
     conn = connections.connect("default", host=host, port=port)
     assert conn is not None
     mil = MilvusSys(alias="default")
     log.info(f"milvus build version: {mil.build_version}")
Exemple #5
0
def get_milvus_instance_name(namespace, host, port="19530"):
    """
    get milvus instance name after connection

    :param namespace: the namespace where the release
    :type namespace: str

    :param host: milvus host ip
    :type host: str

    :param port: milvus port
    :type port: str
    :example:
            >>> milvus_instance_name = get_milvus_instance_name("chaos-testing", "10.96.250.111")
            "milvus-multi-querynode"

    """
    connections.add_connection(_default={"host": host, "port": port})
    connections.connect(alias='_default')
    ms = MilvusSys()
    query_node_ip = ms.query_nodes[0]["infos"]['hardware_infos']["ip"].split(
        ":")[0]
    pod_name = ""
    if ms.deploy_mode == "STANDALONE":
        # get all pods which label is app.kubernetes.io/name=milvus and component=standalone
        ip_name_pairs = get_pod_ip_name_pairs(
            namespace, "app.kubernetes.io/name=milvus, component=standalone")
        pod_name = ip_name_pairs[query_node_ip]
    if ms.deploy_mode == "DISTRIBUTED":
        # get all pods which label is app.kubernetes.io/name=milvus and component=querynode
        ip_name_pairs = get_pod_ip_name_pairs(
            namespace, "app.kubernetes.io/name=milvus, component=querynode")
        pod_name = ip_name_pairs[query_node_ip]
    init_k8s_client_config()
    api_instance = client.CoreV1Api()
    try:
        api_response = api_instance.read_namespaced_pod(namespace=namespace,
                                                        name=pod_name)
    except ApiException as e:
        log.error(
            "Exception when calling CoreV1Api->list_namespaced_pod: %s\n" % e)
        raise Exception(str(e))
    milvus_instance_name = api_response.metadata.labels[
        "app.kubernetes.io/instance"]
    return milvus_instance_name
 def prepare_bulk_load(self, nb=1000, row_based=True):
     if Op.bulk_load not in self.health_checkers:
         log.info(
             "bulk_load checker is not in  health checkers, skip prepare bulk load"
         )
         return
     log.info(
         "bulk_load checker is in  health checkers, prepare data firstly")
     release_name = self.instance_name
     minio_ip_pod_pair = get_pod_ip_name_pairs(
         "chaos-testing", f"release={release_name}, app=minio")
     ms = MilvusSys()
     minio_ip = list(minio_ip_pod_pair.keys())[0]
     minio_port = "9000"
     minio_endpoint = f"{minio_ip}:{minio_port}"
     bucket_name = ms.index_nodes[0]["infos"]["system_configurations"][
         "minio_bucket_name"]
     schema = cf.gen_default_collection_schema()
     data = cf.gen_default_list_data_for_bulk_load(nb=nb)
     fields_name = [field.name for field in schema.fields]
     if not row_based:
         data_dict = dict(zip(fields_name, data))
     if row_based:
         entities = []
         for i in range(nb):
             entity_value = [field_values[i] for field_values in data]
             entity = dict(zip(fields_name, entity_value))
             entities.append(entity)
         data_dict = {"rows": entities}
     file_name = "bulk_load_data_source.json"
     files = [file_name]
     #TODO: npy file type is not supported so far
     log.info("generate bulk load file")
     with open(file_name, "w") as f:
         f.write(json.dumps(data_dict))
     log.info("upload file to minio")
     client = Minio(minio_endpoint,
                    access_key="minioadmin",
                    secret_key="minioadmin",
                    secure=False)
     client.fput_object(bucket_name, file_name, file_name)
     self.health_checkers[Op.bulk_load].update(schema=schema,
                                               files=files,
                                               row_based=row_based)
     log.info("prepare data for bulk load done")
Exemple #7
0
    def parser_testcase_config(self, chaos_yaml):
        tests_yaml = constants.TESTS_CONFIG_LOCATION + 'testcases.yaml'
        tests_config = cc.gen_experiment_config(tests_yaml)
        test_collections = tests_config.get('Collections', None)
        ms = MilvusSys(alias="default")
        node_map = {
            "querynode": "query_nodes",
            "datanode": "data_nodes",
            "indexnode": "index_nodes",
            "proxy": "proxy_nodes"
        }
        for t in test_collections:
            test_chaos = t.get('testcase', {}).get('chaos', {})
            if test_chaos in chaos_yaml:
                expects = t.get('testcase',
                                {}).get('expectation',
                                        {}).get('cluster_1_node', {})
                # for cluster_n_node mode
                for node in node_map.keys():
                    if node in test_chaos and len(getattr(ms,
                                                          node_map[node])) > 1:
                        expects = t.get('testcase',
                                        {}).get('expectation',
                                                {}).get('cluster_n_node', {})
                log.info(f"yaml.expects: {expects}")
                self.expect_create = expects.get(Op.create.value,
                                                 constants.SUCC)
                self.expect_insert = expects.get(Op.insert.value,
                                                 constants.SUCC)
                self.expect_flush = expects.get(Op.flush.value, constants.SUCC)
                self.expect_index = expects.get(Op.index.value, constants.SUCC)
                self.expect_search = expects.get(Op.search.value,
                                                 constants.SUCC)
                self.expect_query = expects.get(Op.query.value, constants.SUCC)
                log.info(
                    f"self.expects: create:{self.expect_create}, insert:{self.expect_insert}, "
                    f"flush:{self.expect_flush}, index:{self.expect_index}, "
                    f"search:{self.expect_search}, query:{self.expect_query}")
                return True

        return False
Exemple #8
0
    def test_milvus_install(self, request, simd):
        release_name = "mil-simd-" + cf.gen_digits_by_length(6)
        namespace = 'chaos-testing'
        cus_configs = {
            'spec.components.image': 'milvusdb/milvus-dev:master-latest',
            'metadata.namespace': namespace,
            'metadata.name': release_name,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            # TODO: use simd config instead of replicas
            'spec.components.queryNode.replicas': 2
        }
        milvus_op = MilvusOperator()
        log.info(f"install milvus with configs: {cus_configs}")
        milvus_op.install(cus_configs)
        healthy = milvus_op.wait_for_healthy(release_name, namespace)
        log.info(f"milvus healthy: {healthy}")
        assert healthy
        endpoint = milvus_op.endpoint(release_name, namespace).split(':')
        log.info(f"milvus endpoint: {endpoint}")
        host = endpoint[0]
        port = endpoint[1]
        conn = connections.connect(simd, host=host, port=port)
        assert conn is not None
        mil = MilvusSys(alias=simd)
        log.info(f"milvus build version: {mil.build_version}")
        # TODO: Verify simd config instead of replicas
        assert len(mil.query_nodes) == 2

        # cache results for dependent tests
        cache = {
            'release_name': release_name,
            'namespace': namespace,
            'alias': simd,
            'simd': simd
        }
        request.config.cache.set(simd, cache)
Exemple #9
0
        after upgrade: get collection, load with multi replicas, search, insert data with flush, load with multi replicas and search
    """
    prefix = "task_5_"
    connections.connect(host=host, port=19530, timeout=60)
    get_collections(prefix)
    create_collections_and_insert_data(prefix, flush=False, count=data_size)


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(description='config for deploy test')
    parser.add_argument('--host',
                        type=str,
                        default="127.0.0.1",
                        help='milvus server ip')
    parser.add_argument('--data_size',
                        type=int,
                        default=3000,
                        help='data size')
    args = parser.parse_args()
    data_size = args.data_size
    host = args.host
    print(f"data size: {data_size}")
    connections.connect(host=host, port=19530, timeout=60)
    ms = MilvusSys()
    task_1(data_size, host)
    task_2(data_size, host)
    if len(ms.query_nodes) >= NUM_REPLICAS:
        task_3(data_size, host)
        task_4(data_size, host)
        task_5(data_size, host)
Exemple #10
0
    def test_simd_compat_e2e(self, simd_id):
        """
       steps
       1. [test_milvus_install]: set up milvus with customized simd configured
       2. [test_simd_compat_e2e]: verify milvus is working well
       4. [test_milvus_cleanup]: delete milvus instances in teardown
       """
        simd = supported_simd_types[simd_id]
        log.info(f"start to install milvus with simd {simd}")
        release_name, host, port = _install_milvus(simd)
        self.release_name = release_name
        assert host is not None
        conn = connections.connect("default", host=host, port=port)
        assert conn is not None
        mil = MilvusSys(alias="default")
        log.info(f"milvus build version: {mil.build_version}")
        log.info(f"milvus simdType: {mil.simd_type}")
        assert str(mil.simd_type).lower() in [
            simd_type.lower() for simd_type in supported_simd_types[simd_id:]
        ]

        log.info(f"start to e2e verification: {simd}")
        # create
        name = cf.gen_unique_str("compat")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        data = cf.gen_default_list_data()
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = time.time()
        assert collection_w.num_entities == len(data[0]) + entities
        tt = time.time() - t0
        entities = collection_w.num_entities
        log.info(f"assert flush: {tt}, entities: {entities}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str())
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
Exemple #11
0
    def test_check(self, collection_name, data_size):
        """
        before reinstall: create collection
        """
        self._connect()
        ms = MilvusSys()
        name = collection_name
        collection_w = self.init_collection_general(
            insert_data=False, name=name, active_trace=True)[0]
        schema = collection_w.schema
        data_type = [field.dtype.name for field in schema.fields]
        field_name = [field.name for field in schema.fields]
        type_field_map = dict(zip(data_type,field_name))
        is_binary = False
        
        if "BINARY_VECTOR" in data_type:
            is_binary = True
        
        if is_binary:
            default_index_field = ct.default_binary_vec_field_name
            vector_index_type = "BIN_FLAT"
        else:
            default_index_field = ct.default_float_vec_field_name
            vector_index_type = "IVF_FLAT"       
        
        is_vector_indexed = False
        is_string_indexed = False
        indexed_fields = [index.field_name for index in collection_w.indexes]
        binary_vector_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("BINARY_VECTOR", "")]
        float_vector_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("FLOAT_VECTOR", "")]
        string_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("VARCHAR", "")]
        index_names = [index.index_name for index in collection_w.indexes] # used to drop index
        vector_index_types = binary_vector_index_types + float_vector_index_types
        if len(vector_index_types) > 0:
            is_vector_indexed = True
            vector_index_type = vector_index_types[0]

        if len(string_index_types) > 0:
            is_string_indexed = True
 
        try:
            replicas, _ = collection_w.get_replicas(enable_traceback=False)
            replicas_loaded = len(replicas.groups)
        except Exception as e:
            log.info("get replicas failed")
            replicas_loaded = 0
        # params for search and query
        if is_binary:
            _, vectors_to_search = cf.gen_binary_vectors(
                default_nb, default_dim)
            default_search_field = ct.default_binary_vec_field_name
        else:
            vectors_to_search = cf.gen_vectors(default_nb, default_dim)
            default_search_field = ct.default_float_vec_field_name
        search_params = gen_search_param(vector_index_type)[0]        
        
        # load if not loaded
        if replicas_loaded == 0:
            collection_w.load()
        
        # search and query    
        collection_w.search(vectors_to_search[:default_nq], default_search_field,
                            search_params, default_limit,
                            default_search_exp,
                            output_fields=[ct.default_int64_field_name],
                            check_task=CheckTasks.check_search_results,
                            check_items={"nq": default_nq,
                                        "limit": default_limit})
        collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_query_not_empty)

        # flush
        collection_w.num_entities

        # search and query
        collection_w.search(vectors_to_search[:default_nq], default_search_field,
                            search_params, default_limit,
                            default_search_exp,
                            output_fields=[ct.default_int64_field_name],
                            check_task=CheckTasks.check_search_results,
                            check_items={"nq": default_nq,
                                        "limit": default_limit})
        collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_query_not_empty)
        
        # insert data and flush
        for i in range(2):
            self.init_collection_general(insert_data=True, is_binary=is_binary, nb=data_size,
                                         is_flush=False, is_index=True, name=name)
        collection_w.num_entities
        
        # delete data
        delete_expr = f"{ct.default_int64_field_name} in [0,1,2,3,4,5,6,7,8,9]"
        collection_w.delete(expr=delete_expr)

        # search and query
        collection_w.search(vectors_to_search[:default_nq], default_search_field,
                            search_params, default_limit,
                            default_search_exp,
                            output_fields=[ct.default_int64_field_name],
                            check_task=CheckTasks.check_search_results,
                            check_items={"nq": default_nq,
                                        "limit": default_limit})
        collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_query_not_empty)
        
        # drop index if exist
        if len(index_names) > 0:
            for index_name in index_names:
                collection_w.drop_index(index_name=index_name)
            # search and query after dropping index
            collection_w.search(vectors_to_search[:default_nq], default_search_field,
                            search_params, default_limit,
                            default_search_exp,
                            output_fields=[ct.default_int64_field_name],
                            check_task=CheckTasks.check_search_results,
                            check_items={"nq": default_nq,
                                        "limit": default_limit})
            collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                            check_task=CheckTasks.check_query_not_empty)        

        # create index
        default_index_param = gen_index_param(vector_index_type)
        collection_w.create_index(default_index_field, default_index_param, index_name=cf.gen_unique_str())    
        collection_w.create_index(default_string_field_name, {}, index_name=cf.gen_unique_str())

        # search and query
        collection_w.search(vectors_to_search[:default_nq], default_search_field,
                        search_params, default_limit,
                        default_search_exp,
                        output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_search_results,
                        check_items={"nq": default_nq,
                                    "limit": default_limit})
        collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_query_not_empty)          

        # release and reload with changed replicas
        collection_w.release()
        replica_number = 1
        if replicas_loaded in [0,1] and len(ms.query_nodes)>=2 :
            replica_number = 2
        collection_w.load(replica_number=replica_number)

        # search and query
        collection_w.search(vectors_to_search[:default_nq], default_search_field,
                        search_params, default_limit,
                        default_search_exp,
                        output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_search_results,
                        check_items={"nq": default_nq,
                                    "limit": default_limit})
        collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_query_not_empty)
    def test_customize_segment_size(self, seg_size, seg_count):
        """
       steps
       """
        log.info(f"start to install milvus with segment size {seg_size}")
        release_name, host, port = _install_milvus(seg_size)
        self.release_name = release_name
        assert host is not None
        conn = connections.connect("default", host=host, port=port)
        assert conn is not None
        mil = MilvusSys(alias="default")
        log.info(f"milvus build version: {mil.build_version}")

        log.info(f"start to e2e verification: {seg_size}")
        # create
        name = cf.gen_unique_str("segsiz")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        nb = 50000
        data = cf.gen_default_list_data(nb=nb)
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res
        # insert 2 million entities
        rounds = 40
        for _ in range(rounds - 1):
            _, res = collection_w.insert(data)
        entities = collection_w.num_entities
        assert entities == nb * rounds

        # load
        collection_w.load()
        utility_wrap = ApiUtilityWrapper()
        segs, _ = utility_wrap.get_query_segment_info(collection_w.name)
        log.info(f"assert segments: {len(segs)}")
        assert len(segs) == seg_count

        # search
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1,
            timeout=30)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str(),
            timeout=120)
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1,
            timeout=30)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr, timeout=30)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
Exemple #13
0
    def test_task_all(self, index_type, is_compacted,
                      segment_status, is_vector_indexed, is_string_indexed, replica_number, is_deleted, data_size):
        """
        before reinstall: create collection and insert data, load and search
        """
        name = ""
        for k,v in locals().items():
            if k in ["self", "name"]:
                continue
            name += f"_{k}_{v}"
        name = prefix + name
        self._connect()
        ms = MilvusSys()
        if len(ms.query_nodes) < replica_number:
            # this step is to make sure this testcase can run on standalone mode
            # or cluster mode which has only one querynode
            pytest.skip("skip test, not enough nodes")

        log.info(f"collection name: {name}, replica_number: {replica_number}, is_compacted: {is_compacted},"
                 f"is_deleted: {is_deleted}, is_vector_indexed: {is_vector_indexed}, is_string_indexed: {is_string_indexed},"
                 f"segment_status: {segment_status}, index_type: {index_type}")

        is_binary = True if "BIN" in index_type else False

        # params for search and query
        if is_binary:
            _, vectors_to_search = cf.gen_binary_vectors(
                default_nb, default_dim)
            default_search_field = ct.default_binary_vec_field_name
        else:
            vectors_to_search = cf.gen_vectors(default_nb, default_dim)
            default_search_field = ct.default_float_vec_field_name
        search_params = gen_search_param(index_type)[0]

        # init collection and insert with small size data without flush to get growing segment
        collection_w = self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000,
                                                    is_flush=False, is_index=True, name=name)[0]
        # load for growing segment
        if replica_number >= 1:
            try:
                collection_w.release()
            except Exception as e:
                log.error(
                    f"release collection failed: {e} maybe the collection is not loaded")
            collection_w.load(replica_number=replica_number)

        # delete data for growing segment
        delete_expr = f"{ct.default_int64_field_name} in [0,1,2,3,4,5,6,7,8,9]"
        if is_deleted == "is_deleted":
            collection_w.delete(expr=delete_expr)

        # search and query for growing segment
        if replica_number >= 1:
            collection_w.search(vectors_to_search[:default_nq], default_search_field,
                                search_params, default_limit,
                                default_search_exp,
                                check_task=CheckTasks.check_search_results,
                                check_items={"nq": default_nq,
                                            "limit": default_limit})
            output_fields = [ct.default_int64_field_name]
            collection_w.query(default_term_expr, output_fields=output_fields,
                            check_task=CheckTasks.check_query_not_empty)

        # skip subsequent operations when segment_status is set to only_growing
        if segment_status == "only_growing":
            pytest.skip(
                "already get growing segment, skip subsequent operations")

        # insert with flush multiple times to generate multiple sealed segment
        for i in range(2):
            self.init_collection_general(insert_data=True, is_binary=is_binary, nb=data_size,
                                         is_flush=False, is_index=True, name=name)
            collection_w.flush()


        # params for creating index
        if is_binary:
            default_index_field = ct.default_binary_vec_field_name
        else:
            default_index_field = ct.default_float_vec_field_name

        # create index for vector
        if is_vector_indexed == "is_vector_indexed":
            default_index_param = gen_index_param(index_type)
            collection_w.create_index(default_index_field, default_index_param)

        # create index for string
        if is_string_indexed == "is_string_indexed":
            default_string_index_params = {}
            default_string_index_name = "_default_string_idx"
            collection_w.create_index(
                default_string_field_name, default_string_index_params, index_name=default_string_index_name)

        # delete data for sealed segment
        delete_expr = f"{ct.default_int64_field_name} in [10,11,12,13,14,15,16,17,18,19]"
        if is_deleted == "is_deleted":
            collection_w.delete(expr=delete_expr)
        if is_compacted == "is_compacted":
            collection_w.compact()
        if segment_status == "all":
            self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000,
                                         is_flush=False, is_index=True, name=name)
        # reload after flush and creating index
        if replica_number > 0:
            collection_w.release()
            collection_w.load(replica_number=replica_number)

        # insert data to get growing segment
        if segment_status == "all":
            self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000,
                                         is_flush=False, is_index=True, name=name)
        
        # search and query for sealed and growing segment
        if replica_number > 0:
            collection_w.search(vectors_to_search[:default_nq], default_search_field,
                                search_params, default_limit,
                                default_search_exp,
                                check_task=CheckTasks.check_search_results,
                                check_items={"nq": default_nq,
                                            "limit": default_limit})
            output_fields = [ct.default_int64_field_name]
            collection_w.query(default_term_expr, output_fields=output_fields,
                            check_task=CheckTasks.check_query_not_empty)