Exemple #1
0
 def test_search_binary_tanimoto_flat_index(self):
     """
     target: search binary_collection, and check the result: distance
     method: compare the return distance value with value computed with Inner product
     expected: the return distance equals to the computed value
     """
     # 1. initialize with binary data
     collection_w, _, binary_raw_vector = \
         self.init_collection_general(prefix, True, is_binary=True)
     # 2. create index
     default_index = {
         "index_type": "BIN_IVF_FLAT",
         "params": {
             "nlist": 128
         },
         "metric_type": "TANIMOTO"
     }
     collection_w.create_index("binary_vector", default_index)
     # 3. compute the distance
     query_raw_vector, binary_vectors = cf.gen_binary_vectors(
         3000, default_dim)
     distance_0 = cf.tanimoto(query_raw_vector[0], binary_raw_vector[0])
     distance_1 = cf.tanimoto(query_raw_vector[0], binary_raw_vector[1])
     # 4. search and compare the distance
     search_params = {"metric_type": "TANIMOTO", "params": {"nprobe": 10}}
     res, _ = collection_w.search(binary_vectors[:default_nq],
                                  "binary_vector", search_params,
                                  default_limit, "int64 >= 0")
     assert abs(res[0]._distances[0] -
                min(distance_0, distance_1)) <= epsilon
Exemple #2
0
 def test_search_param_invalid_binary(self):
     """
     target: test search within binary data (invalid parameter)
     method: search with wrong metric type
     expected: raise exception and report the error
     """
     log.info(
         "test_search_param_invalid_binary: test invalid paramter with binary data"
     )
     # 1. initialize with binary data
     collection_w = self.init_collection_general(prefix,
                                                 True,
                                                 is_binary=True)[0]
     # 2. create index
     default_index = {
         "index_type": "BIN_IVF_FLAT",
         "params": {
             "nlist": 128
         },
         "metric_type": "JACCARD"
     }
     collection_w.create_index("binary_vector", default_index)
     # 3. search with exception
     binary_vectors = cf.gen_binary_vectors(3000, default_dim)[1]
     wrong_search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
     collection_w.search(binary_vectors[:default_nq],
                         "binary_vector",
                         wrong_search_params,
                         default_limit,
                         default_search_exp,
                         check_task=CheckTasks.err_res,
                         check_items={
                             "err_code": 1,
                             "err_msg": "unsupported"
                         })
Exemple #3
0
 def test_search_binary_flat_with_L2(self):
     """
     target: search binary collection, and check the result: distance
     method: compare the return distance value with value computed with L2
     expected: the return distance equals to the computed value
     """
     log.info(
         "Test case of search interface: test_search_binary_flat_with_L2")
     # 1. initialize with binary data
     collection_w = self.init_collection_general(prefix,
                                                 True,
                                                 is_binary=True)[0]
     # 2. search and assert
     query_raw_vector, binary_vectors = cf.gen_binary_vectors(
         3000, default_dim)
     search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
     collection_w.search(binary_vectors[:default_nq],
                         "binary_vector",
                         search_params,
                         default_limit,
                         "int64 >= 0",
                         check_task=CheckTasks.err_res,
                         check_items={
                             "err_code": 1,
                             "err_msg": "Search failed"
                         })
Exemple #4
0
 def test_calc_distance_binary_metric(self, metric_field, metric_binary):
     """
     target: test calculate distance with binary vectors
     method: calculate distance between binary vectors
     expected: distance calculated successfully
     """
     self._connect()
     nb = 10
     raw_vectors_l, vectors_l = cf.gen_binary_vectors(nb, default_dim)
     raw_vectors_r, vectors_r = cf.gen_binary_vectors(nb, default_dim)
     op_l = {"bin_vectors": vectors_l}
     op_r = {"bin_vectors": vectors_r}
     params = {metric_field: metric_binary}
     vectors_l = raw_vectors_l
     vectors_r = raw_vectors_r
     self.utility_wrap.calc_distance(op_l,
                                     op_r,
                                     params,
                                     check_task=CheckTasks.check_distance,
                                     check_items={
                                         "vectors_l": vectors_l,
                                         "vectors_r": vectors_r,
                                         "metric": metric_binary
                                     })
Exemple #5
0
    def test_task_2(self, index_type, data_size):
        """
        before reinstall: create collection, insert data and create index,load and search
        after reinstall: get collection, load, search, insert data, create index, load, and search
		"""
        name = "task_2_" + index_type
        insert_data = True
        is_binary = True if "BIN" in index_type else False
        is_flush = False
        # create collection and insert data
        collection_w = self.init_collection_general(insert_data=insert_data,
                                                    is_binary=is_binary,
                                                    nb=data_size,
                                                    is_flush=is_flush,
                                                    name=name,
                                                    active_trace=True)[0]
        vectors_to_search = cf.gen_vectors(default_nb, default_dim)
        default_search_field = ct.default_float_vec_field_name
        if is_binary:
            _, vectors_to_search = cf.gen_binary_vectors(
                default_nb, default_dim)
            default_search_field = ct.default_binary_vec_field_name
        # create index
        default_index = gen_index_param(index_type)
        collection_w.create_index(default_search_field, default_index)
        # load
        collection_w.load()
        # search
        search_params = gen_search_param(index_type)[0]
        collection_w.search(vectors_to_search[:default_nq],
                            default_search_field,
                            search_params,
                            default_limit,
                            default_search_exp,
                            check_task=CheckTasks.check_search_results,
                            check_items={
                                "nq": default_nq,
                                "limit": default_limit
                            })
        # query
        output_fields = [ct.default_int64_field_name]
        collection_w.query(default_term_expr,
                           output_fields=output_fields,
                           check_task=CheckTasks.check_query_not_empty)
Exemple #6
0
    def test_check(self, collection_name, data_size):
        """
        before reinstall: create collection
        """
        self._connect()
        ms = MilvusSys()
        name = collection_name
        collection_w = self.init_collection_general(
            insert_data=False, name=name, active_trace=True)[0]
        schema = collection_w.schema
        data_type = [field.dtype.name for field in schema.fields]
        field_name = [field.name for field in schema.fields]
        type_field_map = dict(zip(data_type,field_name))
        is_binary = False
        
        if "BINARY_VECTOR" in data_type:
            is_binary = True
        
        if is_binary:
            default_index_field = ct.default_binary_vec_field_name
            vector_index_type = "BIN_FLAT"
        else:
            default_index_field = ct.default_float_vec_field_name
            vector_index_type = "IVF_FLAT"       
        
        is_vector_indexed = False
        is_string_indexed = False
        indexed_fields = [index.field_name for index in collection_w.indexes]
        binary_vector_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("BINARY_VECTOR", "")]
        float_vector_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("FLOAT_VECTOR", "")]
        string_index_types = [index.params["index_type"] for index in collection_w.indexes if index.field_name == type_field_map.get("VARCHAR", "")]
        index_names = [index.index_name for index in collection_w.indexes] # used to drop index
        vector_index_types = binary_vector_index_types + float_vector_index_types
        if len(vector_index_types) > 0:
            is_vector_indexed = True
            vector_index_type = vector_index_types[0]

        if len(string_index_types) > 0:
            is_string_indexed = True
 
        try:
            replicas, _ = collection_w.get_replicas(enable_traceback=False)
            replicas_loaded = len(replicas.groups)
        except Exception as e:
            log.info("get replicas failed")
            replicas_loaded = 0
        # params for search and query
        if is_binary:
            _, vectors_to_search = cf.gen_binary_vectors(
                default_nb, default_dim)
            default_search_field = ct.default_binary_vec_field_name
        else:
            vectors_to_search = cf.gen_vectors(default_nb, default_dim)
            default_search_field = ct.default_float_vec_field_name
        search_params = gen_search_param(vector_index_type)[0]        
        
        # load if not loaded
        if replicas_loaded == 0:
            collection_w.load()
        
        # search and query    
        collection_w.search(vectors_to_search[:default_nq], default_search_field,
                            search_params, default_limit,
                            default_search_exp,
                            output_fields=[ct.default_int64_field_name],
                            check_task=CheckTasks.check_search_results,
                            check_items={"nq": default_nq,
                                        "limit": default_limit})
        collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_query_not_empty)

        # flush
        collection_w.num_entities

        # search and query
        collection_w.search(vectors_to_search[:default_nq], default_search_field,
                            search_params, default_limit,
                            default_search_exp,
                            output_fields=[ct.default_int64_field_name],
                            check_task=CheckTasks.check_search_results,
                            check_items={"nq": default_nq,
                                        "limit": default_limit})
        collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_query_not_empty)
        
        # insert data and flush
        for i in range(2):
            self.init_collection_general(insert_data=True, is_binary=is_binary, nb=data_size,
                                         is_flush=False, is_index=True, name=name)
        collection_w.num_entities
        
        # delete data
        delete_expr = f"{ct.default_int64_field_name} in [0,1,2,3,4,5,6,7,8,9]"
        collection_w.delete(expr=delete_expr)

        # search and query
        collection_w.search(vectors_to_search[:default_nq], default_search_field,
                            search_params, default_limit,
                            default_search_exp,
                            output_fields=[ct.default_int64_field_name],
                            check_task=CheckTasks.check_search_results,
                            check_items={"nq": default_nq,
                                        "limit": default_limit})
        collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_query_not_empty)
        
        # drop index if exist
        if len(index_names) > 0:
            for index_name in index_names:
                collection_w.drop_index(index_name=index_name)
            # search and query after dropping index
            collection_w.search(vectors_to_search[:default_nq], default_search_field,
                            search_params, default_limit,
                            default_search_exp,
                            output_fields=[ct.default_int64_field_name],
                            check_task=CheckTasks.check_search_results,
                            check_items={"nq": default_nq,
                                        "limit": default_limit})
            collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                            check_task=CheckTasks.check_query_not_empty)        

        # create index
        default_index_param = gen_index_param(vector_index_type)
        collection_w.create_index(default_index_field, default_index_param, index_name=cf.gen_unique_str())    
        collection_w.create_index(default_string_field_name, {}, index_name=cf.gen_unique_str())

        # search and query
        collection_w.search(vectors_to_search[:default_nq], default_search_field,
                        search_params, default_limit,
                        default_search_exp,
                        output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_search_results,
                        check_items={"nq": default_nq,
                                    "limit": default_limit})
        collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_query_not_empty)          

        # release and reload with changed replicas
        collection_w.release()
        replica_number = 1
        if replicas_loaded in [0,1] and len(ms.query_nodes)>=2 :
            replica_number = 2
        collection_w.load(replica_number=replica_number)

        # search and query
        collection_w.search(vectors_to_search[:default_nq], default_search_field,
                        search_params, default_limit,
                        default_search_exp,
                        output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_search_results,
                        check_items={"nq": default_nq,
                                    "limit": default_limit})
        collection_w.query(default_term_expr, output_fields=[ct.default_int64_field_name],
                        check_task=CheckTasks.check_query_not_empty)
Exemple #7
0
    def test_compact_after_binary_index(self):
        """
        target: test compact after create index
        method: 1.insert binary data into two segments
                2.create binary index
                3.compact
                4.search
        expected: Verify segment info and index info
        """
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix),
            shards_num=1,
            schema=cf.gen_default_binary_collection_schema())
        for i in range(2):
            df, _ = cf.gen_default_binary_dataframe_data()
            collection_w.insert(data=df)
            assert collection_w.num_entities == (i + 1) * ct.default_nb

        # create index
        collection_w.create_index(ct.default_binary_vec_field_name,
                                  ct.default_binary_index)
        log.debug(collection_w.index())

        collection_w.load()

        search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
        vectors = cf.gen_binary_vectors(ct.default_nq, ct.default_dim)[1]
        search_res_one, _ = collection_w.search(
            vectors, ct.default_binary_vec_field_name, search_params,
            ct.default_limit)
        assert len(search_res_one) == ct.default_nq
        for hits in search_res_one:
            assert len(hits) == ct.default_limit

        # compact
        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        collection_w.get_compaction_plans(
            check_task=CheckTasks.check_merge_compact)

        # verify index re-build and re-load
        search_params = {"metric_type": "L1", "params": {"nprobe": 10}}
        search_res_two, _ = collection_w.search(
            vectors,
            ct.default_binary_vec_field_name,
            search_params,
            ct.default_limit,
            check_task=CheckTasks.err_res,
            check_items={
                ct.err_code: 1,
                ct.err_msg: "metric type not found: (L1)"
            })

        # verify search result
        search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
        search_res_two, _ = collection_w.search(
            vectors, ct.default_binary_vec_field_name, search_params,
            ct.default_limit)
        for i in range(ct.default_nq):
            for j in range(ct.default_limit):
                assert search_res_two[i][j].id == search_res_one[i][j].id
Exemple #8
0
    def test_task_all(self, index_type, is_compacted,
                      segment_status, is_vector_indexed, is_string_indexed, replica_number, is_deleted, data_size):
        """
        before reinstall: create collection and insert data, load and search
        """
        name = ""
        for k,v in locals().items():
            if k in ["self", "name"]:
                continue
            name += f"_{k}_{v}"
        name = prefix + name
        self._connect()
        ms = MilvusSys()
        if len(ms.query_nodes) < replica_number:
            # this step is to make sure this testcase can run on standalone mode
            # or cluster mode which has only one querynode
            pytest.skip("skip test, not enough nodes")

        log.info(f"collection name: {name}, replica_number: {replica_number}, is_compacted: {is_compacted},"
                 f"is_deleted: {is_deleted}, is_vector_indexed: {is_vector_indexed}, is_string_indexed: {is_string_indexed},"
                 f"segment_status: {segment_status}, index_type: {index_type}")

        is_binary = True if "BIN" in index_type else False

        # params for search and query
        if is_binary:
            _, vectors_to_search = cf.gen_binary_vectors(
                default_nb, default_dim)
            default_search_field = ct.default_binary_vec_field_name
        else:
            vectors_to_search = cf.gen_vectors(default_nb, default_dim)
            default_search_field = ct.default_float_vec_field_name
        search_params = gen_search_param(index_type)[0]

        # init collection and insert with small size data without flush to get growing segment
        collection_w = self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000,
                                                    is_flush=False, is_index=True, name=name)[0]
        # load for growing segment
        if replica_number >= 1:
            try:
                collection_w.release()
            except Exception as e:
                log.error(
                    f"release collection failed: {e} maybe the collection is not loaded")
            collection_w.load(replica_number=replica_number)

        # delete data for growing segment
        delete_expr = f"{ct.default_int64_field_name} in [0,1,2,3,4,5,6,7,8,9]"
        if is_deleted == "is_deleted":
            collection_w.delete(expr=delete_expr)

        # search and query for growing segment
        if replica_number >= 1:
            collection_w.search(vectors_to_search[:default_nq], default_search_field,
                                search_params, default_limit,
                                default_search_exp,
                                check_task=CheckTasks.check_search_results,
                                check_items={"nq": default_nq,
                                            "limit": default_limit})
            output_fields = [ct.default_int64_field_name]
            collection_w.query(default_term_expr, output_fields=output_fields,
                            check_task=CheckTasks.check_query_not_empty)

        # skip subsequent operations when segment_status is set to only_growing
        if segment_status == "only_growing":
            pytest.skip(
                "already get growing segment, skip subsequent operations")

        # insert with flush multiple times to generate multiple sealed segment
        for i in range(2):
            self.init_collection_general(insert_data=True, is_binary=is_binary, nb=data_size,
                                         is_flush=False, is_index=True, name=name)
            collection_w.flush()


        # params for creating index
        if is_binary:
            default_index_field = ct.default_binary_vec_field_name
        else:
            default_index_field = ct.default_float_vec_field_name

        # create index for vector
        if is_vector_indexed == "is_vector_indexed":
            default_index_param = gen_index_param(index_type)
            collection_w.create_index(default_index_field, default_index_param)

        # create index for string
        if is_string_indexed == "is_string_indexed":
            default_string_index_params = {}
            default_string_index_name = "_default_string_idx"
            collection_w.create_index(
                default_string_field_name, default_string_index_params, index_name=default_string_index_name)

        # delete data for sealed segment
        delete_expr = f"{ct.default_int64_field_name} in [10,11,12,13,14,15,16,17,18,19]"
        if is_deleted == "is_deleted":
            collection_w.delete(expr=delete_expr)
        if is_compacted == "is_compacted":
            collection_w.compact()
        if segment_status == "all":
            self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000,
                                         is_flush=False, is_index=True, name=name)
        # reload after flush and creating index
        if replica_number > 0:
            collection_w.release()
            collection_w.load(replica_number=replica_number)

        # insert data to get growing segment
        if segment_status == "all":
            self.init_collection_general(insert_data=True, is_binary=is_binary, nb=3000,
                                         is_flush=False, is_index=True, name=name)
        
        # search and query for sealed and growing segment
        if replica_number > 0:
            collection_w.search(vectors_to_search[:default_nq], default_search_field,
                                search_params, default_limit,
                                default_search_exp,
                                check_task=CheckTasks.check_search_results,
                                check_items={"nq": default_nq,
                                            "limit": default_limit})
            output_fields = [ct.default_int64_field_name]
            collection_w.query(default_term_expr, output_fields=output_fields,
                            check_task=CheckTasks.check_query_not_empty)