Ejemplo n.º 1
0
 def create_collection(self, dimension, data_type=DataType.FLOAT_VECTOR, auto_id=False,
                       collection_name=None, other_fields=None):
     self._dimension = dimension
     if not collection_name:
         collection_name = self._collection_name
     vec_field_name = utils.get_default_field_name(data_type)
     fields = [
         {"name": vec_field_name, "type": data_type, "params": {"dim": dimension}},
         {"name": "id", "type": DataType.INT64, "is_primary": True}
     ]
     if other_fields:
         other_fields = other_fields.split(",")
         for other_field_name in other_fields:
             if other_field_name.startswith("int"):
                 field_type = DataType.INT64
             elif other_field_name.startswith("float"):
                 field_type = DataType.FLOAT
             elif other_field_name.startswith("double"):
                 field_type = DataType.DOUBLE
             else:
                 raise Exception("Field name not supported")
             fields.append({"name": other_field_name, "type": field_type})
     create_param = {
         "fields": fields,
         "auto_id": auto_id}
     try:
         self._milvus.create_collection(collection_name, create_param)
         logger.info("Create collection: <%s> successfully" % collection_name)
     except Exception as e:
         logger.error(str(e))
         raise
Ejemplo n.º 2
0
 def run_step(self, interface_name, interface_params):
     if interface_name == "create_collection":
         collection_name = utils.get_unique_name("chaos")
         self.data_type = interface_params["data_type"]
         self.dimension = interface_params["dimension"]
         self.milvus.set_collection(collection_name)
         vector_type = runner_utils.get_vector_type(self.data_type)
         self.milvus.create_collection(self.dimension,
                                       data_type=vector_type)
     elif interface_name == "insert":
         batch_size = interface_params["batch_size"]
         collection_size = interface_params["collection_size"]
         self.insert(self.milvus, self.milvus.collection_name,
                     self.data_type, self.dimension, collection_size,
                     batch_size)
     elif interface_name == "create_index":
         metric_type = interface_params["metric_type"]
         index_type = interface_params["index_type"]
         index_param = interface_params["index_param"]
         vector_type = runner_utils.get_vector_type(self.data_type)
         field_name = runner_utils.get_default_field_name(vector_type)
         self.milvus.create_index(field_name,
                                  index_type,
                                  metric_type,
                                  index_param=index_param)
     elif interface_name == "flush":
         self.milvus.flush()
Ejemplo n.º 3
0
    def extract_cases(self, collection):
        collection_name = collection[
            "collection_name"] if "collection_name" in collection else None
        (data_type, collection_size, dimension,
         metric_type) = parser.collection_parser(collection_name)
        ni_pers = collection["ni_pers"]
        build_index = collection[
            "build_index"] if "build_index" in collection else False
        index_info = None
        vector_type = utils.get_vector_type(data_type)
        other_fields = collection[
            "other_fields"] if "other_fields" in collection else None
        index_field_name = None
        index_type = None
        index_param = None
        if build_index is True:
            index_type = collection["index_type"]
            index_param = collection["index_param"]
            index_info = {"index_type": index_type, "index_param": index_param}
            index_field_name = utils.get_default_field_name(vector_type)
        flush = True
        if "flush" in collection and collection["flush"] == "no":
            flush = False
        case_metrics = list()
        case_params = list()

        for ni_per in ni_pers:
            collection_info = {
                "dimension": dimension,
                "metric_type": metric_type,
                "dataset_name": collection_name,
                "collection_size": collection_size,
                "other_fields": other_fields,
                "ni_per": ni_per
            }
            self.init_metric(self.name, collection_info, index_info, None)
            case_metric = copy.deepcopy(self.metric)
            case_metric.set_case_metric_type()
            case_metrics.append(case_metric)
            case_param = {
                "collection_name": collection_name,
                "data_type": data_type,
                "dimension": dimension,
                "collection_size": collection_size,
                "ni_per": ni_per,
                "metric_type": metric_type,
                "vector_type": vector_type,
                "other_fields": other_fields,
                "build_index": build_index,
                "flush_after_insert": flush,
                "index_field_name": index_field_name,
                "index_type": index_type,
                "index_param": index_param,
            }
            case_params.append(case_param)
        return case_params, case_metrics
Ejemplo n.º 4
0
 def extract_cases(self, collection):
     collection_name = collection[
         "collection_name"] if "collection_name" in collection else None
     (data_type, collection_size, dimension,
      metric_type) = parser.collection_parser(collection_name)
     ni_per = collection["ni_per"]
     vector_type = utils.get_vector_type(data_type)
     other_fields = collection[
         "other_fields"] if "other_fields" in collection else None
     ids_length_list = collection["ids_length_list"]
     collection_info = {
         "dimension": dimension,
         "metric_type": metric_type,
         "dataset_name": collection_name,
         "collection_size": collection_size,
         "other_fields": other_fields,
         "ni_per": ni_per
     }
     index_field_name = utils.get_default_field_name(vector_type)
     index_type = collection["index_type"]
     index_param = collection["index_param"]
     index_info = {"index_type": index_type, "index_param": index_param}
     flush = True
     if "flush" in collection and collection["flush"] == "no":
         flush = False
     self.init_metric(self.name,
                      collection_info,
                      index_info,
                      search_info=None)
     case_metrics = list()
     for ids_length in ids_length_list:
         ids = get_ids(ids_length, collection_size)
         case_metric = copy.deepcopy(self.metric)
         case_metric.set_case_metric_type()
         case_params = list()
         case_metric.run_params = {"ids_length": ids_length}
         case_metrics.append(case_metric)
         case_param = {
             "collection_name": collection_name,
             "data_type": data_type,
             "dimension": dimension,
             "collection_size": collection_size,
             "ni_per": ni_per,
             "metric_type": metric_type,
             "vector_type": vector_type,
             "other_fields": other_fields,
             "flush_after_insert": flush,
             "index_field_name": index_field_name,
             "index_type": index_type,
             "index_param": index_param,
             "ids": ids
         }
         case_params.append(case_param)
     return case_params, case_metrics
Ejemplo n.º 5
0
 def load_query_rand(self, nq_max=100, timeout=None):
     # for ivf search
     dimension = 128
     top_k = random.randint(1, 100)
     nq = random.randint(1, nq_max)
     nprobe = random.randint(1, 100)
     search_param = {"nprobe": nprobe}
     query_vectors = [[random.random() for _ in range(dimension)] for _ in range(nq)]
     metric_type = random.choice(["l2", "ip"])
     logger.info("%s, Search nq: %d, top_k: %d, nprobe: %d" % (self._collection_name, nq, top_k, nprobe))
     vec_field_name = utils.get_default_field_name()
     vector_query = {"vector": {vec_field_name: {
         "topk": top_k,
         "query": query_vectors,
         "metric_type": utils.metric_type_trans(metric_type),
         "params": search_param}
     }}
     self.load_and_query(vector_query, timeout=timeout)
Ejemplo n.º 6
0
    def extract_cases(self, collection):
        collection_name = collection[
            "collection_name"] if "collection_name" in collection else None

        (data_type, collection_size, dimension,
         metric_type) = parser.collection_parser(collection_name)
        ni_per = collection["ni_per"]
        build_index = collection[
            "build_index"] if "build_index" in collection else False
        vector_type = runner_utils.get_vector_type(data_type)
        other_fields = collection[
            "other_fields"] if "other_fields" in collection else None
        collection_info = {
            "dimension": dimension,
            "metric_type": metric_type,
            "dataset_name": collection_name,
            "collection_size": collection_size,
            "other_fields": other_fields,
            "ni_per": ni_per
        }
        index_field_name = None
        index_type = None
        index_param = None
        index_info = None
        vector_field_name = runner_utils.get_default_field_name(vector_type)
        if build_index is True:
            index_type = collection["index_type"]
            index_param = collection["index_param"]
            index_info = {"index_type": index_type, "index_param": index_param}
            index_field_name = runner_utils.get_default_field_name(vector_type)
        task = collection["task"]
        connection_type = "single"
        connection_num = task["connection_num"]
        if connection_num > 1:
            connection_type = "multi"
        run_params = {
            "task": collection["task"],
            "connection_type": connection_type,
        }
        self.init_metric(self.name, collection_info, index_info, None,
                         run_params)
        case_metric = copy.deepcopy(self.metric)
        # set metric type as case
        case_metric.set_case_metric_type()
        case_metrics = list()
        case_params = list()
        case_metrics.append(case_metric)
        case_param = {
            "collection_name": collection_name,
            "data_type": data_type,
            "dimension": dimension,
            "collection_size": collection_size,
            "ni_per": ni_per,
            "metric_type": metric_type,
            "vector_type": vector_type,
            "other_fields": other_fields,
            "build_index": build_index,
            "index_field_name": index_field_name,
            "vector_field_name": vector_field_name,
            "index_type": index_type,
            "index_param": index_param,
            "task": collection["task"],
            "connection_type": connection_type,
        }
        case_params.append(case_param)
        return case_params, case_metrics
Ejemplo n.º 7
0
    def extract_cases(self, collection):
        collection_name = collection[
            "collection_name"] if "collection_name" in collection else None
        (data_type, collection_size, dimension,
         metric_type) = parser.collection_parser(collection_name)
        run_count = collection["run_count"]
        top_ks = collection["top_ks"]
        nqs = collection["nqs"]
        filters = collection["filters"] if "filters" in collection else []

        search_params = collection["search_params"]
        # TODO: get fields by describe_index
        # fields = self.get_fields(self.milvus, collection_name)
        fields = None
        collection_info = {
            "dimension": dimension,
            "metric_type": metric_type,
            "dataset_name": collection_name,
            "collection_size": collection_size,
            "fields": fields
        }
        # TODO: need to get index_info
        index_info = None
        vector_type = utils.get_vector_type(data_type)
        index_field_name = utils.get_default_field_name(vector_type)
        base_query_vectors = utils.get_vectors_from_binary(
            utils.MAX_NQ, dimension, data_type)
        cases = list()
        case_metrics = list()
        self.init_metric(self.name, collection_info, index_info, None)
        for search_param in search_params:
            logger.info("Search param: %s" % json.dumps(search_param))
            for filter in filters:
                filter_query = []
                filter_param = []
                if filter and isinstance(filter, dict):
                    if "range" in filter:
                        filter_query.append(eval(filter["range"]))
                        filter_param.append(filter["range"])
                    elif "term" in filter:
                        filter_query.append(eval(filter["term"]))
                        filter_param.append(filter["term"])
                    else:
                        raise Exception("%s not supported" % filter)
                logger.info("filter param: %s" % json.dumps(filter_param))
                for nq in nqs:
                    query_vectors = base_query_vectors[0:nq]
                    for top_k in top_ks:
                        search_info = {
                            "topk": top_k,
                            "query": query_vectors,
                            "metric_type":
                            utils.metric_type_trans(metric_type),
                            "params": search_param
                        }
                        # TODO: only update search_info
                        case_metric = copy.deepcopy(self.metric)
                        case_metric.set_case_metric_type()
                        case_metric.search = {
                            "nq": nq,
                            "topk": top_k,
                            "search_param": search_param,
                            "filter": filter_param
                        }
                        vector_query = {
                            "vector": {
                                index_field_name: search_info
                            }
                        }
                        case = {
                            "collection_name": collection_name,
                            "index_field_name": index_field_name,
                            "run_count": run_count,
                            "filter_query": filter_query,
                            "vector_query": vector_query,
                        }
                        cases.append(case)
                        case_metrics.append(case_metric)
        return cases, case_metrics
Ejemplo n.º 8
0
    def extract_cases(self, collection):
        collection_name = collection[
            "collection_name"] if "collection_name" in collection else None
        (data_type, collection_size, dimension,
         metric_type) = parser.collection_parser(collection_name)
        build_index = collection[
            "build_index"] if "build_index" in collection else False
        index_type = collection[
            "index_type"] if "index_type" in collection else None
        index_param = collection[
            "index_param"] if "index_param" in collection else None
        run_count = collection["run_count"]
        top_ks = collection["top_ks"]
        nqs = collection["nqs"]
        other_fields = collection[
            "other_fields"] if "other_fields" in collection else None
        filters = collection["filters"] if "filters" in collection else []
        filter_query = []
        search_params = collection["search_params"]
        ni_per = collection["ni_per"]

        # TODO: get fields by describe_index
        # fields = self.get_fields(self.milvus, collection_name)
        fields = None
        collection_info = {
            "dimension": dimension,
            "metric_type": metric_type,
            "dataset_name": collection_name,
            "fields": fields
        }
        index_info = {"index_type": index_type, "index_param": index_param}
        vector_type = utils.get_vector_type(data_type)
        index_field_name = utils.get_default_field_name(vector_type)
        # Get the path of the query.npy file stored on the NAS and get its data
        base_query_vectors = utils.get_vectors_from_binary(
            utils.MAX_NQ, dimension, data_type)
        cases = list()
        case_metrics = list()
        self.init_metric(self.name, collection_info, index_info, None)

        for search_param in search_params:
            if not filters:
                filters.append(None)
            for filter in filters:
                # filter_param = []
                filter_query = []
                if isinstance(filter, dict) and "range" in filter:
                    filter_query.append(eval(filter["range"]))
                    # filter_param.append(filter["range"])
                if isinstance(filter, dict) and "term" in filter:
                    filter_query.append(eval(filter["term"]))
                    # filter_param.append(filter["term"])
                # logger.info("filter param: %s" % json.dumps(filter_param))
                for nq in nqs:
                    # Take nq groups of data for query
                    query_vectors = base_query_vectors[0:nq]
                    for top_k in top_ks:
                        search_info = {
                            "topk": top_k,
                            "query": query_vectors,
                            "metric_type":
                            utils.metric_type_trans(metric_type),
                            "params": search_param
                        }
                        # TODO: only update search_info
                        case_metric = copy.deepcopy(self.metric)
                        # set metric type as case
                        case_metric.set_case_metric_type()
                        case_metric.search = {
                            "nq": nq,
                            "topk": top_k,
                            "search_param": search_param,
                            "filter": filter_query
                        }
                        vector_query = {
                            "vector": {
                                index_field_name: search_info
                            }
                        }
                        case = {
                            "collection_name": collection_name,
                            "index_field_name": index_field_name,
                            "other_fields": other_fields,
                            "dimension": dimension,
                            "data_type": data_type,
                            "vector_type": vector_type,
                            "collection_size": collection_size,
                            "ni_per": ni_per,
                            "build_index": build_index,
                            "index_type": index_type,
                            "index_param": index_param,
                            "metric_type": metric_type,
                            "run_count": run_count,
                            "filter_query": filter_query,
                            "vector_query": vector_query,
                        }
                        cases.append(case)
                        case_metrics.append(case_metric)
        return cases, case_metrics
Ejemplo n.º 9
0
 def extract_cases(self, collection):
     collection_name = collection[
         "collection_name"] if "collection_name" in collection else None
     (data_type, collection_size, dimension,
      metric_type) = parser.collection_parser(collection_name)
     vector_type = utils.get_vector_type(data_type)
     index_field_name = utils.get_default_field_name(vector_type)
     base_query_vectors = utils.get_vectors_from_binary(
         utils.MAX_NQ, dimension, data_type)
     collection_info = {
         "dimension": dimension,
         "metric_type": metric_type,
         "dataset_name": collection_name,
         "collection_size": collection_size
     }
     index_info = self.milvus.describe_index(index_field_name,
                                             collection_name)
     filters = collection["filters"] if "filters" in collection else []
     filter_query = []
     top_ks = collection["top_ks"]
     nqs = collection["nqs"]
     guarantee_timestamp = collection[
         "guarantee_timestamp"] if "guarantee_timestamp" in collection else None
     search_params = collection["search_params"]
     search_params = utils.generate_combinations(search_params)
     cases = list()
     case_metrics = list()
     self.init_metric(self.name,
                      collection_info,
                      index_info,
                      search_info=None)
     for search_param in search_params:
         if not filters:
             filters.append(None)
         for filter in filters:
             filter_param = []
             if isinstance(filter, dict) and "range" in filter:
                 filter_query.append(eval(filter["range"]))
                 filter_param.append(filter["range"])
             if isinstance(filter, dict) and "term" in filter:
                 filter_query.append(eval(filter["term"]))
                 filter_param.append(filter["term"])
             for nq in nqs:
                 query_vectors = base_query_vectors[0:nq]
                 for top_k in top_ks:
                     search_info = {
                         "topk": top_k,
                         "query": query_vectors,
                         "metric_type":
                         utils.metric_type_trans(metric_type),
                         "params": search_param
                     }
                     # TODO: only update search_info
                     case_metric = copy.deepcopy(self.metric)
                     # set metric type as case
                     case_metric.set_case_metric_type()
                     case_metric.search = {
                         "nq": nq,
                         "topk": top_k,
                         "search_param": search_param,
                         "filter": filter_param,
                         "guarantee_timestamp": guarantee_timestamp
                     }
                     vector_query = {
                         "vector": {
                             index_field_name: search_info
                         }
                     }
                     case = {
                         "collection_name": collection_name,
                         "index_field_name": index_field_name,
                         "dimension": dimension,
                         "data_type": data_type,
                         "metric_type": metric_type,
                         "vector_type": vector_type,
                         "collection_size": collection_size,
                         "filter_query": filter_query,
                         "vector_query": vector_query,
                         "guarantee_timestamp": guarantee_timestamp
                     }
                     cases.append(case)
                     case_metrics.append(case_metric)
     return cases, case_metrics
Ejemplo n.º 10
0
    def extract_cases(self, collection):
        collection_name = collection[
            "collection_name"] if "collection_name" in collection else None
        (data_type, dimension,
         metric_type) = parser.parse_ann_collection_name(collection_name)
        # hdf5_source_file: The path of the source data file saved on the NAS
        hdf5_source_file = collection["source_file"]
        index_types = collection["index_types"]
        index_params = collection["index_params"]
        top_ks = collection["top_ks"]
        nqs = collection["nqs"]
        guarantee_timestamp = collection[
            "guarantee_timestamp"] if "guarantee_timestamp" in collection else None
        search_params = collection["search_params"]
        vector_type = utils.get_vector_type(data_type)
        index_field_name = utils.get_default_field_name(vector_type)
        dataset = utils.get_dataset(hdf5_source_file)
        collection_info = {
            "dimension": dimension,
            "metric_type": metric_type,
            "dataset_name": collection_name
        }
        filters = collection["filters"] if "filters" in collection else []
        filter_query = []
        # Convert list data into a set of dictionary data
        search_params = utils.generate_combinations(search_params)
        index_params = utils.generate_combinations(index_params)
        cases = list()
        case_metrics = list()
        self.init_metric(self.name, collection_info, {}, search_info=None)

        # true_ids: The data set used to verify the results returned by query
        true_ids = np.array(dataset["neighbors"])
        for index_type in index_types:
            for index_param in index_params:
                index_info = {
                    "index_type": index_type,
                    "index_param": index_param
                }
                for search_param in search_params:
                    if not filters:
                        filters.append(None)
                    for filter in filters:
                        filter_param = []
                        if isinstance(filter, dict) and "range" in filter:
                            filter_query.append(eval(filter["range"]))
                            filter_param.append(filter["range"])
                        if isinstance(filter, dict) and "term" in filter:
                            filter_query.append(eval(filter["term"]))
                            filter_param.append(filter["term"])
                        for nq in nqs:
                            query_vectors = utils.normalize(
                                metric_type, np.array(dataset["test"][:nq]))
                            for top_k in top_ks:
                                search_info = {
                                    "topk":
                                    top_k,
                                    "query":
                                    query_vectors,
                                    "metric_type":
                                    utils.metric_type_trans(metric_type),
                                    "params":
                                    search_param
                                }
                                # TODO: only update search_info
                                case_metric = copy.deepcopy(self.metric)
                                # set metric type as case
                                case_metric.set_case_metric_type()
                                case_metric.index = index_info
                                case_metric.search = {
                                    "nq": nq,
                                    "topk": top_k,
                                    "search_param": search_param,
                                    "filter": filter_param,
                                    "guarantee_timestamp": guarantee_timestamp
                                }
                                vector_query = {
                                    "vector": {
                                        index_field_name: search_info
                                    }
                                }
                                case = {
                                    "collection_name": collection_name,
                                    "dataset": dataset,
                                    "index_field_name": index_field_name,
                                    "dimension": dimension,
                                    "data_type": data_type,
                                    "metric_type": metric_type,
                                    "vector_type": vector_type,
                                    "index_type": index_type,
                                    "index_param": index_param,
                                    "filter_query": filter_query,
                                    "vector_query": vector_query,
                                    "true_ids": true_ids,
                                    "guarantee_timestamp": guarantee_timestamp
                                }
                                # Obtain the parameters of the use case to be tested
                                cases.append(case)
                                case_metrics.append(case_metric)
        return cases, case_metrics
Ejemplo n.º 11
0
 def extract_cases(self, collection):
     collection_name = collection[
         "collection_name"] if "collection_name" in collection else None
     (data_type, dimension,
      metric_type) = parser.parse_ann_collection_name(collection_name)
     hdf5_source_file = collection["source_file"]
     index_types = collection["index_types"]
     index_params = collection["index_params"]
     top_ks = collection["top_ks"]
     nqs = collection["nqs"]
     search_params = collection["search_params"]
     vector_type = utils.get_vector_type(data_type)
     index_field_name = utils.get_default_field_name(vector_type)
     dataset = utils.get_dataset(hdf5_source_file)
     collection_info = {
         "dimension": dimension,
         "metric_type": metric_type,
         "dataset_name": collection_name
     }
     filters = collection["filters"] if "filters" in collection else []
     filter_query = []
     search_params = utils.generate_combinations(search_params)
     index_params = utils.generate_combinations(index_params)
     cases = list()
     case_metrics = list()
     self.init_metric(self.name, collection_info, {}, search_info=None)
     true_ids = np.array(dataset["neighbors"])
     for index_type in index_types:
         for index_param in index_params:
             index_info = {
                 "index_type": index_type,
                 "index_param": index_param
             }
             for search_param in search_params:
                 if not filters:
                     filters.append(None)
                 for filter in filters:
                     filter_param = []
                     if isinstance(filter, dict) and "range" in filter:
                         filter_query.append(eval(filter["range"]))
                         filter_param.append(filter["range"])
                     if isinstance(filter, dict) and "term" in filter:
                         filter_query.append(eval(filter["term"]))
                         filter_param.append(filter["term"])
                     for nq in nqs:
                         query_vectors = utils.normalize(
                             metric_type, np.array(dataset["test"][:nq]))
                         for top_k in top_ks:
                             search_info = {
                                 "topk":
                                 top_k,
                                 "query":
                                 query_vectors,
                                 "metric_type":
                                 utils.metric_type_trans(metric_type),
                                 "params":
                                 search_param
                             }
                             # TODO: only update search_info
                             case_metric = copy.deepcopy(self.metric)
                             case_metric.index = index_info
                             case_metric.search = {
                                 "nq": nq,
                                 "topk": top_k,
                                 "search_param": search_param,
                                 "filter": filter_param
                             }
                             vector_query = {
                                 "vector": {
                                     index_field_name: search_info
                                 }
                             }
                             case = {
                                 "collection_name": collection_name,
                                 "dataset": dataset,
                                 "index_field_name": index_field_name,
                                 "dimension": dimension,
                                 "data_type": data_type,
                                 "metric_type": metric_type,
                                 "vector_type": vector_type,
                                 "index_type": index_type,
                                 "index_param": index_param,
                                 "filter_query": filter_query,
                                 "vector_query": vector_query,
                                 "true_ids": true_ids
                             }
                             cases.append(case)
                             case_metrics.append(case_metric)
     return cases, case_metrics