def extract_cases(self, collection): collection_name = collection[ "collection_name"] if "collection_name" in collection else None (data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name) ni_pers = collection["ni_pers"] build_index = collection[ "build_index"] if "build_index" in collection else False index_info = None vector_type = utils.get_vector_type(data_type) other_fields = collection[ "other_fields"] if "other_fields" in collection else None index_field_name = None index_type = None index_param = None if build_index is True: index_type = collection["index_type"] index_param = collection["index_param"] index_info = {"index_type": index_type, "index_param": index_param} index_field_name = utils.get_default_field_name(vector_type) flush = True if "flush" in collection and collection["flush"] == "no": flush = False case_metrics = list() case_params = list() for ni_per in ni_pers: collection_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": collection_name, "collection_size": collection_size, "other_fields": other_fields, "ni_per": ni_per } self.init_metric(self.name, collection_info, index_info, None) case_metric = copy.deepcopy(self.metric) case_metric.set_case_metric_type() case_metrics.append(case_metric) case_param = { "collection_name": collection_name, "data_type": data_type, "dimension": dimension, "collection_size": collection_size, "ni_per": ni_per, "metric_type": metric_type, "vector_type": vector_type, "other_fields": other_fields, "build_index": build_index, "flush_after_insert": flush, "index_field_name": index_field_name, "index_type": index_type, "index_param": index_param, } case_params.append(case_param) return case_params, case_metrics
def extract_cases(self, collection): collection_name = collection[ "collection_name"] if "collection_name" in collection else None (data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name) ni_per = collection["ni_per"] vector_type = utils.get_vector_type(data_type) other_fields = collection[ "other_fields"] if "other_fields" in collection else None ids_length_list = collection["ids_length_list"] collection_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": collection_name, "collection_size": collection_size, "other_fields": other_fields, "ni_per": ni_per } index_field_name = utils.get_default_field_name(vector_type) index_type = collection["index_type"] index_param = collection["index_param"] index_info = {"index_type": index_type, "index_param": index_param} flush = True if "flush" in collection and collection["flush"] == "no": flush = False self.init_metric(self.name, collection_info, index_info, search_info=None) case_metrics = list() for ids_length in ids_length_list: ids = get_ids(ids_length, collection_size) case_metric = copy.deepcopy(self.metric) case_metric.set_case_metric_type() case_params = list() case_metric.run_params = {"ids_length": ids_length} case_metrics.append(case_metric) case_param = { "collection_name": collection_name, "data_type": data_type, "dimension": dimension, "collection_size": collection_size, "ni_per": ni_per, "metric_type": metric_type, "vector_type": vector_type, "other_fields": other_fields, "flush_after_insert": flush, "index_field_name": index_field_name, "index_type": index_type, "index_param": index_param, "ids": ids } case_params.append(case_param) return case_params, case_metrics
def extract_cases(self, collection): collection_name = collection[ "collection_name"] if "collection_name" in collection else None (data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name) ni_per = collection["ni_per"] build_index = collection[ "build_index"] if "build_index" in collection else False vector_type = runner_utils.get_vector_type(data_type) other_fields = collection[ "other_fields"] if "other_fields" in collection else None collection_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": collection_name, "collection_size": collection_size, "other_fields": other_fields, "ni_per": ni_per } index_field_name = None index_type = None index_param = None index_info = None vector_field_name = runner_utils.get_default_field_name(vector_type) if build_index is True: index_type = collection["index_type"] index_param = collection["index_param"] index_info = {"index_type": index_type, "index_param": index_param} index_field_name = runner_utils.get_default_field_name(vector_type) task = collection["task"] connection_type = "single" connection_num = task["connection_num"] if connection_num > 1: connection_type = "multi" run_params = { "task": collection["task"], "connection_type": connection_type, } self.init_metric(self.name, collection_info, index_info, None, run_params) case_metric = copy.deepcopy(self.metric) # set metric type as case case_metric.set_case_metric_type() case_metrics = list() case_params = list() case_metrics.append(case_metric) case_param = { "collection_name": collection_name, "data_type": data_type, "dimension": dimension, "collection_size": collection_size, "ni_per": ni_per, "metric_type": metric_type, "vector_type": vector_type, "other_fields": other_fields, "build_index": build_index, "index_field_name": index_field_name, "vector_field_name": vector_field_name, "index_type": index_type, "index_param": index_param, "task": collection["task"], "connection_type": connection_type, } case_params.append(case_param) return case_params, case_metrics
def extract_cases(self, collection): collection_name = collection[ "collection_name"] if "collection_name" in collection else None (data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name) run_count = collection["run_count"] top_ks = collection["top_ks"] nqs = collection["nqs"] filters = collection["filters"] if "filters" in collection else [] search_params = collection["search_params"] # TODO: get fields by describe_index # fields = self.get_fields(self.milvus, collection_name) fields = None collection_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": collection_name, "collection_size": collection_size, "fields": fields } # TODO: need to get index_info index_info = None vector_type = utils.get_vector_type(data_type) index_field_name = utils.get_default_field_name(vector_type) base_query_vectors = utils.get_vectors_from_binary( utils.MAX_NQ, dimension, data_type) cases = list() case_metrics = list() self.init_metric(self.name, collection_info, index_info, None) for search_param in search_params: logger.info("Search param: %s" % json.dumps(search_param)) for filter in filters: filter_query = [] filter_param = [] if filter and isinstance(filter, dict): if "range" in filter: filter_query.append(eval(filter["range"])) filter_param.append(filter["range"]) elif "term" in filter: filter_query.append(eval(filter["term"])) filter_param.append(filter["term"]) else: raise Exception("%s not supported" % filter) logger.info("filter param: %s" % json.dumps(filter_param)) for nq in nqs: query_vectors = base_query_vectors[0:nq] for top_k in top_ks: search_info = { "topk": top_k, "query": query_vectors, "metric_type": utils.metric_type_trans(metric_type), "params": search_param } # TODO: only update search_info case_metric = copy.deepcopy(self.metric) case_metric.set_case_metric_type() case_metric.search = { "nq": nq, "topk": top_k, "search_param": search_param, "filter": filter_param } vector_query = { "vector": { index_field_name: search_info } } case = { "collection_name": collection_name, "index_field_name": index_field_name, "run_count": run_count, "filter_query": filter_query, "vector_query": vector_query, } cases.append(case) case_metrics.append(case_metric) return cases, case_metrics
def extract_cases(self, collection): collection_name = collection[ "collection_name"] if "collection_name" in collection else None (data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name) build_index = collection[ "build_index"] if "build_index" in collection else False index_type = collection[ "index_type"] if "index_type" in collection else None index_param = collection[ "index_param"] if "index_param" in collection else None run_count = collection["run_count"] top_ks = collection["top_ks"] nqs = collection["nqs"] other_fields = collection[ "other_fields"] if "other_fields" in collection else None filters = collection["filters"] if "filters" in collection else [] filter_query = [] search_params = collection["search_params"] ni_per = collection["ni_per"] # TODO: get fields by describe_index # fields = self.get_fields(self.milvus, collection_name) fields = None collection_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": collection_name, "fields": fields } index_info = {"index_type": index_type, "index_param": index_param} vector_type = utils.get_vector_type(data_type) index_field_name = utils.get_default_field_name(vector_type) # Get the path of the query.npy file stored on the NAS and get its data base_query_vectors = utils.get_vectors_from_binary( utils.MAX_NQ, dimension, data_type) cases = list() case_metrics = list() self.init_metric(self.name, collection_info, index_info, None) for search_param in search_params: if not filters: filters.append(None) for filter in filters: # filter_param = [] filter_query = [] if isinstance(filter, dict) and "range" in filter: filter_query.append(eval(filter["range"])) # filter_param.append(filter["range"]) if isinstance(filter, dict) and "term" in filter: filter_query.append(eval(filter["term"])) # filter_param.append(filter["term"]) # logger.info("filter param: %s" % json.dumps(filter_param)) for nq in nqs: # Take nq groups of data for query query_vectors = base_query_vectors[0:nq] for top_k in top_ks: search_info = { "topk": top_k, "query": query_vectors, "metric_type": utils.metric_type_trans(metric_type), "params": search_param } # TODO: only update search_info case_metric = copy.deepcopy(self.metric) # set metric type as case case_metric.set_case_metric_type() case_metric.search = { "nq": nq, "topk": top_k, "search_param": search_param, "filter": filter_query } vector_query = { "vector": { index_field_name: search_info } } case = { "collection_name": collection_name, "index_field_name": index_field_name, "other_fields": other_fields, "dimension": dimension, "data_type": data_type, "vector_type": vector_type, "collection_size": collection_size, "ni_per": ni_per, "build_index": build_index, "index_type": index_type, "index_param": index_param, "metric_type": metric_type, "run_count": run_count, "filter_query": filter_query, "vector_query": vector_query, } cases.append(case) case_metrics.append(case_metric) return cases, case_metrics
def extract_cases(self, collection): collection_name = collection[ "collection_name"] if "collection_name" in collection else None (data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name) vector_type = utils.get_vector_type(data_type) index_field_name = utils.get_default_field_name(vector_type) base_query_vectors = utils.get_vectors_from_binary( utils.MAX_NQ, dimension, data_type) collection_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": collection_name, "collection_size": collection_size } index_info = self.milvus.describe_index(index_field_name, collection_name) filters = collection["filters"] if "filters" in collection else [] filter_query = [] top_ks = collection["top_ks"] nqs = collection["nqs"] guarantee_timestamp = collection[ "guarantee_timestamp"] if "guarantee_timestamp" in collection else None search_params = collection["search_params"] search_params = utils.generate_combinations(search_params) cases = list() case_metrics = list() self.init_metric(self.name, collection_info, index_info, search_info=None) for search_param in search_params: if not filters: filters.append(None) for filter in filters: filter_param = [] if isinstance(filter, dict) and "range" in filter: filter_query.append(eval(filter["range"])) filter_param.append(filter["range"]) if isinstance(filter, dict) and "term" in filter: filter_query.append(eval(filter["term"])) filter_param.append(filter["term"]) for nq in nqs: query_vectors = base_query_vectors[0:nq] for top_k in top_ks: search_info = { "topk": top_k, "query": query_vectors, "metric_type": utils.metric_type_trans(metric_type), "params": search_param } # TODO: only update search_info case_metric = copy.deepcopy(self.metric) # set metric type as case case_metric.set_case_metric_type() case_metric.search = { "nq": nq, "topk": top_k, "search_param": search_param, "filter": filter_param, "guarantee_timestamp": guarantee_timestamp } vector_query = { "vector": { index_field_name: search_info } } case = { "collection_name": collection_name, "index_field_name": index_field_name, "dimension": dimension, "data_type": data_type, "metric_type": metric_type, "vector_type": vector_type, "collection_size": collection_size, "filter_query": filter_query, "vector_query": vector_query, "guarantee_timestamp": guarantee_timestamp } cases.append(case) case_metrics.append(case_metric) return cases, case_metrics