def search(name, fields={}, topk=10, nprobe=16): res = [] try: app = application_detail(name) accept_fields = [x for x, y in app.fields.items() if y.get('type') != "object"] pipeline_fields = {x: y['pipeline'] for x, y in app.fields.items() if y.get('type') == "object"} for k, _ in fields.items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) value = fields.get(n) file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") vectors = run_pipeline(pipe, data=file_data, url=url) if not vectors: raise NoneVectorError("can't encode data by encoder, check input or encoder", "") milvus_collection_name = f"{pipe.name}_{pipe.encoder}" vids = MilvusIns.search_vectors(milvus_collection_name, vectors, topk=topk, nprobe=nprobe) # here add scoreling function dbs = search_ids_from_mapping([x.id for x in vids[0]]) for db in dbs: m = new_mapping_ins(id=db.id, app_name=db.app_name, image_url=db.image_url, fields=db.fields) res.append(m) return res except Exception as e: raise e
def search_and_score(milvus_collection_name, mongo_name, field_name, vectors, topk, nprobe, inner_score_mode: str): """ search vectors from milvus and score by inner field score mode :param milvus_collection_name: collection name will be search :param mongo_name: mongo collection name will be selected from :param field_name: field name for searching from mongodb :param vectors: vectors which will be searched in milvus :param topk: milvus topk number :param nprobe: milvus nprobe number :param inner_score_mode: :return: image id of entity """ result_dbs = [] MAX_TOPK = 2048 magic_number = 60 increase_rate = 0.1 query_topk = topk + magic_number end_flag = False try: inner_score_mode = InnerFieldScoreMode(inner_score_mode) except Exception as e: raise WrongInnerFieldModeError("Unsupported inner field mode", e) while (len(result_dbs) < topk) and (not end_flag): # check query topk max value query_topk = min(query_topk, MAX_TOPK) vids = MilvusIns.search_vectors(milvus_collection_name, vectors, topk=query_topk, nprobe=nprobe) if len(vids) == 0: raise NoneVectorError("milvus search result is None", "") # filter -1 and if exist -1 or len(vids) < topk if (-1 in vids.id_array[0]) or len(vids[0]) < query_topk: end_flag = True # inner field score function here res_vids = get_inner_field_score_result(vids, query_topk, inner_score_mode) if len(res_vids) < topk: if query_topk < MAX_TOPK: # calc a new query_topk and needn't to query from mysql query_topk += math.ceil(query_topk * increase_rate) increase_rate *= 2 if not end_flag: continue end_flag = True result_dbs = MongoIns.search_by_vector_id(mongo_name, field_name, res_vids) # calc a new query_topk if len(result_dbs) < topk query_topk += math.ceil(query_topk * increase_rate) return result_dbs[:topk]
def test_search_vectors(self): """test search vectors""" q_records = [[random.random() for _ in range(self.dimension)]] rv = MilvusIns.search_vectors(self.name, q_records, 10, 16)