def delete_entity(app_name, entity_id): try: mongo_ins_name = f"{app_name}_entity" entity = MongoIns.search_by_id(mongo_ins_name, entity_id) if not entity.count(): raise NotExistError("Entity %s not exist" % entity_id, "NotExistError") for item in entity: en = new_mapping_ins(item) for name, fields in en._docs.items(): # delete s3 object bucket_name = fields.get("url").split("/")[-2] object_name = fields.get("url").split("/")[-1] S3Ins.del_object(bucket_name, object_name) # delete vector from milvus vids = fields.get("ids") app = application_detail(app_name) pipe_name = app.fields[name]["value"] pipe = pipeline_detail(pipe_name) instance_name = pipe.encoder.get("instance") MilvusIns.del_vectors(f"{app_name}_{name}_{instance_name}", vids) # delete from mongodb MongoIns.delete_by_id(mongo_ins_name, entity_id) logger.info("delete entity %s in application %s", entity_id, app_name) return en except Exception as e: logger.error(e) raise e
def search(name, fields={}, topk=10, nprobe=16): res = [] try: app = application_detail(name) accept_fields = [x for x, y in app.fields.items() if y.get('type') != "object"] pipeline_fields = {x: y['pipeline'] for x, y in app.fields.items() if y.get('type') == "object"} for k, _ in fields.items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) value = fields.get(n) file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") vectors = run_pipeline(pipe, data=file_data, url=url) if not vectors: raise NoneVectorError("can't encode data by encoder, check input or encoder", "") milvus_collection_name = f"{pipe.name}_{pipe.encoder}" vids = MilvusIns.search_vectors(milvus_collection_name, vectors, topk=topk, nprobe=nprobe) # here add scoreling function dbs = search_ids_from_mapping([x.id for x in vids[0]]) for db in dbs: m = new_mapping_ins(id=db.id, app_name=db.app_name, image_url=db.image_url, fields=db.fields) res.append(m) return res except Exception as e: raise e
def search(name, fields={}, topk=10, nprobe=16): fields_res = {} score_config = {} try: app = application_detail(name) accept_fields = [ x for x, y in app.fields.items() if y.get('type') != "pipeline" ] accept_fields.append("score_mode") pipeline_fields = { x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline" } for k, _ in fields.items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") valid_field_flag = False for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) value = fields.get(n) if not value: continue valid_field_flag = True file_data = value.get('data') url = value.get('url') inner_score_mode = value.get('inner_field_score_mode', 'distance_first') score_config[n] = {} score_config[n]['weight'] = value.get('weight', 1) score_config[n]['decay_function'] = value.get( 'decay_function', 'linear') if not file_data and not url: raise RequestError("can't find data or url from request", "") vectors = run_pipeline(pipe, data=file_data, url=url) if not vectors: raise NoneVectorError( "can't encode data by encoder, check input or encoder", "") milvus_collection_name = f"{app.name}_{pipe.encoder['instance']['name'].replace('phantoscope_', '')}" mongo_name = f"{app.name}_entity" dbs = search_and_score(milvus_collection_name, mongo_name, n, vectors, topk, nprobe, inner_score_mode) tmp_res = [] for db in dbs: m = new_mapping_ins(db) tmp_res.append(m) fields_res[n] = tmp_res if not valid_field_flag: raise NoneValidFieldError( "There is none valid field in search request boby", Exception()) score_mode = fields.get('score_mode', 'first') res = get_score_result(fields_res, topk, score_config, score_mode) return res except Exception as e: err_msg = f"Unexpected error happen when search, {str(e)}" logger.error(err_msg, exc_info=True) raise UnexpectedError(err_msg, e)
def upload(name, **kwargs): try: app = application_detail(name) if not app: raise NotExistError("application not exist", "application %s not exist" % name) bucket_name = app.buckets.split(",")[0] accept_fields = [x for x, y in app.fields.items() if y.get('type') != "object"] pipeline_fields = {x: y['pipeline'] for x, y in app.fields.items() if y.get('type') == "object"} new_fields = app.fields.copy() for k, v in kwargs.items(): if k in accept_fields: new_fields[k]['value'] = v res = [] for k, _ in kwargs.get('fields').items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) if not pipe: raise NotExistError("pipeline not exist", "pipeline %s not exist" % p) value = kwargs['fields'].get(n) file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") file_name = "{}-{}".format(name, uuid.uuid4().hex) file_path = save_tmp_file(file_name, file_data, url) # begin to timing start = time.time() S3Ins.upload2bucket(bucket_name, file_path, file_name) upload_time = time.time() logger.debug("[timing] upload image to bucket costs: {:.3f}s".format(upload_time - start)) vectors = run_pipeline(pipe, data=file_data, url=url) pipeline_time = time.time() logger.debug("[timing] run pipeline costs: {:.3f}s".format(pipeline_time - upload_time)) milvus_collection_name = f"{pipe.name}_{pipe.encoder}" vids = MilvusIns.insert_vectors(milvus_collection_name, vectors) insert_time = time.time() logger.debug("[timing] insert to milvus costs: {:.3f}s".format(insert_time - pipeline_time)) for vid in vids: m = DB(id=vid, app_name=name, image_url=gen_url(bucket_name, file_name), fields=new_fields) add_mapping_data(m) res.append(new_mapping_ins(id=vid, app_name=name, image_url=gen_url(bucket_name, file_name), fields=new_fields)) final_time = time.time() logger.debug("[timing] prepare result costs: {:.3f}s".format(final_time - insert_time)) return res except Exception as e: print(e) return e
def upload(name, **kwargs): try: app = application_detail(name) if not app: raise NotExistError("application not exist", "application %s not exist" % name) bucket_name = app.buckets.split(",")[0] accept_fields = [ x for x, y in app.fields.items() if y.get('type') != "object" ] pipeline_fields = { x: y['pipeline'] for x, y in app.fields.items() if y.get('type') == "object" } new_fields = app.fields.copy() for k, v in kwargs.items(): if k in accept_fields: new_fields[k]['value'] = v res = [] for k, _ in kwargs.get('fields').items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) if not pipe: raise NotExistError("pipeline not exist", "pipeline %s not exist" % p) value = kwargs['fields'].get(n) file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") file_name = "{}-{}".format(name, uuid.uuid4().hex) file_path = save_tmp_file(file_name, file_data, url) S3Ins.upload2bucket(bucket_name, file_path, file_name) vectors = run_pipeline(pipe, data=file_data, url=url) if not vectors: raise NoneVectorError( "can't encode data by encoder, check input or encoder", "") milvus_collection_name = f"{pipe.name}_{pipe.encoder}" vids = MilvusIns.insert_vectors(milvus_collection_name, vectors) for vid in vids: m = DB(id=vid, app_name=name, image_url=gen_url(bucket_name, file_name), fields=new_fields) add_mapping_data(m) res.append( new_mapping_ins(id=vid, app_name=name, image_url=gen_url(bucket_name, file_name), fields=new_fields)) return res except Exception as e: print(e) return e
def entities_list(name, num, page): res = [] try: docs = MongoIns.list_documents(f"{name}_entity", num, page) for doc in docs: res.append(new_mapping_ins(docs=doc)) logger.info("get application %s entity list", name) return res except Exception as e: logger.error(e) raise e
def upload(name, **kwargs): try: app = application_detail(name) if not app: raise NotExistError("application not exist", "application %s not exist" % name) bucket_name = app.buckets.split(",")[0] accept_fields = [x for x, y in app.fields.items() if y.get('type') != "pipeline"] pipeline_fields = {x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline"} new_fields = app.fields.copy() for k, v in kwargs.items(): if k in accept_fields: new_fields[k]['value'] = v res = [] for k, _ in kwargs.get('fields').items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") docs = {} valid_field_flag = False for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) if not pipe: raise NotExistError("pipeline not exist", "pipeline %s not exist" % p) value = kwargs['fields'].get(n) if not value: continue valid_field_flag = True file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") file_name = "{}-{}".format(name, uuid.uuid4().hex) file_path = save_tmp_file(file_name, file_data, url) S3Ins.upload2bucket(bucket_name, file_path, file_name) vectors = run_pipeline(pipe, data=file_data, url=url) if not vectors: raise NoneVectorError("can't encode data by encoder, check input or encoder", "") milvus_collection_name = f"{app.name}_{pipe.encoder['name']}_{pipe.encoder['instance']}" vids = MilvusIns.insert_vectors(milvus_collection_name, vectors) docs[n] = {"ids": vids, "url": gen_url(bucket_name, file_name)} doc_id = MongoIns.insert_documents(f"{app.name}_entity", docs) res.append(new_mapping_ins(docs)) if not valid_field_flag: raise RequestError("none valid field exist", "") return res except Exception as e: err_msg = f"Unexpected error happen when upload: {str(e)}" logger.error(err_msg, exc_info=True) raise UnexpectedError(err_msg, e)
def entities_list(name, num, page): res = [] try: for i in search_by_application(name, num, num * page): res.append( new_mapping_ins(id=i.id, app_name=i.app_name, image_url=i.image_url, fields=i.fields)) logger.info("get application %s entity list", name) return res except Exception as e: logger.error(e) return e
def delete_entity(app_name, entity_name): try: entity = search_from_mapping(entity_name) if not entity: raise NotExistError("Entity %s not exist" % entity_name, "NotExistError") MilvusIns.del_vectors(app_name, [int(entity_name)]) bucket_name = entity.image_url.split("/")[-2] object_name = entity.image_url.split("/")[-1] S3Ins.del_object(bucket_name, object_name) del_mapping(entity_name) logger.info("delete entity %s in application %s", entity_name, app_name) return new_mapping_ins(id=entity.id, app_name=entity.app_name, image_url=entity.image_url, fields=entity.fields) except Exception as e: logger.error(e) return e
def upload(name, **kwargs): try: app = application_detail(name) if not app: raise NotExistError("application not exist", "application %s not exist" % name) bucket_name = app.buckets.split(",")[0] accept_fields = [x for x, y in app.fields.items() if y.get('type') != "pipeline"] pipeline_fields = {x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline"} new_fields = app.fields.copy() for k, v in kwargs.items(): if k in accept_fields: new_fields[k]['value'] = v res = [] for k, _ in kwargs.get('fields').items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") docs = {} for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) if not pipe: raise NotExistError("pipeline not exist", "pipeline %s not exist" % p) value = kwargs['fields'].get(n) file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") file_name = "{}-{}".format(name, uuid.uuid4().hex) file_path = save_tmp_file(file_name, file_data, url) S3Ins.upload2bucket(bucket_name, file_path, file_name) vectors = run_pipeline(pipe, data=file_data, url=url) milvus_collection_name = f"{app.name}_{pipe.encoder['name']}_{pipe.encoder['instance']}" vids = MilvusIns.insert_vectors(milvus_collection_name, vectors) docs[n] = {"ids": vids, "url": gen_url(bucket_name, file_name)} doc_id = MongoIns.insert_documents(f"{app.name}_entity", docs) res.append(new_mapping_ins(docs)) fields=new_fields)) return res