def delete_entity(app_name, entity_id): try: mongo_ins_name = f"{app_name}_entity" entity = MongoIns.search_by_id(mongo_ins_name, entity_id) if not entity.count(): raise NotExistError("Entity %s not exist" % entity_id, "NotExistError") for item in entity: en = new_mapping_ins(item) for name, fields in en._docs.items(): # delete s3 object bucket_name = fields.get("url").split("/")[-2] object_name = fields.get("url").split("/")[-1] S3Ins.del_object(bucket_name, object_name) # delete vector from milvus vids = fields.get("ids") app = application_detail(app_name) pipe_name = app.fields[name]["value"] pipe = pipeline_detail(pipe_name) instance_name = pipe.encoder.get("instance") MilvusIns.del_vectors(f"{app_name}_{name}_{instance_name}", vids) # delete from mongodb MongoIns.delete_by_id(mongo_ins_name, entity_id) logger.info("delete entity %s in application %s", entity_id, app_name) return en except Exception as e: logger.error(e) raise e
def new_application(app_name, fields, s3_buckets): ok, message = fields_check(fields) if not ok: raise ArgsCheckError(message, "") try: # check application exist if search_application(app_name): raise ExistError(f"application <{app_name}> had exist", "") # insert fields to metadata fieldsdb = [] for name, field in fields.items(): fieldsdb.append( FieldsDB(name=name, type=field.get('type'), value=field.get('value'), app=app_name)) ids = insert_fields(fieldsdb) # create a application entity collection MongoIns.new_mongo_collection(f"{app_name}_entity") app = Application(name=app_name, fields=ids, buckets=s3_buckets) # create milvus collections create_milvus_collections_by_fields(app) # insert application to metadata app.save() app.fields = fields2dict(search_fields(ids)) return app except Exception as e: logger.error("error happen during create app: %s", str(e), exc_info=True) raise e
def fetch_operators(url, overwrite=True): """fetch operators from origin market url -- origin url overwrite -- Whether to overwrite local information if the same name exists """ origin = [] try: r = requests.get(url) if r.headers.get(MARKET_IDENTITY_HEADER) != "0.1.0": raise RequestError("Uncertified market", "") if r.status_code != 200: raise RequestError(r.text, r.status_code) except Exception as e: raise RequestError(e.args[0], e) for op in r.json(): origin.append(Operator(op['name'], op['addr'], op['author'], op['version'], op['type'], op['description'])) local_operators = all_operators() local_operator_names = [x.name for x in local_operators] for x in origin: if x.name not in local_operator_names: local_operators.append(x) else: if overwrite: for lop in local_operators: if lop.name == x.name: local_operators.remove(lop) local_operators.append(x) MongoIns.delete_mongo_collection(OPERATOR_COLLECTION_NAME) for x in local_operators: MongoIns.insert_documents(OPERATOR_COLLECTION_NAME, x.to_dict()) return local_operators
def create_pipeline(name, processors=None, encoder=None, description=None): try: p = MongoIns.search_by_name(PIPELINE_COLLECTION_NAME, name) if p: raise ExistError(f"pipeline <{name}> already exists", "") pro = [] encoder_res = {} processor_res = {} for processor in processors: pr = operator_detail(processor["name"]) processor_res["operator"] = pr.to_dict() processor_res["instance"] = pr.inspect_instance( processor["instance"]) pro.append(processor_res) encoder_info = operator_detail(encoder["name"]) encoder_res["operator"] = encoder_info.to_dict() encoder_res["instance"] = encoder_info.inspect_instance( encoder["instance"]) pipe = Pipeline(name, description, pro, encoder_res) pipe.metadata = pipe._metadata() if pipeline_illegal(pipe): raise PipelineIllegalError("Pipeline illegal check error", "") MongoIns.insert_documents(PIPELINE_COLLECTION_NAME, pipe.to_dict()) return pipe except Exception as e: logger.error(e, exc_info=True) raise e
def register_operators(name, addr, author, version, type, description): try: op = Operator(name, addr, author, version, type, description) op.metadata = op._metadata() if MongoIns.search_by_name(OPERATOR_COLLECTION_NAME, name): raise ExistError(f"operator {name} had exist", "") MongoIns.insert_documents(OPERATOR_COLLECTION_NAME, op.to_dict()) return op.to_dict() except Exception as e: logger.error(f"Unexpected error happen during register operator, {str(e)}", exc_info=True) raise e
def delete_operators(name): try: op = MongoIns.search_by_name(OPERATOR_COLLECTION_NAME, name) if not op: raise NotExistError(f"operator {name} not exist", "") op = op[0] MongoIns.delete_by_name(OPERATOR_COLLECTION_NAME, name) operator = Operator(op["name"], op["addr"], op["author"], op["version"], op["type"], op["description"]) operator.metadata = op["metadata"] return operator except Exception as e: logger.error(e) raise e
def delete_pipeline(name): try: p = MongoIns.search_by_name(PIPELINE_COLLECTION_NAME, name) if not p: raise NotExistError("pipeline %s is not exist" % name, "") p = p[0] MongoIns.delete_by_name(PIPELINE_COLLECTION_NAME, name) pipe = Pipeline(p["name"], p["description"], p["processors"], p["encoder"], p["input"], p["output"]) pipe.metadata = p["metadata"] return pipe except Exception as e: logger.error(e) raise e
def delete_milvus_collections_by_fields(app): for _, field in app['fields'].items(): if field["type"] == "pipeline": pipe = MongoIns.search_by_name(PIPELINE_COLLECTION_NAME, field.get("value"))[0] name = f"{app.get('name')}_{pipe.get('encoder').get('instance').get('name').replace('phantoscope_', '')}" MilvusIns.del_milvus_collection(name)
def count_entities(name): try: count = MongoIns.count_documents(f"{name}_entity") logger.info(f"get count of application {name} entities") return str(count) except Exception as e: logger.error(e) raise e
def entities_list(name, num, page): res = [] try: docs = MongoIns.list_documents(f"{name}_entity", num, page) for doc in docs: res.append(new_mapping_ins(docs=doc)) logger.info("get application %s entity list", name) return res except Exception as e: logger.error(e) raise e
def application_detail(name): try: app = MongoIns.search_by_name(APPLICATION_COLLECTION_NAME, name) if not app: raise NotExistError(f"application {name} not exist", "") app = app[0] application = Application(app["name"], app["fields"], app["bucket"]) application.metadata = app["metadata"] return application except Exception as e: logger.error(e) raise e
def upload(name, **kwargs): try: app = application_detail(name) if not app: raise NotExistError("application not exist", "application %s not exist" % name) bucket_name = app.buckets.split(",")[0] accept_fields = [x for x, y in app.fields.items() if y.get('type') != "pipeline"] pipeline_fields = {x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline"} new_fields = app.fields.copy() for k, v in kwargs.items(): if k in accept_fields: new_fields[k]['value'] = v res = [] for k, _ in kwargs.get('fields').items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") docs = {} valid_field_flag = False for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) if not pipe: raise NotExistError("pipeline not exist", "pipeline %s not exist" % p) value = kwargs['fields'].get(n) if not value: continue valid_field_flag = True file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") file_name = "{}-{}".format(name, uuid.uuid4().hex) file_path = save_tmp_file(file_name, file_data, url) S3Ins.upload2bucket(bucket_name, file_path, file_name) vectors = run_pipeline(pipe, data=file_data, url=url) if not vectors: raise NoneVectorError("can't encode data by encoder, check input or encoder", "") milvus_collection_name = f"{app.name}_{pipe.encoder['name']}_{pipe.encoder['instance']}" vids = MilvusIns.insert_vectors(milvus_collection_name, vectors) docs[n] = {"ids": vids, "url": gen_url(bucket_name, file_name)} doc_id = MongoIns.insert_documents(f"{app.name}_entity", docs) res.append(new_mapping_ins(docs)) if not valid_field_flag: raise RequestError("none valid field exist", "") return res except Exception as e: err_msg = f"Unexpected error happen when upload: {str(e)}" logger.error(err_msg, exc_info=True) raise UnexpectedError(err_msg, e)
def search_and_score(milvus_collection_name, mongo_name, field_name, vectors, topk, nprobe, inner_score_mode: str): """ search vectors from milvus and score by inner field score mode :param milvus_collection_name: collection name will be search :param mongo_name: mongo collection name will be selected from :param field_name: field name for searching from mongodb :param vectors: vectors which will be searched in milvus :param topk: milvus topk number :param nprobe: milvus nprobe number :param inner_score_mode: :return: image id of entity """ result_dbs = [] MAX_TOPK = 2048 magic_number = 60 increase_rate = 0.1 query_topk = topk + magic_number end_flag = False try: inner_score_mode = InnerFieldScoreMode(inner_score_mode) except Exception as e: raise WrongInnerFieldModeError("Unsupported inner field mode", e) while (len(result_dbs) < topk) and (not end_flag): # check query topk max value query_topk = min(query_topk, MAX_TOPK) vids = MilvusIns.search_vectors(milvus_collection_name, vectors, topk=query_topk, nprobe=nprobe) if len(vids) == 0: raise NoneVectorError("milvus search result is None", "") # filter -1 and if exist -1 or len(vids) < topk if (-1 in vids.id_array[0]) or len(vids[0]) < query_topk: end_flag = True # inner field score function here res_vids = get_inner_field_score_result(vids, query_topk, inner_score_mode) if len(res_vids) < topk: if query_topk < MAX_TOPK: # calc a new query_topk and needn't to query from mysql query_topk += math.ceil(query_topk * increase_rate) increase_rate *= 2 if not end_flag: continue end_flag = True result_dbs = MongoIns.search_by_vector_id(mongo_name, field_name, res_vids) # calc a new query_topk if len(result_dbs) < topk query_topk += math.ceil(query_topk * increase_rate) return result_dbs[:topk]
def test_pipeline(name, data=None, url=None): try: pipe = MongoIns.search_by_name(PIPELINE_COLLECTION_NAME, name) if not pipe: raise NotExistError("pipeline %s is not exist" % name, "") pipe = pipe[0] p = Pipeline(pipe["name"], pipe["description"], pipe["processors"], pipe["encoder"], pipe["input"], pipe["output"]) p.metadata = pipe["metadata"] return {"result": run_pipeline(p, data=data, url=url)} except Exception as e: raise e
def all_operators(): res = [] try: operators = MongoIns.list_documents(OPERATOR_COLLECTION_NAME, 0) for x in operators: op = Operator(x["name"], x["addr"], x["author"], x["version"], x["type"], x["description"]) op.metadata = x["metadata"] res.append(op) return res except Exception as e: logger.error(e) raise e
def new_application(app_name, fields, s3_bucket): ok, message = fields_check(fields) if not ok: raise ArgsCheckError(message, "") try: # check application exist if MongoIns.search_by_name(APPLICATION_COLLECTION_NAME, app_name): raise ExistError(f"application <{app_name}> had exist", "") except ExistError: raise try: for _, value in fields.items(): if value.get("type") == "pipeline": pipe = MongoIns.search_by_name(PIPELINE_COLLECTION_NAME, value.get("value"))[0] ei = identity( pipe.get("encoder").get("instance").get("endpoint")) name = f"{app_name}_{pipe.get('encoder').get('instance').get('name').replace('phantoscope_', '')}" MilvusIns.new_milvus_collection(name, int(ei["dimension"]), 1024, "l2") # create a application entity collection MongoIns.new_mongo_collection(f"{app_name}_entity") S3Ins.new_s3_buckets(s3_bucket) # create milvus collections app = Application(name=app_name, fields=fields, bucket=s3_bucket) app.metadata = app._metadata() MongoIns.insert_documents(APPLICATION_COLLECTION_NAME, app.to_dict()) return app except Exception as e: logger.error("error happen during create app: %s", str(e), exc_info=True) raise e
def all_pipelines(): res = [] try: pipes = MongoIns.list_documents(PIPELINE_COLLECTION_NAME, 0) for pipe in pipes: p = Pipeline(pipe["name"], pipe["description"], pipe["processors"], pipe["encoder"], pipe["input"], pipe["output"]) p.metadata = pipe["metadata"] res.append(p) return res except Exception as e: logger.error(e) raise e
def delete_application(name, force=False): try: if not force: if not entities_list(name, 100, 0): raise RequestError( "Prevent to delete application with entity not deleted", "") app = MongoIns.search_by_name(APPLICATION_COLLECTION_NAME, name) if not app: raise NotExistError(f"application {name} not exist", "") app = app[0] delete_milvus_collections_by_fields(app) S3Ins.del_s3_buckets(app['bucket']) MongoIns.delete_mongo_collection(f"{name}_entity") MongoIns.delete_by_name(APPLICATION_COLLECTION_NAME, name) logger.info("delete application %s", name) application = Application(app["name"], app["fields"], app["bucket"]) application.metadata = app["metadata"] return application except Exception as e: logger.error(e) raise e
def delete_application(name): try: if len(entities_list(name, 100, 0)): raise RequestError( "Prevent to delete application with entity not deleted", "") # TODO rewrite clean all resource before change metadata x = del_application(name) if not x: raise NotExistError(f"application {name} not exist", "") x = x[0] fields = search_fields(json.loads(x.fields)) app = Application(name=x.name, fields=fields2dict(fields), buckets=x.s3_buckets) delete_milvus_collections_by_fields(app) delete_fields(json.loads(x.fields)) S3Ins.del_s3_buckets(x.s3_buckets.split(",")) MongoIns.delete_mongo_collection(f"{name}_entity") logger.info("delete application %s", name) return app except Exception as e: logger.error(e) raise e
def all_applications(): res = [] try: apps = MongoIns.list_documents(APPLICATION_COLLECTION_NAME, 0) for x in apps: app = Application(name=x["name"], fields=x["fields"], bucket=x["bucket"]) app.metadata = x["metadata"] res.append(app) logger.info("get all application") return res except Exception as e: logger.error(e) raise e
def upload(name, **kwargs): try: app = application_detail(name) if not app: raise NotExistError("application not exist", "application %s not exist" % name) bucket_name = app.buckets.split(",")[0] accept_fields = [x for x, y in app.fields.items() if y.get('type') != "pipeline"] pipeline_fields = {x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline"} new_fields = app.fields.copy() for k, v in kwargs.items(): if k in accept_fields: new_fields[k]['value'] = v res = [] for k, _ in kwargs.get('fields').items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") docs = {} for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) if not pipe: raise NotExistError("pipeline not exist", "pipeline %s not exist" % p) value = kwargs['fields'].get(n) file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") file_name = "{}-{}".format(name, uuid.uuid4().hex) file_path = save_tmp_file(file_name, file_data, url) S3Ins.upload2bucket(bucket_name, file_path, file_name) vectors = run_pipeline(pipe, data=file_data, url=url) milvus_collection_name = f"{app.name}_{pipe.encoder['name']}_{pipe.encoder['instance']}" vids = MilvusIns.insert_vectors(milvus_collection_name, vectors) docs[n] = {"ids": vids, "url": gen_url(bucket_name, file_name)} doc_id = MongoIns.insert_documents(f"{app.name}_entity", docs) res.append(new_mapping_ins(docs)) fields=new_fields)) return res