예제 #1
0
def delete_entity(app_name, entity_id):
    try:
        mongo_ins_name = f"{app_name}_entity"
        entity = MongoIns.search_by_id(mongo_ins_name, entity_id)

        if not entity.count():
            raise NotExistError("Entity %s not exist" % entity_id,
                                "NotExistError")
        for item in entity:
            en = new_mapping_ins(item)
            for name, fields in en._docs.items():
                # delete s3 object
                bucket_name = fields.get("url").split("/")[-2]
                object_name = fields.get("url").split("/")[-1]
                S3Ins.del_object(bucket_name, object_name)
                # delete vector from milvus
                vids = fields.get("ids")
                app = application_detail(app_name)
                pipe_name = app.fields[name]["value"]
                pipe = pipeline_detail(pipe_name)
                instance_name = pipe.encoder.get("instance")
                MilvusIns.del_vectors(f"{app_name}_{name}_{instance_name}",
                                      vids)
            # delete from mongodb
            MongoIns.delete_by_id(mongo_ins_name, entity_id)
            logger.info("delete entity %s in application %s", entity_id,
                        app_name)
            return en
    except Exception as e:
        logger.error(e)
        raise e
예제 #2
0
def new_application(app_name, fields, s3_buckets):
    ok, message = fields_check(fields)
    if not ok:
        raise ArgsCheckError(message, "")
    try:
        # check application exist
        if search_application(app_name):
            raise ExistError(f"application <{app_name}> had exist", "")
        # insert fields to metadata
        fieldsdb = []
        for name, field in fields.items():
            fieldsdb.append(
                FieldsDB(name=name,
                         type=field.get('type'),
                         value=field.get('value'),
                         app=app_name))
        ids = insert_fields(fieldsdb)
        # create a application entity collection
        MongoIns.new_mongo_collection(f"{app_name}_entity")
        app = Application(name=app_name, fields=ids, buckets=s3_buckets)
        # create milvus collections
        create_milvus_collections_by_fields(app)
        # insert application to metadata
        app.save()
        app.fields = fields2dict(search_fields(ids))
        return app
    except Exception as e:
        logger.error("error happen during create app: %s",
                     str(e),
                     exc_info=True)
        raise e
예제 #3
0
def fetch_operators(url, overwrite=True):
    """fetch operators from origin market

    url -- origin url
    overwrite -- Whether to overwrite local information if the same name exists

    """
    origin = []
    try:
        r = requests.get(url)
        if r.headers.get(MARKET_IDENTITY_HEADER) != "0.1.0":
            raise RequestError("Uncertified market", "")
        if r.status_code != 200:
            raise RequestError(r.text, r.status_code)
    except Exception as e:
        raise RequestError(e.args[0], e)
    for op in r.json():
        origin.append(Operator(op['name'], op['addr'], op['author'],
                               op['version'], op['type'], op['description']))
    local_operators = all_operators()
    local_operator_names = [x.name for x in local_operators]
    for x in origin:
        if x.name not in local_operator_names:
            local_operators.append(x)
        else:
            if overwrite:
                for lop in local_operators:
                    if lop.name == x.name:
                        local_operators.remove(lop)
                        local_operators.append(x)
    MongoIns.delete_mongo_collection(OPERATOR_COLLECTION_NAME)
    for x in local_operators:
        MongoIns.insert_documents(OPERATOR_COLLECTION_NAME, x.to_dict())
    return local_operators
예제 #4
0
def create_pipeline(name, processors=None, encoder=None, description=None):
    try:
        p = MongoIns.search_by_name(PIPELINE_COLLECTION_NAME, name)
        if p:
            raise ExistError(f"pipeline <{name}> already exists", "")
        pro = []
        encoder_res = {}
        processor_res = {}
        for processor in processors:
            pr = operator_detail(processor["name"])
            processor_res["operator"] = pr.to_dict()
            processor_res["instance"] = pr.inspect_instance(
                processor["instance"])
            pro.append(processor_res)
        encoder_info = operator_detail(encoder["name"])
        encoder_res["operator"] = encoder_info.to_dict()
        encoder_res["instance"] = encoder_info.inspect_instance(
            encoder["instance"])
        pipe = Pipeline(name, description, pro, encoder_res)
        pipe.metadata = pipe._metadata()
        if pipeline_illegal(pipe):
            raise PipelineIllegalError("Pipeline illegal check error", "")
        MongoIns.insert_documents(PIPELINE_COLLECTION_NAME, pipe.to_dict())
        return pipe
    except Exception as e:
        logger.error(e, exc_info=True)
        raise e
예제 #5
0
def register_operators(name, addr, author, version, type, description):
    try:
        op = Operator(name, addr, author, version, type, description)
        op.metadata = op._metadata()
        if MongoIns.search_by_name(OPERATOR_COLLECTION_NAME, name):
            raise ExistError(f"operator {name} had exist", "")
        MongoIns.insert_documents(OPERATOR_COLLECTION_NAME, op.to_dict())
        return op.to_dict()
    except Exception as e:
        logger.error(f"Unexpected error happen during register operator, {str(e)}", exc_info=True)
        raise e
예제 #6
0
def delete_operators(name):
    try:
        op = MongoIns.search_by_name(OPERATOR_COLLECTION_NAME, name)
        if not op:
            raise NotExistError(f"operator {name} not exist", "")
        op = op[0]
        MongoIns.delete_by_name(OPERATOR_COLLECTION_NAME, name)
        operator = Operator(op["name"], op["addr"], op["author"], op["version"], op["type"], op["description"])
        operator.metadata = op["metadata"]
        return operator
    except Exception as e:
        logger.error(e)
        raise e
예제 #7
0
def delete_pipeline(name):
    try:
        p = MongoIns.search_by_name(PIPELINE_COLLECTION_NAME, name)
        if not p:
            raise NotExistError("pipeline %s is not exist" % name, "")
        p = p[0]
        MongoIns.delete_by_name(PIPELINE_COLLECTION_NAME, name)
        pipe = Pipeline(p["name"], p["description"], p["processors"],
                        p["encoder"], p["input"], p["output"])
        pipe.metadata = p["metadata"]
        return pipe
    except Exception as e:
        logger.error(e)
        raise e
예제 #8
0
def delete_milvus_collections_by_fields(app):
    for _, field in app['fields'].items():
        if field["type"] == "pipeline":
            pipe = MongoIns.search_by_name(PIPELINE_COLLECTION_NAME,
                                           field.get("value"))[0]
            name = f"{app.get('name')}_{pipe.get('encoder').get('instance').get('name').replace('phantoscope_', '')}"
            MilvusIns.del_milvus_collection(name)
예제 #9
0
def count_entities(name):
    try:
        count = MongoIns.count_documents(f"{name}_entity")
        logger.info(f"get count of application {name} entities")
        return str(count)
    except Exception as e:
        logger.error(e)
        raise e
예제 #10
0
def entities_list(name, num, page):
    res = []
    try:
        docs = MongoIns.list_documents(f"{name}_entity", num, page)
        for doc in docs:
            res.append(new_mapping_ins(docs=doc))
        logger.info("get application %s entity list", name)
        return res
    except Exception as e:
        logger.error(e)
        raise e
예제 #11
0
def application_detail(name):
    try:
        app = MongoIns.search_by_name(APPLICATION_COLLECTION_NAME, name)
        if not app:
            raise NotExistError(f"application {name} not exist", "")
        app = app[0]
        application = Application(app["name"], app["fields"], app["bucket"])
        application.metadata = app["metadata"]
        return application
    except Exception as e:
        logger.error(e)
        raise e
예제 #12
0
def upload(name, **kwargs):
    try:
        app = application_detail(name)
        if not app:
            raise NotExistError("application not exist", "application %s not exist" % name)
        bucket_name = app.buckets.split(",")[0]
        accept_fields = [x for x, y in app.fields.items() if y.get('type') != "pipeline"]
        pipeline_fields = {x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline"}
        new_fields = app.fields.copy()
        for k, v in kwargs.items():
            if k in accept_fields:
                new_fields[k]['value'] = v
        res = []
        for k, _ in kwargs.get('fields').items():
            if k not in accept_fields and k not in pipeline_fields:
                raise RequestError(f"fields {k} not in application", "")

        docs = {}
        valid_field_flag = False
        for n, p in pipeline_fields.items():
            pipe = pipeline_detail(p)
            if not pipe:
                raise NotExistError("pipeline not exist", "pipeline %s not exist" % p)
            value = kwargs['fields'].get(n)
            if not value:
                continue
            valid_field_flag = True
            file_data = value.get('data')
            url = value.get('url')
            if not file_data and not url:
                raise RequestError("can't find data or url from request", "")
            file_name = "{}-{}".format(name, uuid.uuid4().hex)
            file_path = save_tmp_file(file_name, file_data, url)

            S3Ins.upload2bucket(bucket_name, file_path, file_name)

            vectors = run_pipeline(pipe, data=file_data, url=url)
            if not vectors:
                raise NoneVectorError("can't encode data by encoder, check input or encoder", "")

            milvus_collection_name = f"{app.name}_{pipe.encoder['name']}_{pipe.encoder['instance']}"
            vids = MilvusIns.insert_vectors(milvus_collection_name, vectors)

            docs[n] = {"ids": vids, "url": gen_url(bucket_name, file_name)}
            doc_id = MongoIns.insert_documents(f"{app.name}_entity", docs)
            res.append(new_mapping_ins(docs))
        if not valid_field_flag:
            raise RequestError("none valid field exist", "")
        return res
    except Exception as e:
        err_msg = f"Unexpected error happen when upload: {str(e)}"
        logger.error(err_msg, exc_info=True)
        raise UnexpectedError(err_msg, e)
예제 #13
0
def search_and_score(milvus_collection_name, mongo_name, field_name, vectors,
                     topk, nprobe, inner_score_mode: str):
    """
    search vectors from milvus and score by inner field score mode
    :param milvus_collection_name: collection name will be search
    :param mongo_name: mongo collection name will be selected from
    :param field_name: field name for searching from mongodb
    :param vectors: vectors which will be searched in milvus
    :param topk: milvus topk number
    :param nprobe: milvus nprobe number
    :param inner_score_mode:
    :return: image id of entity
    """
    result_dbs = []
    MAX_TOPK = 2048
    magic_number = 60
    increase_rate = 0.1
    query_topk = topk + magic_number
    end_flag = False
    try:
        inner_score_mode = InnerFieldScoreMode(inner_score_mode)
    except Exception as e:
        raise WrongInnerFieldModeError("Unsupported inner field mode", e)
    while (len(result_dbs) < topk) and (not end_flag):
        # check query topk max value
        query_topk = min(query_topk, MAX_TOPK)
        vids = MilvusIns.search_vectors(milvus_collection_name,
                                        vectors,
                                        topk=query_topk,
                                        nprobe=nprobe)
        if len(vids) == 0:
            raise NoneVectorError("milvus search result is None", "")
        # filter -1 and if exist -1 or len(vids) < topk
        if (-1 in vids.id_array[0]) or len(vids[0]) < query_topk:
            end_flag = True
        # inner field score function here
        res_vids = get_inner_field_score_result(vids, query_topk,
                                                inner_score_mode)

        if len(res_vids) < topk:
            if query_topk < MAX_TOPK:
                # calc a new query_topk and needn't to query from mysql
                query_topk += math.ceil(query_topk * increase_rate)
                increase_rate *= 2
                if not end_flag:
                    continue
            end_flag = True
        result_dbs = MongoIns.search_by_vector_id(mongo_name, field_name,
                                                  res_vids)
        # calc a new query_topk if len(result_dbs) < topk
        query_topk += math.ceil(query_topk * increase_rate)

    return result_dbs[:topk]
예제 #14
0
def test_pipeline(name, data=None, url=None):
    try:
        pipe = MongoIns.search_by_name(PIPELINE_COLLECTION_NAME, name)
        if not pipe:
            raise NotExistError("pipeline %s is not exist" % name, "")
        pipe = pipe[0]
        p = Pipeline(pipe["name"], pipe["description"], pipe["processors"],
                     pipe["encoder"], pipe["input"], pipe["output"])
        p.metadata = pipe["metadata"]
        return {"result": run_pipeline(p, data=data, url=url)}
    except Exception as e:
        raise e
예제 #15
0
def all_operators():
    res = []
    try:
        operators = MongoIns.list_documents(OPERATOR_COLLECTION_NAME, 0)
        for x in operators:
            op = Operator(x["name"], x["addr"], x["author"], x["version"], x["type"], x["description"])
            op.metadata = x["metadata"]
            res.append(op)
        return res
    except Exception as e:
        logger.error(e)
        raise e
예제 #16
0
def new_application(app_name, fields, s3_bucket):
    ok, message = fields_check(fields)
    if not ok:
        raise ArgsCheckError(message, "")
    try:
        # check application exist
        if MongoIns.search_by_name(APPLICATION_COLLECTION_NAME, app_name):
            raise ExistError(f"application <{app_name}> had exist", "")
    except ExistError:
        raise
    try:
        for _, value in fields.items():
            if value.get("type") == "pipeline":
                pipe = MongoIns.search_by_name(PIPELINE_COLLECTION_NAME,
                                               value.get("value"))[0]
                ei = identity(
                    pipe.get("encoder").get("instance").get("endpoint"))
                name = f"{app_name}_{pipe.get('encoder').get('instance').get('name').replace('phantoscope_', '')}"
                MilvusIns.new_milvus_collection(name, int(ei["dimension"]),
                                                1024, "l2")
        # create a application entity collection
        MongoIns.new_mongo_collection(f"{app_name}_entity")
        S3Ins.new_s3_buckets(s3_bucket)
        # create milvus collections
        app = Application(name=app_name, fields=fields, bucket=s3_bucket)
        app.metadata = app._metadata()
        MongoIns.insert_documents(APPLICATION_COLLECTION_NAME, app.to_dict())
        return app
    except Exception as e:
        logger.error("error happen during create app: %s",
                     str(e),
                     exc_info=True)
        raise e
예제 #17
0
def all_pipelines():
    res = []
    try:
        pipes = MongoIns.list_documents(PIPELINE_COLLECTION_NAME, 0)
        for pipe in pipes:
            p = Pipeline(pipe["name"], pipe["description"], pipe["processors"],
                         pipe["encoder"], pipe["input"], pipe["output"])
            p.metadata = pipe["metadata"]
            res.append(p)
        return res
    except Exception as e:
        logger.error(e)
        raise e
예제 #18
0
def delete_application(name, force=False):
    try:
        if not force:
            if not entities_list(name, 100, 0):
                raise RequestError(
                    "Prevent to delete application with entity not deleted",
                    "")
        app = MongoIns.search_by_name(APPLICATION_COLLECTION_NAME, name)
        if not app:
            raise NotExistError(f"application {name} not exist", "")
        app = app[0]
        delete_milvus_collections_by_fields(app)
        S3Ins.del_s3_buckets(app['bucket'])
        MongoIns.delete_mongo_collection(f"{name}_entity")
        MongoIns.delete_by_name(APPLICATION_COLLECTION_NAME, name)
        logger.info("delete application %s", name)
        application = Application(app["name"], app["fields"], app["bucket"])
        application.metadata = app["metadata"]
        return application
    except Exception as e:
        logger.error(e)
        raise e
예제 #19
0
def delete_application(name):
    try:
        if len(entities_list(name, 100, 0)):
            raise RequestError(
                "Prevent to delete application with entity not deleted", "")
        # TODO rewrite clean all resource before change metadata
        x = del_application(name)
        if not x:
            raise NotExistError(f"application {name} not exist", "")
        x = x[0]
        fields = search_fields(json.loads(x.fields))
        app = Application(name=x.name,
                          fields=fields2dict(fields),
                          buckets=x.s3_buckets)
        delete_milvus_collections_by_fields(app)
        delete_fields(json.loads(x.fields))
        S3Ins.del_s3_buckets(x.s3_buckets.split(","))
        MongoIns.delete_mongo_collection(f"{name}_entity")
        logger.info("delete application %s", name)
        return app
    except Exception as e:
        logger.error(e)
        raise e
예제 #20
0
def all_applications():
    res = []
    try:
        apps = MongoIns.list_documents(APPLICATION_COLLECTION_NAME, 0)
        for x in apps:
            app = Application(name=x["name"],
                              fields=x["fields"],
                              bucket=x["bucket"])
            app.metadata = x["metadata"]
            res.append(app)
        logger.info("get all application")
        return res
    except Exception as e:
        logger.error(e)
        raise e
예제 #21
0
def upload(name, **kwargs):
    try:
        app = application_detail(name)
        if not app:
            raise NotExistError("application not exist", "application %s not exist" % name)
        bucket_name = app.buckets.split(",")[0]
        accept_fields = [x for x, y in app.fields.items() if y.get('type') != "pipeline"]
        pipeline_fields = {x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline"}
        new_fields = app.fields.copy()
        for k, v in kwargs.items():
            if k in accept_fields:
                new_fields[k]['value'] = v
        res = []
        for k, _ in kwargs.get('fields').items():
            if k not in accept_fields and k not in pipeline_fields:
                raise RequestError(f"fields {k} not in application", "")
        docs = {}
        for n, p in pipeline_fields.items():
            pipe = pipeline_detail(p)
            if not pipe:
                raise NotExistError("pipeline not exist", "pipeline %s not exist" % p)
            value = kwargs['fields'].get(n)
            file_data = value.get('data')
            url = value.get('url')
            if not file_data and not url:
                raise RequestError("can't find data or url from request", "")
            file_name = "{}-{}".format(name, uuid.uuid4().hex)
            file_path = save_tmp_file(file_name, file_data, url)

            S3Ins.upload2bucket(bucket_name, file_path, file_name)

            vectors = run_pipeline(pipe, data=file_data, url=url)

            milvus_collection_name = f"{app.name}_{pipe.encoder['name']}_{pipe.encoder['instance']}"
            vids = MilvusIns.insert_vectors(milvus_collection_name, vectors)
            docs[n] = {"ids": vids, "url": gen_url(bucket_name, file_name)}
            doc_id = MongoIns.insert_documents(f"{app.name}_entity", docs)
            res.append(new_mapping_ins(docs))
                         fields=new_fields))
        return res