Ejemplo n.º 1
0
def search(name, fields={}, topk=10, nprobe=16):
    res = []
    try:
        app = application_detail(name)
        accept_fields = [x for x, y in app.fields.items() if y.get('type') != "object"]
        pipeline_fields = {x: y['pipeline'] for x, y in app.fields.items() if y.get('type') == "object"}
        for k, _ in fields.items():
            if k not in accept_fields and k not in pipeline_fields:
                raise RequestError(f"fields {k} not in application", "")
        for n, p in pipeline_fields.items():
            pipe = pipeline_detail(p)
            value = fields.get(n)
            file_data = value.get('data')
            url = value.get('url')
            if not file_data and not url:
                raise RequestError("can't find data or url from request", "")
            vectors = run_pipeline(pipe, data=file_data, url=url)
            if not vectors:
                raise NoneVectorError("can't encode data by encoder, check input or encoder", "")
            milvus_collection_name = f"{pipe.name}_{pipe.encoder}"
            vids = MilvusIns.search_vectors(milvus_collection_name, vectors, topk=topk, nprobe=nprobe)
            # here add scoreling function
            dbs = search_ids_from_mapping([x.id for x in vids[0]])
            for db in dbs:
                m = new_mapping_ins(id=db.id, app_name=db.app_name,
                                    image_url=db.image_url, fields=db.fields)
                res.append(m)
        return res
    except Exception as e:
        raise e
Ejemplo n.º 2
0
def search(name, fields={}, topk=10, nprobe=16):
    fields_res = {}
    score_config = {}
    try:
        app = application_detail(name)
        accept_fields = [
            x for x, y in app.fields.items() if y.get('type') != "pipeline"
        ]
        accept_fields.append("score_mode")
        pipeline_fields = {
            x: y['value']
            for x, y in app.fields.items() if y.get('type') == "pipeline"
        }
        for k, _ in fields.items():
            if k not in accept_fields and k not in pipeline_fields:
                raise RequestError(f"fields {k} not in application", "")
        valid_field_flag = False
        for n, p in pipeline_fields.items():
            pipe = pipeline_detail(p)

            value = fields.get(n)
            if not value:
                continue

            valid_field_flag = True
            file_data = value.get('data')
            url = value.get('url')
            inner_score_mode = value.get('inner_field_score_mode',
                                         'distance_first')
            score_config[n] = {}
            score_config[n]['weight'] = value.get('weight', 1)
            score_config[n]['decay_function'] = value.get(
                'decay_function', 'linear')
            if not file_data and not url:
                raise RequestError("can't find data or url from request", "")
            vectors = run_pipeline(pipe, data=file_data, url=url)
            if not vectors:
                raise NoneVectorError(
                    "can't encode data by encoder, check input or encoder", "")
            milvus_collection_name = f"{app.name}_{pipe.encoder['instance']['name'].replace('phantoscope_', '')}"
            mongo_name = f"{app.name}_entity"
            dbs = search_and_score(milvus_collection_name, mongo_name, n,
                                   vectors, topk, nprobe, inner_score_mode)
            tmp_res = []

            for db in dbs:
                m = new_mapping_ins(db)
                tmp_res.append(m)
            fields_res[n] = tmp_res
        if not valid_field_flag:
            raise NoneValidFieldError(
                "There is none valid field in search request boby",
                Exception())
        score_mode = fields.get('score_mode', 'first')
        res = get_score_result(fields_res, topk, score_config, score_mode)
        return res
    except Exception as e:
        err_msg = f"Unexpected error happen when search, {str(e)}"
        logger.error(err_msg, exc_info=True)
        raise UnexpectedError(err_msg, e)
Ejemplo n.º 3
0
def search(name, fields={}, topk=10, nprobe=16):
    res = []
    try:
        app = application_detail(name)
        accept_fields = [
            x for x, y in app.fields.items() if y.get('type') != "object"
        ]
        pipeline_fields = {
            x: y['pipeline']
            for x, y in app.fields.items() if y.get('type') == "object"
        }
        for k, _ in fields.items():
            if k not in accept_fields and k not in pipeline_fields:
                raise RequestError(f"fields {k} not in application", "")
        for n, p in pipeline_fields.items():
            pipe = pipeline_detail(p)
            value = fields.get(n)
            file_data = value.get('data')
            url = value.get('url')
            if not file_data and not url:
                raise RequestError("can't find data or url from request", "")
            vectors = run_pipeline(pipe, data=file_data, url=url)
            if not vectors:
                raise NoneVectorError(
                    "can't encode data by encoder, check input or encoder", "")
        return res
    except Exception as e:
        raise e
Ejemplo n.º 4
0
def upload(name, **kwargs):
    try:
        app = application_detail(name)
        if not app:
            raise NotExistError("application not exist", "application %s not exist" % name)
        bucket_name = app.buckets.split(",")[0]
        accept_fields = [x for x, y in app.fields.items() if y.get('type') != "object"]
        pipeline_fields = {x: y['pipeline'] for x, y in app.fields.items() if y.get('type') == "object"}
        new_fields = app.fields.copy()
        for k, v in kwargs.items():
            if k in accept_fields:
                new_fields[k]['value'] = v
        res = []
        for k, _ in kwargs.get('fields').items():
            if k not in accept_fields and k not in pipeline_fields:
                raise RequestError(f"fields {k} not in application", "")
        for n, p in pipeline_fields.items():
            pipe = pipeline_detail(p)
            if not pipe:
                raise NotExistError("pipeline not exist", "pipeline %s not exist" % p)
            value = kwargs['fields'].get(n)
            file_data = value.get('data')
            url = value.get('url')
            if not file_data and not url:
                raise RequestError("can't find data or url from request", "")
            file_name = "{}-{}".format(name, uuid.uuid4().hex)
            file_path = save_tmp_file(file_name, file_data, url)

            # begin to timing
            start = time.time()
            S3Ins.upload2bucket(bucket_name, file_path, file_name)
            upload_time = time.time()
            logger.debug("[timing] upload image to bucket costs: {:.3f}s".format(upload_time - start))

            vectors = run_pipeline(pipe, data=file_data, url=url)
            pipeline_time = time.time()
            logger.debug("[timing] run pipeline costs: {:.3f}s".format(pipeline_time - upload_time))

            milvus_collection_name = f"{pipe.name}_{pipe.encoder}"
            vids = MilvusIns.insert_vectors(milvus_collection_name, vectors)
            insert_time = time.time()
            logger.debug("[timing] insert to milvus costs: {:.3f}s".format(insert_time - pipeline_time))
            for vid in vids:
                m = DB(id=vid, app_name=name,
                       image_url=gen_url(bucket_name, file_name),
                       fields=new_fields)
                add_mapping_data(m)
                res.append(new_mapping_ins(id=vid, app_name=name,
                                           image_url=gen_url(bucket_name, file_name),
                                           fields=new_fields))
            final_time = time.time()
            logger.debug("[timing] prepare result costs: {:.3f}s".format(final_time - insert_time))

        return res
    except Exception as e:
        print(e)
        return e
Ejemplo n.º 5
0
def upload(name, **kwargs):
    try:
        app = application_detail(name)
        if not app:
            raise NotExistError("application not exist",
                                "application %s not exist" % name)
        bucket_name = app.buckets.split(",")[0]
        accept_fields = [
            x for x, y in app.fields.items() if y.get('type') != "object"
        ]
        pipeline_fields = {
            x: y['pipeline']
            for x, y in app.fields.items() if y.get('type') == "object"
        }
        new_fields = app.fields.copy()
        for k, v in kwargs.items():
            if k in accept_fields:
                new_fields[k]['value'] = v
        res = []
        for k, _ in kwargs.get('fields').items():
            if k not in accept_fields and k not in pipeline_fields:
                raise RequestError(f"fields {k} not in application", "")
        for n, p in pipeline_fields.items():
            pipe = pipeline_detail(p)
            if not pipe:
                raise NotExistError("pipeline not exist",
                                    "pipeline %s not exist" % p)
            value = kwargs['fields'].get(n)
            file_data = value.get('data')
            url = value.get('url')
            if not file_data and not url:
                raise RequestError("can't find data or url from request", "")
            file_name = "{}-{}".format(name, uuid.uuid4().hex)
            file_path = save_tmp_file(file_name, file_data, url)
            S3Ins.upload2bucket(bucket_name, file_path, file_name)
            vectors = run_pipeline(pipe, data=file_data, url=url)
            if not vectors:
                raise NoneVectorError(
                    "can't encode data by encoder, check input or encoder", "")
            milvus_collection_name = f"{pipe.name}_{pipe.encoder}"
            vids = MilvusIns.insert_vectors(milvus_collection_name, vectors)
            for vid in vids:
                m = DB(id=vid,
                       app_name=name,
                       image_url=gen_url(bucket_name, file_name),
                       fields=new_fields)
                add_mapping_data(m)
                res.append(
                    new_mapping_ins(id=vid,
                                    app_name=name,
                                    image_url=gen_url(bucket_name, file_name),
                                    fields=new_fields))
        return res
    except Exception as e:
        print(e)
        return e
Ejemplo n.º 6
0
def upload(name, **kwargs):
    try:
        app = application_detail(name)
        if not app:
            raise NotExistError("application not exist", "application %s not exist" % name)
        bucket_name = app.buckets.split(",")[0]
        accept_fields = [x for x, y in app.fields.items() if y.get('type') != "pipeline"]
        pipeline_fields = {x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline"}
        new_fields = app.fields.copy()
        for k, v in kwargs.items():
            if k in accept_fields:
                new_fields[k]['value'] = v
        res = []
        for k, _ in kwargs.get('fields').items():
            if k not in accept_fields and k not in pipeline_fields:
                raise RequestError(f"fields {k} not in application", "")

        docs = {}
        valid_field_flag = False
        for n, p in pipeline_fields.items():
            pipe = pipeline_detail(p)
            if not pipe:
                raise NotExistError("pipeline not exist", "pipeline %s not exist" % p)
            value = kwargs['fields'].get(n)
            if not value:
                continue
            valid_field_flag = True
            file_data = value.get('data')
            url = value.get('url')
            if not file_data and not url:
                raise RequestError("can't find data or url from request", "")
            file_name = "{}-{}".format(name, uuid.uuid4().hex)
            file_path = save_tmp_file(file_name, file_data, url)

            S3Ins.upload2bucket(bucket_name, file_path, file_name)

            vectors = run_pipeline(pipe, data=file_data, url=url)
            if not vectors:
                raise NoneVectorError("can't encode data by encoder, check input or encoder", "")

            milvus_collection_name = f"{app.name}_{pipe.encoder['name']}_{pipe.encoder['instance']}"
            vids = MilvusIns.insert_vectors(milvus_collection_name, vectors)

            docs[n] = {"ids": vids, "url": gen_url(bucket_name, file_name)}
            doc_id = MongoIns.insert_documents(f"{app.name}_entity", docs)
            res.append(new_mapping_ins(docs))
        if not valid_field_flag:
            raise RequestError("none valid field exist", "")
        return res
    except Exception as e:
        err_msg = f"Unexpected error happen when upload: {str(e)}"
        logger.error(err_msg, exc_info=True)
        raise UnexpectedError(err_msg, e)
Ejemplo n.º 7
0
def upload(name, **kwargs):
    try:
        app = application_detail(name)
        if not app:
            raise NotExistError("application not exist", "application %s not exist" % name)
        bucket_name = app.buckets.split(",")[0]
        accept_fields = [x for x, y in app.fields.items() if y.get('type') != "pipeline"]
        pipeline_fields = {x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline"}
        new_fields = app.fields.copy()
        for k, v in kwargs.items():
            if k in accept_fields:
                new_fields[k]['value'] = v
        res = []
        for k, _ in kwargs.get('fields').items():
            if k not in accept_fields and k not in pipeline_fields:
                raise RequestError(f"fields {k} not in application", "")
        docs = {}
        for n, p in pipeline_fields.items():
            pipe = pipeline_detail(p)
            if not pipe:
                raise NotExistError("pipeline not exist", "pipeline %s not exist" % p)
            value = kwargs['fields'].get(n)
            file_data = value.get('data')
            url = value.get('url')
            if not file_data and not url:
                raise RequestError("can't find data or url from request", "")
            file_name = "{}-{}".format(name, uuid.uuid4().hex)
            file_path = save_tmp_file(file_name, file_data, url)

            S3Ins.upload2bucket(bucket_name, file_path, file_name)

            vectors = run_pipeline(pipe, data=file_data, url=url)

            milvus_collection_name = f"{app.name}_{pipe.encoder['name']}_{pipe.encoder['instance']}"
            vids = MilvusIns.insert_vectors(milvus_collection_name, vectors)
            docs[n] = {"ids": vids, "url": gen_url(bucket_name, file_name)}
            doc_id = MongoIns.insert_documents(f"{app.name}_entity", docs)
            res.append(new_mapping_ins(docs))
                         fields=new_fields))
        return res
Ejemplo n.º 8
0
def application_detail_api(name):
    return application_detail(name)