def upload(name, **kwargs): try: app = application_detail(name) if not app: raise NotExistError("application not exist", "application %s not exist" % name) bucket_name = app.buckets.split(",")[0] accept_fields = [x for x, y in app.fields.items() if y.get('type') != "object"] pipeline_fields = {x: y['pipeline'] for x, y in app.fields.items() if y.get('type') == "object"} new_fields = app.fields.copy() for k, v in kwargs.items(): if k in accept_fields: new_fields[k]['value'] = v res = [] for k, _ in kwargs.get('fields').items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) if not pipe: raise NotExistError("pipeline not exist", "pipeline %s not exist" % p) value = kwargs['fields'].get(n) file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") file_name = "{}-{}".format(name, uuid.uuid4().hex) file_path = save_tmp_file(file_name, file_data, url) # begin to timing start = time.time() S3Ins.upload2bucket(bucket_name, file_path, file_name) upload_time = time.time() logger.debug("[timing] upload image to bucket costs: {:.3f}s".format(upload_time - start)) vectors = run_pipeline(pipe, data=file_data, url=url) pipeline_time = time.time() logger.debug("[timing] run pipeline costs: {:.3f}s".format(pipeline_time - upload_time)) milvus_collection_name = f"{pipe.name}_{pipe.encoder}" vids = MilvusIns.insert_vectors(milvus_collection_name, vectors) insert_time = time.time() logger.debug("[timing] insert to milvus costs: {:.3f}s".format(insert_time - pipeline_time)) for vid in vids: m = DB(id=vid, app_name=name, image_url=gen_url(bucket_name, file_name), fields=new_fields) add_mapping_data(m) res.append(new_mapping_ins(id=vid, app_name=name, image_url=gen_url(bucket_name, file_name), fields=new_fields)) final_time = time.time() logger.debug("[timing] prepare result costs: {:.3f}s".format(final_time - insert_time)) return res except Exception as e: print(e) return e
def upload(name, **kwargs): try: app = application_detail(name) if not app: raise NotExistError("application not exist", "application %s not exist" % name) bucket_name = app.buckets.split(",")[0] accept_fields = [ x for x, y in app.fields.items() if y.get('type') != "object" ] pipeline_fields = { x: y['pipeline'] for x, y in app.fields.items() if y.get('type') == "object" } new_fields = app.fields.copy() for k, v in kwargs.items(): if k in accept_fields: new_fields[k]['value'] = v res = [] for k, _ in kwargs.get('fields').items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) if not pipe: raise NotExistError("pipeline not exist", "pipeline %s not exist" % p) value = kwargs['fields'].get(n) file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") file_name = "{}-{}".format(name, uuid.uuid4().hex) file_path = save_tmp_file(file_name, file_data, url) S3Ins.upload2bucket(bucket_name, file_path, file_name) vectors = run_pipeline(pipe, data=file_data, url=url) if not vectors: raise NoneVectorError( "can't encode data by encoder, check input or encoder", "") milvus_collection_name = f"{pipe.name}_{pipe.encoder}" vids = MilvusIns.insert_vectors(milvus_collection_name, vectors) for vid in vids: m = DB(id=vid, app_name=name, image_url=gen_url(bucket_name, file_name), fields=new_fields) add_mapping_data(m) res.append( new_mapping_ins(id=vid, app_name=name, image_url=gen_url(bucket_name, file_name), fields=new_fields)) return res except Exception as e: print(e) return e
def upload(name, **kwargs): try: app = application_detail(name) if not app: raise NotExistError("application not exist", "application %s not exist" % name) bucket_name = app.buckets.split(",")[0] accept_fields = [x for x, y in app.fields.items() if y.get('type') != "pipeline"] pipeline_fields = {x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline"} new_fields = app.fields.copy() for k, v in kwargs.items(): if k in accept_fields: new_fields[k]['value'] = v res = [] for k, _ in kwargs.get('fields').items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") docs = {} valid_field_flag = False for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) if not pipe: raise NotExistError("pipeline not exist", "pipeline %s not exist" % p) value = kwargs['fields'].get(n) if not value: continue valid_field_flag = True file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") file_name = "{}-{}".format(name, uuid.uuid4().hex) file_path = save_tmp_file(file_name, file_data, url) S3Ins.upload2bucket(bucket_name, file_path, file_name) vectors = run_pipeline(pipe, data=file_data, url=url) if not vectors: raise NoneVectorError("can't encode data by encoder, check input or encoder", "") milvus_collection_name = f"{app.name}_{pipe.encoder['name']}_{pipe.encoder['instance']}" vids = MilvusIns.insert_vectors(milvus_collection_name, vectors) docs[n] = {"ids": vids, "url": gen_url(bucket_name, file_name)} doc_id = MongoIns.insert_documents(f"{app.name}_entity", docs) res.append(new_mapping_ins(docs)) if not valid_field_flag: raise RequestError("none valid field exist", "") return res except Exception as e: err_msg = f"Unexpected error happen when upload: {str(e)}" logger.error(err_msg, exc_info=True) raise UnexpectedError(err_msg, e)
def upload(name, **kwargs): try: app = application_detail(name) if not app: raise NotExistError("application not exist", "application %s not exist" % name) bucket_name = app.buckets.split(",")[0] accept_fields = [x for x, y in app.fields.items() if y.get('type') != "pipeline"] pipeline_fields = {x: y['value'] for x, y in app.fields.items() if y.get('type') == "pipeline"} new_fields = app.fields.copy() for k, v in kwargs.items(): if k in accept_fields: new_fields[k]['value'] = v res = [] for k, _ in kwargs.get('fields').items(): if k not in accept_fields and k not in pipeline_fields: raise RequestError(f"fields {k} not in application", "") docs = {} for n, p in pipeline_fields.items(): pipe = pipeline_detail(p) if not pipe: raise NotExistError("pipeline not exist", "pipeline %s not exist" % p) value = kwargs['fields'].get(n) file_data = value.get('data') url = value.get('url') if not file_data and not url: raise RequestError("can't find data or url from request", "") file_name = "{}-{}".format(name, uuid.uuid4().hex) file_path = save_tmp_file(file_name, file_data, url) S3Ins.upload2bucket(bucket_name, file_path, file_name) vectors = run_pipeline(pipe, data=file_data, url=url) milvus_collection_name = f"{app.name}_{pipe.encoder['name']}_{pipe.encoder['instance']}" vids = MilvusIns.insert_vectors(milvus_collection_name, vectors) docs[n] = {"ids": vids, "url": gen_url(bucket_name, file_name)} doc_id = MongoIns.insert_documents(f"{app.name}_entity", docs) res.append(new_mapping_ins(docs)) fields=new_fields)) return res
def test_insert_vectors(self): """test insert vectors""" vectors = [[random.random() for _ in range(self.dimension)] for _ in range(20)] rv = MilvusIns.insert_vectors(self.name, vectors) assert len(rv) == 20