def infer_with_predictor(input): """ infer a image with predictor, for example :param input: image base64 :return: pred out """ start_time = time.time() period = time.time() - start_time logger.info("warp_input cost time: {}".format("%2.2f sec" % period)) predictor = predictor_manager.get_predictor() if predictor is not None: try: start_time = time.time() outputs = predictor.run(input) period = time.time() - start_time logger.info("predictor infer cost time: {}".format("%2.2f sec" % period)) if len(outputs) > 0: output = outputs[0] print(output.name) output_data = np.array(output.data.float_data()) output_data = output_data.reshape(-1, 6) # logger.debug("predictor infer result {}".format(output_data)) return output_data.tolist() return "infer nothing" finally: predictor_manager.return_predictor(predictor) else: return None
def set_task_queue(self, data, timeout=0.5): """ 用于生产者向队列中提交任务,并返回对应的索引位置 :param data: 需要提交的数据 :param timeout: 最长等待时间 :return: 生产者到时候需要在结果数组中获取结果的索引 """ index = None # 这里如果查询放在锁外面有概率造成,请求提前结束等待,然后返回请求超时,所以查询的时候也需要加锁, # 如果目前队列满,那么先解锁,然后等待,然后加锁再次尝试添加数据,如果还是不行,就超时 self.q_lock.acquire() if not self.q_full.is_set(): self.q_lock.release() self.q_full.wait(timeout) self.q_lock.acquire() if self.q_full.is_set(): index = self.task_queue_index self.task_queue[index] = data self.task_queue_index += 1 # 如果消费者还没被唤醒,那就唤醒他 if not self.c_lock.is_set(): self.c_lock.set() # 如果任务队列满了,夯住自己 if self.task_queue_index == self.task_num: self.q_full.clear() else: logger.info("set:task_queue is full, try a moment later") self.q_lock.release() return index
def read_image(image_bytes, target_size): """ read image :param image_base64: input image, base64 encode string :param target_size: image resize target :return: origin image and resized image """ start_time = time.time() # image_bytes = base64.b64decode(image_base64) img_array = np.fromstring(image_bytes, np.uint8) img = cv2.imdecode(img_array, cv2.COLOR_RGB2BGR) origin = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) img = resize_img(origin, target_size) if img.mode != 'RGB': img = img.convert('RGB') # img = np.array(img).astype(np.float32).transpose((2, 0, 1)) # HWC to CHW # img -= 127.5 # img *= 0.007843 mean_rgb = [127.5, 127.5, 127.5] img = crop_image(img, target_size) img = np.array(img).astype('float32') img -= mean_rgb img = img.transpose((2, 0, 1)) # HWC to CHW img *= 0.007843 img = img[np.newaxis,:] period = time.time() - start_time logger.info("read image base64 and resize cost time: {}".format("%2.2f sec" % period)) return origin, img
def __init__(self): """ create predictor manager """ self.get_predictor_timeout = float( config.get('get.predictor.timeout', default_value=0.5)) predictor_count = 0 enable_mkl = False gpu_memory = 200 gpu_device_ids = [] model_dir = config.get('model.dir') device_type = config.get('device.type') if device_type == PredictorManager.CPU_DEVICE: cpu_predictor_count = int( config.getint('cpu.predictor.count', default_value=0)) predictor_count = cpu_predictor_count enable_mkl = config.getboolean('cpu.enable_mkl', default_value=False) elif device_type == PredictorManager.GPU_DEVICE: gpu_predictor_count = int( config.getint('gpu.predictor.count', default_value=0)) predictor_count = gpu_predictor_count gpu_memory = config.getint('gpu.predictor.memory', default_value=200) gpu_device_ids = config.get('gpu.predictor.device.id').split(',') gpu_device_ids = map(int, gpu_device_ids) if PYTHON_VERSION == 3: gpu_device_ids = list(gpu_device_ids) assert len( gpu_device_ids ) == gpu_predictor_count, "gpu predictor count doesn't match device count" else: raise Exception("no device to run predictor!") assert predictor_count > 0, "no device to predict" logger.info( "device type:{} predictor count:{} model dir:{} get predictor timeout:{}s" .format(device_type, predictor_count, model_dir, self.get_predictor_timeout)) self.predictor_queue = Queue(maxsize=predictor_count) for i in range(predictor_count): # Set config predictor_config = AnalysisConfig(model_dir) # predictor_config.specify_input_name() if device_type == PredictorManager.CPU_DEVICE: predictor_config.disable_gpu() if enable_mkl: predictor_config.enable_mkldnn() else: device_id = gpu_device_ids[i] predictor_config.enable_use_gpu(gpu_memory, device_id) # Create PaddlePredictor predictor = create_paddle_predictor(predictor_config) self.predictor_queue.put(predictor)
def init_evn(cls, current_evn): """ init config environment, and only once :param current_evn: :return: """ if cls.env is None: cls.env = current_evn logger.info("current config section %s", cls.env, extra={'logid': 'react-main-loop-logid'})
def recognize(): """ 处理post请求,并返回处理后的结果 """ # print(request.headers) # print(request.form) file = request.files['imgfile'] # print(file, file.filename) # with open('receiveimg.jpg', 'wb') as f: # f.write(file.stream.read()) if not request.form: abort(400) if 'log_id' in request.form: log_id = request.form['log_id'] logger.set_logid(str(log_id)) else: logger.set_auto_logid() log_id = logger.get_logid() log_id = int(log_id) result = ApiResult(log_id=log_id) start_time = time.time() try: image_bytes = file.stream.read() task_data = process_data(image_bytes) # print(task_data.shape) start_time = time.time() max_request_time = float(config.get('max_request_time')) index = pivot.set_task_queue(task_data, max_request_time) if index is not None: pred = pivot.get_result_list(index) pred['log_id'] = log_id result.success(data=pred) # 历史记录入库 label, score, user_id = result.top3[0][0], result.top3[0][1], 1 sql = "insert into record(img,label,score,user_id) values (%s,{},{},{});".format( label, score, user_id) print(result.top3[0][0], result.top3[0][1]) cursor.execute(sql, args=(image_bytes)) database.commit() else: result.error(message="busy, wait then retry") except Exception as err: logger.error("infer exception: {}".format(err)) result.error(message=str(err)) period = time.time() - start_time logger.info("request cost:{}".format("%2.2f sec" % period)) return json.dumps(result, default=lambda o: o.__dict__)
def process_data(image_bytes): """ 根据infer_type来处理数据,并返回, 可根据业务需要自行修改 :param inputs: 原始的输入数据 :return: """ # 两种方式 处理数据公共的部分 input_size = config.get('input.size').split(',') input_size = map(int, input_size) if PYTHON_VERSION == 3: input_size = list(input_size) start_time = time.time() origin, image_data = data_util.read_image(image_bytes, input_size[1:]) # 如果是predictor,需要进一步的把数据处理为PaddleTensor if PREDICTOR == infer_type: image_data = data_util.warp_input(image_data, input_size) period = time.time() - start_time logger.info("prepare input cost time: {}".format("%2.2f sec" % period)) return image_data
def business_process(inputs): """ 不同业务,数据处理的方式可能不一样,统一在这里修改和控制 为了充分利用多线程并行的方式来处理数据,所以首先由生产者调用process_data来处理数据,并提交至任务队列 此处直接从任务队列中获取处理好的数据即可,因此该部分应该和process_data一起修改 :param inputs: 从任务队列中获取的预处理后的数据 :return: """ if PREDICTOR == infer_type: outputs = infer_with_predictor(inputs) return outputs elif EXECUTOR == infer_type: # 重组数据 image_data = [data[0] for data in inputs] image_data = np.array(image_data) start_time = time.time() # 通过不同的model名称来调用模型级联任务中 不同阶段需要用到的模型 outputs = infer_with_executor("SE_ResNeXt", image_data) period = time.time() - start_time infer_num = image_data.shape[0] logger.info("executor infer num {} cost time: {}".format( str(infer_num), "%2.2f sec" % period)) outputs = np.array(outputs[0]) result = [] for lod_id in range(infer_num): data = {} output_data = outputs[lod_id] # logger.debug("executor infer result {}".format(output_data)) data["result"] = np.array(output_data).tolist() # 首先先根据每次infer的output的lod信息,把output拆成一个图一个图的result result.append(data) return result else: logger.critical( "must set a infer type in config, executor or predictor") exit(1)
def __init__(self): """ create executor manager """ self.get_executor_timeout = float( config.get('get.executor.timeout', default_value=0.5)) model_dir = config.get('model.dir') # 将地址的json字符串转为字典,key为模型的名称,value为模型的地址,放便级联使用 model_dir = json.loads(model_dir) executor_count = 0 enable_mkl = False gpu_memory = 200 gpu_device_ids = [] self.places_list = [] # 模型的字典 self.model_dict = {} for model_name in model_dir.keys(): self._load_model(model_dir[model_name], model_name) device_type = config.get('device.type') if device_type == ExecutorManager.CPU_DEVICE: cpu_executor_count = int( config.getint('cpu.executor.count', default_value=0)) executor_count = cpu_executor_count elif device_type == ExecutorManager.GPU_DEVICE: gpu_executor_count = int( config.getint('gpu.executor.count', default_value=0)) executor_count = gpu_executor_count gpu_device_ids = config.get('gpu.executor.device.id').split(',') gpu_device_ids = map(int, gpu_device_ids) if PYTHON_VERSION == 3: gpu_device_ids = list(gpu_device_ids) assert len( gpu_device_ids ) == gpu_executor_count, "gpu executor count doesn't match device count" else: raise Exception("no device to run executor!") assert executor_count > 0, "no device to predict" logger.info( "device type:{} executor count:{} model dir:{} get executor timeout:{}s" .format(device_type, executor_count, model_dir, self.get_executor_timeout)) self.executor_queue = Queue(maxsize=executor_count) for i in range(executor_count): # executor is thread safe, # supports single/multiple-GPU running, # and single/multiple-CPU running, # if CPU device, only create one for all thread if device_type == ExecutorManager.CPU_DEVICE: if self.executor_queue.empty(): place = fluid.CPUPlace() executor = fluid.Executor(place) self._temp_executor = executor else: executor = self._temp_executor else: device_id = gpu_device_ids[i] place = fluid.CUDAPlace(device_id) executor = fluid.Executor(place) self.executor_queue.put(executor)
def state(): """ 访问state """ logger.info("visit state page") return '0'
def index(): """ 访问root """ logger.info("visit root page") return 'Index Page'
from twisted.web.resource import Resource from twisted.web.server import Site from twisted.web.wsgi import WSGIResource from utils.consumer import InferConsumer from business_service import process_data pivot = Pivot(int(config.get("max_task_num"))) infer_consumer = InferConsumer(pivot, float(config.get("max_get_time"))) port = int(config.get("flask.server.port")) work_thread_count = int(config.get('work.thread.count')) site_root = str(config.get('server.root.site')) reactor.suggestThreadPoolSize(int(work_thread_count)) flask_site = WSGIResource(reactor, reactor.getThreadPool(), app) root = Resource() root.putChild( site_root if PYTHON_VERSION == 2 else bytes(site_root, encoding='utf-8'), flask_site) logger.info( "start app listen port:{} thread pool size:{} site root:{}".format( port, work_thread_count, site_root)) reactor.listenTCP(port, Site(root)) # 把主线程设置为infer_consumr的守护线程,如果主线程结束了,infer_consumer线程就跟着结束 infer_consumer.setDaemon(True) infer_consumer.start() reactor.run()