def execute(self, _image): to_return_result = { 'points_count': 106, 'x_locations': [0] * 106, 'y_locations': [0] * 106, } resized_image = resize_with_long_side(_image, 192) resized_h, resized_w = resized_image.shape[:2] padded_image, (width_pad_ratio, height_pad_ratio) = center_pad_image_with_specific_base( resized_image, _width_base=192, _height_base=192, _output_pad_ratio=True ) candidate_image = cv2.cvtColor(force_convert_image_to_bgr(padded_image), cv2.COLOR_BGR2RGB) candidate_h, candidate_w = candidate_image.shape[:2] if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer(_need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) coordinates = result['OUTPUT__0'].squeeze() else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for 106p landmark not implement") remapped_coordinates = np.reshape(coordinates, (-1, 2)) to_return_result['x_locations'] = \ ((remapped_coordinates[:, 0] + 1) * (candidate_w // 2) - width_pad_ratio * candidate_w) / resized_w to_return_result['y_locations'] = \ ((remapped_coordinates[:, 1] + 1) * (candidate_h // 2) - height_pad_ratio * candidate_h) / resized_h return to_return_result
def execute(self, _image, _landmark_info=None): to_return_result = { 'feature_vector': [ 0, ] * 512, } if _landmark_info is not None: if _landmark_info['points_count'] == 0: candidate_index = list(range(5)) elif _landmark_info['points_count'] == 106: candidate_index = [38, 88, 86, 52, 61] else: raise NotImplementedError( f"Cannot align face with {_landmark_info['points_count']} landmark points now" ) landmark_x = _landmark_info['x_locations'][candidate_index] landmark_y = _landmark_info['y_locations'][candidate_index] landmark = np.stack([landmark_x, landmark_y], axis=1) aligned_face = face_align(_image, landmark, (96, 112)) else: aligned_face = cv2.resize(_image, (96, 112)) padded_face = center_pad_image_with_specific_base( aligned_face, 112, 112, 0, False) if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer(_need_tensor_check=False, INPUT__0=padded_face.astype( np.float32)) face_feature_vector = result['OUTPUT__0'].squeeze() else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for asian face embedding not implement" ) to_return_result['feature_vector'] = face_feature_vector.tolist() return to_return_result
def execute(self, _image): to_return_result = { 'matting_alpha': np.zeros((_image.shape[1], _image.shape[0]), dtype=np.float32), } original_h, original_w = _image.shape[:2] resized_image = resize_with_long_side(_image, 512) padded_image, (left_margin_ration, top_margin_ratio) = \ center_pad_image_with_specific_base(resized_image, 512, 512, 0, True) candidate_image = force_convert_image_to_bgr(padded_image) if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer( _need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) matting_result = result['OUTPUT__0'].squeeze() else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for bise human matting not implement" ) alpha_result = matting_result[3, ...] matting_result_without_pad = remove_image_pad(alpha_result, resized_image, left_margin_ration, top_margin_ratio) resize_back_matting_result = cv2.resize(matting_result_without_pad, (original_w, original_h), interpolation=cv2.INTER_LINEAR) to_return_result['matting_alpha'] = resize_back_matting_result return to_return_result
def execute(self, _image): to_return_result = { 'locations': [], } padded_image, (width_pad_ratio, height_pad_ratio) = center_pad_image_with_specific_base( _image, _width_base=32, _height_base=24, _output_pad_ratio=True) resized_image = cv2.resize(_image, self.candidate_image_size) resized_shape = resized_image.shape[:2] resize_h, resize_w = resized_shape candidate_image = force_convert_image_to_bgr(resized_image) if isinstance(self.inference_helper, TritonInferenceHelper): rgb_image = cv2.cvtColor(candidate_image, cv2.COLOR_BGR2RGB) result = self.inference_helper.infer(_need_tensor_check=False, INPUT__0=rgb_image.astype( np.float32)) score_map = result['OUTPUT__0'].squeeze() box = result['OUTPUT__1'].squeeze() else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for ultra light face detect not implement" ) # 0为bg,1为人脸 box_score_map = score_map[..., 1] available_box = box_score_map > self.score_threshold if np.sum(available_box) == 0: return to_return_result filter_scores = box_score_map[available_box] filtered_box = box[available_box, :] filtered_box_without_normalization = filtered_box * ( resize_w, resize_h, resize_w, resize_h) final_box_index = nms(filtered_box_without_normalization, filter_scores, _nms_threshold=self.iou_threshold) final_boxes = filtered_box[final_box_index] final_scores = filter_scores[final_box_index] for m_box, m_score in zip(final_boxes, final_scores): m_box_width = m_box[2] - m_box[0] m_box_height = m_box[3] - m_box[1] m_box_center_x = m_box[0] + m_box_width / 2 - width_pad_ratio m_box_center_y = m_box[1] + m_box_height / 2 - height_pad_ratio box_info = { 'degree': 0, 'center_x': m_box_center_x, 'center_y': m_box_center_y, 'box_height': m_box_height, 'box_width': m_box_width, } to_return_result['locations'].append({ 'box_info': box_info, 'score': m_score, }) return to_return_result
def execute(self, _image, _landmark_info=None): to_return_result = { 'age_lower_boundary': 0, 'age_higher_boundary': 10, 'race_type': RaceType.EAST_ASIAN, 'sexual': SexualType.MALE, } lower_boundaries = [0, 3, 10, 20, 30, 40, 50, 60, 70] higher_boundaries = [2, 9, 19, 29, 39, 49, 59, 69, 100] race_list = [ RaceType.WHITE, RaceType.BLACK, RaceType.LATINO_HISPANIC, RaceType.EAST_ASIAN, RaceType.SOUTHEAST_ASIAN, RaceType.INDIAN, RaceType.MIDDLE_EASTERN ] sexual_list = [SexualType.MALE, SexualType.FEMALE] if _landmark_info is not None: if _landmark_info['points_count'] == 0: candidate_index = list(range(5)) elif _landmark_info['points_count'] == 106: candidate_index = [38, 88, 86, 52, 61] else: raise NotImplementedError( f"Cannot align face with {_landmark_info['points_count']} landmark points now" ) landmark_x = _landmark_info['x_locations'][candidate_index] landmark_y = _landmark_info['y_locations'][candidate_index] landmark = np.stack([landmark_x, landmark_y], axis=1) aligned_face = face_align(_image, landmark, (192, 224)) else: aligned_face = cv2.resize(_image, (192, 224)) padded_face = center_pad_image_with_specific_base( aligned_face, 224, 224, 0, False) candidate_image = force_convert_image_to_bgr(padded_face) if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer( _need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) classification_result = result['OUTPUT__0'].squeeze(0) else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for fair not implement" ) race_index = np.argmax(softmax(classification_result[:7], axis=0)) gender_index = np.argmax(softmax(classification_result[7:9], axis=0)) age_index = np.argmax(softmax(classification_result[9:18], axis=0)) to_return_result = { 'age_lower_boundary': lower_boundaries[age_index], 'age_higher_boundary': higher_boundaries[age_index], 'race_type': race_list[race_index], 'sexual': sexual_list[gender_index], } return to_return_result
def execute(self, _image, _landmark_info=None): to_return_result = { 'semantic_segmentation': np.zeros((_image.shape[1], _image.shape[0]), dtype=np.uint8), } if _landmark_info is not None: corrected_face_image, rotate_back_function = correct_face_orientation( _image, _landmark_info) else: corrected_face_image = _image def _rotate_back_function(_image): return _image rotate_back_function = _rotate_back_function original_h, original_w = corrected_face_image.shape[:2] resized_image = resize_with_long_side(corrected_face_image, 512) resized_h, resized_w = resized_image.shape[:2] padded_image, (width_pad_ratio, height_pad_ratio) = center_pad_image_with_specific_base( resized_image, _width_base=512, _height_base=512, _output_pad_ratio=True) candidate_image = cv2.cvtColor( force_convert_image_to_bgr(padded_image), cv2.COLOR_BGR2RGB) candidate_h, candidate_w = candidate_image.shape[:2] if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer( _need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) semantic_index = result['OUTPUT__0'].squeeze() else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for face parsing not implement" ) left_width_pad = int(width_pad_ratio * candidate_w) top_height_pad = int(height_pad_ratio * candidate_h) # 去除pad semantic_index_without_pad = semantic_index[ top_height_pad:top_height_pad + resized_h, left_width_pad:left_width_pad + resized_w] # 恢复resize resize_back_semantic_index = cv2.resize( semantic_index_without_pad, (original_w, original_h), interpolation=cv2.INTER_NEAREST) # 恢复图像方向 original_orientation_semantic_index = rotate_back_function( resize_back_semantic_index) to_return_result[ 'semantic_segmentation'] = original_orientation_semantic_index return to_return_result
def execute(self, _image): to_return_result = {'text': '', 'probability': []} if isinstance(self.inference_helper, TritonInferenceHelper): resized_image = resize_with_height(_image, 32) padded_image = center_pad_image_with_specific_base( resized_image, _width_base=4).astype(np.float32) result = self.inference_helper.infer(_need_tensor_check=False, INPUT__0=padded_image) decode_result = self.ctc_decoder.decode(result['OUTPUT__1'], result['OUTPUT__0'])[0] to_return_result['text'] = decode_result[0] to_return_result['probability'] = decode_result[1] else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for ncnn not implement" ) return to_return_result
def execute(self, _image): to_return_result = { 'locations': [], } h, w = _image.shape[:2] aspect_ratio = max(h, w) / min(h, w) bgr_image = force_convert_image_to_bgr(_image) need_crop = False left_pad, top_pad = 0, 0 if aspect_ratio < 3: resized_image = resize_with_specific_base(resize_with_short_side(bgr_image, max(736, min(h, w))), 32, 32) candidate_image = resized_image else: # 目前测试的最严重的长宽比为30:1 resized_image = resize_with_long_side(bgr_image, 736) candidate_image, (left_pad, top_pad) = center_pad_image_with_specific_base( resized_image, 736, 736, _output_pad_ratio=True ) need_crop = True if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer(_need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) score_map = result['OUTPUT__0'] else: raise NotImplementedError(f"{self.inference_helper.type_name} helper for db not implement") if need_crop: resized_h, resized_w = resized_image.shape[:2] candidate_h, candidate_w = candidate_image.shape[:2] start_x = 0 start_y = 0 if left_pad != 0: start_x = int(left_pad * candidate_w) if top_pad != 0: start_y = int(top_pad * candidate_h) score_map = score_map[..., start_y:start_y + resized_h, start_x:start_x + resized_w] boxes, scores = db_post_process(score_map, self.threshold, self.bbox_scale_ratio, self.shortest_length) for m_box, m_score in zip(boxes, scores): to_return_result['locations'].append({ 'box_info': m_box, 'score': m_score, }) return to_return_result
def execute(self, _image): to_return_result = { 'text': '', 'probability': [] } resized_image = resize_with_height(_image, 32) padded_image = center_pad_image_with_specific_base(resized_image, _width_base=4).astype(np.float32) if len(padded_image.shape) == 2: padded_image = cv2.cvtColor(padded_image, cv2.COLOR_GRAY2BGR) else: if padded_image.shape[-1] == 4: padded_image = cv2.cvtColor(padded_image, cv2.COLOR_BGRA2BGR) if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer(_need_tensor_check=False, INPUT__0=padded_image) predict_index, predict_score = result['OUTPUT__1'], result['OUTPUT__0'] else: raise NotImplementedError(f"{self.inference_helper.type_name} helper for crnn not implement") decode_result = self.ctc_decoder.decode(predict_index, predict_score)[0] to_return_result['text'] = decode_result[0] to_return_result['probability'] = decode_result[1] return to_return_result
def execute(self, _image): to_return_result = {'text': '', 'probability': []} resized_image = resize_with_height(_image, 32) padded_image = center_pad_image_with_specific_base(resized_image, _width_base=4) candidate_image = force_convert_image_to_bgr(padded_image) if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer( _need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) predict_index, predict_score = result['OUTPUT__1'], result[ 'OUTPUT__0'] else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for crnn not implement" ) decode_result = self.ctc_decoder.decode(predict_index, predict_score)[0] to_return_result['text'] = decode_result[0] to_return_result['probability'] = decode_result[1] return to_return_result
def execute(self, _image): to_return_result = { 'locations': [], } resized_image = resize_with_long_side(_image, self.candidate_size) resized_h, resized_w = resized_image.shape[:2] # 保证输入网络中的图像为矩形 padded_image, (width_pad_ratio, height_pad_ratio) = center_pad_image_with_specific_base( resized_image, _width_base=self.candidate_size, _height_base=self.candidate_size, _output_pad_ratio=True) candidate_image = force_convert_image_to_gray(padded_image) candidate_image_h, candidate_image_w = candidate_image.shape[:2] if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer( _need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) box_location = result['OUTPUT__0'].squeeze(0) box_confidence = result['OUTPUT__1'].squeeze(0) else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for qrcode detect not implement" ) stage4_prior_boxes = generate_prior_boxes( candidate_image_h // 16, candidate_image_w // 16, candidate_image_h, candidate_image_w, _min_size=50, _max_size=100, _aspect_ratios=[2.0, 0.5, 3.0, 0.3], _flip=False, _clip=False, _variance=[0.1, 0.1, 0.2, 0.2], _step=16, _offset=0.5, ) stage5_prior_boxes = generate_prior_boxes( candidate_image_h // 32, candidate_image_w // 32, candidate_image_h, candidate_image_w, _min_size=100, _max_size=150, _aspect_ratios=[2.0, 0.5, 3.0, 0.3], _flip=False, _clip=False, _variance=[0.1, 0.1, 0.2, 0.2], _step=32, _offset=0.5, ) stage6_prior_boxes = generate_prior_boxes( candidate_image_h // 32, candidate_image_w // 32, candidate_image_h, candidate_image_w, _min_size=150, _max_size=200, _aspect_ratios=[2.0, 0.5, 3.0, 0.3], _flip=False, _clip=False, _variance=[0.1, 0.1, 0.2, 0.2], _step=32, _offset=0.5, ) stage7_prior_boxes = generate_prior_boxes( candidate_image_h // 32, candidate_image_w // 32, candidate_image_h, candidate_image_w, _min_size=200, _max_size=300, _aspect_ratios=[2.0, 0.5, 3.0, 0.3], _flip=False, _clip=False, _variance=[0.1, 0.1, 0.2, 0.2], _step=32, _offset=0.5, ) stage8_prior_boxes = generate_prior_boxes( candidate_image_h // 32, candidate_image_w // 32, candidate_image_h, candidate_image_w, _min_size=300, _max_size=400, _aspect_ratios=[2.0, 0.5, 3.0, 0.3], _flip=False, _clip=False, _variance=[0.1, 0.1, 0.2, 0.2], _step=32, _offset=0.5, ) all_stage_prior_boxes = np.concatenate([ stage4_prior_boxes, stage5_prior_boxes, stage6_prior_boxes, stage7_prior_boxes, stage8_prior_boxes ], axis=1) detect_result = ssd_detect(candidate_image_h, candidate_image_w, box_location, box_confidence, all_stage_prior_boxes, 2, self.variance, self.score_threshold, self.iou_threshold)[0] height_resize_ratio = candidate_image_h / resized_h width_resize_ratio = candidate_image_w / resized_w for m_detect_qrcode in detect_result: m_detect_bbox_width = (m_detect_qrcode[2] - m_detect_qrcode[0]) * width_resize_ratio m_detect_bbox_height = (m_detect_qrcode[2] - m_detect_qrcode[0]) * height_resize_ratio m_detect_bbox_top_left_x = (m_detect_qrcode[0] - width_pad_ratio) * width_resize_ratio m_detect_bbox_top_left_y = (m_detect_qrcode[1] - height_pad_ratio) * height_resize_ratio to_return_result['locations'].append({ 'box_width': m_detect_bbox_width, 'box_height': m_detect_bbox_height, 'center_x': m_detect_bbox_top_left_x + m_detect_bbox_width / 2, 'center_y': m_detect_bbox_top_left_y + m_detect_bbox_height / 2, 'degree': 0, }) return to_return_result
def execute(self, _image): to_return_result = { 'locations': [], } resized_image = resize_with_long_side(_image, 512) resized_shape = resized_image.shape[:2] resize_h, resize_w = resized_shape padded_image, (width_pad_ratio, height_pad_ratio) = center_pad_image_with_specific_base( resized_image, _width_base=512, _height_base=512, _output_pad_ratio=True) candidate_image = force_convert_image_to_bgr(padded_image) candidate_shape = candidate_image.shape[:2] if isinstance(self.inference_helper, TritonInferenceHelper): rgb_image = cv2.cvtColor(candidate_image, cv2.COLOR_BGR2RGB) result = self.inference_helper.infer(_need_tensor_check=False, INPUT__0=rgb_image.astype( np.float32)) filter_scores = result['OUTPUT__0'].squeeze() box = result['OUTPUT__1'].squeeze() else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for retina face detect not implement" ) anchors = get_anchors(np.array(candidate_image.shape[:2])) all_boxes, _ = regress_boxes(anchors, box, None, candidate_image.shape[:2]) exp_box_score = np.exp(filter_scores) face_classification_index = np.argmax(exp_box_score, axis=-1) max_classification_score = np.max(exp_box_score, axis=-1) candidate_box_index = (face_classification_index == 0) & ( max_classification_score > self.score_threshold) filter_scores = max_classification_score[candidate_box_index] filtered_box = all_boxes[candidate_box_index] if len(filter_scores) == 0: return to_return_result filtered_box_without_normalization = filtered_box * ( resize_w, resize_h, resize_w, resize_h) final_box_index = nms(filtered_box_without_normalization, filter_scores, _nms_threshold=self.iou_threshold) final_boxes = filtered_box[final_box_index] final_scores = filter_scores[final_box_index] for m_box, m_score in zip(final_boxes, final_scores): m_box_width = m_box[2] - m_box[0] m_box_height = m_box[3] - m_box[1] m_box_center_x = (m_box[0] + m_box_width / 2 - width_pad_ratio ) * candidate_shape[1] / resized_shape[1] m_box_center_y = (m_box[1] + m_box_height / 2 - height_pad_ratio ) * candidate_shape[0] / resized_shape[0] box_info = { 'degree': 0, 'center_x': m_box_center_x, 'center_y': m_box_center_y, 'box_height': m_box_height * candidate_shape[0] / resized_shape[0], 'box_width': m_box_width * candidate_shape[1] / resized_shape[1], } to_return_result['locations'].append({ 'box_info': box_info, 'score': m_score, }) return to_return_result