def execute(self, _image): to_return_result = { 'points_count': 106, 'x_locations': [0] * 106, 'y_locations': [0] * 106, } resized_image = resize_with_long_side(_image, 192) resized_h, resized_w = resized_image.shape[:2] padded_image, (width_pad_ratio, height_pad_ratio) = center_pad_image_with_specific_base( resized_image, _width_base=192, _height_base=192, _output_pad_ratio=True ) candidate_image = cv2.cvtColor(force_convert_image_to_bgr(padded_image), cv2.COLOR_BGR2RGB) candidate_h, candidate_w = candidate_image.shape[:2] if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer(_need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) coordinates = result['OUTPUT__0'].squeeze() else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for 106p landmark not implement") remapped_coordinates = np.reshape(coordinates, (-1, 2)) to_return_result['x_locations'] = \ ((remapped_coordinates[:, 0] + 1) * (candidate_w // 2) - width_pad_ratio * candidate_w) / resized_w to_return_result['y_locations'] = \ ((remapped_coordinates[:, 1] + 1) * (candidate_h // 2) - height_pad_ratio * candidate_h) / resized_h return to_return_result
def execute(self, _image): to_return_result = { 'matting_alpha': np.zeros((_image.shape[1], _image.shape[0]), dtype=np.float32), } original_h, original_w = _image.shape[:2] resized_image = resize_with_long_side(_image, 512) padded_image, (left_margin_ration, top_margin_ratio) = \ center_pad_image_with_specific_base(resized_image, 512, 512, 0, True) candidate_image = force_convert_image_to_bgr(padded_image) if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer( _need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) matting_result = result['OUTPUT__0'].squeeze() else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for bise human matting not implement" ) alpha_result = matting_result[3, ...] matting_result_without_pad = remove_image_pad(alpha_result, resized_image, left_margin_ration, top_margin_ratio) resize_back_matting_result = cv2.resize(matting_result_without_pad, (original_w, original_h), interpolation=cv2.INTER_LINEAR) to_return_result['matting_alpha'] = resize_back_matting_result return to_return_result
def text_detect(_image_info): """ 文本检测 Args: _image_info: 待检测的图像信息 Returns: 检测得到的所有box """ to_return_result = {'box_info': [], 'box_count': 0} oss_handler = get_oss_handler() img = oss_handler.download_image_file( _image_info['bucket_name'], _image_info['path'] ) if max(img.shape[:2]) > 1024: candidate_img = resize_with_long_side(img, 1024) else: candidate_img = img detect_result = db_res18_op.execute(candidate_img) for m_box in detect_result['locations']: m_box_info = m_box['box_info'] m_box_score = m_box['score'] to_return_result['box_info'].append({ 'degree': m_box_info['degree'], 'center_x': m_box_info['center_x'], 'center_y': m_box_info['center_y'], 'box_height': m_box_info['box_height'], 'box_width': m_box_info['box_width'], 'score': m_box_score, }) to_return_result['box_count'] = len(detect_result['locations']) return to_return_result
def execute(self, _image): # 避免计算耗时过长 resized_img = resize_with_long_side(_image, 1024) gray_image = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY) edges = canny(gray_image, sigma=self.sigma) h, a, d = hough_line(edges) _, candidate_angle_bins, _ = hough_line_peaks(h, a, d, num_peaks=self.num_peaks) if len(candidate_angle_bins) == 0: return _image absolute_deviations = [ calculate_deviation(m_angle) for m_angle in candidate_angle_bins ] average_deviation = np.mean(np.rad2deg(absolute_deviations)) angle_degrees = [np.rad2deg(x) for x in candidate_angle_bins] bin_0_45 = [] bin_45_90 = [] bin_0_45n = [] bin_45_90n = [] low_bound_of_angle = 44 high_bound_of_angle = 46 for m_angle_degree in angle_degrees: for m_bin, m_new_angle_degree in zip( [bin_45_90, bin_0_45, bin_0_45n, bin_45_90n], [ 90 - m_angle_degree, m_angle_degree, -m_angle_degree, 90 + m_angle_degree ]): deviation_sum = int(m_new_angle_degree + average_deviation) if low_bound_of_angle <= deviation_sum <= high_bound_of_angle: m_bin.append(m_angle_degree) candidate_angle_bins = [bin_0_45, bin_45_90, bin_0_45n, bin_45_90n] selected_angle_bin = 0 selected_angle_bin_length = len(candidate_angle_bins[0]) for j in range(1, len(candidate_angle_bins)): m_len_angles = len(candidate_angle_bins[j]) if m_len_angles > selected_angle_bin_length: selected_angle_bin_length = m_len_angles selected_angle_bin = j if selected_angle_bin_length: candidate_degrees = get_max_frequency_element( candidate_angle_bins[selected_angle_bin]) mean_degree = np.mean(candidate_degrees) else: candidate_degrees = get_max_frequency_element(angle_degrees) mean_degree = np.mean(candidate_degrees) target_to_rotate_angle = mean_degree if 0 <= mean_degree <= 90: target_to_rotate_angle = mean_degree - 90 if -90 <= mean_degree < 0: target_to_rotate_angle = 90 + mean_degree rotated_image, _ = rotate_degree_img(_image, -target_to_rotate_angle) return rotated_image
def execute(self, _image, _landmark_info=None): to_return_result = { 'semantic_segmentation': np.zeros((_image.shape[1], _image.shape[0]), dtype=np.uint8), } if _landmark_info is not None: corrected_face_image, rotate_back_function = correct_face_orientation( _image, _landmark_info) else: corrected_face_image = _image def _rotate_back_function(_image): return _image rotate_back_function = _rotate_back_function original_h, original_w = corrected_face_image.shape[:2] resized_image = resize_with_long_side(corrected_face_image, 512) resized_h, resized_w = resized_image.shape[:2] padded_image, (width_pad_ratio, height_pad_ratio) = center_pad_image_with_specific_base( resized_image, _width_base=512, _height_base=512, _output_pad_ratio=True) candidate_image = cv2.cvtColor( force_convert_image_to_bgr(padded_image), cv2.COLOR_BGR2RGB) candidate_h, candidate_w = candidate_image.shape[:2] if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer( _need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) semantic_index = result['OUTPUT__0'].squeeze() else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for face parsing not implement" ) left_width_pad = int(width_pad_ratio * candidate_w) top_height_pad = int(height_pad_ratio * candidate_h) # 去除pad semantic_index_without_pad = semantic_index[ top_height_pad:top_height_pad + resized_h, left_width_pad:left_width_pad + resized_w] # 恢复resize resize_back_semantic_index = cv2.resize( semantic_index_without_pad, (original_w, original_h), interpolation=cv2.INTER_NEAREST) # 恢复图像方向 original_orientation_semantic_index = rotate_back_function( resize_back_semantic_index) to_return_result[ 'semantic_segmentation'] = original_orientation_semantic_index return to_return_result
def execute(self, _image): to_return_result = { 'locations': [], } h, w = _image.shape[:2] aspect_ratio = max(h, w) / min(h, w) bgr_image = force_convert_image_to_bgr(_image) need_crop = False left_pad, top_pad = 0, 0 if aspect_ratio < 3: resized_image = resize_with_specific_base(resize_with_short_side(bgr_image, max(736, min(h, w))), 32, 32) candidate_image = resized_image else: # 目前测试的最严重的长宽比为30:1 resized_image = resize_with_long_side(bgr_image, 736) candidate_image, (left_pad, top_pad) = center_pad_image_with_specific_base( resized_image, 736, 736, _output_pad_ratio=True ) need_crop = True if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer(_need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) score_map = result['OUTPUT__0'] else: raise NotImplementedError(f"{self.inference_helper.type_name} helper for db not implement") if need_crop: resized_h, resized_w = resized_image.shape[:2] candidate_h, candidate_w = candidate_image.shape[:2] start_x = 0 start_y = 0 if left_pad != 0: start_x = int(left_pad * candidate_w) if top_pad != 0: start_y = int(top_pad * candidate_h) score_map = score_map[..., start_y:start_y + resized_h, start_x:start_x + resized_w] boxes, scores = db_post_process(score_map, self.threshold, self.bbox_scale_ratio, self.shortest_length) for m_box, m_score in zip(boxes, scores): to_return_result['locations'].append({ 'box_info': m_box, 'score': m_score, }) return to_return_result
def execute(self, _image): to_return_result = { 'locations': [], } resized_image = resize_with_long_side(_image, self.candidate_size) resized_h, resized_w = resized_image.shape[:2] # 保证输入网络中的图像为矩形 padded_image, (width_pad_ratio, height_pad_ratio) = center_pad_image_with_specific_base( resized_image, _width_base=self.candidate_size, _height_base=self.candidate_size, _output_pad_ratio=True) candidate_image = force_convert_image_to_gray(padded_image) candidate_image_h, candidate_image_w = candidate_image.shape[:2] if isinstance(self.inference_helper, TritonInferenceHelper): result = self.inference_helper.infer( _need_tensor_check=False, INPUT__0=candidate_image.astype(np.float32)) box_location = result['OUTPUT__0'].squeeze(0) box_confidence = result['OUTPUT__1'].squeeze(0) else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for qrcode detect not implement" ) stage4_prior_boxes = generate_prior_boxes( candidate_image_h // 16, candidate_image_w // 16, candidate_image_h, candidate_image_w, _min_size=50, _max_size=100, _aspect_ratios=[2.0, 0.5, 3.0, 0.3], _flip=False, _clip=False, _variance=[0.1, 0.1, 0.2, 0.2], _step=16, _offset=0.5, ) stage5_prior_boxes = generate_prior_boxes( candidate_image_h // 32, candidate_image_w // 32, candidate_image_h, candidate_image_w, _min_size=100, _max_size=150, _aspect_ratios=[2.0, 0.5, 3.0, 0.3], _flip=False, _clip=False, _variance=[0.1, 0.1, 0.2, 0.2], _step=32, _offset=0.5, ) stage6_prior_boxes = generate_prior_boxes( candidate_image_h // 32, candidate_image_w // 32, candidate_image_h, candidate_image_w, _min_size=150, _max_size=200, _aspect_ratios=[2.0, 0.5, 3.0, 0.3], _flip=False, _clip=False, _variance=[0.1, 0.1, 0.2, 0.2], _step=32, _offset=0.5, ) stage7_prior_boxes = generate_prior_boxes( candidate_image_h // 32, candidate_image_w // 32, candidate_image_h, candidate_image_w, _min_size=200, _max_size=300, _aspect_ratios=[2.0, 0.5, 3.0, 0.3], _flip=False, _clip=False, _variance=[0.1, 0.1, 0.2, 0.2], _step=32, _offset=0.5, ) stage8_prior_boxes = generate_prior_boxes( candidate_image_h // 32, candidate_image_w // 32, candidate_image_h, candidate_image_w, _min_size=300, _max_size=400, _aspect_ratios=[2.0, 0.5, 3.0, 0.3], _flip=False, _clip=False, _variance=[0.1, 0.1, 0.2, 0.2], _step=32, _offset=0.5, ) all_stage_prior_boxes = np.concatenate([ stage4_prior_boxes, stage5_prior_boxes, stage6_prior_boxes, stage7_prior_boxes, stage8_prior_boxes ], axis=1) detect_result = ssd_detect(candidate_image_h, candidate_image_w, box_location, box_confidence, all_stage_prior_boxes, 2, self.variance, self.score_threshold, self.iou_threshold)[0] height_resize_ratio = candidate_image_h / resized_h width_resize_ratio = candidate_image_w / resized_w for m_detect_qrcode in detect_result: m_detect_bbox_width = (m_detect_qrcode[2] - m_detect_qrcode[0]) * width_resize_ratio m_detect_bbox_height = (m_detect_qrcode[2] - m_detect_qrcode[0]) * height_resize_ratio m_detect_bbox_top_left_x = (m_detect_qrcode[0] - width_pad_ratio) * width_resize_ratio m_detect_bbox_top_left_y = (m_detect_qrcode[1] - height_pad_ratio) * height_resize_ratio to_return_result['locations'].append({ 'box_width': m_detect_bbox_width, 'box_height': m_detect_bbox_height, 'center_x': m_detect_bbox_top_left_x + m_detect_bbox_width / 2, 'center_y': m_detect_bbox_top_left_y + m_detect_bbox_height / 2, 'degree': 0, }) return to_return_result
def execute(self, _image): to_return_result = { 'locations': [], } resized_image = resize_with_long_side(_image, 512) resized_shape = resized_image.shape[:2] resize_h, resize_w = resized_shape padded_image, (width_pad_ratio, height_pad_ratio) = center_pad_image_with_specific_base( resized_image, _width_base=512, _height_base=512, _output_pad_ratio=True) candidate_image = force_convert_image_to_bgr(padded_image) candidate_shape = candidate_image.shape[:2] if isinstance(self.inference_helper, TritonInferenceHelper): rgb_image = cv2.cvtColor(candidate_image, cv2.COLOR_BGR2RGB) result = self.inference_helper.infer(_need_tensor_check=False, INPUT__0=rgb_image.astype( np.float32)) filter_scores = result['OUTPUT__0'].squeeze() box = result['OUTPUT__1'].squeeze() else: raise NotImplementedError( f"{self.inference_helper.type_name} helper for retina face detect not implement" ) anchors = get_anchors(np.array(candidate_image.shape[:2])) all_boxes, _ = regress_boxes(anchors, box, None, candidate_image.shape[:2]) exp_box_score = np.exp(filter_scores) face_classification_index = np.argmax(exp_box_score, axis=-1) max_classification_score = np.max(exp_box_score, axis=-1) candidate_box_index = (face_classification_index == 0) & ( max_classification_score > self.score_threshold) filter_scores = max_classification_score[candidate_box_index] filtered_box = all_boxes[candidate_box_index] if len(filter_scores) == 0: return to_return_result filtered_box_without_normalization = filtered_box * ( resize_w, resize_h, resize_w, resize_h) final_box_index = nms(filtered_box_without_normalization, filter_scores, _nms_threshold=self.iou_threshold) final_boxes = filtered_box[final_box_index] final_scores = filter_scores[final_box_index] for m_box, m_score in zip(final_boxes, final_scores): m_box_width = m_box[2] - m_box[0] m_box_height = m_box[3] - m_box[1] m_box_center_x = (m_box[0] + m_box_width / 2 - width_pad_ratio ) * candidate_shape[1] / resized_shape[1] m_box_center_y = (m_box[1] + m_box_height / 2 - height_pad_ratio ) * candidate_shape[0] / resized_shape[0] box_info = { 'degree': 0, 'center_x': m_box_center_x, 'center_y': m_box_center_y, 'box_height': m_box_height * candidate_shape[0] / resized_shape[0], 'box_width': m_box_width * candidate_shape[1] / resized_shape[1], } to_return_result['locations'].append({ 'box_info': box_info, 'score': m_score, }) return to_return_result