def create_cropped_labelme(self, c_point1, c_point2, output_img, theshold: float=1.0): data = dict() data["version"] = self.l_data["version"] data["flags"] = dict() data["shapes"] = [] useful_img = False frame_box = BBox.from_list(functools.reduce(operator.iconcat, [c_point1, c_point2], [])).to_int() for a in self.l_data["shapes"]: # p1, p2 = [Point2D.from_list(p) for p in a["points"]] bbox = BBox.from_list(functools.reduce(operator.iconcat, a["points"], [])) fin_box = bbox.coord_in_cropped_frame(frame_box, theshold) # if all(p.inside_rectangle(c_point1, c_point2) for p in [p1, p2]) : if fin_box.xmax > 0: # p1 -= c_point1 # p2 -= c_point1 _to_append = dict() _to_append["label"] = a["label"] _to_append["points"] = [[fin_box.xmin, fin_box.ymin], [fin_box.xmax, fin_box.ymax]] _to_append["group_id"] = None _to_append["shape_type"] = "rectangle" _to_append["flags"] = {} data["shapes"].append(_to_append) useful_img = True data["imagePath"] = output_img data["imageData"] = None data["imageHeight"] = frame_box.height data["imageWidth"] = frame_box.width output_json = f'{output_img.split(".")[-2]}.json' if useful_img: with open(output_json, 'w') as outfile: json.dump(data, outfile, indent=2) return useful_img
def filter_predictions(self, predict_dict, iou_thres1=0.5, iou_thres2=0.1): """ Returns the predictions after removing overlapping bounding boxes. - iou_thres1: to remove redundant overlap - iou_thres2: to remove overlap caused by "chop and merge" """ score_list = predict_dict['score_list'] bbox_list = predict_dict['bbox_list'] pred_class_list = predict_dict['pred_class_list'] pred_masks_list = predict_dict['pred_masks_list'] pred_keypoints_list = predict_dict['pred_keypoints_list'] vis_keypoints_list = predict_dict['vis_keypoints_list'] kpt_confidences_list = predict_dict['kpt_confidences_list'] remove_idxs = [] for i in range(len(score_list) - 1): if i in remove_idxs: continue for j in range(i + 1, len(score_list)): if j in remove_idxs: continue iou = BBox.iou(bbox_list[i], bbox_list[j]) if iou > iou_thres1: if score_list[i] > score_list[j]: remove_idxs.append(j) else: remove_idxs.append(i) elif iou > iou_thres2: if pred_class_list[i] == pred_class_list[j]: if bbox_list[i].area > bbox_list[j].area: remove_idxs.append(j) else: remove_idxs.append(i) # print(i, j, round(iou, 3), remove_idxs) score_list_filtered = [] bbox_list_filtered = [] pred_class_list_filtered = [] pred_masks_list_filtered = [] pred_keypoints_list_filtered = [] vis_keypoints_list_filtered = [] kpt_confidences_list_filtered = [] for i in range(len(score_list)): if i in remove_idxs: continue score_list_filtered.append(score_list[i]) bbox_list_filtered.append(bbox_list[i]) pred_class_list_filtered.append(pred_class_list[i]) pred_masks_list_filtered.append(pred_masks_list[i]) pred_keypoints_list_filtered.append(pred_keypoints_list[i]) vis_keypoints_list_filtered.append(vis_keypoints_list[i]) kpt_confidences_list_filtered.append(kpt_confidences_list[i]) predict_dict['score_list'] = score_list_filtered predict_dict['bbox_list'] = bbox_list_filtered predict_dict['pred_class_list'] = pred_class_list_filtered predict_dict['pred_masks_list'] = pred_masks_list_filtered predict_dict['pred_keypoints_list'] = pred_keypoints_list_filtered predict_dict['vis_keypoints_list'] = vis_keypoints_list_filtered predict_dict['kpt_confidences_list'] = kpt_confidences_list_filtered return predict_dict
def rcnn_cropped(self, image, pred_class, bbox, output): printj.yellow(bbox) orig_copy = image.copy() printj.cyan(pred_class) if pred_class in ["outlet", "plumbing"]: crop_box = bbox.pad(pad_left=50, pad_right=50, pad_top=50, pad_bottom=50) predict_dict = self.rcnn_OCR( orig_copy[crop_box.ymin:crop_box.ymax, crop_box.xmin:crop_box.xmax, :]) score_list = predict_dict['score_list'] bbox_list = predict_dict['bbox_list'] pred_class_list = predict_dict['pred_class_list'] for score, pred_class, bbox in zip(score_list, pred_class_list, bbox_list): if not self.confidence_threshold2: self.confidence_threshold2 = 0.01 if score > self.confidence_threshold2: bbox = BBox(xmin=bbox.xmin + crop_box.xmin, ymin=bbox.ymin + crop_box.ymin, xmax=bbox.xmax + crop_box.xmin, ymax=bbox.ymax + crop_box.ymin) output = draw_bbox(img=output, bbox=bbox, color=255, show_label=True, text=RENAME2[pred_class], thickness=2, text_size=1) # dist = np.linalg.norm(np.asarray(bbox.center) - np.asarray(crop_box.center)) output = cv2.line(output, bbox.center, crop_box.center, (150, 150, 150), 2) # show_image(output, 800) # printj.red(bbox) # st.image(image=output.astype(np.uint8),) return output
def draw_gt(self, image_name, output): if self.gt_path: row = dict() for image in self.gt_data["images"]: if image["file_name"] == image_name: self.image_id = image["id"] row = {'img_name': image["file_name"], 'width': image["width"], 'height': image["height"], } for ann in self.gt_data["annotations"]: if ann["image_id"] == self.image_id: gt_bbox = BBox.from_list( bbox=ann["bbox"], input_format='pminsize') output = draw_bbox(img=output, bbox=gt_bbox, show_bbox=True, show_label=False, color=[0, 0, 255], thickness=2) row['bbox_width'] = ann["bbox"][2] row['bbox_height'] = ann["bbox"][3] row['bbox_area_default'] = ann["area"] row['bbox_area_s1500'] = ann["area"] * \ (1500*1500)/(row['width']*row['height']) row['bbox_area_s1024'] = ann["area"] * \ (1024*1024)/(row['width']*row['height']) row['bbox_area_s800'] = ann["area"] * \ (800*800)/(row['width']*row['height']) def seperate_sizes(seperate_type="default"): if row[f'bbox_area_{seperate_type}'] < 32**2: row[f'bbox_size_{seperate_type}'] = 0 row[f'bbox_s_{seperate_type}'] = 1 row[f'bbox_m_{seperate_type}'] = 0 row[f'bbox_l_{seperate_type}'] = 0 elif row[f'bbox_area_{seperate_type}'] < 96**2: row[f'bbox_size_{seperate_type}'] = 1 row[f'bbox_s_{seperate_type}'] = 0 row[f'bbox_m_{seperate_type}'] = 1 row[f'bbox_l_{seperate_type}'] = 0 else: row[f'bbox_area_{seperate_type}'] = 2 row[f'bbox_s_{seperate_type}'] = 0 row[f'bbox_m_{seperate_type}'] = 0 row[f'bbox_l_{seperate_type}'] = 1 seperate_sizes("default") seperate_sizes("s1500") seperate_sizes("s1024") seperate_sizes("s800") def check_size(width, height): if row['width'] == width and row['height'] == height: row[f'{width} x {height}'] = 1 else: row[f'{width} x {height}'] = 0 if row['width'] == height and row['height'] == width: row[f'{height} x {width}'] = 1 else: row[f'{height} x {width}'] = 0 check_size(2048, 1536) check_size(1024, 768) check_size(854, 640) check_size(640, 480) self.df = self.df.append(pd.DataFrame(row, index=[self.image_id] )) return 1, output else: return 0, output
def draw_infer(self, show_max_score_only, show_class_label, show_class_label_score_only, show_keypoint_label, show_bbox, show_keypoints, show_segmentation, color_bbox, transparent_mask, transparency_alpha, ignore_keypoint_idx, output, score_list, bbox_list, pred_class_list, pred_masks_list, pred_keypoints_list, vis_keypoints_list, kpt_confidences_list, show_legends=False): if self.gt_path is None: self.img_id_without_gt = next(self.counter) max_score_list = dict() max_score_pred_list = dict() output = output.copy() if show_max_score_only: for i, class_name in enumerate(self.class_names): max_score_list[class_name] = -1 # Setting color palletes/ class name legend on top left side of the image if color_bbox is None: if color_bbox is None: if show_legends: for i, name in enumerate(self.class_names): scale = 2 if scale == 1: cv2.putText( img=output, text=name, # org=(5, 30 + 30*i), # org=(5, 100 + 70*i), org=(5, output.shape[0]-(200 + 30*i)), # org=(5, output.shape[0]-(100 + 70*i)), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=1, color=self.palette[i], thickness=1, bottomLeftOrigin=False) elif scale == 2: cv2.putText( img=output, text=name, org=(5, output.shape[0]-(100 + 70*i)), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=2, color=self.palette[i], thickness=2, bottomLeftOrigin=False) for score, pred_class, bbox, mask, keypoints, vis_keypoints, kpt_confidences in zip(score_list, pred_class_list, bbox_list, pred_masks_list, pred_keypoints_list, vis_keypoints_list, kpt_confidences_list): cat_id = self.class_names.index(pred_class) if color_bbox: _color_bbox = color_bbox else: _color_bbox = self.palette[cat_id] """ # _box = BBox( # # bbox.xmin, bbox.ymin, # bbox.xmin-int(bbox.width/4), bbox.ymin-int(bbox.height/4), # bbox.xmax+bbox.width, bbox.ymax+int(bbox.height*3/4)) # printj.red(_box) # check_text_frame = img.copy()[_box.ymin:_box.ymax, _box.xmin:_box.xmax] # # check_text_frame = cv2.adaptiveThreshold(check_text_frame,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ # # cv2.THRESH_BINARY,5,2)*3 # # blur = cv2.GaussianBlur(check_text_frame,(5,5),0) # # ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) # template_path = "/home/jitesh/prj/SekisuiProjects/test/gosar/tm/t.jpg" # cv2.imwrite(template_path, check_text_frame) # gray = cv2.cvtColor(check_text_frame, cv2.COLOR_BGR2GRAY) # # gray = cv2.medianBlur(gray, 3) # # gray = check_text_frame # config = ('-l eng --oem 1 --psm 3') # # # pytessercat # import pytesseract # text = pytesseract.image_to_string(gray, config=config) # boxes = pytesseract.image_to_boxes(gray, config=config) # print(boxes) # _text = text.split('\n') # print(_text) # _img = cv2.copyMakeBorder(check_text_frame, top=100, bottom=0, left=0, right=200, borderType=0) # cv2.putText(_img, text, (10, 30), fontFace= cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=[255, 255, 255], thickness=1, lineType=1) # cv2.putText(_img, str(boxes), (10, 60), fontFace= cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=[255, 255, 255], thickness=1, lineType=1) # if show_image(_img): # return """ # if pred_class in ["switch"]: if True: if show_max_score_only: for i, class_name in enumerate(self.class_names): if class_name == pred_class: if max_score_list[class_name] < score: max_score_list[class_name] = score max_score_pred_list[class_name] = { "score": score, "pred_class": pred_class, "bbox": bbox, "mask": mask, "keypoints": keypoints, "vis_keypoints": vis_keypoints, "kpt_confidences": kpt_confidences, } else: if mask is not None and show_segmentation: output = draw_mask_bool(img=output, mask_bool=mask, transparent=transparent_mask, alpha=transparency_alpha) if show_class_label_score_only: output = draw_bbox(img=output, bbox=bbox, color=_color_bbox, show_bbox=show_bbox, show_label=show_class_label, text=f'{round(score, 2)}') else: output = draw_bbox(img=output, bbox=bbox, color=_color_bbox, show_bbox=show_bbox, show_label=show_class_label, text=f'{pred_class}', label_orientation='right') output = draw_bbox(img=output, bbox=bbox, color=_color_bbox, show_bbox=show_bbox, show_label=show_class_label, text=f'{round(score, 2)}') if keypoints is not None and show_keypoints: output = draw_keypoints(img=output, keypoints=keypoints, show_keypoints=show_keypoints, keypoint_labels=self.keypoint_names, show_keypoints_labels=show_keypoint_label, ignore_kpt_idx=ignore_keypoint_idx) xmin, ymin, xmax, ymax = bbox.to_int().to_list() pred_to_append = dict() if self.gt_path: for category in self.gt_data["categories"]: if category["name"] == pred_class: cat_id = category["id"] pred_to_append["image_id"] = self.image_id pred_to_append["category_id"] = cat_id else: pred_to_append["image_id"] = self.img_id_without_gt pred_to_append["category_id"] = cat_id pred_to_append["bbox"] = BBox( xmin, ymin, xmax, ymax).to_list(output_format='pminsize') if keypoints: _k = [] for keypoint in keypoints: # printj.red(keypoint) x, y, c = keypoint _k.append(int(x)) _k.append(int(y)) _k.append(1) pred_to_append["keypoints"] = _k pred_to_append["score"] = score self.pred_dataset.append(pred_to_append) # printj.red(keypoints) if show_max_score_only: for i, class_name in enumerate(self.class_names): cat_id = self.class_names.index(class_name) if color_bbox: _color_bbox = color_bbox else: _color_bbox = self.palette[cat_id] if max_score_list[class_name] > 0: max_pred = max_score_pred_list[class_name] if max_pred["mask"] is not None and show_segmentation: output = draw_mask_bool(img=output, mask_bool=max_pred["mask"], color=_color_bbox, transparent=transparent_mask, alpha=transparency_alpha) output = draw_bbox(img=output, bbox=max_pred["bbox"], show_bbox=show_bbox, show_label=show_class_label, text=f'{max_pred["pred_class"]} {round(max_pred["score"], 2)}') if max_pred["keypoints"] is not None and show_keypoints: output = draw_keypoints(img=output, keypoints=max_pred["keypoints"], show_keypoints=show_keypoints, keypoint_labels=self.keypoint_names, show_keypoints_labels=show_keypoint_label, ignore_kpt_idx=ignore_keypoint_idx) return output
def _infer_image(self, image_path: str, show_max_score_only: bool = False, show_class_label: bool = True, show_class_label_score_only: bool = False, show_keypoint_label: bool = True, show_bbox: bool = True, show_keypoints: bool = True, show_segmentation: bool = True, color_bbox: list = None, transparent_mask: bool = True, transparency_alpha: float = 0.3, ignore_keypoint_idx=None, show_legends: bool = False, # gt_path: str = None, ) -> np.ndarray: '''Returns the Inference result of a single image.''' _predict_image = partial( self.infer_image, image_path=image_path, show_max_score_only=show_max_score_only, show_class_label=show_class_label, show_class_label_score_only=show_class_label_score_only, show_keypoint_label=show_keypoint_label, show_bbox=show_bbox, show_keypoints=show_keypoints, show_segmentation=show_segmentation, color_bbox=color_bbox, transparent_mask=transparent_mask, transparency_alpha=transparency_alpha, ignore_keypoint_idx=ignore_keypoint_idx, show_legends=show_legends # gt_path=gt_path ) img = cv2.imread(image_path) # if self.gray_on: # gray = cv2.cvtColor(_img, cv2.COLOR_RGB2GRAY) # img = cv2.cvtColor(gray.copy(), cv2.COLOR_GRAY2RGB) # else: # img = _img if self.crop_mode == 1: h, w, _ = img.shape a = min(h, w) img = img[0:a, 0:a] output = _predict_image(img) elif self.crop_mode == 2: p1, p2 = self.crop_rec p = BBox.from_list([p1, p2]) img = img[p.ymin: p.ymax, p.xmin: p.xmax] output = _predict_image(img) elif self.crop_mode == 3: predict_dict = self.chop_and_fix(img, self.crop_mode3_sizes, self.crop_mode3_overlaps) # output = _img score_list = predict_dict['score_list'] bbox_list = predict_dict['bbox_list'] # print(bbox_list) pred_class_list = predict_dict['pred_class_list'] pred_masks_list = predict_dict['pred_masks_list'] pred_keypoints_list = predict_dict['pred_keypoints_list'] vis_keypoints_list = predict_dict['vis_keypoints_list'] kpt_confidences_list = predict_dict['kpt_confidences_list'] output = img output = self.draw_infer(show_max_score_only, show_class_label, show_class_label_score_only, show_keypoint_label, show_bbox, show_keypoints, show_segmentation, color_bbox, transparent_mask, transparency_alpha, ignore_keypoint_idx, output, score_list, bbox_list, pred_class_list, pred_masks_list, pred_keypoints_list, vis_keypoints_list, kpt_confidences_list, show_legends) # return output else: output = _predict_image(img) return output
def predict(self, img: np.ndarray) -> dict: """ predict_dict = self.predict(img=img) score_list = predict_dict['score_list'] bbox_list = predict_dict['bbox_list'] pred_class_list = predict_dict['pred_class_list'] pred_masks_list = predict_dict['pred_masks_list'] pred_keypoints_list = predict_dict['pred_keypoints_list'] vis_keypoints_list = predict_dict['vis_keypoints_list'] kpt_confidences_list = predict_dict['kpt_confidences_list'] for score, pred_class, bbox, mask, keypoints, vis_keypoints, kpt_confidences in zip(score_list, pred_class_list, bbox_list, pred_masks_list, pred_keypoints_list, vis_keypoints_list, kpt_confidences_list): """ if self.gray_on: gray = cv2.cvtColor(img.copy(), cv2.COLOR_RGB2GRAY) img = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB) outputs = self.get_outputs(img) result = dict() score_list = [float(val) for val in outputs['instances'].scores.cpu().numpy()] bbox_list = [BBox.from_list(val_list).to_int() for val_list in outputs['instances'].pred_boxes.tensor.cpu().numpy()] pred_class_list = [self.class_names[idx] for idx in outputs['instances'].pred_classes.cpu().numpy()] if self.cfg.MODEL.MASK_ON: pred_masks_list = [mask for mask in outputs['instances'].pred_masks.cpu().numpy()] else: pred_masks_list = [None] * len(score_list) if 'keypoint' in self.model.lower(): pred_keypoints_list = [keypoints for keypoints in outputs['instances'].pred_keypoints.cpu().numpy()] vis_keypoints_list = [[[int(x), int(y)] for x, y, c in keypoints] for keypoints in pred_keypoints_list] kpt_confidences_list = [[c for x, y, c in keypoints] for keypoints in pred_keypoints_list] else: pred_keypoints_list = [None] * len(score_list) vis_keypoints_list = [None] * len(score_list) kpt_confidences_list = [None] * len(score_list) result['score_list'] = score_list result['bbox_list'] = bbox_list result['pred_class_list'] = pred_class_list result['pred_masks_list'] = pred_masks_list result['pred_keypoints_list'] = pred_keypoints_list result['vis_keypoints_list'] = vis_keypoints_list result['kpt_confidences_list'] = kpt_confidences_list return result