def preprocess(image, points, size=cfg.image_size): """ Preprocess for test. Args: image: test image points: text polygon size: test image size """ height, width = image.shape[:2] mask = polygons_to_mask([np.asarray(points, np.float32)], height, width) x, y, w, h = cv2.boundingRect(mask) mask = np.expand_dims(np.float32(mask), axis=-1) image = image * mask image = image[y:y+h, x:x+w,:] new_height, new_width = (size, int(w*size/h)) if h>w else (int(h*size/w), size) image = cv2.resize(image, (new_width, new_height)) if new_height > new_width: padding_top, padding_down = 0, 0 padding_left = (size - new_width)//2 padding_right = size - padding_left - new_width else: padding_left, padding_right = 0, 0 padding_top = (size - new_height)//2 padding_down = size - padding_top - new_height image = cv2.copyMakeBorder(image, padding_top, padding_down, padding_left, padding_right, borderType=cv2.BORDER_CONSTANT, value=[0,0,0]) image = image/255. return image
def affine_transform(image, polygon): """ Conduct same affine transform for both image and polygon for data augmentation. """ height, width, _ = image.shape center_x, center_y = width / 2, height / 2 angle = 0 if np.random.uniform() > 0.5 else np.random.uniform(-20., 20.) shear_x, shear_y = (0, 0) if np.random.uniform() > 0.5 else ( np.random.uniform(-0.2, 0.2), np.random.uniform(-0.2, 0.2)) rad = math.radians(angle) sin, cos = math.sin(rad), math.cos(rad) # x, y abs_sin, abs_cos = abs(sin), abs(cos) new_width = ((height * abs_sin) + (width * abs_cos)) new_height = ((height * abs_cos) + (width * abs_sin)) new_width += np.abs(shear_y * new_height) new_height += np.abs(shear_x * new_width) new_width = int(new_width) new_height = int(new_height) M = np.array([[ cos, sin + shear_y, new_width / 2 - center_x + (1 - cos) * center_x - (sin + shear_y) * center_y ], [ -sin + shear_x, cos, new_height / 2 - center_y + (sin - shear_x) * center_x + (1 - cos) * center_y ]]) rotatedImage = cv2.warpAffine(image, M, (new_width, new_height), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)) height, width = rotatedImage.shape[:2] rotatedPoints = [rotatedPoint(M, point) for point in polygon] mask = polygons_to_mask([np.array(rotatedPoints, np.float32)], new_height, new_width) x, y, w, h = cv2.boundingRect(mask) mask = np.expand_dims(np.float32(mask), axis=-1) rotatedImage = rotatedImage * mask cropImage = rotatedImage[y:y + h, x:x + w, :] return cropImage
def draw_annotation(img, boxes, klass, polygons=None, is_crowd=None): """Will not modify img""" labels = [] assert len(boxes) == len(klass) if is_crowd is not None: assert len(boxes) == len(is_crowd) for cls, crd in zip(klass, is_crowd): clsname = cfg.DATA.CLASS_NAMES[cls] if crd == 1: clsname += ';Crowd' labels.append(clsname) else: for cls in klass: labels.append(cfg.DATA.CLASS_NAMES[cls]) img = viz.draw_boxes(img, boxes, labels) if polygons is not None: for p in polygons: mask = polygons_to_mask(p, img.shape[0], img.shape[1]) img = draw_mask(img, mask) return img
def __call__(self, roidb): fname, boxes, klass, is_crowd = roidb["file_name"], roidb[ "boxes"], roidb["class"], roidb["is_crowd"] assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return float32 boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: tfms = self.aug.get_transform(im) im = tfms.apply_image(im) points = box_to_point4(boxes) points = tfms.apply_coords(points) boxes = point4_to_box(points) if len(boxes): assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \ "Invalid category {}!".format(klass.max()) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {"image": im} # Add rpn data to dataflow: try: if self.cfg.MODE_FPN: multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes else: ret["anchor_labels"], ret[ "anchor_boxes"] = self.get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes"] = boxes ret["gt_labels"] = klass except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), "warn") return None if self.cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb["segmentation"]) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8) # pad to 8 in order to pack mask into bits for polys in segmentation: if not self.cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [tfms.apply_coords(p) for p in polys] masks.append( polygons_to_mask(polys, im.shape[0], gt_mask_width)) if len(masks): masks = np.asarray(masks, dtype='uint8') # values in {0, 1} masks = np.packbits(masks, axis=-1) else: # no gt on the image masks = np.zeros((0, im.shape[0], gt_mask_width // 8), dtype='uint8') ret['gt_masks_packed'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret