def __call__(self, img, box = None, label=None, mask = None, **kwargs): if random.randint(1,10) <= self.ratio*10: # Flip image img = img.transpose(Image.FLIP_TOP_BOTTOM) # Flip mask if mask is not None: mask = mask.transpose(Image.FLIP_TOP_BOTTOM) # Flip bounding box if box is not None: new_box = change_box_order(box, 'xywh2xyxy') h = img.width ymin = h - new_box[:,3] ymax = h - new_box[:,1] new_box[:,1] = ymin new_box[:,3] = ymax new_box = change_box_order(new_box, 'xyxy2xywh') box = new_box results = { 'img': img, 'box': box, 'label': label, 'mask': mask} return results
def __call__(self, img, box = None, label=None, mask = None, **kwargs): if random.randint(1,10) <= self.ratio*10: # Flip image img = img.transpose(Image.FLIP_LEFT_RIGHT) # Flip mask if mask is not None: mask = mask.transpose(Image.FLIP_LEFT_RIGHT) # Flip bounding box if box is not None: new_box = change_box_order(box, 'xywh2xyxy') w = img.width xmin = w - new_box[:,2] xmax = w - new_box[:,0] new_box[:,0] = xmin new_box[:,2] = xmax new_box = change_box_order(new_box, 'xyxy2xywh') box = new_box results = { 'img': img, 'box': box, 'label': label, 'mask': mask} return results
def __call__(self, img, box=None, label=None, mask=None, **kwargs): angle = random.uniform(*self.angle) w, h = img.width, img.height cx, cy = w // 2, h // 2 img = self.rotate_im(img, angle) if mask is not None: mask = self.rotate_im(mask, angle) if box is not None: new_box = change_box_order(box, 'xywh2xyxy') corners = self.get_corners(new_box) corners = np.hstack((corners, new_box[:, 4:])) corners[:, :8] = self.rotate_box(corners[:, :8], angle, cx, cy, h, w) new_bbox = self.get_enclosing_box(corners) scale_factor_x = img.shape[1] / w scale_factor_y = img.shape[0] / h img = cv2.resize(img, (w, h)) if mask is not None: mask = cv2.resize(mask, (w, h)) new_bbox[:, :4] /= [ scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y ] new_box = new_bbox new_box = self.clip_box(new_box, [0, 0, w, h], 0.25) new_box = change_box_order(new_box, 'xyxy2xywh') else: new_box = box img = Image.fromarray(img) mask = Image.fromarray(mask) if mask is not None else None return {'img': img, 'box': new_box, 'label': label, 'mask': mask}
def __call__(self, img, box = None, label = None, mask = None, **kwargs): shear_factor = random.uniform(*self.shear_factor) img = np.array(img) mask = np.array(mask) if mask is not None else None w,h = img.shape[1], img.shape[0] if shear_factor < 0: if mask is not None: mask = Image.fromarray(mask) img = Image.fromarray(img) item = RandomHorizontalFlip(1)(img = img, box = box, mask = mask) img, box = item['img'], item['box'] img = np.array(img) if mask is not None: mask = item['mask'] mask = np.array(mask) M = np.array([[1, abs(shear_factor), 0],[0,1,0]]) nW = img.shape[1] + abs(shear_factor*img.shape[0]) if box is not None: box = change_box_order(box, 'xywh2xyxy') box[:,[0,2]] += ((box[:,[1,3]]) * abs(shear_factor) ).astype(int) box = change_box_order(box, 'xyxy2xywh') img = cv2.warpAffine(img, M, (int(nW), img.shape[0])) if mask is not None: mask = cv2.warpAffine(mask, M, (int(nW), mask.shape[0])) if shear_factor < 0: if mask is not None: mask = Image.fromarray(mask) img = Image.fromarray(img) item = RandomHorizontalFlip(1)(img = img, box = box, mask = mask) img, box = item['img'], item['box'] img = np.array(img) if mask is not None: mask = item['mask'] mask = np.array(mask) img = cv2.resize(img, (w,h)) mask = cv2.resize(mask, (w,h)) if mask is not None else None scale_factor_x = nW / w if box is not None: box = change_box_order(box, 'xywh2xyxy') box[:,:4] /= [scale_factor_x, 1, scale_factor_x, 1] box = change_box_order(box, 'xyxy2xywh') img = Image.fromarray(img) mask = Image.fromarray(mask) if mask is not None else None return { 'img': img, 'box': box, 'label': label, 'mask': mask}
def visualize_item(self, index=None, figsize=(15, 15)): """ Visualize an image with its bouding boxes by index """ if index is None: index = random.randint(0, len(self.coco.imgs)) item = self.__getitem__(index) img = item['img'] box = item['box'] label = item['label'] if any( isinstance(x, Normalize) for x in self.transforms.transforms_list): normalize = True else: normalize = False # Denormalize and reverse-tensorize if normalize: results = self.transforms.denormalize(img=img, box=box, label=label) img, label, box = results['img'], results['label'], results['box'] # Numpify label = label.numpy() box = box.numpy() if self.mode == 'xyxy': box = change_box_order(box, 'xyxy2xywh') self.visualize(img, box, label, figsize=figsize)
def __getitem__(self, idx): img = self.load_image(idx) annot = self.load_annotations(idx) box = annot[:, :4] label = annot[:, -1] if self.transforms: item = self.transforms(img=img, box=box, label=label) img = item['img'] box = item['box'] label = item['label'] box = change_box_order(box, order='xywh2xyxy') return {'img': img, 'box': box, 'label': label}
def encode(self, boxes, labels, input_size): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious < 0.5] = 0 ignore = (max_ious > 0.4) & (max_ious < 0.5 ) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 return loc_targets, cls_targets
def __call__(self, img, box = None, label = None, mask = None, **kwargs): ''' image: A PIL image boxes: Bounding boxes, a tensor of dimensions (#objects, 4) labels: labels of object, a tensor of dimensions (#objects) difficulties: difficulties of detect object, a tensor of dimensions (#objects) Out: cropped image , new boxes, new labels, new difficulties ''' image = TF.to_tensor(img) masks = TF.to_tensor(mask) if mask is not None else mask original_h = image.size(1) original_w = image.size(2) while True: mode = random.choice(self.ratios) if mode is None: return { 'img': img, 'box': box, 'label': label, 'mask': mask} if box is not None: boxes = change_box_order(box, 'xywh2xyxy') boxes = torch.FloatTensor(boxes) labels = torch.LongTensor(label) else: boxes = None labels = None new_image = image new_boxes = boxes new_labels = labels new_mask = masks if mask is not None else mask for _ in range(50): # Crop dimensions: [0.3, 1] of original dimensions new_h = random.uniform(0.3*original_h, original_h) new_w = random.uniform(0.3*original_w, original_w) # Aspect ratio constraint b/t .5 & 2 if new_h/new_w < 0.5 or new_h/new_w > 2: continue #Crop coordinate left = random.uniform(0, original_w - new_w) right = left + new_w top = random.uniform(0, original_h - new_h) bottom = top + new_h crop = torch.FloatTensor([int(left), int(top), int(right), int(bottom)]) # Calculate IoU between the crop and the bounding boxes if boxes is not None: overlap = find_jaccard_overlap(crop.unsqueeze(0), boxes) #(1, #objects) overlap = overlap.squeeze(0) # If not a single bounding box has a IoU of greater than the minimum, try again if overlap.max().item() < mode: continue #Crop new_image = image[:, int(top):int(bottom), int(left):int(right)] #(3, new_h, new_w) new_masks = masks[:, int(top):int(bottom), int(left):int(right)] if masks is not None else masks #Center of bounding boxes if boxes is not None: center_bb = (boxes[:, :2] + boxes[:, 2:])/2.0 #Find bounding box has been had center in crop center_in_crop = (center_bb[:, 0] >left) * (center_bb[:, 0] < right ) *(center_bb[:, 1] > top) * (center_bb[:, 1] < bottom) #( #objects) if not center_in_crop.any(): continue #take matching bounding box new_boxes = boxes[center_in_crop, :] #take matching labels new_labels = labels[center_in_crop] #Use the box left and top corner or the crop's new_boxes[:, :2] = torch.max(new_boxes[:, :2], crop[:2]) #adjust to crop new_boxes[:, :2] -= crop[:2] new_boxes[:, 2:] = torch.min(new_boxes[:, 2:],crop[2:]) #adjust to crop new_boxes[:, 2:] -= crop[:2] new_boxes = change_box_order(new_boxes, 'xyxy2xywh') new_boxes = new_boxes.numpy() new_labels = new_labels.numpy() else: new_boxes = None new_masks = TF.to_pil_image(new_masks) if new_masks is not None else None return { 'img': TF.to_pil_image(new_image), 'box': new_boxes, 'label': new_labels, 'mask': new_masks}
tmp = [] for o in outputs: try: coor_4bit = o[:,-4:] conf_cls = o[:,8:11] except: coor_4bit = torch.zeros((1,4)) conf_cls = torch.zeros((1,3)) tmp.append(torch.cat((coor_4bit,conf_cls),1)) outputs = tmp ###8bit to 4bit for target targets[:,:,:8] *= cfg['img_shape'] coor_8bit = targets[:,:,:8] coor_4bit = torch.zeros((coor_8bit.shape[0],coor_8bit.shape[1],4)) for b, bit_8 in enumerate(coor_8bit): coor_4bit[b] = change_box_order(bit_8,'xiyi2xyxy') tmp = torch.cat((coor_4bit, targets[:,:,10:].cpu()),2) targets = tmp #[xm,ym,xmax,ymax, conf,cls_conf, class] for output, annotations in zip(outputs, targets): ## [?, 7]-----[50,5]////[b,?,5] all_detections.append([np.array([]) for _ in range(cfg['num_classes'])]) if output is not None: pred_boxes = output[:, :5].cpu().numpy() scores = output[:, 4].cpu().numpy() pred_labels = output[:, -1].cpu().numpy() # # Order by confidence sort_i = np.argsort(scores) # pred_labels = pred_labels[sort_i] pred_boxes = pred_boxes[sort_i]
def visualize_batch(self): if not os.path.exists('./samples'): os.mkdir('./samples') self.model.eval() with torch.no_grad(): batch = next(iter(self.valloader)) targets = batch['targets'] image_names = batch['img_names'] imgs = batch['imgs'] img_sizes = batch['img_sizes'] if self.cfg.tta is not None: outputs = self.cfg.tta.make_tta_predictions(self.model, batch) else: outputs = self.model.inference_step(batch) for idx in range(len(outputs)): img = imgs[idx] img_size = img_sizes[idx] image_name = image_names[idx] image_outname = os.path.join( 'samples', f'{self.epoch}_{self.iters}_{idx}.jpg') pred = postprocessing(outputs[idx], current_img_size=self.cfg.image_size, ori_img_size=self.cfg.image_size, min_iou=self.cfg.min_iou_val, min_conf=self.cfg.min_conf_val, mode=self.cfg.fusion_mode) boxes = pred['bboxes'] labels = pred['classes'] scores = pred['scores'] target = targets[idx] target_boxes = target['boxes'] target_labels = target['labels'] if len(boxes) == 0 or boxes is None: continue if self.cfg.box_format == 'yxyx': target_boxes = change_box_order(target_boxes, 'yxyx2xyxy') target_boxes = change_box_order(target_boxes, order='xyxy2xywh') pred_gt_imgs = img pred_gt_boxes = [boxes, target_boxes] pred_gt_labels = [labels, target_labels] pred_gt_scores = scores pred_gt_name = image_name draw_pred_gt_boxes(image_outname=image_outname, img=img, boxes=pred_gt_boxes, labels=pred_gt_labels, scores=pred_gt_scores, image_name=pred_gt_name, figsize=(15, 15))