def get_seg_masks(self, pts_score, det_pts, det_bboxes, det_labels, test_cfg, ori_shape, scale_factor, rescale=False): """ Get segmentation masks from points and scores Args: pts_score (Tensor or ndarray): shape (n, num_pts) det_pts (Tensor): shape (n, num_pts*2) det_bboxes (Tensor): shape (n, 4) det_labels (Tensor): shape (n, 1) test_cfg (dict): rcnn testing config ori_shape: original image size scale_factor: scale factor for image rescale: whether rescale to original size Returns: list[list]: encoded masks """ cls_segms = [[] for _ in range(self.bbox_head.num_classes - 1)] bboxes = det_bboxes.cpu().numpy()[:, :4] labels = det_labels.cpu().numpy() + 1 if rescale: img_h, img_w = ori_shape[:2] else: img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32) img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32) scale_factor = 1.0 for i in range(bboxes.shape[0]): bbox = (bboxes[i, :] / scale_factor).astype(np.int32) label = labels[i] w = max(bbox[2] - bbox[0] + 1, 1) h = max(bbox[3] - bbox[1] + 1, 1) im_mask = np.zeros((img_h, img_w), dtype=np.uint8) im_pts = det_pts[i].clone() im_pts = im_pts.reshape(-1, 2) im_pts_score = pts_score[i] im_pts[:, 0] = (im_pts[:, 0] - bbox[0]) im_pts[:, 1] = (im_pts[:, 1] - bbox[1]) _h, _w = h, w corner_pts = im_pts.new_tensor([[0, 0], [_h - 1, 0], [0, _w - 1], [_w - 1, _h - 1]]) corner_score = im_pts_score.new_tensor([0, 0, 0, 0]) im_pts = torch.cat([im_pts, corner_pts], dim=0).cpu().numpy() im_pts_score = torch.cat([im_pts_score, corner_score], dim=0).cpu().numpy() grids = tuple(np.mgrid[0:_w:1, 0:_h:1]) bbox_mask = scipy.interpolate.griddata(im_pts, im_pts_score, grids) bbox_mask = bbox_mask.transpose(1, 0) bbox_mask = mmcv.imresize(bbox_mask, (w, h)) bbox_mask = bbox_mask.astype(np.float32) bbox_mask[np.isnan(bbox_mask)] = 0 bbox_mask = (bbox_mask > test_cfg.get('pts_score_thr', 0.5)).astype(np.uint8) im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask rle = maskUtils.encode( np.array(im_mask[:, :, np.newaxis], order='F'))[0] cls_segms[label - 1].append(rle) return cls_segms
def single_gpu_test(model, data_loader, show=False, out_dir=None): """Test with single GPU. Args: model (nn.Module): Model to be tested. data_loader (nn.Dataloader): Pytorch data loader. show (bool): Whether show results during infernece. Default: False. out_dir (str, optional): If specified, the results will be dumped into the directory to save output results. Returns: list: The prediction results. """ model.eval() results_0 = [] results_1 = [] results_2 = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, **data) if isinstance(result, list): results_0.extend(result[0]) results_1.extend(result[1]) results_2.extend(result[2]) else: # results.append(result) pass if show or out_dir: img_tensor = data['img'][0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for img, img_meta in zip(imgs, img_metas): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file_0 = osp.join(out_dir + '_dir', img_meta['ori_filename']) out_file_1 = osp.join(out_dir + '_sty', img_meta['ori_filename']) out_file_2 = osp.join(out_dir + '_type', img_meta['ori_filename']) else: out_file_0 = None out_file_1 = None out_file_2 = None model.module.show_result( img_show, result[0], palette=dataset.PALETTE, show=show, out_file=out_file_0) model.module.show_result( img_show, result[1], palette=dataset.PALETTE, show=show, out_file=out_file_1) model.module.show_result( img_show, result[2], palette=dataset.PALETTE, show=show, out_file=out_file_2) batch_size = data['img'][0].size(0) for _ in range(batch_size): prog_bar.update() return [results_0, results_1, results_2]
def single_gpu_test_rotate_rect_img(model, data_loader, show=False, out_dir=None, show_score_thr=0.3): print('clw: using single_gpu_test_rotate_rect_img() !!') model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) ########### clw note: for debug # for idx, item in enumerate(result[0]): # if item.size == 0: # print('111') # for row in item: # print('boxw:', row[2] - row[0], 'boxh:', row[3] - row[1] ) # if row[2] - row[0] == 0 or row[3] - row[1] == 0: # print('aaaa') ######### ## img_name = data['img_metas'][0].data[0][0]['ori_filename'] # origin_name = img_name.split('CAM')[0] + 'CAM' + img_name.split('CAM')[1][0] + '.jpg' # data['img_metas'][0].data[0][0]['ori_filename'] = origin_name # data['img_metas'][0].data[0][0]['filename'] = data['img_metas'][0].data[0][0]['filename'].rsplit('/', 1)[0] + '/' + origin_name aaa = img_name[:-4].split('_')[-9:] bbb = [float(a) for a in aaa] M_perspective_inv = np.array(bbb).reshape(3, 3) for i in range(len(result[0])): ddd = [] ccc = result[0][i][:, :4] # (n, 4) if ccc.size == 0: continue for xyxy in ccc: x1 = xyxy[0] y1 = xyxy[1] x2 = xyxy[2] y2 = xyxy[3] cnt = np.array(((x1, y1), (x1, y2), (x2, y2), (x2, y1))) ddd.append(cnt) ddd = np.array(ddd) # fff = [] src_pts = cv2.perspectiveTransform(ddd, M_perspective_inv) for cnt in src_pts: rect = cv2.boundingRect(cnt) x1 = rect[0] y1 = rect[1] x2 = rect[0] + rect[2] y2 = rect[1] + rect[3] ggg = np.array((x1, y1, x2, y2)) fff.append(ggg) fff = np.array(fff) result[0][ i][:, : 4] = fff # result[0][i] = np.concatenate((fff, result[0][i][:, 4]), axis=1) ## batch_size = len(result) if show or out_dir: if batch_size == 1 and isinstance(data['img'][0], torch.Tensor): img_tensor = data['img'][0] else: img_tensor = data['img'][0].data[0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result[i], show=show, out_file=out_file, score_thr=show_score_thr) # encode mask results if isinstance(result[0], tuple): result = [(bbox_results, encode_mask_results(mask_results)) for bbox_results, mask_results in result] results.extend(result) for _ in range(batch_size): prog_bar.update() return results
def show_result_ins(img, result, class_names, score_thr=0.3, sort_by_density=False, out_file=None): """Visualize the instance segmentation results on the image. Args: img (str or np.ndarray): Image filename or loaded image. result (tuple[list] or list): The instance segmentation result. class_names (list[str] or tuple[str]): A list of class names. score_thr (float): The threshold to visualize the masks. sort_by_density (bool): sort the masks by their density. out_file (str, optional): If specified, the visualization result will be written to the out file instead of shown in a window. Returns: np.ndarray or None: If neither `show` nor `out_file` is specified, the visualized image is returned, otherwise None is returned. """ assert isinstance(class_names, (tuple, list)) img = mmcv.imread(img) img_show = img.copy() h, w, _ = img.shape cur_result = result[0] seg_label = cur_result[0] seg_label = seg_label.cpu().numpy().astype(np.uint8) cate_label = cur_result[1] cate_label = cate_label.cpu().numpy() score = cur_result[2].cpu().numpy() vis_inds = score > score_thr seg_label = seg_label[vis_inds] num_mask = seg_label.shape[0] cate_label = cate_label[vis_inds] cate_score = score[vis_inds] if sort_by_density: mask_density = [] for idx in range(num_mask): cur_mask = seg_label[idx, :, :] cur_mask = mmcv.imresize(cur_mask, (w, h)) cur_mask = (cur_mask > 0.5).astype(np.int32) mask_density.append(cur_mask.sum()) orders = np.argsort(mask_density) seg_label = seg_label[orders] cate_label = cate_label[orders] cate_score = cate_score[orders] np.random.seed(42) color_masks = [ np.random.randint(0, 256, (1, 3), dtype=np.uint8) for _ in range(num_mask) ] for idx in range(num_mask): idx = -(idx + 1) cur_mask = seg_label[idx, :, :] cur_mask = mmcv.imresize(cur_mask, (w, h)) cur_mask = (cur_mask > 0.5).astype(np.uint8) if cur_mask.sum() == 0: continue color_mask = color_masks[idx] cur_mask_bool = cur_mask.astype(np.bool) img_show[cur_mask_bool] = img[cur_mask_bool] * 0.5 + color_mask * 0.5 cur_cate = cate_label[idx] cur_score = cate_score[idx] #label_text = class_names[cur_cate] label_text = " " center_y, center_x = ndimage.measurements.center_of_mass(cur_mask) vis_pos = (max(int(center_x) - 10, 0), int(center_y)) cv2.putText(img_show, label_text, vis_pos, cv2.FONT_HERSHEY_COMPLEX, 0.3, (255, 255, 255)) # green if out_file is None: return img else: mmcv.imwrite(img_show, out_file)
def __call__(self, img_group, scale, crop_history=None, flip=False, keep_ratio=True, div_255=False, is_flow=False): if self.resize_crop or self.rescale_crop: img_group, crop_quadruple = self.op_crop(img_group) img_shape = img_group[0].shape scale_factor = None else: # 1. rescale if keep_ratio: tuple_list = [ mmcv.imrescale(img, scale, return_scale=True) for img in img_group ] img_group, scale_factors = list(zip(*tuple_list)) scale_factor = scale_factors[0] else: tuple_list = [ mmcv.imresize(img, scale, return_scale=True) for img in img_group ] img_group, w_scales, h_scales = list(zip(*tuple_list)) scale_factor = np.array( [w_scales[0], h_scales[0], w_scales[0], h_scales[0]], dtype=np.float32) if self.pre_mean_volume is not None: volume_len = self.pre_mean_volume.shape[0] img_group = [ img - self.pre_mean_volume[i % volume_len, ...] for i, img in enumerate(img_group) ] # 2. crop (if necessary) if crop_history is not None: self.op_crop = GroupCrop(crop_history) if self.op_crop is not None: img_group, crop_quadruple = self.op_crop(img_group, is_flow=is_flow) else: crop_quadruple = None img_shape = img_group[0].shape # 3. flip if flip: img_group = [mmcv.imflip(img) for img in img_group] if is_flow: for i in range(0, len(img_group), 2): img_group[i] = mmcv.iminvert(img_group[i]) # 4a. div_255 if div_255: img_group = [ mmcv.imnormalize(img, 0, 255, False) for img in img_group ] # 4. normalize img_group = [ mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) for img in img_group ] # 5. pad if self.size_divisor is not None: img_group = [ mmcv.impad_to_multiple(img, self.size_divisor) for img in img_group ] pad_shape = img_group[0].shape else: pad_shape = img_shape if is_flow: assert len(img_group[0].shape) == 2 img_group = [ np.stack((flow_x, flow_y), axis=2) for flow_x, flow_y in zip(img_group[0::2], img_group[1::2]) ] # 6. transpose img_group = [img.transpose(2, 0, 1) for img in img_group] # Stack into numpy.array img_group = np.stack(img_group, axis=0) return img_group, img_shape, pad_shape, scale_factor, crop_quadruple
def read_fn(self): """Main function for read thread. Contains three steps: 1) Read and preprocess (resize + norm) frames from source. 2) Create task by frames from previous step and buffer. 3) Put task into read queue. """ was_read = True start_time = time.time() while was_read and not self.stopped: # init task task = TaskInfo() task.clip_vis_length = self.clip_vis_length task.frames_inds = self.frames_inds task.ratio = self.ratio # read buffer frames = [] processed_frames = [] if len(self.buffer) != 0: frames = self.buffer if len(self.processed_buffer) != 0: processed_frames = self.processed_buffer # read and preprocess frames from source and update task with self.read_lock: before_read = time.time() read_frame_cnt = self.window_size - len(frames) while was_read and len(frames) < self.window_size: was_read, frame = self.cap.read() if not self.webcam: # Reading frames too fast may lead to unexpected # performance degradation. If you have enough # resource, this line could be commented. time.sleep(1 / self.output_fps) if was_read: frames.append(mmcv.imresize(frame, self.display_size)) processed_frame = mmcv.imresize( frame, self.stdet_input_size).astype(np.float32) _ = mmcv.imnormalize_(processed_frame, **self.img_norm_cfg) processed_frames.append(processed_frame) task.add_frames(self.read_id + 1, frames, processed_frames) # update buffer if was_read: self.buffer = frames[-self.buffer_size:] self.processed_buffer = processed_frames[-self.buffer_size:] # update read state with self.read_id_lock: self.read_id += 1 self.not_end = was_read self.read_queue.put((was_read, copy.deepcopy(task))) cur_time = time.time() logger.debug( f'Read thread: {1000*(cur_time - start_time):.0f} ms, ' f'{read_frame_cnt / (cur_time - before_read):.0f} fps') start_time = cur_time
def update_one_model(ori_img, diff_map, temp_fname, best_temp_fname, darknet_model, frcnn_model, flag, start_yolo_num, start_frcnn_num, dest_num, rate, gt_bboxes, update_mask): print("Updating %s..." % flag) # generate bbox grad mask grad_mask = np.zeros((500, 500, 3), dtype=np.float) for bbox in gt_bboxes: x1, y1, x2, y2 = bbox cv2.rectangle(grad_mask, (x1, y1), (x2, y2), (255, 255, 255), -1) grad_mask = np.swapaxes(np.swapaxes(grad_mask, 1, 2), 0, 1).reshape( (1, 3, 500, 500)) step = 0 max_steps_num = 200 if flag == 'frcnn' else 50 best_yolo_num = start_yolo_num best_frcnn_num = start_frcnn_num min_yolo_loss = float('inf') min_frcnn_loss = float('inf') min_creterion = float('inf') best_diff_map = None gradient = np.zeros((1, 3, 500, 500), dtype=np.float) relu = torch.nn.ReLU() while (step < max_steps_num): save_format_try_image(ori_img, diff_map, temp_fname) yolo_input, frcnn_input = get_yolo_image(temp_fname), get_frcnn_image( temp_fname) yolo_input.requires_grad = True list_boxes = darknet_model(yolo_input) yolo_results = post_process(list_boxes) yolo_num = len(yolo_results) boxes_0 = list_boxes[0].view(3, 85, -1) loss_0 = torch.sum(relu(boxes_0[:, 4, :])) boxes_1 = list_boxes[1].view(3, 85, -1) loss_1 = torch.sum(relu(boxes_1[:, 4, :])) boxes_2 = list_boxes[2].view(3, 85, -1) loss_2 = torch.sum(relu(boxes_2[:, 4, :])) yolo_loss = loss_0 + loss_1 + loss_2 frcnn_input.requires_grad = True frcnn_results, scores, _ = frcnn_model( img=[frcnn_input], img_metas=[[{ 'filename': '', 'ori_filename': '', 'ori_shape': (500, 500, 3), 'img_shape': (800, 800, 3), 'pad_shape': (800, 800, 3), 'scale_factor': np.array([1.6, 1.6, 1.6, 1.6]), 'flip': False, 'flip_direction': None, 'img_norm_cfg': { 'mean': np.array([123.675, 116.28, 103.53]), 'std': np.array([58.395, 57.12, 57.375]), 'to_rgb': True } }]], return_loss=False, rescale=False) frcnn_results = np.concatenate(frcnn_results) frcnn_loss = torch.sum(relu(scores[:, :-1] - 0.049)) frcnn_num = np.sum(frcnn_results[:, 4] > 0.3) # # get gt bboxes # gt_bboxes = [] # h = w = 500 # for yolo_bbox in yolo_results: # x1, y1, x2, y2 = yolo_bbox[:4] # x1, x2 = int(x1*w), int(x2*w) # y1, y2 = int(y1*h), int(y2*h) # gt_bboxes.append([x1-x2//2, y1-y2//2, x1+x2//2, y1+y2//2]) # for frcnn_bbox in frcnn_results: # if(frcnn_bbox[-1] > 0.3): # x1, y1, x2, y2 = [int(x/1.6) for x in frcnn_bbox[:4]] # gt_bboxes.append([x1,y1,x2,y2]) # # generate bbox grad mask # grad_mask = np.zeros((500,500,3), dtype=np.float) # for bbox in gt_bboxes: # x1, y1, x2, y2 = bbox # cv2.rectangle(grad_mask, (x1,y1), (x2,y2), (255,255,255), -1) # grad_mask = np.swapaxes(np.swapaxes(grad_mask, 1, 2), 0, 1).reshape((1,3,500,500)) if (step == 0): epoch_creterion = float(yolo_num) / start_yolo_num + float( frcnn_num) / start_frcnn_num #creterion = yolo_num if flag == 'yolo' else frcnn_num creterion = 10000 * (min(1., float(yolo_num) / start_yolo_num) + min(1., float(frcnn_num) / start_frcnn_num)) + ( yolo_loss if flag == 'yolo' else frcnn_loss) if (creterion < min_creterion): min_creterion = creterion min_frcnn_loss = frcnn_loss min_yolo_loss = yolo_loss best_yolo_num = yolo_num best_frcnn_num = frcnn_num best_diff_map = diff_map.copy() copyfile(temp_fname, best_temp_fname) # check rate patch_number, area_rate = get_cd_score(fname, best_temp_fname) print( "%d @ [%d,%d, %d,%d --> %d] f_loss=%g y_loss=%g min_f_loss=%g min_y_loss=%g, best patch=%d rate=%g limit=%.2f" % (step, yolo_num, frcnn_num, best_yolo_num, best_frcnn_num, dest_num, frcnn_loss, yolo_loss, min_frcnn_loss, min_yolo_loss, patch_number, area_rate, 100. - rate)) if (((yolo_num == 0 and flag == 'yolo') or (frcnn_num == 0 and flag == 'frcnn')) and area_rate < 0.02 and patch_number <= 10): break darknet_model.zero_grad() yolo_loss.backward(retain_graph=False) yolo_d_grad = yolo_input.grad.data.cpu().numpy().reshape( (1, 3, 608, 608)) yolo_d_grad = np.swapaxes(np.swapaxes(yolo_d_grad[0], 0, 1), 1, 2) yolo_d_grad = mmcv.imresize(yolo_d_grad, (500, 500)) yolo_d_grad = np.swapaxes(np.swapaxes(yolo_d_grad, 1, 2), 0, 1).reshape((1, 3, 500, 500)) #yolo_d_grad = blur(yolo_d_grad) frcnn_model.zero_grad() frcnn_loss.backward(retain_graph=False) frcnn_d_grad = frcnn_input.grad.data.cpu().numpy().reshape( (1, 3, 800, 800)) frcnn_d_grad[:, 0, :, :] = frcnn_d_grad[:, 0, :, :] * (58.395 / 255.) frcnn_d_grad[:, 1, :, :] = frcnn_d_grad[:, 1, :, :] * (57.12 / 255.) frcnn_d_grad[:, 2, :, :] = frcnn_d_grad[:, 2, :, :] * (57.375 / 255.) frcnn_d_grad = np.swapaxes(np.swapaxes(frcnn_d_grad[0], 0, 1), 1, 2) frcnn_d_grad = mmcv.imresize(frcnn_d_grad, (500, 500)) frcnn_d_grad = np.swapaxes(np.swapaxes(frcnn_d_grad, 1, 2), 0, 1).reshape((1, 3, 500, 500)) #frcnn_d_norm = np.linalg.norm(frcnn_d_grad, ord=2, axis=1).reshape(500,500) #frcnn_d_norm = (frcnn_d_norm - np.min(frcnn_d_norm)) / (np.max(frcnn_d_norm) - np.min(frcnn_d_norm)) #frcnn_weight = np.repeat(frcnn_d_norm.reshape(1,1,500,500), 3, axis=1) #frcnn_d_grad = np.multiply(frcnn_weight, frcnn_d_grad) frcnn_d_grad = normalize(frcnn_d_grad.reshape(3, -1), axis=1).reshape( (1, 3, 500, 500)) frcnn_d_grad = frcnn_d_grad * 10 #frcnn_d_grad = blur(frcnn_d_grad) if (flag == 'yolo'): alpha = 0.95 else: alpha = 0.8 gradient = (1. - alpha) * frcnn_d_grad + alpha * yolo_d_grad #if(flag == 'frcnn'): # gradient = 0.9 * gradient + 0.1 * grad #else: # gradient = grad loss = yolo_loss if flag == 'yolo' else frcnn_loss if (loss > 10): step_size = 2 #0.1 + 0.3*(float(loss)-10.)/(start_loss-10.) elif (loss > 5): step_size = 2 else: step_size = 0.2 step_size = step_size * (1. - float(step) / max_steps_num) gradient = step_size * gradient # blur #gradient[0,0,:,:] = gaussian_filter(gradient[0,0,:,:], sigma=3) #gradient[0,1,:,:] = gaussian_filter(gradient[0,1,:,:], sigma=3) #gradient[0,2,:,:] = gaussian_filter(gradient[0,2,:,:], sigma=3) # fix mask gradient *= update_mask.astype(np.float) diff_map -= gradient ## check area rate #diff_map[grad_mask == 0] = 0 #diff_map_change = np.sum(np.abs(diff_map), axis=1) #high_thresh = np.percentile(diff_map_change, rate) #gray_mask = ((diff_map_change > high_thresh) * 255.).astype(np.uint8) #gray_mask = gray_mask.reshape(500,500) #diff_map[0,0,:,:][gray_mask == 0] = 0 #diff_map[0,1,:,:][gray_mask == 0] = 0 #diff_map[0,2,:,:][gray_mask == 0] = 0 ## check connected parts' number save_format_try_image(ori_img, diff_map, temp_fname) cd_map = get_cd_map(fname, temp_fname) labels = measure.label(cd_map, background=0, connectivity=2) label_num = np.max(labels) if (label_num > 10): areas = [np.sum(labels == i) for i in range(1, label_num + 1)] label_ids = list(range(1, label_num + 1)) areas, label_ids = zip(*sorted(zip(areas, label_ids))) for i in label_ids[:-10]: #gray_mask[labels==i] = 0 diff_map[0, 0, :, :][labels == i] = 0 diff_map[0, 1, :, :][labels == i] = 0 diff_map[0, 2, :, :][labels == i] = 0 #kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3, 3)) #gray_mask = cv2.morphologyEx(gray_mask, cv2.MORPH_CLOSE, kernel) #gray_mask = gray_mask.reshape(500,500) #diff_map[0,0,:,:][gray_mask == 0] = 0 #diff_map[0,1,:,:][gray_mask == 0] = 0 #diff_map[0,2,:,:][gray_mask == 0] = 0 #see = check_image(diff_map) #cv2.imwrite('check/%03d_region.jpg' % step, see) #cv2.imwrite('check/%03d_region_filter.jpg' % step, cv2.medianBlur(see, 3)) step += 1 return float(best_yolo_num) / start_yolo_num + float( best_frcnn_num) / start_frcnn_num >= epoch_creterion, best_diff_map
def sample_dist(gt_bboxes, gt_masks, cfg, num_pts): sample_dist_p = cfg.get('sample_dist_p', 1.5) pts_list = [] pts_label_list = [] # _len = int(np.sqrt(num_pts)) # assert _len**2 == num_pts for i in range(len(gt_bboxes)): x1, y1, x2, y2 = gt_bboxes[i].cpu().numpy().astype(np.int32) if cfg.get('resize_sample', True): w = np.maximum(x2 - x1 + 1, 1) h = np.maximum(y2 - y1 + 1, 1) mask = mmcv.imresize(gt_masks[i][y1:y1 + h, x1:x1 + w], (cfg.mask_size, cfg.mask_size)) polygons = mask_to_poly(mask) distance_map = np.ones(mask.shape).astype(np.uint8) for poly in polygons: poly = np.array(poly).astype(np.int) for j in range(len(poly) // 2): x_0, y_0 = poly[2 * j:2 * j + 2] if j == len(poly) // 2 - 1: x_1, y_1 = poly[0:2] else: x_1, y_1 = poly[2 * j + 2:2 * j + 4] cv2.line(distance_map, (x_0, y_0), (x_1, y_1), (0), thickness=2) roi_dist_map = cv2.distanceTransform(distance_map, cv2.DIST_L2, 3) con_index = np.stack(np.nonzero(roi_dist_map == 0)[::-1], axis=-1) roi_dist_map[roi_dist_map == 0] = 1 prob_dist_map = 1 / roi_dist_map prob_dist_map = np.power(prob_dist_map, sample_dist_p) prob_dist_map = prob_dist_map / prob_dist_map.sum() index_y, index_x = np.nonzero(prob_dist_map > 0) index = np.stack([index_x, index_y], axis=-1) _len = index.shape[0] if len(con_index) == 0: pts = np.zeros([2 * num_pts]) else: repeat = num_pts // _len mod = num_pts % _len perm = np.random.choice(_len, mod, replace=False, p=prob_dist_map.reshape(-1)) draw = [index.copy() for i in range(repeat)] draw.append(index[perm]) draw = np.concatenate(draw, 0) # draw[:num_extreme] = extremes[:num_extreme] draw = draw + np.random.rand(*draw.shape) x_scale = float(w) / cfg.mask_size y_scale = float(h) / cfg.mask_size draw[:, 0] = draw[:, 0] * x_scale + x1 draw[:, 1] = draw[:, 1] * y_scale + y1 pts = draw.reshape(2 * num_pts) else: polygons = mask_to_poly(gt_masks[i]) distance_map = np.ones(gt_masks[i].shape).astype(np.uint8) for poly in polygons: poly = np.array(poly).astype(np.int) for j in range(len(poly) // 2): x_0, y_0 = poly[2 * j:2 * j + 2] if j == len(poly) // 2 - 1: x_1, y_1 = poly[0:2] else: x_1, y_1 = poly[2 * j + 2:2 * j + 4] cv2.line(distance_map, (x_0, y_0), (x_1, y_1), (0), thickness=2) dist = cv2.distanceTransform(distance_map, cv2.DIST_L2, 3) roi_dist_map = dist[y1:y2, x1:x2] con_index = np.stack(np.nonzero(roi_dist_map == 0)[::-1], axis=-1) roi_dist_map[roi_dist_map == 0] = 1 prob_dist_map = 1 / roi_dist_map prob_dist_map = np.power(prob_dist_map, sample_dist_p) prob_dist_map = prob_dist_map / prob_dist_map.sum() index_y, index_x = np.nonzero(prob_dist_map > 0) index = np.stack([index_x, index_y], axis=-1) _len = index.shape[0] if len(con_index) == 0: pts = np.zeros([2 * num_pts]) else: repeat = num_pts // _len mod = num_pts % _len perm = np.random.choice(_len, mod, replace=False, p=prob_dist_map.reshape(-1)) draw = [index.copy() for i in range(repeat)] draw.append(index[perm]) draw = np.concatenate(draw, 0) draw[:, 0] = draw[:, 0] + x1 draw[:, 1] = draw[:, 1] + y1 pts = draw.reshape(2 * num_pts) pts_list.append(pts) pts_long = pts.astype(np.long) pts_label = gt_masks[i][pts_long[1::2], pts_long[0::2]] pts_label_list.append(pts_label) pts_list = np.stack(pts_list, 0) pts_label_list = np.stack(pts_label_list, 0) return pts_list, pts_label_list
def vis_seg(img, result, score_thr, save_dir): class_names = [ 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle' ] print(class_names) imgs = [img] if result[0]: for img, cur_result in zip(imgs, result): h, w, _ = img.shape img_show = img[:h, :w, :] seg_label = cur_result[0] seg_label = seg_label.cpu().numpy().astype(np.uint8) cate_label = cur_result[1] cate_label = cate_label.cpu().numpy() score = cur_result[2].cpu().numpy() vis_inds = score > score_thr seg_label = seg_label[vis_inds] num_mask = seg_label.shape[0] cate_label = cate_label[vis_inds] cate_score = score[vis_inds] mask_density = [] for idx in range(num_mask): cur_mask = seg_label[idx, :, :] cur_mask = mmcv.imresize(cur_mask, (w, h)) cur_mask = (cur_mask > 0.5).astype(np.int32) mask_density.append(cur_mask.sum()) orders = np.argsort(mask_density) seg_label = seg_label[orders] cate_label = cate_label[orders] cate_score = cate_score[orders] seg_show = img_show.copy() for idx in range(num_mask): idx = -(idx + 1) cur_mask = seg_label[idx, :, :] cur_mask = mmcv.imresize(cur_mask, (w, h)) cur_mask = (cur_mask > 0.5).astype(np.uint8) if cur_mask.sum() == 0: continue color_mask = np.random.randint(0, 256, (1, 3), dtype=np.uint8) cur_mask_bool = cur_mask.astype(np.bool) contours, _ = cv2.findContours(cur_mask * 255, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) seg_show[cur_mask_bool] = img_show[ cur_mask_bool] * 0.4 + color_mask * 0.6 color_mask = color_mask[0].tolist() cv2.drawContours(seg_show, contours, -1, tuple(color_mask), 1, lineType=cv2.LINE_AA) cur_cate = cate_label[idx] cur_score = cate_score[idx] label_text = class_names[cur_cate] center_y, center_x = ndimage.measurements.center_of_mass( cur_mask) vis_pos = (max(int(center_x) - 10, 0), int(center_y)) cv2.putText(seg_show, label_text, vis_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), lineType=cv2.LINE_AA) cv2.putText(seg_show, '{:.1f}%'.format(cur_score * 100), (vis_pos[0], vis_pos[1] + 9), cv2.FONT_HERSHEY_SIMPLEX, 0.25, (255, 255, 255), lineType=cv2.LINE_AA) mmcv.imshow(seg_show) else: print('no detections')
def single_gpu_test(model, data_loader, show=False, out_dir=None, show_score_thr=0.3): """Test model with single gpu. This method tests model with single gpu and gives the 'show' option. By setting ``show=True``, it saves the visualization results under ``out_dir``. Args: model (nn.Module): Model to be tested. data_loader (nn.Dataloader): Pytorch data loader. show (bool): Whether to save viualization results. Default: True. out_dir (str): The path to save visualization results. Default: None. Returns: list[dict]: The prediction results. """ model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) if show: # Visualize the results of MMDetection3D model # 'show_results' is MMdetection3D visualization API models_3d = (Base3DDetector, Base3DSegmentor, SingleStageMono3DDetector) if isinstance(model.module, models_3d): model.module.show_results(data, result, out_dir=out_dir) # Visualize the results of MMDetection model # 'show_result' is MMdetection visualization API else: batch_size = len(result) if batch_size == 1 and isinstance(data['img'][0], torch.Tensor): img_tensor = data['img'][0] else: img_tensor = data['img'][0].data[0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result[i], show=show, out_file=out_file, score_thr=show_score_thr) results.extend(result) batch_size = len(result) for _ in range(batch_size): prog_bar.update() return results
def draw_assign_results(self, img_meta, sampling_result): if self.draw_assign_results_path == '': return img = imread(img_meta['ori_filename']) if img is None: return #resize with crop img_h, img_w, _ = img.shape dst_h, dst_w, _ = img_meta['img_shape'] img_name = img_meta['ori_filename'].strip('\n').split('/')[-1] w_scale = dst_w / img_w h_scale = dst_h / img_h scale = max(w_scale, h_scale) img = imresize(img, (int(img_w * scale), int(img_h * scale))) #crop now_w = int(img_w * scale) now_h = int(img_h * scale) box = [0, 0, now_w, now_h] if now_w > dst_w: box = [(now_w - dst_w) // 2, 0, now_w - (now_w - dst_w) // 2, now_h] diff = box[2] - box[0] + 1 - dst_w if diff != 0: box[2] -= diff elif now_h > dst_h: box = [ 0, (now_h - dst_h) // 2, now_w, now_h - (now_h - dst_h) // 2 ] diff = box[3] - box[1] + 1 - dst_h if diff != 0: box[3] -= diff import numpy as np img = imcrop(img, np.array(box)) pos_anchor = sampling_result.pos_bboxes pos_anchor_label = sampling_result.pos_gt_labels color_dict = { 0: (0, 255, 0), 1: (255, 153, 18), 2: (160, 82, 45), 3: (255, 0, 0), 4: (3, 168, 158), 5: (0, 255, 255), 6: (138, 43, 226), 7: (64, 224, 205), 8: (122, 123, 124) } import cv2 img = np.ascontiguousarray(img) for i in range(len(pos_anchor)): anchor = pos_anchor[i, :] x_min = max(0, int(anchor[0])) y_min = max(0, int(anchor[1])) x_max = max(0, int(anchor[2])) y_max = max(0, int(anchor[3])) label = pos_anchor_label[i].cpu().item() if label in self.draw_label: cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color_dict[label], thickness=1) imwrite(img, self.draw_assign_results_path + img_name)
def extract_frame(vid_item): """Generate optical flow using dense flow. Args: vid_item (list): Video item containing video full path, video (short) path, video id. Returns: bool: Whether generate optical flow successfully. """ full_path, vid_path, vid_id, method, task, report_file = vid_item if '/' in vid_path: act_name = osp.basename(osp.dirname(vid_path)) out_full_path = osp.join(args.out_dir, act_name) else: out_full_path = args.out_dir run_success = -1 if task == 'rgb': if args.use_opencv: # Not like using denseflow, # Use OpenCV will not make a sub directory with the video name try: video_name = osp.splitext(osp.basename(vid_path))[0] out_full_path = osp.join(out_full_path, video_name) vr = mmcv.VideoReader(full_path) for i, vr_frame in enumerate(vr): if vr_frame is not None: w, h, _ = np.shape(vr_frame) if args.new_short == 0: if args.new_width == 0 or args.new_height == 0: # Keep original shape out_img = vr_frame else: out_img = mmcv.imresize( vr_frame, (args.new_width, args.new_height)) else: if min(h, w) == h: new_h = args.new_short new_w = int((new_h / h) * w) else: new_w = args.new_short new_h = int((new_w / w) * h) out_img = mmcv.imresize(vr_frame, (new_h, new_w)) mmcv.imwrite(out_img, f'{out_full_path}/img_{i + 1:05d}.jpg') else: warnings.warn( 'Length inconsistent!' f'Early stop with {i + 1} out of {len(vr)} frames.' ) break run_success = 0 except Exception: run_success = -1 else: if args.new_short == 0: cmd = osp.join( f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'" f' -nw={args.new_width} -nh={args.new_height} -v') else: cmd = osp.join( f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'" f' -ns={args.new_short} -v') run_success = os.system(cmd) elif task == 'flow': if args.input_frames: if args.new_short == 0: cmd = osp.join( f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'" # noqa: E501 f' -nw={args.new_width} --nh={args.new_height} -v --if') else: cmd = osp.join( f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'" # noqa: E501 f' -ns={args.new_short} -v --if') else: if args.new_short == 0: cmd = osp.join( f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'" # noqa: E501 f' -nw={args.new_width} --nh={args.new_height} -v') else: cmd = osp.join( f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'" # noqa: E501 f' -ns={args.new_short} -v') run_success = os.system(cmd) else: if args.new_short == 0: cmd_rgb = osp.join( f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'" f' -nw={args.new_width} -nh={args.new_height} -v') cmd_flow = osp.join( f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'" # noqa: E501 f' -nw={args.new_width} -nh={args.new_height} -v') else: cmd_rgb = osp.join( f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'" f' -ns={args.new_short} -v') cmd_flow = osp.join( f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'" # noqa: E501 f' -ns={args.new_short} -v') run_success_rgb = os.system(cmd_rgb) run_success_flow = os.system(cmd_flow) if run_success_flow == 0 and run_success_rgb == 0: run_success = 0 if run_success == 0: print(f'{task} {vid_id} {vid_path} {method} done') sys.stdout.flush() lock.acquire() with open(report_file, 'a') as f: line = full_path + '\n' f.write(line) lock.release() else: print(f'{task} {vid_id} {vid_path} {method} got something wrong') sys.stdout.flush() return True
def single_gpu_test(model, data_loader, show=False, out_dir=None, efficient_test=False, opacity=0.5, pre_eval=False, format_only=False, format_args={}): """Test with single GPU by progressive mode. Args: model (nn.Module): Model to be tested. data_loader (utils.data.Dataloader): Pytorch data loader. show (bool): Whether show results during inference. Default: False. out_dir (str, optional): If specified, the results will be dumped into the directory to save output results. efficient_test (bool): Whether save the results as local numpy files to save CPU memory during evaluation. Mutually exclusive with pre_eval and format_results. Default: False. opacity(float): Opacity of painted segmentation map. Default 0.5. Must be in (0, 1] range. pre_eval (bool): Use dataset.pre_eval() function to generate pre_results for metric evaluation. Mutually exclusive with efficient_test and format_results. Default: False. format_only (bool): Only format result for results commit. Mutually exclusive with pre_eval and efficient_test. Default: False. format_args (dict): The args for format_results. Default: {}. Returns: list: list of evaluation pre-results or list of save file names. """ if efficient_test: warnings.warn( 'DeprecationWarning: ``efficient_test`` will be deprecated, the ' 'evaluation is CPU memory friendly with pre_eval=True') mmcv.mkdir_or_exist('.efficient_test') # when none of them is set true, return segmentation results as # a list of np.array. assert [efficient_test, pre_eval, format_only].count(True) <= 1, \ '``efficient_test``, ``pre_eval`` and ``format_only`` are mutually ' \ 'exclusive, only one of them could be true .' model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) # The pipeline about how the data_loader retrieval samples from dataset: # sampler -> batch_sampler -> indices # The indices are passed to dataset_fetcher to get data from dataset. # data_fetcher -> collate_fn(dataset[index]) -> data_sample # we use batch_sampler to get correct data idx loader_indices = data_loader.batch_sampler for batch_indices, data in zip(loader_indices, data_loader): with torch.no_grad(): result = model(return_loss=False, **data) if efficient_test: result = [np2tmp(_, tmpdir='.efficient_test') for _ in result] if format_only: result = dataset.format_results(result, indices=batch_indices, **format_args) if pre_eval: # TODO: adapt samples_per_gpu > 1. # only samples_per_gpu=1 valid now result = dataset.pre_eval(result, indices=batch_indices) results.extend(result) if show or out_dir: img_tensor = data['img'][0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for img, img_meta in zip(imgs, img_metas): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result, palette=dataset.PALETTE, show=show, out_file=out_file, opacity=opacity) batch_size = len(result) for _ in range(batch_size): prog_bar.update() return results
def __call__(self, results): rank, _ = get_dist_info() if isinstance(self.height, int): dst_height = self.height dst_min_width = self.min_width dst_max_width = self.max_width else: # Multi-scale resize used in distributed training. # Choose one (height, width) pair for one rank id. idx = rank % len(self.height) dst_height = self.height[idx] dst_min_width = self.min_width[idx] dst_max_width = self.max_width[idx] img_shape = results['img_shape'] ori_height, ori_width = img_shape[:2] valid_ratio = 1.0 resize_shape = list(img_shape) pad_shape = list(img_shape) if self.keep_aspect_ratio: new_width = math.ceil(float(dst_height) / ori_height * ori_width) width_divisor = int(1 / self.width_downsample_ratio) # make sure new_width is an integral multiple of width_divisor. if new_width % width_divisor != 0: new_width = round(new_width / width_divisor) * width_divisor if dst_min_width is not None: new_width = max(dst_min_width, new_width) if dst_max_width is not None: valid_ratio = min(1.0, 1.0 * new_width / dst_max_width) resize_width = min(dst_max_width, new_width) img_resize = mmcv.imresize(results['img'], (resize_width, dst_height), backend=self.backend) resize_shape = img_resize.shape pad_shape = img_resize.shape if new_width < dst_max_width: img_resize = mmcv.impad(img_resize, shape=(dst_height, dst_max_width), pad_val=self.img_pad_value) pad_shape = img_resize.shape else: img_resize = mmcv.imresize(results['img'], (new_width, dst_height), backend=self.backend) resize_shape = img_resize.shape pad_shape = img_resize.shape else: img_resize = mmcv.imresize(results['img'], (dst_max_width, dst_height), backend=self.backend) resize_shape = img_resize.shape pad_shape = img_resize.shape results['img'] = img_resize results['img_shape'] = resize_shape results['resize_shape'] = resize_shape results['pad_shape'] = pad_shape results['valid_ratio'] = valid_ratio return results
def single_gpu_test(model, data_loader, show=False, out_dir=None, fps=3, show_score_thr=0.3): """Test model with single gpu. Args: model (nn.Module): Model to be tested. data_loader (nn.Dataloader): Pytorch data loader. show (bool, optional): If True, visualize the prediction results. Defaults to False. out_dir (str, optional): Path of directory to save the visualization results. Defaults to None. fps (int, optional): FPS of the output video. Defaults to 3. show_score_thr (float, optional): The score threshold of visualization (Only used in VID for now). Defaults to 0.3. Returns: dict[str, list]: The prediction results. """ model.eval() results = defaultdict(list) dataset = data_loader.dataset prev_img_meta = None prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) batch_size = data['img'][0].size(0) if show or out_dir: assert batch_size == 1, 'Only support batch_size=1 when testing.' img_tensor = data['img'][0] img_meta = data['img_metas'][0].data[0][0] img = tensor2imgs(img_tensor, **img_meta['img_norm_cfg'])[0] h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result, show=show, out_file=out_file, score_thr=show_score_thr) # Whether need to generate a video from images. # The frame_id == 0 means the model starts processing # a new video, therefore we can write the previous video. # There are two corner cases. # Case 1: prev_img_meta == None means there is no previous video. # Case 2: i == len(dataset) means processing the last video need_write_video = (prev_img_meta is not None and img_meta['frame_id'] == 0 or i == len(dataset)) if out_dir and need_write_video: prev_img_prefix, prev_img_name = prev_img_meta[ 'ori_filename'].rsplit('/', 1) prev_img_idx, prev_img_type = prev_img_name.split('.') prev_filename_tmpl = '{:0' + str( len(prev_img_idx)) + 'd}.' + prev_img_type prev_img_dirs = f'{out_dir}/{prev_img_prefix}' prev_img_names = sorted(os.listdir(prev_img_dirs)) prev_start_frame_id = int(prev_img_names[0].split('.')[0]) prev_end_frame_id = int(prev_img_names[-1].split('.')[0]) mmcv.frames2video(prev_img_dirs, f'{prev_img_dirs}/out_video.mp4', fps=fps, fourcc='mp4v', filename_tmpl=prev_filename_tmpl, start=prev_start_frame_id, end=prev_end_frame_id, show_progress=False) prev_img_meta = img_meta for key in result: if 'mask' in key: result[key] = encode_mask_results(result[key]) for k, v in result.items(): results[k].append(v) for _ in range(batch_size): prog_bar.update() return results
def single_gpu_test(model, data_loader, show=False, out_dir=None, show_score_thr=0.3): model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) if show or out_dir: img_tensor = data['img'][0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for img, img_meta in zip(imgs, img_metas): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result, show=show, out_file=out_file, score_thr=show_score_thr) # encode mask results if isinstance(result, tuple) and len(result) == 2: # Mask R-CNN bbox_results, mask_results = result encoded_mask_results = encode_mask_results(mask_results) result = bbox_results, encoded_mask_results elif isinstance(result, tuple) and len(result) == 3: # Mask R-CNN + Offset bbox_results, mask_results, offset_results = result if mask_results is not None: encoded_mask_results = encode_mask_results(mask_results) result = bbox_results, encoded_mask_results, offset_results else: # only pred offset result = bbox_results, offset_results elif isinstance(result, tuple) and len(result) == 4: # Mask R-CNN + Offset + Height bbox_results, mask_results, offset_results, height_results = result encoded_mask_results = encode_mask_results(mask_results) result = bbox_results, encoded_mask_results, offset_results, height_results results.append(result) batch_size = len(data['img_metas'][0].data) for _ in range(batch_size): prog_bar.update() return results
def single_gpu_test(model, data_loader, show=False, out_dir=None, efficient_test=False, opacity=0.5): """Test with single GPU. Args: model (nn.Module): Model to be tested. data_loader (utils.data.Dataloader): Pytorch data loader. show (bool): Whether show results during inference. Default: False. out_dir (str, optional): If specified, the results will be dumped into the directory to save output results. efficient_test (bool): Whether save the results as local numpy files to save CPU memory during evaluation. Default: False. opacity(float): Opacity of painted segmentation map. Default 0.5. Must be in (0, 1] range. Returns: list: The prediction results. """ model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, **data) if show or out_dir: img_tensor = data['img'][0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for img, img_meta in zip(imgs, img_metas): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result, palette=dataset.PALETTE, show=show, out_file=out_file, opacity=opacity) if isinstance(result, list): if efficient_test: result = [np2tmp(_) for _ in result] results.extend(result) else: if efficient_test: result = np2tmp(result) results.append(result) batch_size = len(result) for _ in range(batch_size): prog_bar.update() return results
def single_gpu_test(model, data_loader, bbox_head=None, show=False, out_dir=None, show_score_thr=0.3): model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, show=show, out_dir=out_dir, **data) if show or out_dir: img_tensor = data['img'][0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for img, img_meta in zip(imgs, img_metas): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result, show=show, out_file=out_file, score_thr=show_score_thr) if bbox_head.type == 'LSHead': if bbox_head.task == 'bbox': extremes = result.pop(-1) result = result[0] elif bbox_head.task == 'segm': bbox_results, poly_results = result img_metas = data['img_metas'][0].data[0] ori_h, ori_w = img_metas[0]['ori_shape'][:-1] encoded_poly_results = encode_poly_results( poly_results, ori_h, ori_w) result = bbox_results, encoded_poly_results elif isinstance(result, tuple): bbox_results, mask_results = result encoded_mask_results = encode_mask_results(mask_results) result = bbox_results, encoded_mask_results results.append(result) batch_size = len(data['img_metas'][0].data) for _ in range(batch_size): prog_bar.update() return results
def single_gpu_test(model, data_loader, show=False, out_dir=None, show_score_thr=0.3): model.eval() results = [] dataset = data_loader.dataset PALETTE = getattr(dataset, 'PALETTE', None) prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) batch_size = len(result) if show or out_dir: if batch_size == 1 and isinstance(data['img'][0], torch.Tensor): img_tensor = data['img'][0] else: img_tensor = data['img'][0].data[0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result[i], bbox_color=PALETTE, text_color=PALETTE, mask_color=PALETTE, show=show, out_file=out_file, score_thr=show_score_thr) # encode mask results if isinstance(result[0], tuple): result = [(bbox_results, encode_mask_results(mask_results)) for bbox_results, mask_results in result] # This logic is only used in panoptic segmentation test. elif isinstance(result[0], dict) and 'ins_results' in result[0]: for j in range(len(result)): bbox_results, mask_results = result[j]['ins_results'] result[j]['ins_results'] = (bbox_results, encode_mask_results(mask_results)) results.extend(result) for _ in range(batch_size): prog_bar.update() return results
def show_result_ins(img, result, score_thr=0.2, sort_by_density=False, out_file=None): """Visualize the instance segmentation results on the image. Args: img (str or np.ndarray): Image filename or loaded image. result (tuple[list] or list): The instance segmentation result. score_thr (float): The threshold to visualize the masks. sort_by_density (bool): sort the masks by their density. out_file (str, optional): If specified, the visualization result will be written to the out file instead of shown in a window. Returns: np.ndarray or None: If neither `show` nor `out_file` is specified, the visualized image is returned, otherwise None is returned. """ img = mmcv.imread(img) img_show = img.copy() h, w, _ = img.shape mask = np.zeros_like(img_show) if not result or result == [None]: return mask cur_result = result[0] seg_label = cur_result[0] seg_label = seg_label.cpu().numpy().astype(np.uint8) cate_label = cur_result[1] cate_label = cate_label.cpu().numpy() score = cur_result[2].cpu().numpy() vis_inds = score > score_thr seg_label = seg_label[vis_inds] num_mask = seg_label.shape[0] cate_label = cate_label[vis_inds] cate_score = score[vis_inds] if sort_by_density: mask_density = [] for idx in range(num_mask): cur_mask = seg_label[idx, :, :] cur_mask = mmcv.imresize(cur_mask, (w, h)) cur_mask = (cur_mask > 0.5).astype(np.int32) mask_density.append(cur_mask.sum()) orders = np.argsort(mask_density) seg_label = seg_label[orders] cate_label = cate_label[orders] cate_score = cate_score[orders] np.random.seed(42) color_masks = [ np.random.randint(0, 256, (1, 3), dtype=np.uint8) for _ in range(num_mask) ] for idx in range(num_mask): idx = -(idx + 1) cur_cate = cate_label[idx] if cur_cate == 0: cur_mask = seg_label[idx, :, :] cur_mask = mmcv.imresize(cur_mask, (w, h)) cur_mask = (cur_mask > 0.5).astype(np.uint8) if cur_mask.sum() == 0: continue color_mask = color_masks[idx] cur_mask_bool = cur_mask.astype(np.bool) mask[cur_mask_bool] = color_mask cur_score = cate_score[idx] if out_file is None: return mask else: mmcv.imwrite(mask, out_file)
def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, img_shape, scale_factor, rescale, return_rect=False): """Get segmentation masks from mask_pred and bboxes. Args: mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). For single-scale testing, mask_pred is the direct output of model, whose type is Tensor, while for multi-scale testing, it will be converted to numpy array outside of this method. det_bboxes (Tensor): shape (n, 4/5) det_labels (Tensor): shape (n, ) rcnn_test_cfg (dict): rcnn testing config ori_shape: original image size Returns: list[list]: encoded masks """ if isinstance(mask_pred, torch.Tensor): mask_pred = mask_pred.sigmoid().cpu().detach().numpy() assert isinstance(mask_pred, np.ndarray) # when enabling mixed precision training, mask_pred may be float16 # numpy array mask_pred = mask_pred.astype(np.float32) if return_rect: rects = [] else: cls_segms = [[] for _ in range(self.num_classes - 1)] bboxes = det_bboxes.cpu().detach().numpy()[:, :4] labels = det_labels.cpu().detach().numpy() + 1 if rescale: img_h, img_w = ori_shape[:2] else: img_h, img_w = img_shape[:2] scale_factor = 1.0 for i in range(bboxes.shape[0]): bbox = (bboxes[i, :] / scale_factor).astype(np.int32) label = labels[i] w = max(bbox[2] - bbox[0] + 1, 1) h = max(bbox[3] - bbox[1] + 1, 1) if not self.class_agnostic: mask_pred_ = mask_pred[i, label, :, :] else: mask_pred_ = mask_pred[i, 0, :, :] im_mask = np.zeros((img_h, img_w), dtype=np.uint8) bbox_mask = mmcv.imresize(mask_pred_, (w, h)) bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(np.uint8) try: im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask except: print(bbox, img_h, img_w) exit() if return_rect: cnt = np.stack(np.where(im_mask == 1)).T rect = cv2.boxPoints(cv2.minAreaRect(cnt)) rect = np.array(rect)[:, ::-1].reshape(-1) rects.append(rect) else: rle = mask_util.encode( np.array(im_mask[:, :, np.newaxis], order='F'))[0] cls_segms[label - 1].append(rle) if return_rect: return rects return cls_segms
def extract_frame(vid_item, dev_id=0): """Generate optical flow using dense flow. Args: vid_item (list): Video item containing video full path, video (short) path, video id. dev_id (int): Device id. Returns: bool: Whether generate optical flow successfully. """ full_path, vid_path, vid_id, method, task = vid_item if ('/' in vid_path): act_name = osp.basename(osp.dirname(vid_path)) out_full_path = osp.join(args.out_dir, act_name) else: out_full_path = args.out_dir if task == 'rgb': if args.use_opencv: # Not like using denseflow, # Use OpenCV will not make a sub directory with the video name video_name = osp.splitext(osp.basename(vid_path))[0] out_full_path = osp.join(out_full_path, video_name) vr = mmcv.VideoReader(full_path) for i in range(len(vr)): if vr[i] is not None: w, h, c = np.shape(vr[i]) if args.new_short == 0: out_img = mmcv.imresize( vr[i], (args.new_width, args.new_height)) else: if min(h, w) == h: new_h = args.new_short new_w = int((new_h / h) * w) else: new_w = args.new_short new_h = int((new_w / w) * h) out_img = mmcv.imresize(vr[i], (new_h, new_w)) mmcv.imwrite(out_img, f'{out_full_path}/img_{i + 1:05d}.jpg') else: warnings.warn( 'Length inconsistent!' f'Early stop with {i + 1} out of {len(vr)} frames.') break else: if args.new_short == 0: cmd = osp.join( f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'" f' -nw={args.new_width} -nh={args.new_height} -v') else: cmd = osp.join( f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'" f' -ns={args.new_short} -v') os.system(cmd) elif task == 'flow': if args.new_short == 0: cmd = osp.join( f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'" # noqa: E501 f' -nw={args.new_width} --nh={args.new_height} -v') else: cmd = osp.join( f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'" # noqa: E501 f' -ns={args.new_short} -v') os.system(cmd) else: if args.new_short == 0: cmd_rgb = osp.join( f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'" f' -nw={args.new_width} -nh={args.new_height} -v') cmd_flow = osp.join( f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'" # noqa: E501 f' -nw={args.new_width} -nh={args.new_height} -v') else: cmd_rgb = osp.join( f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'" f' -ns={args.new_short} -v') cmd_flow = osp.join( f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'" # noqa: E501 f' -ns={args.new_short} -v') os.system(cmd_rgb) os.system(cmd_flow) print(f'{task} {vid_id} {vid_path} {method} done') sys.stdout.flush() return True
def single_gpu_test(model, data_loader, show=False, out_dir=None, show_score_thr=0.3): model.eval() results = [] eval_results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) batch_size = len(result) if show or out_dir: if batch_size == 1 and isinstance(data['img'][0], torch.Tensor): img_tensor = data['img'][0] else: img_tensor = data['img'][0].data[0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): h, w, _ = img_meta['img_shape'] # img_show = img[:h, :w, :] if img_show.shape[-1] > 3: img_show = img[:h, :w, :3][:, :, ::-1] else: img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result[i][:2], show=show, out_file=out_file, score_thr=show_score_thr) # encode mask results if isinstance(result[0], tuple): if len(result[0]) == 3: result = [(bbox_results, encode_mask_results(mask_results), poly_points) for bbox_results, mask_results, poly_points in result ] else: result = [(bbox_results, encode_mask_results(mask_results)) for bbox_results, mask_results in result] results.extend(result) eval_results.extend([res[:2] for res in result]) for _ in range(batch_size): prog_bar.update() return results, eval_results
def __call__(self, img_group, scale, crop_history=None, flip=False, rotate=None, keep_ratio=True, dropout_prob=None, div_255=False, transpose=True, stack=True): # 1. rescale if keep_ratio: tuple_list = [ mmcv.imrescale(img, scale, return_scale=True) for img in img_group ] img_group, scale_factors = list(zip(*tuple_list)) scale_factor = scale_factors[0] else: tuple_list = [ mmcv.imresize(img, scale, return_scale=True) for img in img_group ] img_group, w_scales, h_scales = list(zip(*tuple_list)) scale_factor = np.array( [w_scales[0], h_scales[0], w_scales[0], h_scales[0]], dtype=np.float32) # 2. rotate if rotate is not None: img_group = [mmcv.imrotate(img, rotate) for img in img_group] # 3. crop (if necessary) if crop_history is not None: self.op_crop = GroupCrop(crop_history) if self.op_crop is not None: img_group, crop_quadruple = self.op_crop(img_group) else: crop_quadruple = None img_shape = img_group[0].shape # 4. flip if flip: img_group = [mmcv.imflip(img) for img in img_group] # 5a. extra augmentation if self.extra_augm is not None: img_group = self.extra_augm(img_group) # 5b. coarse dropout if self.dropout_scale is not None and dropout_prob is not None and dropout_prob > 0.0: dropout_mask = self._coarse_dropout_mask(img_group[0].shape, dropout_prob, self.dropout_scale) img_group = [img * dropout_mask for img in img_group] # 6a. div_255 if div_255: img_group = [ mmcv.imnormalize(img, 0, 255, False) for img in img_group ] # 6b. normalize if self.mean is not None and self.std is not None: img_group = [ mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) for img in img_group ] elif self.to_rgb: img_group = [mmcv.bgr2rgb(img) for img in img_group] # 7. pad if self.size_divisor is not None: img_group = [ mmcv.impad_to_multiple(img, self.size_divisor) for img in img_group ] pad_shape = img_group[0].shape else: pad_shape = img_shape # 8. transpose if transpose: img_group = [img.transpose((2, 0, 1)) for img in img_group] # 9. stack into numpy.array if stack: img_group = np.stack(img_group, axis=0) return img_group, img_shape, pad_shape, scale_factor, crop_quadruple
def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale): """Get segmentation masks from mask_pred and bboxes. Args: mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). For single-scale testing, mask_pred is the direct output of model, whose type is Tensor, while for multi-scale testing, it will be converted to numpy array outside of this method. det_bboxes (Tensor): shape (n, 4/5) det_labels (Tensor): shape (n, ) img_shape (Tensor): shape (3, ) rcnn_test_cfg (dict): rcnn testing config ori_shape: original image size Returns: list[list]: encoded masks """ if isinstance(mask_pred, torch.Tensor): mask_pred = mask_pred.sigmoid().cpu().numpy() assert isinstance(mask_pred, np.ndarray) # when enabling mixed precision training, mask_pred may be float16 # numpy array mask_pred = mask_pred.astype(np.float32) cls_segms = [[] for _ in range(self.num_classes - 1)] bboxes = det_bboxes.cpu().numpy()[:, :4] labels = det_labels.cpu().numpy() + 1 if rescale: img_h, img_w = ori_shape[:2] else: img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32) img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32) scale_factor = 1.0 for i in range(bboxes.shape[0]): if not isinstance(scale_factor, (float, np.ndarray)): scale_factor = scale_factor.cpu().numpy() bbox = (bboxes[i, :] / scale_factor).astype(np.int32) label = labels[i] w = max(bbox[2] - bbox[0] + 1, 1) h = max(bbox[3] - bbox[1] + 1, 1) if not self.class_agnostic: mask_pred_ = mask_pred[i, label, :, :] else: mask_pred_ = mask_pred[i, 0, :, :] bbox_mask = mmcv.imresize(mask_pred_, (w, h)) bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype( np.uint8) if rcnn_test_cfg.get('crop_mask', False): im_mask = bbox_mask else: im_mask = np.zeros((img_h, img_w), dtype=np.uint8) im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask if rcnn_test_cfg.get('rle_mask_encode', True): rle = mask_util.encode( np.array(im_mask[:, :, np.newaxis], order='F'))[0] cls_segms[label - 1].append(rle) else: cls_segms[label - 1].append(im_mask) return cls_segms
def prepare_test_img(self, idx): """Prepare an image for testing (multi-scale and flipping)""" img_info = self.img_infos[idx] img = mmcv.imread(osp.join(self.img_prefix[:-11], img_info['filename'])) # corruption if self.corruption is not None: img = corrupt(img, severity=self.corruption_severity, corruption_name=self.corruption) # load proposals if necessary if self.proposals is not None: proposal = self.proposals[idx][:self.num_max_proposals] if not (proposal.shape[1] == 4 or proposal.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposal.shape)) else: proposal = None # get img_refer from first frame first_frame_idx = img_info["first_frame"] refer_info = self.img_infos[first_frame_idx] refer_ann = self.get_ann_info(first_frame_idx) img_refer = mmcv.imread( osp.join(self.img_prefix[:-11], refer_info['filename'])) # crop the bbox img_refer = torch.squeeze( torch.Tensor(mmcv.imcrop(img_refer, refer_ann["bboxes"]))) # resize to refer_scale img_refer = torch.Tensor( mmcv.imresize(np.float32(img_refer), self.refer_scale, return_scale=False)).permute(2, 0, 1) def prepare_single(img, scale, flip, proposal=None): _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip, keep_ratio=self.resize_keep_ratio) _img = to_tensor(_img) _img_meta = dict(ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) if proposal is not None: if proposal.shape[1] == 5: score = proposal[:, 4, None] proposal = proposal[:, :4] else: score = None _proposal = self.bbox_transform(proposal, img_shape, scale_factor, flip) _proposal = np.hstack([_proposal, score ]) if score is not None else _proposal _proposal = to_tensor(_proposal) else: _proposal = None return _img, _img_meta, _proposal imgs = [] img_metas = [] img_refers = [] proposals = [] for scale in self.img_scales: _img, _img_meta, _proposal = prepare_single( img, scale, False, proposal) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) img_refers.append(DC(to_tensor(img_refer), stack=True)) proposals.append(_proposal) if self.flip_ratio > 0: _img, _img_meta, _proposal = prepare_single( img, scale, True, proposal) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) img_refers.append(DC(to_tensor(img_refer), stack=True)) proposals.append(_proposal) data = dict(img=imgs, img_meta=img_metas, img_refer=img_refers) if self.proposals is not None: data['proposals'] = proposals return data
def main(): args = parse_args() frame_paths, original_frames = frame_extraction(args.video) num_frame = len(frame_paths) h, w, _ = original_frames[0].shape # resize frames to shortside 256 new_w, new_h = mmcv.rescale_size((w, h), (256, np.Inf)) frames = [mmcv.imresize(img, (new_w, new_h)) for img in original_frames] w_ratio, h_ratio = new_w / w, new_h / h # Get clip_len, frame_interval and calculate center index of each clip config = mmcv.Config.fromfile(args.config) config.merge_from_dict(args.cfg_options) val_pipeline = config.data.val.pipeline sampler = [x for x in val_pipeline if x['type'] == 'SampleAVAFrames'][0] clip_len, frame_interval = sampler['clip_len'], sampler['frame_interval'] window_size = clip_len * frame_interval assert clip_len % 2 == 0, 'We would like to have an even clip_len' # Note that it's 1 based here timestamps = np.arange(window_size // 2, num_frame + 1 - window_size // 2, args.predict_stepsize) # Load label_map label_map = load_label_map(args.label_map) try: if config['data']['train']['custom_classes'] is not None: label_map = { id + 1: label_map[cls] for id, cls in enumerate(config['data']['train'] ['custom_classes']) } except KeyError: pass # Get Human detection results center_frames = [frame_paths[ind - 1] for ind in timestamps] human_detections = detection_inference(args, center_frames) for i in range(len(human_detections)): det = human_detections[i] det[:, 0:4:2] *= w_ratio det[:, 1:4:2] *= h_ratio human_detections[i] = torch.from_numpy(det[:, :4]).to(args.device) # Get img_norm_cfg img_norm_cfg = config['img_norm_cfg'] if 'to_rgb' not in img_norm_cfg and 'to_bgr' in img_norm_cfg: to_bgr = img_norm_cfg.pop('to_bgr') img_norm_cfg['to_rgb'] = to_bgr img_norm_cfg['mean'] = np.array(img_norm_cfg['mean']) img_norm_cfg['std'] = np.array(img_norm_cfg['std']) # Build STDET model try: # In our spatiotemporal detection demo, different actions should have # the same number of bboxes. config['model']['test_cfg']['rcnn']['action_thr'] = .0 except KeyError: pass config.model.backbone.pretrained = None model = build_detector(config.model, test_cfg=config.get('test_cfg')) load_checkpoint(model, args.checkpoint, map_location='cpu') model.to(args.device) model.eval() predictions = [] print('Performing SpatioTemporal Action Detection for each clip') assert len(timestamps) == len(human_detections) prog_bar = mmcv.ProgressBar(len(timestamps)) for timestamp, proposal in zip(timestamps, human_detections): if proposal.shape[0] == 0: predictions.append(None) continue start_frame = timestamp - (clip_len // 2 - 1) * frame_interval frame_inds = start_frame + np.arange(0, window_size, frame_interval) frame_inds = list(frame_inds - 1) imgs = [frames[ind].astype(np.float32) for ind in frame_inds] _ = [mmcv.imnormalize_(img, **img_norm_cfg) for img in imgs] # THWC -> CTHW -> 1CTHW input_array = np.stack(imgs).transpose((3, 0, 1, 2))[np.newaxis] input_tensor = torch.from_numpy(input_array).to(args.device) with torch.no_grad(): result = model( return_loss=False, img=[input_tensor], img_metas=[[dict(img_shape=(new_h, new_w))]], proposals=[[proposal]]) result = result[0] prediction = [] # N proposals for i in range(proposal.shape[0]): prediction.append([]) # Perform action score thr for i in range(len(result)): if i + 1 not in label_map: continue for j in range(proposal.shape[0]): if result[i][j, 4] > args.action_score_thr: prediction[j].append((label_map[i + 1], result[i][j, 4])) predictions.append(prediction) prog_bar.update() results = [] for human_detection, prediction in zip(human_detections, predictions): results.append(pack_result(human_detection, prediction, new_h, new_w)) def dense_timestamps(timestamps, n): """Make it nx frames.""" old_frame_interval = (timestamps[1] - timestamps[0]) start = timestamps[0] - old_frame_interval / n * (n - 1) / 2 new_frame_inds = np.arange( len(timestamps) * n) * old_frame_interval / n + start return new_frame_inds.astype(np.int) dense_n = int(args.predict_stepsize / args.output_stepsize) frames = [ cv2.imread(frame_paths[i - 1]) for i in dense_timestamps(timestamps, dense_n) ] print('Performing visualization') vis_frames = visualize(frames, results) vid = mpy.ImageSequenceClip([x[:, :, ::-1] for x in vis_frames], fps=args.output_fps) vid.write_videofile(args.out_filename) tmp_frame_dir = osp.dirname(frame_paths[0]) shutil.rmtree(tmp_frame_dir)
def prepare_train_img(self, idx): img_info = self.img_infos[idx] img = mmcv.imread(osp.join(self.img_prefix[:-11], img_info['filename'])) # corruption if self.corruption is not None: img = corrupt(img, severity=self.corruption_severity, corruption_name=self.corruption) # load proposals if necessary if self.proposals is not None: proposals = self.proposals[idx][:self.num_max_proposals] # TODO: Handle empty proposals properly. Currently images with # no proposals are just ignored, but they can be used for # training in concept. if len(proposals) == 0: return None if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposals.shape)) if proposals.shape[1] == 5: scores = proposals[:, 4, None] proposals = proposals[:, :4] else: scores = None ann = self.get_ann_info(idx) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] if self.with_crowd: gt_bboxes_ignore = ann['bboxes_ignore'] # skip the image if there is no valid gt bbox if len(gt_bboxes) == 0 and self.skip_img_without_anno: warnings.warn('Skip the image "%s" that has no valid gt bbox' % osp.join(self.img_prefix, img_info['filename'])) return None # apply transforms flip = True if np.random.rand() < self.flip_ratio else False # randomly sample a scale img_scale = random_scale(self.img_scales, self.multiscale_mode) img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip, keep_ratio=self.resize_keep_ratio) img = img.copy() # get img_refer from first frame first_frame_idx = img_info["first_frame"] refer_info = self.img_infos[first_frame_idx] refer_ann = self.get_ann_info(first_frame_idx) img_refer = mmcv.imread( osp.join(self.img_prefix[:-11], refer_info['filename'])) # crop the bbox img_refer = torch.squeeze( torch.Tensor(mmcv.imcrop(img_refer, refer_ann["bboxes"]))) # resize to refer_scale img_refer = torch.Tensor( mmcv.imresize(np.float32(img_refer), self.refer_scale, return_scale=False)).permute(2, 0, 1) if self.with_seg: gt_seg = mmcv.imread(osp.join( self.seg_prefix, img_info['filename'].replace('jpg', 'png')), flag='unchanged') gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip) gt_seg = mmcv.imrescale(gt_seg, self.seg_scale_factor, interpolation='nearest') gt_seg = gt_seg[None, ...] if self.proposals is not None: proposals = self.bbox_transform(proposals, img_shape, scale_factor, flip) proposals = np.hstack([proposals, scores ]) if scores is not None else proposals gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, flip) if self.with_crowd: gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape, scale_factor, flip) if self.with_mask: gt_masks = self.mask_transform(ann['masks'], pad_shape, scale_factor, flip) ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) data = dict(img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True), gt_bboxes=DC(to_tensor(gt_bboxes)), img_refer=DC(to_tensor(img_refer), stack=True)) if self.with_label: data['gt_labels'] = DC(to_tensor(gt_labels)) if self.with_crowd: data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) if self.with_mask: data['gt_masks'] = DC(gt_masks, cpu_only=True) #--------------------offline ray label generation----------------------------- self.center_sample = True self.use_mask_center = True self.radius = 1.5 featmap_sizes = self.get_featmap_size(pad_shape) # featmap_sizes: [[32, 32], [16, 16], [8, 8]] num_levels = len(self.strides) all_level_points = self.get_points(featmap_sizes) # level 0 points: torch.Size([1024, 2]) # level 1 points: torch.Size([256, 2]) # level 2 points: torch.Size([64, 2]) self.num_points_per_level = [i.size()[0] for i in all_level_points] expanded_regress_ranges = [ all_level_points[i].new_tensor( self.regress_ranges[i])[None].expand_as(all_level_points[i]) for i in range(num_levels) ] concat_regress_ranges = torch.cat(expanded_regress_ranges, dim=0) concat_points = torch.cat(all_level_points, 0) gt_masks = gt_masks[:len(gt_bboxes)] gt_bboxes = torch.Tensor(gt_bboxes) gt_labels = torch.Tensor(gt_labels) _labels, _bbox_targets, _mask_targets = self.polar_target_single( gt_bboxes, gt_masks, gt_labels, concat_points, concat_regress_ranges, self.num_polar) data['_gt_labels'] = DC(_labels) data['_gt_bboxes'] = DC(_bbox_targets) data['_gt_masks'] = DC(_mask_targets) #--------------------offline ray label generation----------------------------- return data
def single_gpu_test_processed_rect_img(model, data_loader, show=False, out_dir=None, show_score_thr=0.3): print('clw: using single_gpu_test_processed_rect_img() !!') model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) ########### clw note: for debug # for idx, item in enumerate(result[0]): # if item.size == 0: # print('111') # for row in item: # print('boxw:', row[2] - row[0], 'boxh:', row[3] - row[1] ) # if row[2] - row[0] == 0 or row[3] - row[1] == 0: # print('aaaa') ######### ## img_name = data['img_metas'][0].data[0][0]['ori_filename'] aaa = img_name[:-4].split('_')[-2:] x_rect_left = int(aaa[0]) y_rect_up = int(aaa[1]) for i in range(len(result[0])): ddd = [] ccc = result[0][i][:, :4] # (n, 4) if ccc.size == 0: continue for xyxy in ccc: x1 = xyxy[0] + x_rect_left y1 = xyxy[1] + y_rect_up x2 = xyxy[2] + x_rect_left y2 = xyxy[3] + y_rect_up cnt = np.array((x1, y1, x2, y2)) ddd.append(cnt) ddd = np.array(ddd) result[0][ i][:, : 4] = ddd # result[0][i] = np.concatenate((fff, result[0][i][:, 4]), axis=1) ## batch_size = len(result) if show or out_dir: if batch_size == 1 and isinstance(data['img'][0], torch.Tensor): img_tensor = data['img'][0] else: img_tensor = data['img'][0].data[0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result[i], show=show, out_file=out_file, score_thr=show_score_thr) # encode mask results if isinstance(result[0], tuple): result = [(bbox_results, encode_mask_results(mask_results)) for bbox_results, mask_results in result] results.extend(result) for _ in range(batch_size): prog_bar.update() return results
def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale): ipdb.set_trace() """Get segmentation masks from mask_pred and bboxes. Args: mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). For single-scale testing, mask_pred is the direct output of model, whose type is Tensor, while for multi-scale testing, it will be converted to numpy array outside of this method. det_bboxes (Tensor): shape (n, 4/5) det_labels (Tensor): shape (n, ) img_shape (Tensor): shape (3, ) rcnn_test_cfg (dict): rcnn testing config ori_shape: original image size Returns: list[list]: encoded masks """ if isinstance(mask_pred, torch.Tensor): mask_pred = mask_pred.sigmoid().cpu().numpy() assert isinstance(mask_pred, np.ndarray) cls_segms = [[] for _ in range(self.num_classes - 1) ] # mask不预测背景,coco为例此处80 bboxes = det_bboxes.cpu().numpy()[:, :4] labels = det_labels.cpu().numpy() + 1 if rescale: img_h, img_w = ori_shape[:2] else: img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32) img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32) scale_factor = 1.0 # 遍历检测出的每个物体 for i in range(bboxes.shape[0]): bbox = (bboxes[i, :] / scale_factor).astype(np.int32) label = labels[ i] # 可以看出,虽然最终生成80类mask,但是是根据检测的label选mask的,也就是检测阶段的分类得分选择mask,不准确 w = max(bbox[2] - bbox[0] + 1, 1) h = max(bbox[3] - bbox[1] + 1, 1) if not self.class_agnostic: # 为每类都预测mask,取出当前类别的28*28mask,进行进一步处理 mask_pred_ = mask_pred[i, label, :, :] else: # 提供了mask只预测前景背景的接口 mask_pred_ = mask_pred[i, 0, :, :] # 创建一个原图尺寸大小的矩阵 im_mask = np.zeros((img_h, img_w), dtype=np.uint8) # 将28*28的特征图进行插值到bbox大小,然后根据阈值进行mask的二值化 bbox_mask = mmcv.imresize(mask_pred_, (w, h)) # 将预测的gt box映射到原图尺寸 bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype( np.uint8) # 只在box尺寸内进行mask二值化处理 im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask # im_mask就是个像素掩膜,分割出的地方是1,其他全是0,尺寸等同原图 rle = mask_util.encode( np.array(im_mask[:, :, np.newaxis], order='F'))[0] cls_segms[label - 1].append(rle) return cls_segms