def generate_rpn_on_dataset(weights_file, dataset_name, _proposal_file_ignored, output_dir, multi_gpu=False, gpu_id=0): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) _boxes, _scores, _ids, rpn_file = multi_gpu_generate_rpn_on_dataset( weights_file, dataset_name, _proposal_file_ignored, num_images, output_dir) else: # Processes entire dataset range by default _boxes, _scores, _ids, rpn_file = generate_rpn_on_range( weights_file, dataset_name, _proposal_file_ignored, output_dir, gpu_id=gpu_id) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format( test_timer.average_time)) return evaluate_proposal_file(dataset, rpn_file, output_dir)
def test_net_on_dataset(weights_file, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset( weights_file, dataset_name, proposal_file, num_images, output_dir) else: all_boxes, all_segms, all_keyps = test_net(weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format( test_timer.average_time)) results = task_evaluation.evaluate_all(dataset, all_boxes, all_segms, all_keyps, output_dir) return results
def generate_proposals_on_roidb( model, roidb, start_ind=None, end_ind=None, total_num_images=None, gpu_id=0, ): """Generate RPN proposals on all images in an imdb.""" _t = Timer() num_images = len(roidb) roidb_boxes = [[] for _ in range(num_images)] roidb_scores = [[] for _ in range(num_images)] roidb_ids = [[] for _ in range(num_images)] if start_ind is None: start_ind = 0 end_ind = num_images total_num_images = num_images for i in range(num_images): roidb_ids[i] = roidb[i]['id'] im = cv2.imread(roidb[i]['image']) with c2_utils.NamedCudaScope(gpu_id): _t.tic() roidb_boxes[i], roidb_scores[i] = im_proposals(model, im) _t.toc() if i % 10 == 0: ave_time = _t.average_time eta_seconds = ave_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) logger.info( ('rpn_generate: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s (eta: {})').format(start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, ave_time, eta)) return roidb_boxes, roidb_scores, roidb_ids
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 20 # Output logging period in SGD iterations self.LOG_PERIOD = 20 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses])) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize()) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict(iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetMedianValue(), eta=eta, mb_qsize=int( np.round(self.smoothed_mb_qsize.GetMedianValue())), mem=int(np.ceil(mem_usage / 1024 / 1024))) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetMedianValue() return stats
class JsonDataset(object): """A class representing a COCO json dataset.""" def __init__(self, name): assert dataset_catalog.contains(name), \ 'Unknown dataset name: {}'.format(name) assert os.path.exists(dataset_catalog.get_im_dir(name)), \ 'Im dir \'{}\' not found'.format(dataset_catalog.get_im_dir(name)) assert os.path.exists(dataset_catalog.get_ann_fn(name)), \ 'Ann fn \'{}\' not found'.format(dataset_catalog.get_ann_fn(name)) logger.debug('Creating: {}'.format(name)) self.name = name self.image_directory = dataset_catalog.get_im_dir(name) self.image_prefix = dataset_catalog.get_im_prefix(name) self.COCO = COCO(dataset_catalog.get_ann_fn(name)) self.debug_timer = Timer() # Set up dataset classes category_ids = self.COCO.getCatIds() categories = [c['name'] for c in self.COCO.loadCats(category_ids)] self.category_to_id_map = dict(zip(categories, category_ids)) self.classes = ['__background__'] + categories self.num_classes = len(self.classes) self.json_category_id_to_contiguous_id = { v: i + 1 for i, v in enumerate(self.COCO.getCatIds()) } self.contiguous_category_id_to_json_id = { v: k for k, v in self.json_category_id_to_contiguous_id.items() } self._init_keypoints() def get_roidb(self, gt=False, proposal_file=None, min_proposal_size=2, proposal_limit=-1, crowd_filter_thresh=0): """Return an roidb corresponding to the json dataset. Optionally: - include ground truth boxes in the roidb - add proposals specified in a proposals file - filter proposals based on a minimum side length - filter proposals that intersect with crowd regions """ assert gt is True or crowd_filter_thresh == 0, \ 'Crowd filter threshold must be 0 if ground-truth annotations ' \ 'are not included.' image_ids = self.COCO.getImgIds() image_ids.sort() roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) for entry in roidb: self._prep_roidb_entry(entry) if gt: # Include ground-truth object annotations self.debug_timer.tic() for entry in roidb: self._add_gt_annotations(entry) logger.debug('_add_gt_annotations took {:.3f}s'.format( self.debug_timer.toc(average=False))) if proposal_file is not None: # Include proposals from a file self.debug_timer.tic() self._add_proposals_from_file(roidb, proposal_file, min_proposal_size, proposal_limit, crowd_filter_thresh) logger.debug('_add_proposals_from_file took {:.3f}s'.format( self.debug_timer.toc(average=False))) _add_class_assignments(roidb) return roidb def _prep_roidb_entry(self, entry): """Adds empty metadata fields to an roidb entry.""" # Reference back to the parent dataset entry['dataset'] = self # Make file_name an abs path im_path = os.path.join(self.image_directory, self.image_prefix + entry['file_name']) assert os.path.exists(im_path), 'Image \'{}\' not found'.format( im_path) entry['image'] = im_path entry['flipped'] = False entry['has_visible_keypoints'] = False # Empty placeholders entry['boxes'] = np.empty((0, 4), dtype=np.float32) entry['segms'] = [] entry['gt_classes'] = np.empty((0), dtype=np.int32) entry['seg_areas'] = np.empty((0), dtype=np.float32) entry['gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0, self.num_classes), dtype=np.float32)) entry['is_crowd'] = np.empty((0), dtype=np.bool) # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index # in the list of rois that satisfy np.where(entry['gt_classes'] > 0) entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32) if self.keypoints is not None: entry['gt_keypoints'] = np.empty((0, 3, self.num_keypoints), dtype=np.int32) # Remove unwanted fields that come from the json file (if they exist) for k in ['date_captured', 'url', 'license', 'file_name']: if k in entry: del entry[k] def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded and stored as dicts if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) if self.keypoints is not None: gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) if self.keypoints is not None: entry['gt_keypoints'] = np.append(entry['gt_keypoints'], gt_keypoints, axis=0) entry['has_visible_keypoints'] = im_has_visible_keypoints def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) with open(proposal_file, 'r') as f: proposals = pickle.load(f) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image(boxes, entry['height'], entry['width']) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh) def _init_keypoints(self): """Initialize COCO keypoint information.""" self.keypoints = None self.keypoint_flip_map = None self.keypoints_to_id_map = None self.num_keypoints = 0 # Thus far only the 'person' category has keypoints if 'person' in self.category_to_id_map: cat_info = self.COCO.loadCats([self.category_to_id_map['person']]) else: return # Check if the annotations contain keypoint data or not if 'keypoints' in cat_info[0]: keypoints = cat_info[0]['keypoints'] self.keypoints_to_id_map = dict( zip(keypoints, range(len(keypoints)))) self.keypoints = keypoints self.num_keypoints = len(keypoints) self.keypoint_flip_map = { 'left_eye': 'right_eye', 'left_ear': 'right_ear', 'left_shoulder': 'right_shoulder', 'left_elbow': 'right_elbow', 'left_wrist': 'right_wrist', 'left_hip': 'right_hip', 'left_knee': 'right_knee', 'left_ankle': 'right_ankle' } def _get_gt_keypoints(self, obj): """Return ground truth keypoints.""" if 'keypoints' not in obj: return None kp = np.array(obj['keypoints']) x = kp[0::3] # 0-indexed x coordinates y = kp[1::3] # 0-indexed y coordinates # 0: not labeled; 1: labeled, not inside mask; # 2: labeled and inside mask v = kp[2::3] num_keypoints = len(obj['keypoints']) / 3 assert num_keypoints == self.num_keypoints gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32) for i in range(self.num_keypoints): gt_kps[0, i] = x[i] gt_kps[1, i] = y[i] gt_kps[2, i] = v[i] return gt_kps