def evaluate_detections(self, detections, **kwargs): cache_path = os.path.join(self._root_path, 'cache', '{}_{}.pkl'.format(self._name, 'detections')) logger.info('saving cache {}'.format(cache_path)) with open(cache_path, 'wb') as fid: pickle.dump(detections, fid, pickle.HIGHEST_PROTOCOL) self._evaluate_detections(detections, **kwargs)
def __init__(self, image_set, root_path, devkit_path): """ fill basic information to initialize imdb :param image_set: 2007_trainval, 2007_test, etc :param root_path: 'data', will write 'cache' :param devkit_path: 'data/VOCdevkit', load data and write results """ super(PascalVOC, self).__init__('voc_' + image_set, root_path) year, image_set = image_set.split('_') self._config = {'comp_id': 'comp4', 'use_diff': False, 'min_size': 2} self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._image_index_file = os.path.join(devkit_path, 'VOC' + year, 'ImageSets', 'Main', image_set + '.txt') self._image_file_tmpl = os.path.join(devkit_path, 'VOC' + year, 'JPEGImages', '{}.jpg') self._image_anno_tmpl = os.path.join(devkit_path, 'VOC' + year, 'Annotations', '{}.xml') # results result_folder = os.path.join(devkit_path, 'results', 'VOC' + year, 'Main') if not os.path.exists(result_folder): os.makedirs(result_folder) self._result_file_tmpl = os.path.join( result_folder, 'comp4_det_' + image_set + '_{}.txt') # get roidb self._roidb = self._get_cached('roidb', self._load_gt_roidb) logger.info('%s num_images %d' % (self.name, self.num_images))
def __init__(self, image_set, root_path, devkit_path): """ fill basic information to initialize imdb :param image_set: 2007_trainval, 2007_test, etc :param root_path: 'data', will write 'cache' :param devkit_path: 'data/VOCdevkit', load data and write results """ super(PascalVOC, self).__init__('voc_' + image_set, root_path) year, image_set = image_set.split('_') self._config = {'comp_id': 'comp4', 'use_diff': False, 'min_size': 2} self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._image_index_file = os.path.join(devkit_path, 'VOC' + year, 'ImageSets', 'Main', image_set + '.txt') self._image_file_tmpl = os.path.join(devkit_path, 'VOC' + year, 'JPEGImages', '{}.jpg') self._image_anno_tmpl = os.path.join(devkit_path, 'VOC' + year, 'Annotations', '{}.xml') # results result_folder = os.path.join(devkit_path, 'results', 'VOC' + year, 'Main') if not os.path.exists(result_folder): os.makedirs(result_folder) self._result_file_tmpl = os.path.join(result_folder, 'comp4_det_' + image_set + '_{}.txt') # get roidb self._roidb = self._get_cached('roidb', self._load_gt_roidb) logger.info('%s num_images %d' % (self.name, self.num_images))
def filter_roidb(self): """Remove images without usable rois""" num_roidb = len(self._roidb) self._roidb = [ roi_rec for roi_rec in self._roidb if len(roi_rec['gt_classes']) ] num_after = len(self._roidb) logger.info('filter roidb: {} -> {}'.format(num_roidb, num_after))
def test_net(sym, imdb, args): # print config logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context ctx = mx.gpu(args.gpu) # load testing data test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side, mean=args.img_pixel_means, std=args.img_pixel_stds) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(imdb.num_images)] for _ in range(imdb.num_classes)] # start detection with tqdm(total=imdb.num_images) as pbar: for i, data_batch in enumerate(test_data): # forward im_info = data_batch.data[1][0] mod.forward(data_batch) rois, scores, bbox_deltas = mod.get_outputs() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] det = im_detect(rois, scores, bbox_deltas, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) for j in range(1, imdb.num_classes): indexes = np.where(det[:, 0] == j)[0] all_boxes[j][i] = np.concatenate((det[:, -4:], det[:, [1]]), axis=-1)[indexes, :] pbar.update(data_batch.data[0].shape[0]) # evaluate model imdb.evaluate_detections(all_boxes)
def _do_python_eval(self, all_boxes, use_07_metric): aps = [] for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue # class_anno is a dict [image_index, [bbox, difficult, det]] class_anno = {} npos = 0 for roi_rec in self.roidb: index = roi_rec['index'] objects = [ obj for obj in roi_rec['objs'] if obj['name'] == cls ] bbox = np.array([x['bbox'] for x in objects]) difficult = np.array([x['difficult'] for x in objects]).astype(np.bool) det = [False] * len(objects) # stand for detected npos = npos + sum(~difficult) class_anno[index] = { 'bbox': bbox, 'difficult': difficult, 'det': det } # bbox is 2d array of all detections, corresponding to each image_id image_ids = [] bbox = [] confidence = [] for im_ind, dets in enumerate(all_boxes[cls_ind]): for k in range(dets.shape[0]): image_ids.append(self.roidb[im_ind]['index']) bbox.append([ dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1 ]) confidence.append(dets[k, -1]) bbox = np.array(bbox) confidence = np.array(confidence) rec, prec, ap = self.voc_eval(class_anno, npos, image_ids, bbox, confidence, ovthresh=0.5, use_07_metric=use_07_metric) aps.append(ap) for cls, ap in zip(self.classes, aps): logger.info('AP for {} = {:.4f}'.format(cls, ap)) logger.info('Mean AP = {:.4f}'.format(np.mean(aps)))
def _print_detection_metrics(self, coco_eval): IoU_lo_thresh = 0.5 IoU_hi_thresh = 0.95 def _get_thr_ind(coco_eval, thr): ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) & (coco_eval.params.iouThrs < thr + 1e-5))[0][0] iou_thr = coco_eval.params.iouThrs[ind] assert np.isclose(iou_thr, thr) return ind ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh) ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh) # precision has dims (iou, recall, cls, area range, max dets) # area range index 0: all area ranges # max dets index 2: 100 per image precision = \ coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2] ap_default = np.mean(precision[precision > -1]) logger.info('~~~~ Mean and per-category AP @ IoU=%.2f,%.2f] ~~~~' % (IoU_lo_thresh, IoU_hi_thresh)) logger.info('%-15s %5.1f' % ('all', 100 * ap_default)) for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue # minus 1 because of __background__ precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2] ap = np.mean(precision[precision > -1]) logger.info('%-15s %5.1f' % (cls, 100 * ap)) logger.info('~~~~ Summary metrics ~~~~') coco_eval.summarize()
def append_flipped_images(self): """Only flip boxes coordinates, images will be flipped when loading into network""" logger.info('%s append flipped images to roidb' % self._name) roidb_flipped = [] for roi_rec in self._roidb: boxes = roi_rec['boxes'].copy() oldx1 = boxes[:, 0].copy() oldx2 = boxes[:, 2].copy() boxes[:, 0] = roi_rec['width'] - oldx2 - 1 boxes[:, 2] = roi_rec['width'] - oldx1 - 1 assert (boxes[:, 2] >= boxes[:, 0]).all() roi_rec_flipped = roi_rec.copy() roi_rec_flipped['boxes'] = boxes roi_rec_flipped['flipped'] = True roidb_flipped.append(roi_rec_flipped) self._roidb.extend(roidb_flipped)
def _write_pascal_results(self, all_boxes): for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue logger.info('Writing %s VOC results file' % cls) filename = self._result_file_tmpl.format(cls) with open(filename, 'wt') as f: for im_ind, roi_rec in enumerate(self.roidb): index = roi_rec['index'] dets = all_boxes[cls_ind][im_ind] if len(dets) == 0: continue # the VOCdevkit expects 1-based indices for k in range(dets.shape[0]): f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. format(index, dets[k, -1], dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1))
def __init__(self, image_set, root_path, data_path): """ fill basic information to initialize imdb :param image_set: train2017, val2017 :param root_path: 'data', will write 'cache' :param data_path: 'data/coco', load data and write results """ super(coco, self).__init__('coco_' + image_set, root_path) # example: annotations/instances_train2017.json self._anno_file = os.path.join(data_path, 'annotations', 'instances_' + image_set + '.json') # example train2017/000000119993.jpg self._image_file_tmpl = os.path.join(data_path, image_set, '{}') # example detections_val2017_results.json self._result_file = os.path.join(data_path, 'detections_{}_results.json'.format(image_set)) # get roidb self._roidb = self._get_cached('roidb', self._load_gt_roidb) logger.info('%s num_images %d' % (self.name, self.num_images))
def _write_coco_results(self, _coco, detections): """ example results [{"image_id": 42, "category_id": 18, "bbox": [258.15,41.29,348.26,243.78], "score": 0.236}, ...] """ cats = [cat['name'] for cat in _coco.loadCats(_coco.getCatIds())] class_to_coco_ind = dict(zip(cats, _coco.getCatIds())) results = [] for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue logger.info('collecting %s results (%d/%d)' % (cls, cls_ind, self.num_classes - 1)) coco_cat_id = class_to_coco_ind[cls] results.extend(self._coco_results_one_category(detections[cls_ind], coco_cat_id)) logger.info('writing results json to %s' % self._result_file) with open(self._result_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4)
def __init__(self, image_set, root_path, data_path): """ fill basic information to initialize imdb :param image_set: train2017, val2017 :param root_path: 'data', will write 'cache' :param data_path: 'data/coco', load data and write results """ super(coco, self).__init__('coco_' + image_set, root_path) # example: annotations/instances_train2017.json self._anno_file = os.path.join(data_path, 'annotations', 'instances_' + image_set + '.json') # example train2017/000000119993.jpg self._image_file_tmpl = os.path.join(data_path, image_set, '{}') # example detections_val2017_results.json self._result_file = os.path.join( data_path, 'detections_{}_results.json'.format(image_set)) # get roidb ''' if("custom" in image_set): self.classes = ['__background__']; self._classes_file = os.path.join(data_path, 'annotations', 'classes.txt'); f = open(self._classes_file); lines = f.readlines(); f.close(); for i in range(len(lines)): self.classes.append(lines[i][:len(lines[i])-1]); ''' self.classes = ['__background__'] self._classes_file = os.path.join(data_path, 'annotations', 'classes.txt') f = open(self._classes_file) lines = f.readlines() f.close() for i in range(len(lines)): self.classes.append(lines[i][:len(lines[i]) - 1]) self._roidb = self._get_cached('roidb', self._load_gt_roidb) logger.info('%s num_images %d' % (self.name, self.num_images))
def _do_python_eval(self, all_boxes, use_07_metric): aps = [] for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue # class_anno is a dict [image_index, [bbox, difficult, det]] class_anno = {} npos = 0 for roi_rec in self.roidb: index = roi_rec['index'] objects = [obj for obj in roi_rec['objs'] if obj['name'] == cls] bbox = np.array([x['bbox'] for x in objects]) difficult = np.array([x['difficult'] for x in objects]).astype(np.bool) det = [False] * len(objects) # stand for detected npos = npos + sum(~difficult) class_anno[index] = {'bbox': bbox, 'difficult': difficult, 'det': det} # bbox is 2d array of all detections, corresponding to each image_id image_ids = [] bbox = [] confidence = [] for im_ind, dets in enumerate(all_boxes[cls_ind]): for k in range(dets.shape[0]): image_ids.append(self.roidb[im_ind]['index']) bbox.append([dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1]) confidence.append(dets[k, -1]) bbox = np.array(bbox) confidence = np.array(confidence) rec, prec, ap = self.voc_eval(class_anno, npos, image_ids, bbox, confidence, ovthresh=0.5, use_07_metric=use_07_metric) aps.append(ap) for cls, ap in zip(self.classes, aps): logger.info('AP for {} = {:.4f}'.format(cls, ap)) logger.info('Mean AP = {:.4f}'.format(np.mean(aps)))
def _get_cached(self, cache_item, fn): cache_path = os.path.join(self._root_path, 'cache', '{}_{}.pkl'.format(self._name, cache_item)) if os.path.exists(cache_path): logger.info('loading cache {}'.format(cache_path)) with open(cache_path, 'rb') as fid: cached = pickle.load(fid) return cached else: logger.info('computing cache {}'.format(cache_path)) cached = fn() logger.info('saving cache {}'.format(cache_path)) with open(cache_path, 'wb') as fid: pickle.dump(cached, fid, pickle.HIGHEST_PROTOCOL) return cached
def test_net(sym, imdb, args): # print config logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context ctx = mx.gpu(args.gpu) # load testing data test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side, mean=args.img_pixel_means, std=args.img_pixel_stds) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) results_list = [] all_boxes = [[[] for _ in range(imdb.num_images)] for _ in range(imdb.num_classes)] all_masks = [[[] for _ in range(imdb.num_images)] for _ in range(imdb.num_classes)] all_rois = [[[] for _ in range(imdb.num_images)] for _ in range(imdb.num_classes)] # start detection with tqdm(total=imdb.num_images) as pbar: for i, data_batch in enumerate(test_data): # forward im_info = data_batch.data[1][0] mod.forward(data_batch) rois, scores, bbox_deltas, mask_prob = mod.get_outputs() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] det, masks, rois_out = im_detect(rois, scores, bbox_deltas, mask_prob, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) # print(det.shape, masks.shape) for j in range(1, imdb.num_classes): indexes = np.where(det[:, 0] == j)[0] all_boxes[j][i] = np.concatenate((det[:, -4:], det[:, [1]]), axis=-1)[indexes, :] # print(type(masks), type(rois_out)) all_masks[j][i] = masks[indexes] all_rois[j][i] = rois_out[indexes] boxes_this_image = [[]] + [ all_boxes[cls_ind][i] for cls_ind in range(1, imdb.num_classes) ] masks_this_image = [[]] + [ all_masks[cls_ind][i] for cls_ind in range(1, imdb.num_classes) ] rois_this_image = [[]] + [ all_rois[cls_ind][i] for cls_ind in range(1, imdb.num_classes) ] results_list.append({ 'image': '{}.png'.format(i), 'im_info': im_info.asnumpy(), 'boxes': boxes_this_image, 'masks': masks_this_image, 'rois': rois_this_image }) pbar.update(data_batch.data[0].shape[0]) # evaluate model results_pack = { 'all_boxes': all_boxes, 'all_masks': all_masks, 'results_list': results_list } imdb.evaluate_mask(results_pack)
def test_net(sym, imdb, args, config): logger.addHandler( logging.FileHandler("{0}/{1}".format(args.prefix, 'test.log'))) # print config logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context ctx = mx.gpu(args.gpu) # load testing data test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side, mean=config.transform['img_pixel_means'], std=config.transform['img_pixel_stds']) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(imdb.num_images)] for _ in range(imdb.num_classes)] # start detection with tqdm(total=imdb.num_images) as pbar: for i, data_batch in enumerate(test_data): # forward im_info = data_batch.data[1][0] mod.forward(data_batch) rois, scores, bbox_deltas = mod.get_outputs() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] det = im_detect(rois, scores, bbox_deltas, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) for j in range(1, imdb.num_classes): indexes = np.where(det[:, 0] == j)[0] all_boxes[j][i] = np.concatenate((det[:, -4:], det[:, [1]]), axis=-1)[indexes, :] pbar.update(data_batch.data[0].shape[0]) # evaluate model imdb.evaluate_detections(all_boxes)
def train(sym, roidb): ''' User function: Start training Args: sym (mxnet model): Mxnet model returned from set_network() function roidb (dataloader): Dataloader returned from set_model() function Returns: None ''' # print config #logger.info('called with system_dict\n{}'.format(pprint.pformat(vars(system_dict)))) #print(system_dict) # setup multi-gpu if(len(system_dict["gpus"]) == 0): ctx = [mx.cpu(0)]; else: ctx = [mx.gpu(int(i)) for i in system_dict["gpus"]] batch_size = system_dict["rcnn_batch_size"] * len(ctx) # load training data feat_sym = sym.get_internals()['rpn_cls_score_output'] ag = AnchorGenerator(feat_stride=system_dict["rpn_feat_stride"], anchor_scales=system_dict["rpn_anchor_scales"], anchor_ratios=system_dict["rpn_anchor_ratios"]) asp = AnchorSampler(allowed_border=system_dict["rpn_allowed_border"], batch_rois=system_dict["rpn_batch_rois"], fg_fraction=system_dict["rpn_fg_fraction"], fg_overlap=system_dict["rpn_fg_overlap"], bg_overlap=system_dict["rpn_bg_overlap"]) train_data = AnchorLoader(roidb, batch_size, system_dict["img_short_side"], system_dict["img_long_side"], system_dict["img_pixel_means"], system_dict["img_pixel_stds"], feat_sym, ag, asp, shuffle=True) # produce shape max possible _, out_shape, _ = feat_sym.infer_shape(data=(1, 3, system_dict["img_long_side"], system_dict["img_long_side"])) feat_height, feat_width = out_shape[0][-2:] rpn_num_anchors = len(system_dict["rpn_anchor_scales"]) * len(system_dict["rpn_anchor_ratios"]) data_names = ['data', 'im_info', 'gt_boxes'] label_names = ['label', 'bbox_target', 'bbox_weight'] data_shapes = [('data', (batch_size, 3, system_dict["img_long_side"], system_dict["img_long_side"])), ('im_info', (batch_size, 3)), ('gt_boxes', (batch_size, 100, 5))] label_shapes = [('label', (batch_size, 1, rpn_num_anchors * feat_height, feat_width)), ('bbox_target', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width)), ('bbox_weight', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width))] # print shapes data_shape_dict, out_shape_dict = infer_data_shape(sym, data_shapes + label_shapes) logger.info('max input shape\n%s' % pprint.pformat(data_shape_dict)) logger.info('max output shape\n%s' % pprint.pformat(out_shape_dict)) # load and initialize params if system_dict["resume"]: arg_params, aux_params = load_param(system_dict["resume"]) else: arg_params, aux_params = load_param(system_dict["pretrained"]) arg_params, aux_params = initialize_frcnn(sym, data_shapes, arg_params, aux_params) # check parameter shapes check_shape(sym, data_shapes + label_shapes, arg_params, aux_params) # check fixed params fixed_param_names = get_fixed_params(sym, system_dict["net_fixed_params"]) logger.info('locking params\n%s' % pprint.pformat(fixed_param_names)) # metric rpn_eval_metric = RPNAccMetric() rpn_cls_metric = RPNLogLossMetric() rpn_bbox_metric = RPNL1LossMetric() eval_metric = RCNNAccMetric() cls_metric = RCNNLogLossMetric() bbox_metric = RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = mx.callback.Speedometer(batch_size, frequent=system_dict["log_interval"], auto_reset=False) epoch_end_callback = mx.callback.do_checkpoint(system_dict["save_prefix"]) # learning schedule base_lr = system_dict["lr"] lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in system_dict["lr_decay_epoch"].split(',')] lr_epoch_diff = [epoch - system_dict["start_epoch"] for epoch in lr_epoch if epoch > system_dict["start_epoch"]] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5} # train mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=None, fixed_param_names=fixed_param_names) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore='device', optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=system_dict["start_epoch"], num_epoch=system_dict["epochs"])
def train_net(sym, roidb, args): # print config logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup multi-gpu ctx = [mx.cpu()] if not args.gpus else [ mx.gpu(int(i)) for i in args.gpus.split(',') ] batch_size = args.rcnn_batch_size * len(ctx) # load training data feat_sym = sym.get_internals()['rpn_cls_score_output'] ag = AnchorGenerator(feat_stride=args.rpn_feat_stride, anchor_scales=args.rpn_anchor_scales, anchor_ratios=args.rpn_anchor_ratios) asp = AnchorSampler(allowed_border=args.rpn_allowed_border, batch_rois=args.rpn_batch_rois, fg_fraction=args.rpn_fg_fraction, fg_overlap=args.rpn_fg_overlap, bg_overlap=args.rpn_bg_overlap) train_data = AnchorLoader(roidb, batch_size, args.img_short_side, args.img_long_side, args.img_pixel_means, args.img_pixel_stds, feat_sym, ag, asp, shuffle=True) # produce shape max possible _, out_shape, _ = feat_sym.infer_shape(data=(1, 3, args.img_long_side, args.img_long_side)) feat_height, feat_width = out_shape[0][-2:] rpn_num_anchors = len(args.rpn_anchor_scales) * len(args.rpn_anchor_ratios) data_names = ['data', 'im_info', 'gt_boxes'] label_names = ['label', 'bbox_target', 'bbox_weight'] data_shapes = [('data', (batch_size, 3, args.img_long_side, args.img_long_side)), ('im_info', (batch_size, 3)), ('gt_boxes', (batch_size, 100, 5))] label_shapes = [('label', (batch_size, 1, rpn_num_anchors * feat_height, feat_width)), ('bbox_target', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width)), ('bbox_weight', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width))] # print shapes data_shape_dict, out_shape_dict = infer_data_shape( sym, data_shapes + label_shapes) logger.info('max input shape\n%s' % pprint.pformat(data_shape_dict)) logger.info('max output shape\n%s' % pprint.pformat(out_shape_dict)) # load and initialize params if args.resume: arg_params, aux_params = load_param(args.resume) else: arg_params, aux_params = load_param(args.pretrained) arg_params, aux_params = initialize_frcnn(sym, data_shapes, arg_params, aux_params) # check parameter shapes check_shape(sym, data_shapes + label_shapes, arg_params, aux_params) # check fixed params fixed_param_names = get_fixed_params(sym, args.net_fixed_params) logger.info('locking params\n%s' % pprint.pformat(fixed_param_names)) # metric rpn_eval_metric = RPNAccMetric() rpn_cls_metric = RPNLogLossMetric() rpn_bbox_metric = RPNL1LossMetric() eval_metric = RCNNAccMetric() cls_metric = RCNNLogLossMetric() bbox_metric = RCNNL1LossMetric() eval_metrics = mx.gluon.metric.CompositeEvalMetric() for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = mx.callback.Speedometer(batch_size, frequent=args.log_interval, auto_reset=False) epoch_end_callback = mx.callback.do_checkpoint(args.save_prefix) # learning schedule base_lr = args.lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in args.lr_decay_epoch.split(',')] lr_epoch_diff = [ epoch - args.start_epoch for epoch in lr_epoch if epoch > args.start_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = { 'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5 } # train mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=None, fixed_param_names=fixed_param_names) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore='device', optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=args.start_epoch, num_epoch=args.epochs)
def filter_roidb(self): """Remove images without usable rois""" num_roidb = len(self._roidb) self._roidb = [roi_rec for roi_rec in self._roidb if len(roi_rec['gt_classes'])] num_after = len(self._roidb) logger.info('filter roidb: {} -> {}'.format(num_roidb, num_after))
def train_net(sym, roidb, args): # print config logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup multi-gpu ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] batch_size = args.rcnn_batch_size * len(ctx) # load training data feat_sym = sym.get_internals()['rpn_cls_score_output'] ag = AnchorGenerator(feat_stride=args.rpn_feat_stride, anchor_scales=args.rpn_anchor_scales, anchor_ratios=args.rpn_anchor_ratios) asp = AnchorSampler(allowed_border=args.rpn_allowed_border, batch_rois=args.rpn_batch_rois, fg_fraction=args.rpn_fg_fraction, fg_overlap=args.rpn_fg_overlap, bg_overlap=args.rpn_bg_overlap) train_data = AnchorLoader(roidb, batch_size, args.img_short_side, args.img_long_side, args.img_pixel_means, args.img_pixel_stds, feat_sym, ag, asp, shuffle=True) # produce shape max possible _, out_shape, _ = feat_sym.infer_shape(data=(1, 3, args.img_long_side, args.img_long_side)) feat_height, feat_width = out_shape[0][-2:] rpn_num_anchors = len(args.rpn_anchor_scales) * len(args.rpn_anchor_ratios) data_names = ['data', 'im_info', 'gt_boxes'] label_names = ['label', 'bbox_target', 'bbox_weight'] data_shapes = [('data', (batch_size, 3, args.img_long_side, args.img_long_side)), ('im_info', (batch_size, 3)), ('gt_boxes', (batch_size, 100, 5))] label_shapes = [('label', (batch_size, 1, rpn_num_anchors * feat_height, feat_width)), ('bbox_target', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width)), ('bbox_weight', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width))] # print shapes data_shape_dict, out_shape_dict = infer_data_shape(sym, data_shapes + label_shapes) logger.info('max input shape\n%s' % pprint.pformat(data_shape_dict)) logger.info('max output shape\n%s' % pprint.pformat(out_shape_dict)) # load and initialize params if args.resume: arg_params, aux_params = load_param(args.resume) else: arg_params, aux_params = load_param(args.pretrained) arg_params, aux_params = initialize_frcnn(sym, data_shapes, arg_params, aux_params) # check parameter shapes check_shape(sym, data_shapes + label_shapes, arg_params, aux_params) # check fixed params fixed_param_names = get_fixed_params(sym, args.net_fixed_params) logger.info('locking params\n%s' % pprint.pformat(fixed_param_names)) # metric rpn_eval_metric = RPNAccMetric() rpn_cls_metric = RPNLogLossMetric() rpn_bbox_metric = RPNL1LossMetric() eval_metric = RCNNAccMetric() cls_metric = RCNNLogLossMetric() bbox_metric = RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = mx.callback.Speedometer(batch_size, frequent=args.log_interval, auto_reset=False) epoch_end_callback = mx.callback.do_checkpoint(args.save_prefix) # learning schedule base_lr = args.lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in args.lr_decay_epoch.split(',')] lr_epoch_diff = [epoch - args.start_epoch for epoch in lr_epoch if epoch > args.start_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5} # train mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=None, fixed_param_names=fixed_param_names) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore='device', optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=args.start_epoch, num_epoch=args.epochs)