def init_modules(self): self.feature_extractor = feature_extractors.build( self.feature_extractor_config) self.rpn_model = detectors.build(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = ROIAlign( (self.pooling_size, self.pooling_size), 1.0 / 16.0, 2) # note that roi extractor is shared but heads not self.rcnn_cls_preds = nn.ModuleList( [nn.Linear(2048, self.n_classes) for _ in range(self.num_stages)]) in_channels = 2048 if self.class_agnostic: rcnn_bbox_pred = nn.Linear(in_channels, 4) else: rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) self.rcnn_bbox_preds = nn.ModuleList( [rcnn_bbox_pred for _ in range(self.num_stages)]) self.rcnn_orient_preds = nn.ModuleList( [nn.Linear(2048, 4) for _ in range(self.num_stages)]) self.rcnn_dim_preds = nn.ModuleList( [nn.Linear(2048, 3) for _ in range(self.num_stages)]) # loss module # if self.use_focal_loss: # self.rcnn_cls_loss = FocalLoss(self.n_classes) # else: self.rcnn_cls_loss = nn.CrossEntropyLoss(reduction='none') self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduction='none') self.rcnn_orient_loss = OrientationLoss()
def init_modules(self): self.feature_extractor = feature_extractors.build( self.feature_extractor_config) self.rpn_model = detectors.build(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = ROIAlign( (self.pooling_size, self.pooling_size), 1.0 / 16.0, 2) # note that roi extractor is shared but heads not # self.rcnn_cls_preds = nn.ModuleList( # [nn.Linear(2048, self.n_classes) for _ in range(self.num_stages)]) in_channels = 2048 # construct many branches for each attr of instance branches = {} for attr in self.instance_info: col = self.instance_info[attr] branches[attr] = nn.ModuleList([nn.Linear(in_channels, self.n_classes) for _ in range(self.num_stages)]) self.branches = nn.ModuleDict(branches) # if self.class_agnostic: # rcnn_bbox_pred = nn.Linear(in_channels, 4) # else: # rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # self.rcnn_bbox_preds = nn.ModuleList( # [rcnn_bbox_pred for _ in range(self.num_stages)]) # loss module # if self.use_focal_loss: # self.rcnn_cls_loss = FocalLoss(self.n_classes) # else: self.rcnn_cls_loss = nn.CrossEntropyLoss(reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
def init_modules(self): self.feature_extractor = feature_extractors.build( self.feature_extractor_config) self.rpn_model = detectors.build(self.rpn_config) in_channels = 1024 self.rcnn_cls_preds = nn.ModuleList([ nn.Linear(in_channels, self.n_classes) for _ in range(self.num_stages) ]) if self.class_agnostic: rcnn_bbox_pred = nn.Linear(in_channels, 4) else: rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) self.rcnn_bbox_preds = nn.ModuleList( [rcnn_bbox_pred for _ in range(self.num_stages)]) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(self.n_classes, gamma=2, alpha=0.75) else: self.rcnn_cls_loss = nn.CrossEntropyLoss(reduction='none') self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduction='none') # TODO add feat scale adaptive roi pooling self.rcnn_pooling = AdaptiveROIAlign( (self.pooling_size, self.pooling_size), 2)
def init_modules(self): self.feature_extractor = feature_extractors.build( self.feature_extractor_config) self.rpn_model = detectors.build(self.rpn_config) # self.rcnn_pooling = ROIAlign( # (self.pooling_size, self.pooling_size), 1.0 / 16.0, 2) self.rcnn_pooling = AdaptiveROIAlign( (self.pooling_size, self.pooling_size), 2) # construct many branches for each attr of instance branches = {} for attr in self.instance_info: num_channels = self.instance_info[attr].num_channels branches[attr] = nn.ModuleList([ nn.Linear(self.in_channels, num_channels) for _ in range(self.num_stages) ]) self.branches = nn.ModuleDict(branches) self.rcnn_cls_loss = nn.CrossEntropyLoss(reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
def test(config, logger): eval_config = config['eval_config'] model_config = config['model_config'] data_config = config['eval_data_config'] np.random.seed(eval_config['rng_seed']) logger.info('Using config:') pprint.pprint({ 'model_config': model_config, 'data_config': data_config, 'eval_config': eval_config }) eval_out = eval_config['eval_out'] if not os.path.exists(eval_out): logger.info('creat eval out directory {}'.format(eval_out)) os.makedirs(eval_out) else: logger.warning('dir {} exist already!'.format(eval_out)) # restore from random or checkpoint restore = True # two methods to load model # 1. load from any other dirs,it just needs config and model path # 2. load from training dir if args.model is not None: # assert args.model is not None, 'please determine model or checkpoint' # it should be a path to model checkpoint_name = os.path.basename(args.model) input_dir = os.path.dirname(args.model) elif args.checkpoint is not None: checkpoint_name = 'detector_{}.pth'.format(args.checkpoint) assert args.load_dir is not None, 'please choose a directory to load checkpoint' eval_config['load_dir'] = args.load_dir input_dir = os.path.join(eval_config['load_dir'], model_config['type'], data_config['dataset_config']['type']) if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from {}'. format(input_dir)) else: restore = False # log for restore if restore: logger.info("restore from checkpoint") else: logger.info("use pytorch default initialization") # model model = detectors.build(model_config) model.eval() if restore: # saver saver = Saver(input_dir) saver.load({'model': model}, checkpoint_name) if args.cuda: model = model.cuda() dataloader = dataloaders.make_data_loader(data_config, training=False) tester = Tester(eval_config) tester.test(dataloader, model, logger)
def train(config, logger): data_config = config['data_config'] model_config = config['model_config'] train_config = config['train_config'] # build model model = detectors.build(model_config) model.train() # move to gpus before building optimizer if train_config['mGPUs']: model = common.MyParallel(model) if train_config['cuda']: model = model.cuda() # build optimizer and scheduler optimizer = optimizers.build(train_config['optimizer_config'], model) # force to change lr before scheduler if train_config['lr']: common.change_lr(optimizer, train_config['lr']) scheduler = schedulers.build(train_config['scheduler_config'], optimizer) # some components for logging and saving(saver and summaryer) output_dir = os.path.join(train_config['output_path'], model_config['type'], data_config['dataset_config']['type']) saver = Saver(output_dir) # resume if train_config['resume']: checkpoint_path = 'detector_{}.pth'.format(train_config['checkpoint']) logger.info( 'resume from checkpoint detector_{}'.format(checkpoint_path)) params_dict = { 'model': model, 'optimizer': optimizer, 'scheduler': scheduler, 'start_iters': None } saver.load(params_dict, checkpoint_path) # train_config['num_iters'] = params_dict['num_iters'] train_config['start_iters'] = params_dict['start_iters'] else: train_config['start_iters'] = 1 # build dataloader after resume(may be or not) # dataloader = dataloaders.build(data_config) dataloader = dataloaders.make_data_loader(data_config) # use model to initialize if train_config['model']: model_path = train_config['model'] assert os.path.isabs(model_path) logger.info('initialize model from {}'.format(model_path)) params_dict = {'model': model} saver.load(params_dict, model_path) summary_path = os.path.join(output_dir, './summary') logger.info('setup summary_dir: {}'.format(summary_path)) summary_writer = SummaryWriter(summary_path) os.chmod(summary_path, 0o777) logger.info('setup trainer') trainer = Trainer(train_config, logger) trainer.train(dataloader, model, optimizer, scheduler, saver, summary_writer)
def test_build_model(): model_config = { 'type': 'faster_rcnn', 'num_stages': 2, 'classes': ['Car', 'Truck'], 'class_agnostic': False, 'pooling_size': 7, 'pooling_mode': 'roi_align', 'use_focal_loss': True, 'truncated': True, 'batch_size': 1, "feature_extractor_config": { "type": "resnet", "pretrained_models_dir": "./data/pretrained_model", "net_arch": "res50", "separate_feat": False, "use_cascade": True, "class_agnostic": True, "classes": ["bg", "Car"], "img_channels": 3, "pretrained_model": "", "pretrained": True }, "rpn_config": { "type": "rpn", "use_iou": False, "use_focal_loss": True, "anchor_generator_config": { "type": "default", "anchor_offset": [0, 0], "anchor_stride": [16, 16], "aspect_ratios": [0.5, 0.8, 1], "base_anchor_size": 16, "scales": [2, 4, 8, 16] }, "din": 1024, "min_size": 16, "nms_thresh": 0.7, "post_nms_topN": 1000, "pre_nms_topN": 12000, "rpn_batch_size": 1024, "num_reg_samples": 1024, "num_cls_samples": 512, "sampler_config": { "type": "balanced", "fg_fraction": 0.25 }, "target_generator_config": [{ "target_assigner_config": { "type": "faster_rcnn", "similarity_calc_config": { "type": "center" }, "fg_thresh": 0.3, "bg_thresh": 0.3, "coder_config": { "type": "center", "bbox_normalize_targets_precomputed": False }, "matcher_config": { "type": "bipartitle" } }, "sampler_config": { "type": "balanced", "fg_fraction": 0.5 }, "analyzer_config": {} }], "use_score": False }, "target_generator_config": [{ "target_assigner_config": { "type": "faster_rcnn", "similarity_calc_config": { "type": "center" }, "fg_thresh": 0.3, "bg_thresh": 0.3, "coder_config": { "type": "center", "bbox_normalize_targets_precomputed": False }, "matcher_config": { "type": "bipartitle" } }, "sampler_config": { "type": "balanced", "fg_fraction": 0.5 }, "analyzer_config": {} }, { "target_assigner_config": { "type": "faster_rcnn", "similarity_calc_config": { "type": "center" }, "fg_thresh": 0.3, "bg_thresh": 0.3, "coder_config": { "type": "center", "bbox_normalize_targets_precomputed": False }, "matcher_config": { "type": "bipartitle" } }, "sampler_config": { "type": "balanced", "fg_fraction": 0.5 }, "analyzer_config": {} }], } model = detectors.build(model_config) print(model)
def inference(self, im, p2): """ Args: im: shape(N, 3, H, W) Returns: dets: shape(N, M, 8) """ config = self.config args = self.args eval_config = config['eval_config'] model_config = config['model_config'] data_config = config['eval_data_config'] np.random.seed(eval_config['rng_seed']) self.logger.info('Using config:') pprint.pprint({ 'model_config': model_config, 'data_config': data_config, 'eval_config': eval_config }) eval_out = eval_config['eval_out'] if not os.path.exists(eval_out): self.logger.info('creat eval out directory {}'.format(eval_out)) os.makedirs(eval_out) else: self.logger.warning('dir {} exist already!'.format(eval_out)) # restore from random or checkpoint restore = True # two methods to load model # 1. load from any other dirs,it just needs config and model path # 2. load from training dir if args.model is not None: # assert args.model is not None, 'please determine model or checkpoint' # it should be a path to model checkpoint_name = os.path.basename(args.model) input_dir = os.path.dirname(args.model) elif args.checkpoint is not None: checkpoint_name = 'detector_{}.pth'.format(args.checkpoint) assert args.load_dir is not None, 'please choose a directory to load checkpoint' eval_config['load_dir'] = args.load_dir input_dir = os.path.join(eval_config['load_dir'], model_config['type'], data_config['name']) if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from {}'. format(input_dir)) else: restore = False # log for restore if restore: self.logger.info("restore from checkpoint") else: self.logger.info("use pytorch default initialization") # model model = detectors.build(model_config) model.eval() if restore: # saver saver = Saver(input_dir) saver.load({'model': model}, checkpoint_name) model = model.cuda() # dataloader = dataloaders.make_data_loader(data_config, training=False) self.logger.info('Start testing') # num_samples = len(dataloader) # for step, data in enumerate(dataloader): data = self.preprocess(im, p2) data = self.to_batch(data) data = common.to_cuda(data) # image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction = model(data) # initialize dets for each classes dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] dims = prediction[constants.KEY_DIMS] orients = prediction[constants.KEY_ORIENTS_V2] p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) boxes_2d_per_img = boxes_2d[0] scores_per_img = scores[0] dims_per_img = dims[0] orients_per_img = orients[0] p2_per_img = p2[0] # rcnn_3d_per_img = rcnn_3d[batch_ind] # import ipdb # ipdb.set_trace() for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] threshed_dims_per_img = dims_per_img[inds] threshed_orients_per_img = orients_per_img[inds] threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), threshed_dims_per_img, threshed_orients_per_img.unsqueeze(-1) ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach().cpu( ).numpy() # calculate location location = geometry_utils.calc_location( nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5], nms_dets_per_img[:, 8], p2_per_img.cpu().numpy()) nms_dets_per_img = np.concatenate( [ nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8], location, nms_dets_per_img[:, -1:] ], axis=-1) dets.append(nms_dets_per_img) else: dets.append([]) # duration_time = time.time() - end_time # label_path = self._generate_label_path(image_path[batch_ind]) # self.save_mono_3d_dets(dets, label_path) # sys.stdout.write('\r{}/{},duration: {}'.format( # step + 1, num_samples, duration_time)) # sys.stdout.flush() # end_time = time.time() # xmin, ymin, xmax, ymax, cf, h, w, l, x, y, z, ry return dets