def train(self, data_loader, model, optimizer, scheduler, saver, summary_writer): batch_size = data_loader.batch_sampler.batch_size self.logger.info('Start training') self.logger.info('batch size: {}'.format(batch_size)) # check batch_size, disp_interval and checkpoint_interval assert self.checkpoint_interval % batch_size == 0, \ 'checkpoint_interval({}) cannot be mod by batch_size({})'.format( self.checkpoint_interval, batch_size) assert self.disp_interval % batch_size == 0, \ 'disp_interval({}) cannot be mod by batch_size({})'.format( self.disp_interval, batch_size) # start from 1 start_iters = max(1, self.start_iters // batch_size) for step, data in enumerate(data_loader, start_iters): # truly step step = step * batch_size if step > self.num_iters: self.logger.info('iteration is done') break start_time = time.time() # to gpu data = common.to_cuda(data) # forward and backward prediction, loss_dict, stats = model(data) # loss # loss_dict = model.loss(prediction, data) loss = 0 for loss_key, loss_val in loss_dict.items(): loss += loss_val.mean() # update loss dict loss_dict[loss_key] = loss_val.mean() optimizer.zero_grad() loss.backward() # clip gradients nn.utils.clip_grad_norm_(model.parameters(), self.clip_gradient) # update weight optimizer.step() # adjust lr # step by iters scheduler.step(step) self.stats.update_stats(stats) if step % self.disp_interval == 0: # display info duration_time = time.time() - start_time self.logger.info( '[iter {}] time cost: {:.4f}, loss: {:.4f}, lr: {:.2e}'. format(step, duration_time, loss, scheduler.get_lr()[0])) # info stats self.logger.info(self.stats) self.logger.info(common.loss_dict_to_str(loss_dict)) # summary writer # loss loss_dict.update({'total_loss': loss}) summary_writer.add_scalar_dict(loss_dict, step) # metric summary_writer.add_scalar_dict(self.stats.get_summary_dict(), step) self.stats.clear_stats() if step % self.checkpoint_interval == 0: # save model checkpoint_name = 'detector_{}.pth'.format(step) params_dict = { 'start_iters': step + batch_size, 'model': model, 'optimizer': optimizer, 'scheduler': scheduler } saver.save(params_dict, checkpoint_name) self.logger.info('checkpoint {} saved'.format(checkpoint_name))
def test_corners_3d(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction, _, _ = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] # dims = prediction[constants.KEY_DIMS] corners_2d = prediction[constants.KEY_CORNERS_2D] # import ipdb # ipdb.set_trace() p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] # dims_per_img = dims[batch_ind] corners_2d_per_img = corners_2d[batch_ind] p2_per_img = p2[batch_ind] num_cols = corners_2d.shape[-1] dets = [np.zeros((0, 8, num_cols), dtype=np.float32)] dets_2d = [np.zeros((0, 4), dtype=np.float32)] for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] # threshed_dims_per_img = dims_per_img[inds] threshed_corners_2d_per_img = corners_2d_per_img[inds] # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat( [ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), # threshed_dims_per_img, ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] threshed_corners_2d_per_img = threshed_corners_2d_per_img[ order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() nms_corners_2d_per_img = threshed_corners_2d_per_img[ keep].detach().cpu().numpy() dets.append(nms_corners_2d_per_img) dets_2d.append(nms_dets_per_img[:, :4]) else: dets.append( np.zeros((0, 8, num_cols), dtype=np.float32)) dets_2d.append(np.zeros((0, 4))) # import ipdb # ipdb.set_trace() corners = np.concatenate(dets, axis=0) dets_2d = np.concatenate(dets_2d, axis=0) corners_2d = None corners_3d = None if num_cols == 3: corners_3d = corners else: corners_2d = corners self.visualizer.render_image_corners_2d( image_path[0], boxes_2d=dets_2d, corners_2d=corners_2d, corners_3d=corners_3d, p2=p2_per_img.cpu().numpy()) duration_time = time.time() - end_time # label_path = self._generate_label_path(image_path[batch_ind]) # self.save_mono_3d_dets(dets, label_path) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time()
def test_3d(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction, _, _ = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] dims = prediction[constants.KEY_DIMS] orients = prediction[constants.KEY_ORIENTS_V2] p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] dims_per_img = dims[batch_ind] orients_per_img = orients[batch_ind] p2_per_img = p2[batch_ind] # rcnn_3d_per_img = rcnn_3d[batch_ind] for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] threshed_dims_per_img = dims_per_img[inds] threshed_orients_per_img = orients_per_img[inds] # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), threshed_dims_per_img, threshed_orients_per_img.unsqueeze(-1) ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # threshed_rcnn_3d_per_img = threshed_rcnn_3d_per_img[order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() # nms_rcnn_3d_per_img = threshed_rcnn_3d_per_img[keep].detach().cpu().numpy() # calculate location location = geometry_utils.calc_location( nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5], nms_dets_per_img[:, 8], p2_per_img.cpu().numpy()) # import ipdb # ipdb.set_trace() # location, _ = mono_3d_postprocess_bbox( # nms_rcnn_3d_per_img, nms_dets_per_img[:, :5], # p2_per_img.cpu().numpy()) nms_dets_per_img = np.concatenate([ nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8], location, nms_dets_per_img[:, -1:] ], axis=-1) # nms_dets_per_img = np.concatenate( # [nms_dets_per_img[:, :5], location], axis=-1) dets.append(nms_dets_per_img) else: dets.append([]) duration_time = time.time() - end_time label_path = self._generate_label_path(image_path[batch_ind]) self.save_mono_3d_dets(dets, label_path) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time()
def test_super_nms(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] batch_size = scores.shape[0] # scores = scores.view(-1, self.n_classes) # new_scores = torch.zeros_like(scores) # _, scores_argmax = scores.max(dim=-1) # row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) # new_scores[row, scores_argmax] = scores[row, scores_argmax] # scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] for class_ind in range(1, self.n_classes): # cls thresh # import ipdb # ipdb.set_trace() inds = torch.nonzero( scores_per_img[:, class_ind] > 0.01).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # nms # keep = nms(threshed_dets_per_img[:, :4], # threshed_dets_per_img[:, 4], # self.nms).view(-1).long() keep = box_ops.super_nms(threshed_dets_per_img[:, :4], 0.8, nms_num=3, loop_time=2) nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() dets.append(nms_dets_per_img) else: dets.append([]) duration_time = time.time() - end_time label_path = self._generate_label_path(image_path[batch_ind]) self.save_dets(dets, label_path) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time()
def inference(self, im, p2): """ Args: im: shape(N, 3, H, W) Returns: dets: shape(N, M, 8) """ config = self.config args = self.args eval_config = config['eval_config'] model_config = config['model_config'] data_config = config['eval_data_config'] np.random.seed(eval_config['rng_seed']) self.logger.info('Using config:') pprint.pprint({ 'model_config': model_config, 'data_config': data_config, 'eval_config': eval_config }) eval_out = eval_config['eval_out'] if not os.path.exists(eval_out): self.logger.info('creat eval out directory {}'.format(eval_out)) os.makedirs(eval_out) else: self.logger.warning('dir {} exist already!'.format(eval_out)) # restore from random or checkpoint restore = True # two methods to load model # 1. load from any other dirs,it just needs config and model path # 2. load from training dir if args.model is not None: # assert args.model is not None, 'please determine model or checkpoint' # it should be a path to model checkpoint_name = os.path.basename(args.model) input_dir = os.path.dirname(args.model) elif args.checkpoint is not None: checkpoint_name = 'detector_{}.pth'.format(args.checkpoint) assert args.load_dir is not None, 'please choose a directory to load checkpoint' eval_config['load_dir'] = args.load_dir input_dir = os.path.join(eval_config['load_dir'], model_config['type'], data_config['name']) if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from {}'. format(input_dir)) else: restore = False # log for restore if restore: self.logger.info("restore from checkpoint") else: self.logger.info("use pytorch default initialization") # model model = detectors.build(model_config) model.eval() if restore: # saver saver = Saver(input_dir) saver.load({'model': model}, checkpoint_name) model = model.cuda() # dataloader = dataloaders.make_data_loader(data_config, training=False) self.logger.info('Start testing') # num_samples = len(dataloader) # for step, data in enumerate(dataloader): data = self.preprocess(im, p2) data = self.to_batch(data) data = common.to_cuda(data) # image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction = model(data) # initialize dets for each classes dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] dims = prediction[constants.KEY_DIMS] orients = prediction[constants.KEY_ORIENTS_V2] p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) boxes_2d_per_img = boxes_2d[0] scores_per_img = scores[0] dims_per_img = dims[0] orients_per_img = orients[0] p2_per_img = p2[0] # rcnn_3d_per_img = rcnn_3d[batch_ind] # import ipdb # ipdb.set_trace() for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] threshed_dims_per_img = dims_per_img[inds] threshed_orients_per_img = orients_per_img[inds] threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), threshed_dims_per_img, threshed_orients_per_img.unsqueeze(-1) ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach().cpu( ).numpy() # calculate location location = geometry_utils.calc_location( nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5], nms_dets_per_img[:, 8], p2_per_img.cpu().numpy()) nms_dets_per_img = np.concatenate( [ nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8], location, nms_dets_per_img[:, -1:] ], axis=-1) dets.append(nms_dets_per_img) else: dets.append([]) # duration_time = time.time() - end_time # label_path = self._generate_label_path(image_path[batch_ind]) # self.save_mono_3d_dets(dets, label_path) # sys.stdout.write('\r{}/{},duration: {}'.format( # step + 1, num_samples, duration_time)) # sys.stdout.flush() # end_time = time.time() # xmin, ymin, xmax, ymax, cf, h, w, l, x, y, z, ry return dets