Ejemplo n.º 1
0
    def train(self, data_loader, model, optimizer, scheduler, saver,
              summary_writer):

        batch_size = data_loader.batch_sampler.batch_size
        self.logger.info('Start training')
        self.logger.info('batch size: {}'.format(batch_size))
        # check batch_size, disp_interval and checkpoint_interval
        assert self.checkpoint_interval % batch_size == 0, \
            'checkpoint_interval({}) cannot be mod by batch_size({})'.format(
                self.checkpoint_interval, batch_size)
        assert self.disp_interval % batch_size == 0, \
            'disp_interval({}) cannot be mod by batch_size({})'.format(
                self.disp_interval, batch_size)
        # start from 1
        start_iters = max(1, self.start_iters // batch_size)
        for step, data in enumerate(data_loader, start_iters):
            # truly step
            step = step * batch_size
            if step > self.num_iters:
                self.logger.info('iteration is done')
                break
            start_time = time.time()
            # to gpu
            data = common.to_cuda(data)

            # forward and backward
            prediction, loss_dict, stats = model(data)
            # loss
            #  loss_dict = model.loss(prediction, data)

            loss = 0
            for loss_key, loss_val in loss_dict.items():
                loss += loss_val.mean()
                # update loss dict
                loss_dict[loss_key] = loss_val.mean()

            optimizer.zero_grad()
            loss.backward()

            # clip gradients
            nn.utils.clip_grad_norm_(model.parameters(), self.clip_gradient)
            # update weight
            optimizer.step()

            # adjust lr
            # step by iters
            scheduler.step(step)

            self.stats.update_stats(stats)

            if step % self.disp_interval == 0:
                # display info
                duration_time = time.time() - start_time
                self.logger.info(
                    '[iter {}] time cost: {:.4f}, loss: {:.4f}, lr: {:.2e}'.
                    format(step, duration_time, loss,
                           scheduler.get_lr()[0]))

                # info stats
                self.logger.info(self.stats)
                self.logger.info(common.loss_dict_to_str(loss_dict))

                # summary writer
                # loss
                loss_dict.update({'total_loss': loss})
                summary_writer.add_scalar_dict(loss_dict, step)

                # metric
                summary_writer.add_scalar_dict(self.stats.get_summary_dict(),
                                               step)
                self.stats.clear_stats()

            if step % self.checkpoint_interval == 0:
                # save model
                checkpoint_name = 'detector_{}.pth'.format(step)
                params_dict = {
                    'start_iters': step + batch_size,
                    'model': model,
                    'optimizer': optimizer,
                    'scheduler': scheduler
                }
                saver.save(params_dict, checkpoint_name)
                self.logger.info('checkpoint {} saved'.format(checkpoint_name))
Ejemplo n.º 2
0
    def test_corners_3d(self, dataloader, model, logger):
        self.logger.info('Start testing')
        num_samples = len(dataloader)

        if self.feat_vis:
            # enable it before forward pass
            model.enable_feat_vis()
        end_time = 0

        for step, data in enumerate(dataloader):
            # start_time = time.time()
            data = common.to_cuda(data)
            image_path = data[constants.KEY_IMAGE_PATH]

            with torch.no_grad():
                prediction, _, _ = model(data)
            # duration_time = time.time() - start_time

            if self.feat_vis:
                featmaps_dict = model.get_feat()
                from utils.visualizer import FeatVisualizer
                feat_visualizer = FeatVisualizer()
                feat_visualizer.visualize_maps(featmaps_dict)

            # initialize dets for each classes
            # dets = [[] for class_ind in range(self.n_classes)]

            scores = prediction[constants.KEY_CLASSES]
            boxes_2d = prediction[constants.KEY_BOXES_2D]
            #  dims = prediction[constants.KEY_DIMS]
            corners_2d = prediction[constants.KEY_CORNERS_2D]
            #  import ipdb
            #  ipdb.set_trace()
            p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG]

            # rcnn_3d = prediction['rcnn_3d']
            batch_size = scores.shape[0]
            scores = scores.view(-1, self.n_classes)
            new_scores = torch.zeros_like(scores)
            _, scores_argmax = scores.max(dim=-1)
            row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax)
            new_scores[row, scores_argmax] = scores[row, scores_argmax]
            scores = new_scores.view(batch_size, -1, self.n_classes)

            #  if step == 6:
            #  import ipdb
            #  ipdb.set_trace()

            for batch_ind in range(batch_size):
                boxes_2d_per_img = boxes_2d[batch_ind]
                scores_per_img = scores[batch_ind]
                #  dims_per_img = dims[batch_ind]
                corners_2d_per_img = corners_2d[batch_ind]
                p2_per_img = p2[batch_ind]

                num_cols = corners_2d.shape[-1]
                dets = [np.zeros((0, 8, num_cols), dtype=np.float32)]
                dets_2d = [np.zeros((0, 4), dtype=np.float32)]

                for class_ind in range(1, self.n_classes):
                    # cls thresh
                    inds = torch.nonzero(
                        scores_per_img[:, class_ind] > self.thresh).view(-1)
                    threshed_scores_per_img = scores_per_img[inds, class_ind]
                    if inds.numel() > 0:
                        # if self.class_agnostic:
                        threshed_boxes_2d_per_img = boxes_2d_per_img[inds]
                        #  threshed_dims_per_img = dims_per_img[inds]
                        threshed_corners_2d_per_img = corners_2d_per_img[inds]
                        # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds]
                        # else:
                        # threshed_boxes_2d_per_img = boxes_2d_per_img[
                        # inds, class_ind * 4:class_ind * 4 + 4]
                        # concat boxes and scores
                        threshed_dets_per_img = torch.cat(
                            [
                                threshed_boxes_2d_per_img,
                                threshed_scores_per_img.unsqueeze(-1),
                                #  threshed_dims_per_img,
                            ],
                            dim=-1)

                        # sort by scores
                        _, order = torch.sort(threshed_scores_per_img, 0, True)
                        threshed_dets_per_img = threshed_dets_per_img[order]
                        threshed_corners_2d_per_img = threshed_corners_2d_per_img[
                            order]

                        # nms
                        keep = nms(threshed_dets_per_img[:, :4],
                                   threshed_dets_per_img[:, 4],
                                   self.nms).view(-1).long()
                        nms_dets_per_img = threshed_dets_per_img[keep].detach(
                        ).cpu().numpy()
                        nms_corners_2d_per_img = threshed_corners_2d_per_img[
                            keep].detach().cpu().numpy()

                        dets.append(nms_corners_2d_per_img)
                        dets_2d.append(nms_dets_per_img[:, :4])
                    else:
                        dets.append(
                            np.zeros((0, 8, num_cols), dtype=np.float32))
                        dets_2d.append(np.zeros((0, 4)))

                # import ipdb
                # ipdb.set_trace()
                corners = np.concatenate(dets, axis=0)
                dets_2d = np.concatenate(dets_2d, axis=0)
                corners_2d = None
                corners_3d = None
                if num_cols == 3:
                    corners_3d = corners
                else:
                    corners_2d = corners

                self.visualizer.render_image_corners_2d(
                    image_path[0],
                    boxes_2d=dets_2d,
                    corners_2d=corners_2d,
                    corners_3d=corners_3d,
                    p2=p2_per_img.cpu().numpy())

                duration_time = time.time() - end_time
                #  label_path = self._generate_label_path(image_path[batch_ind])
                #  self.save_mono_3d_dets(dets, label_path)
                sys.stdout.write('\r{}/{},duration: {}'.format(
                    step + 1, num_samples, duration_time))
                sys.stdout.flush()

                end_time = time.time()
Ejemplo n.º 3
0
    def test_3d(self, dataloader, model, logger):
        self.logger.info('Start testing')
        num_samples = len(dataloader)

        if self.feat_vis:
            # enable it before forward pass
            model.enable_feat_vis()
        end_time = 0

        for step, data in enumerate(dataloader):
            # start_time = time.time()
            data = common.to_cuda(data)
            image_path = data[constants.KEY_IMAGE_PATH]

            with torch.no_grad():
                prediction, _, _ = model(data)
            # duration_time = time.time() - start_time

            if self.feat_vis:
                featmaps_dict = model.get_feat()
                from utils.visualizer import FeatVisualizer
                feat_visualizer = FeatVisualizer()
                feat_visualizer.visualize_maps(featmaps_dict)

            # initialize dets for each classes
            # dets = [[] for class_ind in range(self.n_classes)]
            dets = [[]]

            scores = prediction[constants.KEY_CLASSES]
            boxes_2d = prediction[constants.KEY_BOXES_2D]
            dims = prediction[constants.KEY_DIMS]
            orients = prediction[constants.KEY_ORIENTS_V2]
            p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG]

            # rcnn_3d = prediction['rcnn_3d']
            batch_size = scores.shape[0]
            scores = scores.view(-1, self.n_classes)
            new_scores = torch.zeros_like(scores)
            _, scores_argmax = scores.max(dim=-1)
            row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax)
            new_scores[row, scores_argmax] = scores[row, scores_argmax]
            scores = new_scores.view(batch_size, -1, self.n_classes)

            #  if step == 6:
            #  import ipdb
            #  ipdb.set_trace()

            for batch_ind in range(batch_size):
                boxes_2d_per_img = boxes_2d[batch_ind]
                scores_per_img = scores[batch_ind]
                dims_per_img = dims[batch_ind]
                orients_per_img = orients[batch_ind]
                p2_per_img = p2[batch_ind]
                # rcnn_3d_per_img = rcnn_3d[batch_ind]
                for class_ind in range(1, self.n_classes):
                    # cls thresh
                    inds = torch.nonzero(
                        scores_per_img[:, class_ind] > self.thresh).view(-1)
                    threshed_scores_per_img = scores_per_img[inds, class_ind]
                    if inds.numel() > 0:
                        # if self.class_agnostic:
                        threshed_boxes_2d_per_img = boxes_2d_per_img[inds]
                        threshed_dims_per_img = dims_per_img[inds]
                        threshed_orients_per_img = orients_per_img[inds]
                        # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds]
                        # else:
                        # threshed_boxes_2d_per_img = boxes_2d_per_img[
                        # inds, class_ind * 4:class_ind * 4 + 4]
                        # concat boxes and scores
                        threshed_dets_per_img = torch.cat([
                            threshed_boxes_2d_per_img,
                            threshed_scores_per_img.unsqueeze(-1),
                            threshed_dims_per_img,
                            threshed_orients_per_img.unsqueeze(-1)
                        ],
                                                          dim=-1)

                        # sort by scores
                        _, order = torch.sort(threshed_scores_per_img, 0, True)
                        threshed_dets_per_img = threshed_dets_per_img[order]
                        # threshed_rcnn_3d_per_img = threshed_rcnn_3d_per_img[order]

                        # nms
                        keep = nms(threshed_dets_per_img[:, :4],
                                   threshed_dets_per_img[:, 4],
                                   self.nms).view(-1).long()
                        nms_dets_per_img = threshed_dets_per_img[keep].detach(
                        ).cpu().numpy()
                        # nms_rcnn_3d_per_img = threshed_rcnn_3d_per_img[keep].detach().cpu().numpy()

                        # calculate location
                        location = geometry_utils.calc_location(
                            nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5],
                            nms_dets_per_img[:, 8],
                            p2_per_img.cpu().numpy())
                        # import ipdb
                        # ipdb.set_trace()
                        # location, _ = mono_3d_postprocess_bbox(
                        # nms_rcnn_3d_per_img, nms_dets_per_img[:, :5],
                        # p2_per_img.cpu().numpy())
                        nms_dets_per_img = np.concatenate([
                            nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8],
                            location, nms_dets_per_img[:, -1:]
                        ],
                                                          axis=-1)
                        # nms_dets_per_img = np.concatenate(
                        # [nms_dets_per_img[:, :5], location], axis=-1)

                        dets.append(nms_dets_per_img)
                    else:
                        dets.append([])

                duration_time = time.time() - end_time
                label_path = self._generate_label_path(image_path[batch_ind])
                self.save_mono_3d_dets(dets, label_path)
                sys.stdout.write('\r{}/{},duration: {}'.format(
                    step + 1, num_samples, duration_time))
                sys.stdout.flush()

                end_time = time.time()
Ejemplo n.º 4
0
    def test_super_nms(self, dataloader, model, logger):
        self.logger.info('Start testing')
        num_samples = len(dataloader)

        if self.feat_vis:
            # enable it before forward pass
            model.enable_feat_vis()
        end_time = 0

        for step, data in enumerate(dataloader):
            # start_time = time.time()
            data = common.to_cuda(data)
            image_path = data[constants.KEY_IMAGE_PATH]

            with torch.no_grad():
                prediction = model(data)
            # duration_time = time.time() - start_time

            if self.feat_vis:
                featmaps_dict = model.get_feat()
                from utils.visualizer import FeatVisualizer
                feat_visualizer = FeatVisualizer()
                feat_visualizer.visualize_maps(featmaps_dict)

            # initialize dets for each classes
            # dets = [[] for class_ind in range(self.n_classes)]
            dets = [[]]

            scores = prediction[constants.KEY_CLASSES]
            boxes_2d = prediction[constants.KEY_BOXES_2D]

            batch_size = scores.shape[0]
            # scores = scores.view(-1, self.n_classes)
            # new_scores = torch.zeros_like(scores)
            # _, scores_argmax = scores.max(dim=-1)
            # row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax)
            # new_scores[row, scores_argmax] = scores[row, scores_argmax]
            # scores = new_scores.view(batch_size, -1, self.n_classes)

            #  if step == 6:
            #  import ipdb
            #  ipdb.set_trace()

            for batch_ind in range(batch_size):
                boxes_2d_per_img = boxes_2d[batch_ind]
                scores_per_img = scores[batch_ind]
                for class_ind in range(1, self.n_classes):
                    # cls thresh
                    # import ipdb
                    # ipdb.set_trace()
                    inds = torch.nonzero(
                        scores_per_img[:, class_ind] > 0.01).view(-1)
                    threshed_scores_per_img = scores_per_img[inds, class_ind]
                    if inds.numel() > 0:
                        # if self.class_agnostic:
                        threshed_boxes_2d_per_img = boxes_2d_per_img[inds]
                        # else:
                        # threshed_boxes_2d_per_img = boxes_2d_per_img[
                        # inds, class_ind * 4:class_ind * 4 + 4]
                        # concat boxes and scores
                        threshed_dets_per_img = torch.cat([
                            threshed_boxes_2d_per_img,
                            threshed_scores_per_img.unsqueeze(-1),
                        ],
                                                          dim=-1)

                        # sort by scores
                        _, order = torch.sort(threshed_scores_per_img, 0, True)
                        threshed_dets_per_img = threshed_dets_per_img[order]

                        # nms
                        # keep = nms(threshed_dets_per_img[:, :4],
                        # threshed_dets_per_img[:, 4],
                        # self.nms).view(-1).long()
                        keep = box_ops.super_nms(threshed_dets_per_img[:, :4],
                                                 0.8,
                                                 nms_num=3,
                                                 loop_time=2)
                        nms_dets_per_img = threshed_dets_per_img[keep].detach(
                        ).cpu().numpy()

                        dets.append(nms_dets_per_img)
                    else:
                        dets.append([])

                duration_time = time.time() - end_time
                label_path = self._generate_label_path(image_path[batch_ind])
                self.save_dets(dets, label_path)
                sys.stdout.write('\r{}/{},duration: {}'.format(
                    step + 1, num_samples, duration_time))
                sys.stdout.flush()

                end_time = time.time()
Ejemplo n.º 5
0
    def inference(self, im, p2):
        """
        Args:
            im: shape(N, 3, H, W)

        Returns:
            dets: shape(N, M, 8)
        """
        config = self.config
        args = self.args
        eval_config = config['eval_config']
        model_config = config['model_config']
        data_config = config['eval_data_config']

        np.random.seed(eval_config['rng_seed'])

        self.logger.info('Using config:')
        pprint.pprint({
            'model_config': model_config,
            'data_config': data_config,
            'eval_config': eval_config
        })

        eval_out = eval_config['eval_out']
        if not os.path.exists(eval_out):
            self.logger.info('creat eval out directory {}'.format(eval_out))
            os.makedirs(eval_out)
        else:
            self.logger.warning('dir {} exist already!'.format(eval_out))

        # restore from random or checkpoint
        restore = True
        # two methods to load model
        # 1. load from any other dirs,it just needs config and model path
        # 2. load from training dir
        if args.model is not None:
            # assert args.model is not None, 'please determine model or checkpoint'
            # it should be a path to model
            checkpoint_name = os.path.basename(args.model)
            input_dir = os.path.dirname(args.model)
        elif args.checkpoint is not None:
            checkpoint_name = 'detector_{}.pth'.format(args.checkpoint)
            assert args.load_dir is not None, 'please choose a directory to load checkpoint'
            eval_config['load_dir'] = args.load_dir
            input_dir = os.path.join(eval_config['load_dir'],
                                     model_config['type'], data_config['name'])
            if not os.path.exists(input_dir):
                raise Exception(
                    'There is no input directory for loading network from {}'.
                    format(input_dir))
        else:
            restore = False

        # log for restore
        if restore:
            self.logger.info("restore from checkpoint")
        else:
            self.logger.info("use pytorch default initialization")

        # model
        model = detectors.build(model_config)
        model.eval()

        if restore:
            # saver
            saver = Saver(input_dir)
            saver.load({'model': model}, checkpoint_name)

        model = model.cuda()

        #  dataloader = dataloaders.make_data_loader(data_config, training=False)

        self.logger.info('Start testing')
        #  num_samples = len(dataloader)

        #  for step, data in enumerate(dataloader):
        data = self.preprocess(im, p2)
        data = self.to_batch(data)
        data = common.to_cuda(data)
        #  image_path = data[constants.KEY_IMAGE_PATH]

        with torch.no_grad():
            prediction = model(data)

        # initialize dets for each classes
        dets = [[]]

        scores = prediction[constants.KEY_CLASSES]
        boxes_2d = prediction[constants.KEY_BOXES_2D]
        dims = prediction[constants.KEY_DIMS]
        orients = prediction[constants.KEY_ORIENTS_V2]
        p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG]

        # rcnn_3d = prediction['rcnn_3d']
        batch_size = scores.shape[0]
        scores = scores.view(-1, self.n_classes)
        new_scores = torch.zeros_like(scores)
        _, scores_argmax = scores.max(dim=-1)
        row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax)
        new_scores[row, scores_argmax] = scores[row, scores_argmax]
        scores = new_scores.view(batch_size, -1, self.n_classes)

        boxes_2d_per_img = boxes_2d[0]
        scores_per_img = scores[0]
        dims_per_img = dims[0]
        orients_per_img = orients[0]
        p2_per_img = p2[0]
        # rcnn_3d_per_img = rcnn_3d[batch_ind]
        # import ipdb
        # ipdb.set_trace()
        for class_ind in range(1, self.n_classes):
            # cls thresh
            inds = torch.nonzero(
                scores_per_img[:, class_ind] > self.thresh).view(-1)
            threshed_scores_per_img = scores_per_img[inds, class_ind]
            if inds.numel() > 0:
                threshed_boxes_2d_per_img = boxes_2d_per_img[inds]
                threshed_dims_per_img = dims_per_img[inds]
                threshed_orients_per_img = orients_per_img[inds]
                threshed_dets_per_img = torch.cat([
                    threshed_boxes_2d_per_img,
                    threshed_scores_per_img.unsqueeze(-1),
                    threshed_dims_per_img,
                    threshed_orients_per_img.unsqueeze(-1)
                ],
                                                  dim=-1)

                # sort by scores
                _, order = torch.sort(threshed_scores_per_img, 0, True)
                threshed_dets_per_img = threshed_dets_per_img[order]

                # nms
                keep = nms(threshed_dets_per_img[:, :4],
                           threshed_dets_per_img[:, 4],
                           self.nms).view(-1).long()
                nms_dets_per_img = threshed_dets_per_img[keep].detach().cpu(
                ).numpy()

                # calculate location
                location = geometry_utils.calc_location(
                    nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5],
                    nms_dets_per_img[:, 8], p2_per_img.cpu().numpy())

                nms_dets_per_img = np.concatenate(
                    [
                        nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8],
                        location, nms_dets_per_img[:, -1:]
                    ],
                    axis=-1)

                dets.append(nms_dets_per_img)
            else:
                dets.append([])

            #  duration_time = time.time() - end_time
            #  label_path = self._generate_label_path(image_path[batch_ind])
            #  self.save_mono_3d_dets(dets, label_path)
            #  sys.stdout.write('\r{}/{},duration: {}'.format(
            #  step + 1, num_samples, duration_time))
            #  sys.stdout.flush()

            #  end_time = time.time()

            #  xmin, ymin, xmax, ymax, cf, h, w, l, x, y, z, ry
        return dets