Example #1
0
    def __init__(self, args):
        self.batch_size = cfg.TRAIN.CONFIG.BATCH_SIZE
        self.gpu_num = cfg.TRAIN.CONFIG.GPU_NUM
        self.num_workers = cfg.DATA_LOADER.NUM_THREADS
        self.log_dir = cfg.MODEL.PATH.EVALUATION_DIR
        self.is_training = False

        self.cls_thresh = float(args.cls_threshold)
        self.eval_interval_secs = args.eval_interval_secs
        self.restore_model_path = args.restore_model_path

        # save dir
        self.log_dir = args.restore_model_path[0:args.restore_model_path.
                                               find('/ckpt')]
        self.logger = create_logger(os.path.join(self.log_dir, 'log_eval.txt'))
        self.logger.info(str(args) + '\n')
        self.result_dir = os.path.join(self.log_dir, 'eval')
        self.logger.info('**** Saving Evaluation results to the path %s ****' %
                         self.result_dir)

        # dataset
        dataset_func = choose_dataset()
        self.dataset = dataset_func('loading',
                                    split=args.split,
                                    img_list=args.img_list,
                                    is_training=self.is_training)
        self.dataloader = DataLoader(self.dataset,
                                     batch_size=self.batch_size * self.gpu_num,
                                     shuffle=False,
                                     num_workers=self.num_workers,
                                     worker_init_fn=my_worker_init_fn,
                                     collate_fn=self.dataset.load_batch)

        self.logger.info('**** Dataset length is %d ****' % len(self.dataset))
        self.val_size = len(self.dataset)

        # model
        self.model_func = choose_model()
        self.model = self.model_func(self.batch_size, self.is_training)
        self.model = self.model.cuda()

        # tensorboard
        self.tb_log = SummaryWriter(
            log_dir=os.path.join(self.result_dir, 'tensorboard'))
Example #2
0
    def __init__(self, args):
        self.batch_size = cfg.TRAIN.CONFIG.BATCH_SIZE
        self.gpu_num = cfg.TRAIN.CONFIG.GPU_NUM
        self.num_workers = cfg.DATA_LOADER.NUM_THREADS
        self.log_dir = cfg.MODEL.PATH.CHECKPOINT_DIR
        self.max_iteration = cfg.TRAIN.CONFIG.MAX_ITERATIONS
        self.total_epochs = cfg.TRAIN.CONFIG.TOTAL_EPOCHS
        self.checkpoint_interval = cfg.TRAIN.CONFIG.CHECKPOINT_INTERVAL
        self.summary_interval = cfg.TRAIN.CONFIG.SUMMARY_INTERVAL
        self.trainable_param_prefix = cfg.TRAIN.CONFIG.TRAIN_PARAM_PREFIX
        self.trainable_loss_prefix = cfg.TRAIN.CONFIG.TRAIN_LOSS_PREFIX
        if args.output_dir is not None:
            self.log_dir = args.output_dir

        self.restore_model_path = args.restore_model_path
        self.is_training = True

        # gpu_num
        self.gpu_num = min(self.gpu_num, torch.cuda.device_count())

        # save dir
        datetime_str = str(datetime.datetime.now())
        datetime_str = datetime_str[0:datetime_str.find(' ')] + '_' + datetime_str[datetime_str.find(' ')+1:]
        self.log_dir = os.path.join(self.log_dir, datetime_str)
        if not os.path.exists(self.log_dir): os.makedirs(self.log_dir)
        self.logger = create_logger(os.path.join(self.log_dir, 'log_train.txt'))
        self.logger.info(str(args) + '\n')
        self.logger.info('**** Saving models to the path %s ****' % self.log_dir)
        self.logger.info('**** Saving configure file in %s ****' % self.log_dir)
        os.system('cp \"%s\" \"%s\"' % (args.cfg, self.log_dir))
        self.ckpt_dir = os.path.join(self.log_dir, 'ckpt')
        os.mkdir(self.ckpt_dir)

        # dataset
        dataset_func = choose_dataset()
        self.dataset = dataset_func('loading', split=args.split, img_list=args.img_list, is_training=self.is_training)
        self.dataloader = DataLoader(self.dataset, batch_size=self.batch_size*self.gpu_num, shuffle=True, num_workers=self.num_workers, worker_init_fn=my_worker_init_fn,  collate_fn=self.dataset.load_batch)
        self.logger.info('**** Dataset length is %d ****' % len(self.dataset))

        # models
        self.model_func = choose_model()
        self.model = self.model_func(self.batch_size, self.is_training)
        self.model = self.model.cuda()

        # tensorboard
        self.tb_log = SummaryWriter(log_dir=os.path.join(self.log_dir, 'tensorboard'))

        # optimizer
        self.optimizer = optim.Adam(self.model.parameters(), lr=cfg.SOLVER.BASE_LR)
        self.lr_scheduler = LRScheduler(self.optimizer)

        # load from checkpoint
        start_epoch = it = 0
        if args.restore_model_path is not None:
            it, start_epoch = self.model.load_params_with_optimizer(args.restore_model_path, to_cpu=False,
                                                                    optimizer=self.optimizer,
                                                                    logger=self.logger)
        self.start_epoch = start_epoch
        self.it = it

        if self.gpu_num > 1:
            self.logger.info("Use %d GPUs!" % self.gpu_num)
            self.model = torch.nn.DataParallel(self.model)

cfg_from_file("../configs/kitti/3dssd/3dssd.yaml")
dataset_func = choose_dataset()
dataset = dataset_func('loading',
                       split="training",
                       img_list="train",
                       is_training=True)
dataloader = DataLoader(dataset,
                        batch_size=2,
                        shuffle=True,
                        num_workers=6,
                        worker_init_fn=my_worker_init_fn,
                        collate_fn=dataset.load_batch)
assigner = TargetAssigner(0)
model_func = choose_model()
model = model_func(1, is_training=True).cuda()
if __name__ == '__main__':
    for batch_idx, batch_data_label in enumerate(dataloader):
        for key in batch_data_label:
            if isinstance(batch_data_label[key], torch.Tensor):
                batch_data_label[key] = batch_data_label[key].cuda()
        for k, v in batch_data_label.items():
            print(k, v.shape)

        #returned_list = assigner.assign(batch_data_label['point_cloud_pl'][..., :3],
        #                                torch.unsqueeze((batch_data_label['point_cloud_pl'][..., :3]), dim=2),
        #                                batch_data_label['label_boxes_3d_pl'],
        #                                batch_data_label['label_classes_pl'],
        #                                batch_data_label['angle_cls_pl'],
        #                                batch_data_label['angle_res_pl'])
Example #4
0
    def __init__(self, args):
        self.batch_size = cfg.TRAIN.CONFIG.BATCH_SIZE
        self.gpu_num = cfg.TRAIN.CONFIG.GPU_NUM
        self.num_workers = cfg.DATA_LOADER.NUM_THREADS 
        self.log_dir = cfg.MODEL.PATH.CHECKPOINT_DIR
        self.max_iteration = cfg.TRAIN.CONFIG.MAX_ITERATIONS
        self.checkpoint_interval = cfg.TRAIN.CONFIG.CHECKPOINT_INTERVAL
        self.summary_interval = cfg.TRAIN.CONFIG.SUMMARY_INTERVAL
        self.trainable_param_prefix = cfg.TRAIN.CONFIG.TRAIN_PARAM_PREFIX        
        self.trainable_loss_prefix = cfg.TRAIN.CONFIG.TRAIN_LOSS_PREFIX

        self.restore_model_path = args.restore_model_path
        self.is_training = True

        # gpu_num
        self.gpu_num = min(self.gpu_num, len(self._get_available_gpu_num()))

        # save dir
        datetime_str = str(datetime.datetime.now())
        self.log_dir = os.path.join(self.log_dir, datetime_str)
        if not os.path.exists(self.log_dir): os.makedirs(self.log_dir)
        self.log_file = open(os.path.join(self.log_dir, 'log_train.txt'), 'w')
        self.log_file.write(str(args)+'\n')
        self._log_string('**** Saving models to the path %s ****'%self.log_dir)
        self._log_string('**** Saving configure file in %s ****'%self.log_dir)
        os.system('cp \"%s\" \"%s\"' % (args.cfg, self.log_dir))

        # dataset
        dataset_func = choose_dataset()
        self.dataset = dataset_func('loading', split=args.split, img_list=args.img_list, is_training=self.is_training, workers_num=self.num_workers)
        self.dataset_iter = self.dataset.load_batch(self.batch_size * self.gpu_num)
        self._log_string('**** Dataset length is %d ****'%len(self.dataset))

        # optimizer
        with tf.device('/cpu:0'):
            self.global_step = tf.contrib.framework.get_or_create_global_step()
            self.bn_decay = get_bn_decay(self.global_step)
            self.learning_rate = get_learning_rate(self.global_step)
            if cfg.SOLVER.TYPE == 'SGD':
                self.optimizer = tf.train.MomentumOptimizer(self.learning_rate, momentum=cfg.SOLVER.MOMENTUM)
            elif cfg.SOLVER.TYPE == 'Adam':
                self.optimizer = tf.train.AdamOptimizer(self.learning_rate)

        # models
        self.model_func = choose_model() 
        self.model_list, self.tower_grads, self.total_loss_gpu, self.losses_list, self.params, self.extra_update_ops = self._build_model_list()
        tf.summary.scalar('total_loss', self.total_loss_gpu)

        # feeddict
        self.feeddict_producer = FeedDictCreater(self.dataset_iter, self.model_list, self.batch_size)

        with tf.device('/gpu:0'):
            self.grads = average_gradients(self.tower_grads)
            self.update_op = [self.optimizer.apply_gradients(zip(self.grads, self.params), global_step=self.global_step)]
        self.update_op.extend(self.extra_update_ops)
        self.train_op = tf.group(*self.update_op)
        
        # tensorflow training ops 
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1, allow_growth=True)
        config = tf.ConfigProto(
            gpu_options=gpu_options,
            device_count={
                "GPU": self.gpu_num,
            },
            allow_soft_placement=True,
        )
        self.sess = tf.Session(config=config)

        self.saver = tf.train.Saver()
        self.merged = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter(os.path.join(self.log_dir, 'train'), self.sess.graph)
   
        # initialize model
        self._initialize_model()