Exemple #1
0
    def report(self):

        if self.train_params.VERBOSE_MODE:
            print("[Training Parameters Overview] ------------------------------------------------------------------------")
            self.train_params.report()

            print("[Optimizer Overview] ----------------------------------------------------------------------------------")
            if self.optimizer is not None:
                print("[%s] Start learning rate: %f" % (type(self.optimizer), dl_util.get_learning_rate(self.optimizer)))
Exemple #2
0
    def report(self):

        self.print_protected_god_animal()

        print("[Training Parameters Overview] ------------------------------------------------------------------------")
        self.train_params.report()

        print("[Optimizer Overview] ----------------------------------------------------------------------------------")
        if self.optimizer is not None:
            print("[%s] Start learning rate: %f" % (type(self.optimizer), dl_util.get_learning_rate(self.optimizer)))
Exemple #3
0
    return feature_map_loss


""" Do Training --------------------------------------------------------------------------------------------------------
"""
time_start = datetime.datetime.now()
itr = 0
for epoch in range(0, train_params.MAX_EPOCHS):
    try:

        # Iterate the
        for train_batch_idx, train_dict in tqdm(
                enumerate(train_loader),
                total=len(train_loader),
                desc='Train epoch = %d, lr=%f' %
            (epoch, dl_util.get_learning_rate(optimizer)),
                ncols=100,
                leave=False):
            itr += 1

            # Switch to Train model
            net.train()

            # zero the parameter gradients
            optimizer.zero_grad()

            # Pre-process the variables
            # Generate 3 pyramid level with each random select 2000 samples
            I_a, d_a, sel_a_indices, K, I_b, q_gt, t_gt, se3_gt, T_gt = ba_tracknet_preprocess(
                train_dict, 3, 2000)
            I_a = I_a.cuda()
Exemple #4
0
    def train_loop(self, train_data: dataset.Dataset or dataloader.DataLoader, valid_data=None):

        # prepare the dataloader if the input parameter is intance of dataset
        if isinstance(train_data, dataset.Dataset):
            train_loader = dataloader.DataLoader(train_data,
                                                 batch_size=self.train_params.LOADER_BATCH_SIZE,
                                                 shuffle=self.train_params.LOADER_SHUFFLE,
                                                 pin_memory=self.train_params.LOADER_PIN_MEM,
                                                 num_workers=self.train_params.LOADER_NUM_THREADS,
                                                 drop_last=False)
            if self.train_params.VERBOSE_MODE:
                print(
                    "[Dataset Overview] ----------------------------------------------------------------------------------------")
                print("Train set: %d items" % (len(train_data)))

        elif isinstance(train_data, dataloader.DataLoader):
            train_loader = train_data

        if valid_data is not None and isinstance(valid_data, dataset.Dataset):
            valid_loader = dataloader.DataLoader(valid_data,
                                                 batch_size=self.train_params.LOADER_VALID_BATCH_SIZE,
                                                 shuffle=self.train_params.LOADER_SHUFFLE,
                                                 pin_memory=self.train_params.LOADER_PIN_MEM,
                                                 num_workers=self.train_params.LOADER_NUM_THREADS,
                                                 drop_last=False)
            if self.train_params.VERBOSE_MODE:
                print("Validation set: %d items" % (len(valid_data)))

        elif valid_data is not None and isinstance(valid_data, dataloader.DataLoader):
            valid_loader = valid_data
        else:
            valid_loader = None

        # prepare the training process
        self._prepare_train_loop()

        epoch, itr = 0, 0
        self.train_start_time = datetime.datetime.now()
        print('[Running] -----------------------------------------------------------------------------------------')

        try:
            for epoch in range(0, self.train_params.MAX_EPOCHS):

                if self.train_params.TQDM_PROGRESS:
                    progress = tqdm(total=len(train_loader), ncols=100, leave=False)

                for train_batch_idx, train_sample in enumerate(train_loader):

                    itr += 1
                    if self.train_params.TQDM_PROGRESS:
                        progress.update(1)
                        progress.set_description('[Train] epoch = %d, lr=%f' % (epoch,
                                                                              dl_util.get_learning_rate(self.optimizer)))

                    # prepare feeding the samples
                    if self.model is not None:
                        self.model.train()
                    self.optimizer.zero_grad()

                    # update optimizer
                    self._optimizer_update()

                    # forward and backward
                    log_dict = self._train_feed(train_sample, epoch, itr)

                    # optimize the parameters
                    self.optimizer.step()

                    # log the training information
                    if log_dict is not None and self.logger is not None and self.check_log_step(itr):
                        log_dict['Iteration'] = itr + 1
                        log_dict['Epoch'] = epoch
                        log_dict['Event'] = 'Training'
                        self.logger.log(log_dict)

                    # save the training checkpoints every 'checkpoint_steps'
                    if self.check_checkpoint_step(itr):
                        self.save_checkpoint(epoch, itr)

                    # do validation
                    if self.check_valid_step(itr) and valid_loader is not None:

                        if self.train_params.TQDM_PROGRESS:
                            progress.set_description('[Valid]')

                        with torch.no_grad():
                            valid_log_dict = self._valid_loop(valid_loader, epoch, itr)
                            torch.cuda.empty_cache()

                        # log the validation
                        if valid_log_dict is not None and self.logger is not None:
                            valid_log_dict['Iteration'] = itr + 1
                            valid_log_dict['Epoch'] = epoch
                            valid_log_dict['Event'] = 'Validating'
                            self.logger.log(valid_log_dict)

                # save the checkpoint
                self.save_checkpoint(epoch, itr)

                if self.train_params.TQDM_PROGRESS:
                    progress.close()

        except Exception as e:
            import traceback
            print(traceback.format_exc())

            print('[Exception]: ' + str(e))
            self.save_checkpoint(epoch, itr)
Exemple #5
0
    def train_loop(self, train_loader, valid_loader=None):

        # prepare the training process (e.g. adding more dict keys)
        self._prepare_train()

        epoch, itr = 0, 0
        self.train_start_time = datetime.datetime.now()
        print('[Running] -----------------------------------------------------------------------------------------')

        try:
            for epoch in range(0, self.train_params.MAX_EPOCHS):
                progress = tqdm(total=len(train_loader), ncols=100, leave=False)

                for train_batch_idx, train_sample in enumerate(train_loader):

                    itr += 1
                    progress.update(1)
                    progress.set_description('[Train] epoch = %d, lr=%f' % (epoch,
                                                                          dl_util.get_learning_rate(self.optimizer)))

                    # prepare feeding the samples
                    self.model.train()
                    self.optimizer.zero_grad()

                    # update optimizer
                    self._optimizer_update()

                    # forward and backward
                    log_dict = self._train_feed(train_sample, epoch, itr)

                    # optimize the parameters
                    self.optimizer.step()

                    # log the training information
                    if log_dict is not None:
                        log_dict['Iteration'] = itr + 1
                        log_dict['Epoch'] = epoch
                        log_dict['Event'] = 'Training'
                        self.logger.log(log_dict)

                    # save the training checkpoints every 'checkpoint_steps'
                    if self.check_checkpoint_step(itr):
                        self.save_checkpoint(epoch, itr)

                    # do validation
                    if self.check_valid_step(itr) and valid_loader is not None:
                        progress.set_description('[Valid]')

                        valid_log_dict = self._valid_loop(valid_loader, epoch, itr)

                        # log the validation
                        if valid_log_dict is not None:
                            valid_log_dict['Iteration'] = itr + 1
                            valid_log_dict['Epoch'] = epoch
                            valid_log_dict['Event'] = 'Validating'
                            self.logger.log(valid_log_dict)

                # save the checkpoint
                self.save_checkpoint(epoch, itr)
                progress.close()

        except Exception as e:
            import traceback
            print(traceback.format_exc())

            print('[Exception]: ' + str(e))
            self.save_checkpoint(epoch, itr)