Exemple #1
0
    def train_one_epoch(self, specializing=False, cosine_decay=False):
        """
        One epoch training function
        :return:
        """
        if specializing:
            tqdm_batch = tqdm.tqdm(
                self.sub_data_loader.binary_train_loader,
                total=self.sub_data_loader.binary_train_iterations,
                desc="Epoch-{}-".format(self.current_epoch))
        else:
            tqdm_batch = tqdm.tqdm(self.data_loader.train_loader,
                                   total=self.data_loader.train_iterations,
                                   desc="Epoch-{}-".format(self.current_epoch))

        self.model.train()

        epoch_loss = AverageMeter()
        top1_acc = AverageMeter()
        top5_acc = AverageMeter()

        current_batch = 0
        for i, (x, y) in enumerate(tqdm_batch):
            if self.cuda:
                x, y = x.cuda(non_blocking=self.config.async_loading), y.cuda(
                    non_blocking=self.config.async_loading)

            self.optimizer.zero_grad()
            if cosine_decay:
                self.adjust_learning_rate(self.optimizer, self.current_epoch,
                                          i, self.data_loader.train_iterations)

            pred = self.model(x)
            cur_loss = self.loss_fn(pred, y)

            if np.isnan(float(cur_loss.item())):
                raise ValueError('Loss is nan during training...')

            cur_loss.backward()
            self.optimizer.step()

            if specializing:
                top1 = cls_accuracy(pred.data, y.data)
                top1_acc.update(top1[0].item(), x.size(0))
            else:
                top1, top5 = cls_accuracy(pred.data, y.data, topk=(1, 5))
                top1_acc.update(top1.item(), x.size(0))
                top5_acc.update(top5.item(), x.size(0))

            epoch_loss.update(cur_loss.item())

            self.current_iteration += 1
            current_batch += 1

        self.lr_list.append(self.optimizer.param_groups[0]['lr'])
        tqdm_batch.close()

        print("Training at epoch-" + str(self.current_epoch) + " | " +
              "loss: " + str(epoch_loss.val) + "\tTop1 Acc: " +
              str(top1_acc.val))
    def validate(self, specializing=False):
        """
        One epoch validation
        :return:
        """
        if specializing:
            tqdm_batch = tqdm.tqdm(
                self.sub_data_loader.binary_valid_loader,
                total=self.sub_data_loader.binary_valid_iterations,
                desc="Epoch-{}-".format(self.current_epoch))
        else:
            tqdm_batch = tqdm.tqdm(self.data_loader.valid_loader,
                                   total=self.data_loader.valid_iterations,
                                   desc="Valiation at -{}-".format(
                                       self.current_epoch))

        self.model.eval()

        epoch_loss = AverageMeter()
        top1_acc = AverageMeter()
        top5_acc = AverageMeter()

        for x, y in tqdm_batch:
            if self.cuda:
                x, y = x.cuda(non_blocking=self.config.async_loading), y.cuda(
                    non_blocking=self.config.async_loading)

            # model
            pred = self.model(x)
            # loss
            cur_loss = self.loss_fn(pred, y)
            if np.isnan(float(cur_loss.item())):
                raise ValueError('Loss is nan during validation...')

            if specializing:
                top1 = cls_accuracy(pred.data, y.data)
                top1_acc.update(top1[0].item(), x.size(0))
            else:
                top1, top5 = cls_accuracy(pred.data, y.data, topk=(1, 5))
                top1_acc.update(top1.item(), x.size(0))
                top5_acc.update(top5.item(), x.size(0))

            epoch_loss.update(cur_loss.item())

        self.logger.info("Validation results at epoch-" +
                         str(self.current_epoch) + " | " + "loss: " +
                         str(epoch_loss.avg) + "\tTop1 Acc: " +
                         str(top1_acc.val))

        tqdm_batch.close()

        return top1_acc.avg
Exemple #3
0
    def train_one_epoch(self):
        """
        One epoch training function
        :return:
        """

        tqdm_batch = tqdm.tqdm(self.data_loader.train_loader,
                               total=self.data_loader.train_iterations,
                               desc="Epoch-{}-".format(self.current_epoch))

        self.train()

        epoch_loss = AverageMeter()
        top1_acc = AverageMeter()
        top5_acc = AverageMeter()

        current_batch = 0
        for i, (x, y) in enumerate(tqdm_batch):
            if self.cuda:
                x, y = x.cuda(non_blocking=self.config.async_loading), y.cuda(
                    non_blocking=self.config.async_loading)

            self.optimizer.zero_grad()
            #             self.adjust_learning_rate(self.optimizer, self.current_epoch, i, self.data_loader.train_iterations)

            pred = self(x)
            cur_loss = self.loss_fn(pred, y)

            if np.isnan(float(cur_loss.item())):
                raise ValueError('Loss is nan during training...')

            cur_loss.backward()
            self.optimizer.step()

            top1, top5 = cls_accuracy(pred.data, y.data, topk=(1, 5))
            top1_acc.update(top1.item(), x.size(0))
            top5_acc.update(top5.item(), x.size(0))

            epoch_loss.update(cur_loss.item())

            self.current_iteration += 1
            current_batch += 1

        tqdm_batch.close()

        print("Training at epoch-" + str(self.current_epoch) + " | " +
              "loss: " + str(epoch_loss.val) + "\tTop1 Acc: " +
              str(top1_acc.val))
Exemple #4
0
    def _validate(self, config):
        """
        One epoch validation
        :return:
        """
        self.data_loader = Cifar100DataLoader(self.config)
        self.loss_fn = nn.CrossEntropyLoss()
        self.loss_fn = self.loss_fn.to(self.device)
        tqdm_batch = tqdm.tqdm(self.data_loader.valid_loader,
                               total=self.data_loader.valid_iterations,
                               desc="Valiation at -{}-".format(
                                   self.current_epoch))

        self.eval()

        epoch_loss = AverageMeter()
        top1_acc = AverageMeter()
        top5_acc = AverageMeter()

        for x, y in tqdm_batch:
            if self.cuda:
                x, y = x.cuda(non_blocking=self.config.async_loading), y.cuda(
                    non_blocking=self.config.async_loading)

            # model
            pred = self(x)
            # loss
            cur_loss = self.loss_fn(pred, y)
            if np.isnan(float(cur_loss.item())):
                raise ValueError('Loss is nan during validation...')

            top1, top5 = cls_accuracy(pred.data, y.data, topk=(1, 5))
            top1_acc.update(top1.item(), x.size(0))
            top5_acc.update(top5.item(), x.size(0))

            epoch_loss.update(cur_loss.item())

        print("Validation results at epoch-" + str(self.current_epoch) +
              " | " + "loss: " + str(epoch_loss.avg) + "\tTop1 Acc: " +
              str(top1_acc.val))

        tqdm_batch.close()

        return top1_acc.avg
Exemple #5
0
    def validate(self):
        """
        One epoch validation
        :return:
        """
        tqdm_batch = tqdm(self.data_loader.valid_loader,
                          total=self.data_loader.valid_iterations,
                          desc="Valiation at -{}-".format(self.current_epoch))

        # set the model in training mode
        self.model.eval()

        epoch_loss = AverageMeter()
        top1_acc = AverageMeter()
        top5_acc = AverageMeter()

        for x, y in tqdm_batch:
            if self.cuda:
                x, y = x.cuda(self.config.async_loading), y.cuda(
                    self.config.async_loading)

            x, y = Variable(x), Variable(y)
            # model
            pred = self.model(x)
            # loss
            cur_loss = self.loss(pred, y)
            if np.isnan(float(cur_loss.item())):
                raise ValueError('Loss is nan during validation...')

            top1, top5 = cls_accuracy(pred.data, y.data, topk=(1, 5))
            epoch_loss.update(cur_loss.item())
            top1_acc.update(top1.item(), x.size(0))
            top5_acc.update(top5.item(), x.size(0))

        self.logger.info("Validation results at epoch-" +
                         str(self.current_epoch) + " | " + "loss: " +
                         str(epoch_loss.avg) + "- Top1 Acc: " +
                         str(top1_acc.val) + "- Top5 Acc: " +
                         str(top5_acc.val))

        tqdm_batch.close()

        return top1_acc.avg
    def validate(self):
        """
        One epoch validation
        :return:
        """
        self.data_loader = fashion_mnist_dataloader(BATCH_SIZE=128)

        tqdm_batch = tqdm.tqdm(self.data_loader.valid_loader, total=self.data_loader.valid_iterations,
                               desc="Valiation at -{}-".format(self.current_epoch))

        self.eval()

        epoch_loss = AverageMeter()
        top1_acc = AverageMeter()
        top5_acc = AverageMeter()

        for x, y in tqdm_batch:
            if self.cuda:
                x, y = x.cuda(non_blocking=True), y.cuda(non_blocking=True)

            # model
            pred = self(x)
            # loss
            cur_loss = self.loss_fn(pred, y)
            if np.isnan(float(cur_loss.item())):
                raise ValueError('Loss is nan during validation...')

            top1, top5 = cls_accuracy(pred.data, y.data, topk=(1, 5))
            top1_acc.update(top1.item(), x.size(0))
            top5_acc.update(top5.item(), x.size(0))

            epoch_loss.update(cur_loss.item())

        print("Validation results at epoch-" + str(self.current_epoch) + " | " + "loss: " +
              str(epoch_loss.avg) + "\tTop1 Acc: " + str(top1_acc.val))

        tqdm_batch.close()

        return top1_acc.avg
Exemple #7
0
    def train_one_epoch(self):
        """
        One epoch training function
        """
        # Initialize tqdm
        tqdm_batch = tqdm(self.data_loader.train_loader,
                          total=self.data_loader.train_iterations,
                          desc="Epoch-{}-".format(self.current_epoch))
        # Set the model to be in training mode
        self.model.train()
        # Initialize your average meters
        epoch_loss = AverageMeter()
        top1_acc = AverageMeter()
        top5_acc = AverageMeter()

        current_batch = 0
        for x, y in tqdm_batch:
            if self.cuda:
                x, y = x.cuda(self.config.async_loading), y.cuda(
                    self.config.async_loading)

            # current iteration over total iterations
            progress = float(
                self.current_epoch * self.data_loader.train_iterations +
                current_batch) / (self.config.max_epoch *
                                  self.data_loader.train_iterations)
            # progress = float(self.current_iteration) / (self.config.max_epoch * self.data_loader.train_iterations)
            x, y = Variable(x), Variable(y)
            lr = adjust_learning_rate(self.optimizer,
                                      self.current_epoch,
                                      self.config,
                                      batch=current_batch,
                                      nBatch=self.data_loader.train_iterations)
            # model
            pred = self.model(x, progress)
            # loss
            cur_loss = self.loss(pred, y)
            if np.isnan(float(cur_loss.item())):
                raise ValueError('Loss is nan during training...')
            # optimizer
            self.optimizer.zero_grad()
            cur_loss.backward()
            self.optimizer.step()

            top1, top5 = cls_accuracy(pred.data, y.data, topk=(1, 5))

            epoch_loss.update(cur_loss.item())
            top1_acc.update(top1.item(), x.size(0))
            top5_acc.update(top5.item(), x.size(0))

            self.current_iteration += 1
            current_batch += 1

            self.summary_writer.add_scalar("epoch/loss", epoch_loss.val,
                                           self.current_iteration)
            self.summary_writer.add_scalar("epoch/accuracy", top1_acc.val,
                                           self.current_iteration)
        tqdm_batch.close()

        self.logger.info("Training at epoch-" + str(self.current_epoch) +
                         " | " + "loss: " + str(epoch_loss.val) +
                         "- Top1 Acc: " + str(top1_acc.val) + "- Top5 Acc: " +
                         str(top5_acc.val))
    def train_neural_network(self):
        print_training = "Training CONV:  valid_idx:{}, test_idx{} batch_norm:{}, keep_prob:{}".format(
            self.valid_idx, self.test_idx, self.batch_norm, self.keep_prob)
        print(print_training)
        logging.debug(print_training)
        self.session.run(tf.global_variables_initializer())
        best_validation_accuracy = 0
        last_improvement = 0

        start_time = time.time()
        idx = 0
        epochs = 0
        for i in range(self.num_iterations):
            # Batch Training
            j = self.get_last_batch_index(self.num_examples, idx,
                                          self.batch_size)
            x_batch, y_batch = self.train_x[idx:j, :], self.train_y[idx:j, :]
            # TODO simplify batch processing
            if j == self.num_examples:
                epochs += 1
                idx = 0
                is_epoch = True
            else:
                is_epoch = False
                idx = j

            summary, train_loss, train_y_pred_cls, _ = self.session.run(
                [self.merged, self.cost, self.y_pred_cls, self.optimizer],
                feed_dict={
                    self.x: x_batch,
                    self.y: y_batch,
                    self.is_training: True
                })

            train_cls_true = metrics.convert_labels_to_cls(y_batch)
            train_correct = (train_y_pred_cls == train_cls_true)
            train_acc, _ = metrics.cls_accuracy(train_correct)
            self.train_cost.append(train_loss)
            self.train_acc.append(train_acc)
            self.train_writer.add_summary(summary, i)

            # Calculate the accuracy
            valid_correct, _, valid_cost = self.predict_cls(
                images=self.valid_x,
                labels=self.valid_y,
                cls_true=metrics.convert_labels_to_cls(self.valid_y))
            validation_acc, _ = metrics.cls_accuracy(valid_correct)
            self.validation_acc.append(validation_acc)
            self.validation_cost.append(valid_cost)

            if is_epoch or (i == (self.num_iterations - 1)):

                if validation_acc > best_validation_accuracy:
                    # Save  Best Perfoming all variables of the TensorFlow graph to file.
                    self.saver.save(sess=self.session,
                                    save_path=self.save_path)
                    # update best validation accuracy
                    best_validation_accuracy = validation_acc
                    last_improvement = i
                    improved_str = '*'
                else:
                    improved_str = ''

                print_opt = "Epoch: {}, Training Loss:{}, Acc: {}, " \
                            " Validation Loss:{}, Acc:{} {}".format(epochs, train_loss, train_acc, valid_cost,
                                                                    validation_acc, improved_str)
                print(print_opt)
                logging.debug(print_opt)
            if i - last_improvement > self.require_improvement:
                print_impro = "No improvement found in a while, stopping optimization."
                print(print_impro)
                logging.debug(print_impro)
                # Break out from the for-loop.
                break
                # Ending time.
        end_time = time.time()
        time_dif = end_time - start_time
        print_time = "Time usage: " + str(
            timedelta(seconds=int(round(time_dif))))
        print(print_time)
        logging.debug(print_time)
        return last_improvement, epochs
Exemple #9
0
    def train_one_epoch(self):
        """
        One epoch of training
        :return:
        """
        self.model.train()
        # self.scheduler.step()

        for batch_idx, (data,
                        target) in enumerate(self.data_loader.train_loader):
            data, target = data.to(self.device), target.to(self.device)

            self.optimizer.zero_grad()
            # output = self.model(data)

            target_one_hot = to_one_hot(
                target.cpu(),
                num_class=self.config.train_classes).to(self.device)

            # label soomth
            # target_one_hot = (1.0 - 0.1) * target_one_hot + 0.1 / 100
            #target_float_hot = to_float_hot(target.cpu(), num_class=self.config.train_classes).to(self.device)

            # loss = self.loss(output, target)

            inputs_var, labels_var = data, target

            class_label = torch.Tensor(
                np.array(range(self.config.train_classes)))
            center_labels_var = torch.autograd.Variable(
                class_label.to(torch.long)).cuda()

            fvec, feature, class_weight = self.model(inputs_var)

            if self.q.full():

                self.q.get()
                self.q.put(class_weight.cpu())

                # import pdb
                # pdb.set_trace()
                a = list(self.q.queue)

                # temp = (a[0] + a[1] a[2] + a[3] + a[4])
                temp = ((a[0] + a[1] + a[2] + a[3] + a[4]) / 5)
                class_weight = 0.2 * class_weight + temp.to(self.device)

            else:
                self.q.put(class_weight.cpu())

            #on_hot vector
            labels_var_one_hot = target_one_hot
            # inter_class_distance
            fvec = fvec - 4 * labels_var_one_hot.cuda()
            #intra_class_distance
            loss_1 = self.loss(fvec, labels_var)

            origin_class_weight = class_weight

            batch_center = self.Center(feature, target,
                                       self.config.train_classes, class_weight)
            # batch_center = F.relu(batch_center)
            batch_center = F.normalize(batch_center, p=2, dim=1)

            # if self.current_epoch < 13:

            #     linear_beta = (13 - self.current_epoch) / 13

            #     norm_beta = scipy.stats.norm(0, 1).pdf(self.current_epoch/12/2)

            #     beta = 1

            #     class_weight = torch.div(class_weight + beta * batch_center, 2)

            class_weight = torch.div(class_weight + batch_center, 2)

            # class_weight = torch.div(class_weight - batch_center, 2)

            class_weight = F.normalize(class_weight)

            center_loss = self.loss(
                torch.mm(class_weight, torch.t(class_weight)),
                center_labels_var)

            triplet_loss = self.triplet(feature, target, class_weight)

            triplet_origin_loss = self.triplet(feature, target,
                                               origin_class_weight)

            loss = 0.5 * center_loss + loss_1 + 0.1 * triplet_loss
            # loss = 0.5 * center_loss + 0.1 * triplet_loss

            if self.config.loss_mode == '100':
                loss = triplet_origin_loss
            if self.config.loss_mode == '101':
                loss = triplet_origin_loss + loss_1
            if self.config.loss_mode == '110':
                loss = triplet_loss
            if self.config.loss_mode == '011':
                loss = loss_1
            if self.config.loss_mode == '111':
                loss = 0.5 * center_loss + loss_1 + 0.1 * triplet_loss

            prec1, prec5 = cls_accuracy(fvec, target, topk=(1, 5))

            self.epoch_loss.update(loss.item())
            self.top1.update(prec1.item())
            self.top5.update(prec5.item())

            # loss.backward()
            loss.backward(retain_graph=True)

            self.optimizer.step()

            if batch_idx % self.config.log_interval == 0:

                # self.logger.info(f'center_loss:{center_loss}\t loss_1:{loss_1}\t triplet_loss:{triplet_loss}')

                self.logger.info(
                    'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {loss.val:.4f} ({loss.avg:.4f})\t'
                    'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\tPrec@5 {top5.val:.3f} ({top5.avg:.3f}) \tlr {lr}'
                    .format(self.current_epoch,
                            batch_idx * len(data),
                            len(self.data_loader.train_loader.dataset),
                            100. * batch_idx /
                            len(self.data_loader.train_loader),
                            loss=self.epoch_loss,
                            top1=self.top1,
                            top5=self.top5,
                            lr=self.optimizer.param_groups[0]['lr']))
            self.current_iteration += 1
            if np.isnan(float(loss.item())):
                raise ValueError('Loss is nan during training...')

            self.summary_writer.add_scalar("batch/loss", self.epoch_loss.avg,
                                           self.current_iteration)
            self.summary_writer.add_scalar("batch/top1", self.top1.avg,
                                           self.current_iteration)
            self.summary_writer.add_scalar("batch/top5", self.top5.avg,
                                           self.current_iteration)
            self.summary_writer.add_scalar(
                "batch/lr", self.optimizer.param_groups[0]['lr'],
                self.current_iteration)