Ejemplo n.º 1
0
            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)

            out, acc = vgg(img, label)
            loss = fluid.layers.cross_entropy(out, label)
            avg_loss = fluid.layers.mean(loss)

            # 使用backward()方法可以执行反向网络
            avg_loss.backward()
            optimizer.minimize(avg_loss)

            # 将参数梯度清零以保证下一轮训练的正确性
            vgg.clear_gradients()

            all_train_iter = all_train_iter + train_parameters['train_batch_size']
            all_train_iters.append(all_train_iter)
            all_train_costs.append(loss.numpy()[0])
            all_train_accs.append(acc.numpy()[0])

            if batch_id % 1 == 0:
                print(
                    "Loss at epoch {} step {}: {}, acc: {}".format(epoch_num, batch_id, avg_loss.numpy(), acc.numpy()))

    draw_train_process("training", all_train_iters, all_train_costs, all_train_accs, "trainning cost", "trainning acc")
    draw_process("trainning loss", "red", all_train_iters, all_train_costs, "trainning loss")
    draw_process("trainning acc", "green", all_train_iters, all_train_accs, "trainning acc")

    # 保存模型参数
    fluid.save_dygraph(vgg.state_dict(), "vgg")
    print("Final loss: {}".format(avg_loss.numpy()))
Ejemplo n.º 2
0
        # Backprop and optimize
        loss = loss_fn(class_logits, x_class.cuda())

        # darc1 regularizer (optional)
        darc1_loss = 0  #1e-3*torch.max(torch.sum(torch.abs(class_logits), dim=0))
        loss = darc1_loss + loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print("Epoch[{}/{}], Step [{}/{}], CE Loss: {:.4f}".format(
                epoch + 1, num_epochs, i + 1, len(train_data_loader),
                loss.item()))

    with torch.no_grad():
        total = 0.
        correct = 0.
        for tx, tx_class in test_data_loader:
            tx = tx.cuda()  #.view(-1, img_size)
            tclass_logits = model(tx)
            _, mostprob_result = torch.max(tclass_logits, dim=1)
            total += tx.size(0)
            correct += torch.sum(mostprob_result == tx_class.cuda())
        print("%d/%d correct (%.2f %%)" %
              (correct, total, 100 * float(correct) / total))

torch.save(model.state_dict(), f'models/SVHN_{sys.argv[1]}net.pth')
Ejemplo n.º 3
0
class Solver(object):

    DEFAULTS = {}

    def __init__(self, version, data_loader, config, output_txt):
        """
        Initializes a Solver object
        """

        # data loader
        self.__dict__.update(Solver.DEFAULTS, **config)
        self.version = version
        self.data_loader = data_loader
        self.output_txt = output_txt

        self.build_model()

        # start with a pre-trained model
        if self.pretrained_model:
            self.load_pretrained_model()

    def build_model(self):
        """
        Instantiates the model, loss criterion, and optimizer
        """

        # instantiate model
        self.model = VGGNet(self.config, self.use_batch_norm,
                            self.input_channels, self.class_count,
                            self.init_weights)

        # instantiate loss criterion
        self.criterion = nn.CrossEntropyLoss()

        # instantiate optimizer
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.lr,
                                   momentum=self.momentum,
                                   weight_decay=self.weight_decay)

        # print network
        self.print_network(self.model, 'VGGNet')

        # use gpu if enabled
        if torch.cuda.is_available() and self.use_gpu:
            self.model.cuda()
            self.criterion.cuda()

    def print_network(self, model, name):
        """
        Prints the structure of the network and the total number of parameters
        """
        num_params = 0
        for p in model.parameters():
            num_params += p.numel()
        write_print(self.output_txt, name)
        write_print(self.output_txt, str(model))
        write_print(self.output_txt,
                    'The number of parameters: {}'.format(num_params))

    def load_pretrained_model(self):
        """
        loads a pre-trained model from a .pth file
        """
        self.model.load_state_dict(
            torch.load(
                os.path.join(self.model_save_path,
                             '{}.pth'.format(self.pretrained_model))))
        write_print(self.output_txt,
                    'loaded trained model {}'.format(self.pretrained_model))

    def print_loss_log(self, start_time, iters_per_epoch, e, i, loss):
        """
        Prints the loss and elapsed time for each epoch
        """
        total_iter = self.num_epochs * iters_per_epoch
        cur_iter = e * iters_per_epoch + i

        elapsed = time.time() - start_time
        total_time = (total_iter - cur_iter) * elapsed / (cur_iter + 1)
        epoch_time = (iters_per_epoch - i) * elapsed / (cur_iter + 1)

        epoch_time = str(datetime.timedelta(seconds=epoch_time))
        total_time = str(datetime.timedelta(seconds=total_time))
        elapsed = str(datetime.timedelta(seconds=elapsed))

        log = "Elapsed {}/{} -- {}, Epoch [{}/{}], Iter [{}/{}], " \
              "loss: {:.4f}".format(elapsed,
                                    epoch_time,
                                    total_time,
                                    e + 1,
                                    self.num_epochs,
                                    i + 1,
                                    iters_per_epoch,
                                    loss)

        write_print(self.output_txt, log)

    def save_model(self, e):
        """
        Saves a model per e epoch
        """
        path = os.path.join(self.model_save_path,
                            '{}/{}.pth'.format(self.version, e + 1))

        torch.save(self.model.state_dict(), path)

    def model_step(self, images, labels):
        """
        A step for each iteration
        """

        # set model in training mode
        self.model.train()

        # empty the gradients of the model through the optimizer
        self.optimizer.zero_grad()

        # forward pass
        output = self.model(images)

        # compute loss
        loss = self.criterion(output, labels.squeeze())

        # compute gradients using back propagation
        loss.backward()

        # update parameters
        self.optimizer.step()

        # return loss
        return loss

    def train(self):
        """
        Training process
        """
        self.losses = []
        self.top_1_acc = []
        self.top_5_acc = []

        iters_per_epoch = len(self.data_loader)

        # start with a trained model if exists
        if self.pretrained_model:
            start = int(self.pretrained_model.split('/')[-1])
        else:
            start = 0

        # start training
        start_time = time.time()
        for e in range(start, self.num_epochs):
            for i, (images, labels) in enumerate(tqdm(self.data_loader)):
                images = to_var(images, self.use_gpu)
                labels = to_var(torch.LongTensor(labels), self.use_gpu)

                loss = self.model_step(images, labels)

            # print out loss log
            if (e + 1) % self.loss_log_step == 0:
                self.print_loss_log(start_time, iters_per_epoch, e, i, loss)
                self.losses.append((e, loss))

            # save model
            if (e + 1) % self.model_save_step == 0:
                self.save_model(e)

            # evaluate on train dataset
            # if (e + 1) % self.train_eval_step == 0:
            #     top_1_acc, top_5_acc = self.train_evaluate(e)
            #     self.top_1_acc.append((e, top_1_acc))
            #     self.top_5_acc.append((e, top_5_acc))

        # print losses
        write_print(self.output_txt, '\n--Losses--')
        for e, loss in self.losses:
            write_print(self.output_txt, str(e) + ' {:.4f}'.format(loss))

        # print top_1_acc
        write_print(self.output_txt, '\n--Top 1 accuracy--')
        for e, acc in self.top_1_acc:
            write_print(self.output_txt, str(e) + ' {:.4f}'.format(acc))

        # print top_5_acc
        write_print(self.output_txt, '\n--Top 5 accuracy--')
        for e, acc in self.top_5_acc:
            write_print(self.output_txt, str(e) + ' {:.4f}'.format(acc))

    def eval(self, data_loader):
        """
        Returns the count of top 1 and top 5 predictions
        """

        # set the model to eval mode
        self.model.eval()

        top_1_correct = 0
        top_5_correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in data_loader:

                images = to_var(images, self.use_gpu)
                labels = to_var(torch.LongTensor(labels), self.use_gpu)

                output = self.model(images)
                total += labels.size()[0]

                # top 1
                # get the max for each instance in the batch
                _, top_1_output = torch.max(output.data, dim=1)

                top_1_correct += torch.sum(
                    torch.eq(labels.squeeze(), top_1_output))

                # top 5
                _, top_5_output = torch.topk(output.data, k=5, dim=1)
                for i, label in enumerate(labels):
                    if label in top_5_output[i]:
                        top_5_correct += 1

        return top_1_correct.item(), top_5_correct, total

    def train_evaluate(self, e):
        """
        Evaluates the performance of the model using the train dataset
        """
        top_1_correct, top_5_correct, total = self.eval(self.data_loader)
        log = "Epoch [{}/{}]--top_1_acc: {:.4f}--top_5_acc: {:.4f}".format(
            e + 1, self.num_epochs, top_1_correct / total,
            top_5_correct / total)
        write_print(self.output_txt, log)
        return top_1_correct / total, top_5_correct / total

    def test(self):
        """
        Evaluates the performance of the model using the test dataset
        """
        top_1_correct, top_5_correct, total = self.eval(self.data_loader)
        log = "top_1_acc: {:.4f}--top_5_acc: {:.4f}".format(
            top_1_correct / total, top_5_correct / total)
        write_print(self.output_txt, log)
Ejemplo n.º 4
0
                            name, avg_grad))
                        tbwriter.add_scalar('avg_grad/{}'.format(name),
                                            avg_grad.item(), total_steps)
                        tbwriter.add_histogram('grad/{}'.format(name),
                                               parameter.grad.cpu().numpy(),
                                               total_steps)
                    if parameter.data is not None:
                        avg_weight = torch.mean(parameter.data)
                        print('\tavg_weight for {} = {:.6f}'.format(
                            name, avg_weight))
                        tbwriter.add_scalar('avg_weight/{}'.format(name),
                                            avg_weight.item())
                        tbwriter.add_histogram('weight/{}'.format(name),
                                               parameter.data.cpu().numpy(),
                                               total_steps)
                    print()

        total_steps += 1

    # save checkpoint after epoch
    cpt_dir = os.path.join(CPT_DIR, 'checkpoint_e{}.pkl'.format(epoch + 1))
    torch.save(
        {
            'epoch': epoch,
            'model': vggnet.state_dict(),
            'optimizer': optimizer.state_dict(),
            'seed': seed,
            'total_steps': total_steps,
        }, cpt_dir)
tbwriter.close()