from multiprocessing import cpu_count
import matplotlib.pyplot as plt
from data_processor import train_parameters
from data_processor import *
from model import VGGNet

'''
模型训练
'''
# with fluid.dygraph.guard(place = fluid.CUDAPlace(0)):
with fluid.dygraph.guard():
    print(train_parameters['class_dim'])
    print(train_parameters['label_dict'])
    vgg = VGGNet()
    optimizer = fluid.optimizer.AdamOptimizer(learning_rate=train_parameters['learning_strategy']['lr'],
                                              parameter_list=vgg.parameters())
    for epoch_num in range(train_parameters['num_epochs']):
        for batch_id, data in enumerate(train_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([x[1] for x in data]).astype('int64')
            y_data = y_data[:, np.newaxis]

            # 将Numpy转换为DyGraph接收的输入
            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)

            out, acc = vgg(img, label)
            loss = fluid.layers.cross_entropy(out, label)
            avg_loss = fluid.layers.mean(loss)

            # 使用backward()方法可以执行反向网络
Example #2
0
                                                batch_size=batch_size,
                                                shuffle=True)
test_data_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                               batch_size=batch_size,
                                               shuffle=False)

if sys.argv[1] == 'vgg':
    model = VGGNet()
elif sys.argv[1] == 'mobile':
    model = MobileNet()
elif sys.argv[1] == 'custom':
    model = CifarClassifier()
else:
    raise ValueError(f'Unknown network type {sys.argv[1]}')
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    for i, (x, x_class) in enumerate(train_data_loader):
        # Forward pass
        x = x.cuda()  #.view(-1, img_size)
        class_logits = model(x)

        # Backprop and optimize
        loss = loss_fn(class_logits, x_class.cuda())

        # darc1 regularizer (optional)
        darc1_loss = 0  #1e-3*torch.max(torch.sum(torch.abs(class_logits), dim=0))
        loss = darc1_loss + loss
Example #3
0
class Solver(object):

    DEFAULTS = {}

    def __init__(self, version, data_loader, config, output_txt):
        """
        Initializes a Solver object
        """

        # data loader
        self.__dict__.update(Solver.DEFAULTS, **config)
        self.version = version
        self.data_loader = data_loader
        self.output_txt = output_txt

        self.build_model()

        # start with a pre-trained model
        if self.pretrained_model:
            self.load_pretrained_model()

    def build_model(self):
        """
        Instantiates the model, loss criterion, and optimizer
        """

        # instantiate model
        self.model = VGGNet(self.config, self.use_batch_norm,
                            self.input_channels, self.class_count,
                            self.init_weights)

        # instantiate loss criterion
        self.criterion = nn.CrossEntropyLoss()

        # instantiate optimizer
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.lr,
                                   momentum=self.momentum,
                                   weight_decay=self.weight_decay)

        # print network
        self.print_network(self.model, 'VGGNet')

        # use gpu if enabled
        if torch.cuda.is_available() and self.use_gpu:
            self.model.cuda()
            self.criterion.cuda()

    def print_network(self, model, name):
        """
        Prints the structure of the network and the total number of parameters
        """
        num_params = 0
        for p in model.parameters():
            num_params += p.numel()
        write_print(self.output_txt, name)
        write_print(self.output_txt, str(model))
        write_print(self.output_txt,
                    'The number of parameters: {}'.format(num_params))

    def load_pretrained_model(self):
        """
        loads a pre-trained model from a .pth file
        """
        self.model.load_state_dict(
            torch.load(
                os.path.join(self.model_save_path,
                             '{}.pth'.format(self.pretrained_model))))
        write_print(self.output_txt,
                    'loaded trained model {}'.format(self.pretrained_model))

    def print_loss_log(self, start_time, iters_per_epoch, e, i, loss):
        """
        Prints the loss and elapsed time for each epoch
        """
        total_iter = self.num_epochs * iters_per_epoch
        cur_iter = e * iters_per_epoch + i

        elapsed = time.time() - start_time
        total_time = (total_iter - cur_iter) * elapsed / (cur_iter + 1)
        epoch_time = (iters_per_epoch - i) * elapsed / (cur_iter + 1)

        epoch_time = str(datetime.timedelta(seconds=epoch_time))
        total_time = str(datetime.timedelta(seconds=total_time))
        elapsed = str(datetime.timedelta(seconds=elapsed))

        log = "Elapsed {}/{} -- {}, Epoch [{}/{}], Iter [{}/{}], " \
              "loss: {:.4f}".format(elapsed,
                                    epoch_time,
                                    total_time,
                                    e + 1,
                                    self.num_epochs,
                                    i + 1,
                                    iters_per_epoch,
                                    loss)

        write_print(self.output_txt, log)

    def save_model(self, e):
        """
        Saves a model per e epoch
        """
        path = os.path.join(self.model_save_path,
                            '{}/{}.pth'.format(self.version, e + 1))

        torch.save(self.model.state_dict(), path)

    def model_step(self, images, labels):
        """
        A step for each iteration
        """

        # set model in training mode
        self.model.train()

        # empty the gradients of the model through the optimizer
        self.optimizer.zero_grad()

        # forward pass
        output = self.model(images)

        # compute loss
        loss = self.criterion(output, labels.squeeze())

        # compute gradients using back propagation
        loss.backward()

        # update parameters
        self.optimizer.step()

        # return loss
        return loss

    def train(self):
        """
        Training process
        """
        self.losses = []
        self.top_1_acc = []
        self.top_5_acc = []

        iters_per_epoch = len(self.data_loader)

        # start with a trained model if exists
        if self.pretrained_model:
            start = int(self.pretrained_model.split('/')[-1])
        else:
            start = 0

        # start training
        start_time = time.time()
        for e in range(start, self.num_epochs):
            for i, (images, labels) in enumerate(tqdm(self.data_loader)):
                images = to_var(images, self.use_gpu)
                labels = to_var(torch.LongTensor(labels), self.use_gpu)

                loss = self.model_step(images, labels)

            # print out loss log
            if (e + 1) % self.loss_log_step == 0:
                self.print_loss_log(start_time, iters_per_epoch, e, i, loss)
                self.losses.append((e, loss))

            # save model
            if (e + 1) % self.model_save_step == 0:
                self.save_model(e)

            # evaluate on train dataset
            # if (e + 1) % self.train_eval_step == 0:
            #     top_1_acc, top_5_acc = self.train_evaluate(e)
            #     self.top_1_acc.append((e, top_1_acc))
            #     self.top_5_acc.append((e, top_5_acc))

        # print losses
        write_print(self.output_txt, '\n--Losses--')
        for e, loss in self.losses:
            write_print(self.output_txt, str(e) + ' {:.4f}'.format(loss))

        # print top_1_acc
        write_print(self.output_txt, '\n--Top 1 accuracy--')
        for e, acc in self.top_1_acc:
            write_print(self.output_txt, str(e) + ' {:.4f}'.format(acc))

        # print top_5_acc
        write_print(self.output_txt, '\n--Top 5 accuracy--')
        for e, acc in self.top_5_acc:
            write_print(self.output_txt, str(e) + ' {:.4f}'.format(acc))

    def eval(self, data_loader):
        """
        Returns the count of top 1 and top 5 predictions
        """

        # set the model to eval mode
        self.model.eval()

        top_1_correct = 0
        top_5_correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in data_loader:

                images = to_var(images, self.use_gpu)
                labels = to_var(torch.LongTensor(labels), self.use_gpu)

                output = self.model(images)
                total += labels.size()[0]

                # top 1
                # get the max for each instance in the batch
                _, top_1_output = torch.max(output.data, dim=1)

                top_1_correct += torch.sum(
                    torch.eq(labels.squeeze(), top_1_output))

                # top 5
                _, top_5_output = torch.topk(output.data, k=5, dim=1)
                for i, label in enumerate(labels):
                    if label in top_5_output[i]:
                        top_5_correct += 1

        return top_1_correct.item(), top_5_correct, total

    def train_evaluate(self, e):
        """
        Evaluates the performance of the model using the train dataset
        """
        top_1_correct, top_5_correct, total = self.eval(self.data_loader)
        log = "Epoch [{}/{}]--top_1_acc: {:.4f}--top_5_acc: {:.4f}".format(
            e + 1, self.num_epochs, top_1_correct / total,
            top_5_correct / total)
        write_print(self.output_txt, log)
        return top_1_correct / total, top_5_correct / total

    def test(self):
        """
        Evaluates the performance of the model using the test dataset
        """
        top_1_correct, top_5_correct, total = self.eval(self.data_loader)
        log = "top_1_acc: {:.4f}--top_5_acc: {:.4f}".format(
            top_1_correct / total, top_5_correct / total)
        write_print(self.output_txt, log)
Example #4
0
vggnet = VGGNet(VGG_CONFS[MODEL_TYPE], dim=IMAGE_DIM,
                num_classes=NUM_CLASSES).to(device)
vggnet = torch.nn.parallel.DataParallel(vggnet, device_ids=DEVICE_IDS)
print(vggnet)
print('VGGNet created')

dataloader = data.DataLoader(dataset,
                             shuffle=True,
                             pin_memory=True,
                             drop_last=True,
                             num_workers=4,
                             batch_size=BATCH_SIZE)
print('Dataloader created')

# create optimizer
optimizer = optim.SGD(params=vggnet.parameters(),
                      lr=LR_INIT,
                      weight_decay=0.00005,
                      momentum=MOMENTUM)
print('Optimizer created')

# multiply LR by 1 / 10 after every 20 epochs
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
print('LR Scheduler created')

tbwriter = SummaryWriter(log_dir=LOG_DIR)
print('TensorboardX summary writer created')

# criterion defined
criterion = nn.CrossEntropyLoss()
print('Criterion defined')