Example #1
0
def train():
    train_dataloader, val_dataloader = loadData()
    pretrained_params = torch.load('VGG_pretrained.pth')
    model = VGG()
    # strict=False 使得预训练模型参数中和新模型对应上的参数会被载入,对应不上或没有的参数被抛弃。
    model.load_state_dict(pretrained_params.state_dict(), strict=False)

    if torch.cuda.is_available():
        model.cuda()

    # finetune 时冻结XXlayer的参数


#    for p in model.XXlayers.parameters():
#        p.requires_grad = False

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_func = nn.CrossEntropyLoss()
    best_acc = 0

    for epoch in range(epochs):
        epoch_loss = 0
        steps = 0
        for i, data in enumerate(train_dataloader):
            inputs, labels = data
            if torch.cuda.is_available():
                inputs, labels = inputs.cuda(), labels.cuda()
            inputs, labels = Variable(inputs), Variable(labels)
            model.train()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_func(outputs, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.data[0]
            steps += 1
        print('epoch:%d loss:%.3f' % (epoch + 1, epoch_loss / steps))
        if epoch % 5 == 0:
            val_acc = evaluate(model, val_dataloader)
            if val_acc > best_acc:
                best_acc = val_acc
                torch.save(model, 'best_VGG.pkl')
                torch.save(model.state_dict(), 'best_VGG_params.pkl')
            print('test acc:'.format(val_acc))

    print('Finished Training')
    torch.save(model, 'VGG.pkl')
    torch.save(model.state_dict(), 'VGG_params.pkl')
Example #2
0
def train(train_loader, valid_loader, writer):

    model = VGG(batch_size=cf.batch_size)
    criterion = nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=cf.learning_rate)

    for e in range(cf.epoch):
        batch_loss = 0
        for batch_x, batch_y in train_loader:
            y_predict = model(batch_x)
            y_one_hot = to_convert_one_hot(y_predict)

            loss = criterion(y_one_hot, batch_y.float())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss = loss.item()
            batch_loss += loss
            print(f'batch loss: {loss:.3f}')

        print(f'Epoch #{e}: --- Training loss: {batch_loss/cf.batch_size:.3f}')
        writer.log_training(batch_loss / cf.batch_size, e)

        save_path = './models/chkpt-%d.pt' % (e + 1)
        torch.save(
            {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': e
            }, save_path)
        print("Saved checkpoint to: %s" % save_path)
    # iterate through all the features for the chosen layers
    for gen_feature, orig_feature, style_feature in zip(
            generated_features, original_img_features, style_features):
        # batch_size will just be 1
        batch_size, channel, height, width = gen_feature.shape
        original_loss += torch.mean((gen_feature - orig_feature)**2)
        # Compute Gram Matrix of generated
        G = gen_feature.view(channel, height * width).mm(
            gen_feature.view(channel, height * width).t())
        # Compute Gram Matrix of Style
        A = style_feature.view(channel, height * width).mm(
            style_feature.view(channel, height * width).t())
        style_loss += torch.mean((G - A)**2)

    total_loss = alpha * original_loss + beta * style_loss
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

    if step % 200 == 0:
        print(total_loss)
        temp = generated
        temp = temp.to("cpu").squeeze(0)
        to_pil_image = transforms.ToPILImage()
        results = to_pil_image(temp)
        #img = Image.show(temp)
        results.save("generated" + str(step), "png")

torch.save(model.state_dict(),
           "/home/abhrant/Neural_Style_Transfer_Web_App/nst_weight.pt")
        num_batches += 1

        if (count % 100 == 0):

            # compute stats for the full training set
            total_loss = running_loss / num_batches
            total_error = running_error / num_batches
            elapsed = (time.time() - start) / 60

            if int(elapsed) % 10 == 0:
                lost.append(total_loss)
                errors.append(total_error)
                torch.save(
                    {
                        'model': net.state_dict(),
                        'epoch': epoch,
                        'step': count,
                        'lr': my_lr,
                        'lost': lost,
                        'error': error
                    }, "../../models/base/vgg_base_model.pth")

            print('epoch=', epoch, '\t step=', count, '/', step, '\t time=',
                  elapsed, 'min', '\t lr=', my_lr, '\t loss=', total_loss,
                  '\t error=', total_error * 100, 'percent')
            #eval_on_test_set()
            print(' ')

    my_lr = my_lr / 2
def train(config):

    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

    device = torch.device(config.device)
    random.seed(config.seed)
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    if config.use_gpu:
        torch.cuda.manual_seed_all(config.seed)

    # Dataset
    infection_train = Infection_Dataset('train')
    infection_test = Infection_Dataset('test')
    infection_val = Infection_Dataset('val')

    covid_train = Covid_Dataset('train')
    covid_test = Covid_Dataset('test')
    covid_val = Covid_Dataset('val')

    # Dataloader from dataset
    infection_train_loader = DataLoader(infection_train,
                                        batch_size=config.batch_size,
                                        shuffle=True)
    infection_test_loader = DataLoader(infection_test,
                                       batch_size=config.batch_size,
                                       shuffle=True)
    infection_val_loader = DataLoader(infection_val,
                                      batch_size=config.batch_size,
                                      shuffle=True)

    covid_train_loader = DataLoader(covid_train,
                                    batch_size=config.batch_size,
                                    shuffle=True)
    covid_test_loader = DataLoader(covid_test,
                                   batch_size=config.batch_size,
                                   shuffle=True)
    covid_val_loader = DataLoader(covid_val,
                                  batch_size=config.batch_size,
                                  shuffle=True)

    # L2 regularization parameter
    l2_lambda = 0.001

    # Instantiate model, criterion and oprimizer
    model = VGG()
    if config.use_gpu:
        model.to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.AdamW(model.parameters(), lr=config.lr)
    lr_sheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.7)
    best_acc = -0.1
    best_epoch = -1
    start_time = time.time()

    # Train the model that classifies normal and infection images
    print("***** Training the first classifier *****")
    for epoch in range(config.epochs):
        total_loss = 0.0
        model.train()
        for images_data, target_labels in tqdm(infection_train_loader):
            # images_data: [batch_size, 1, 150, 150]
            # target_labels: [batch_size, 2]
            if config.use_gpu:
                images_data = images_data.cuda()
                target_labels = target_labels.cuda()
            total_loss = 0.0
            model.train()
            optimizer.zero_grad()
            predicted_labels = model(images_data)
            loss = criterion(predicted_labels, target_labels)

            # L2 regularization
            l2_reg = torch.tensor(0.)
            if config.use_gpu:
                l2_reg = l2_reg.cuda()
            for param in model.parameters():
                l2_reg += torch.norm(param)
                loss += l2_lambda * l2_reg
            loss.backward()
            optimizer.step()

        # Evaluate the performance and save the model parameters each epoch
        train_acc, train_loss = evaluate(infection_train_loader, model)
        val_acc, val_loss = evaluate(infection_test_loader, model)
        torch.save(model.state_dict(),
                   './checkpoints/' + str(epoch) + '_params_infection.pth')

        # Save the best performing model parameters based on validation accuracy
        if val_acc > best_acc:
            best_acc = val_acc
            best_epoch = epoch
            torch.save(model.state_dict(),
                       './checkpoints/' + 'best_params_infection.pth')
        print(
            f"{now()} Epoch{epoch}: train_loss: {train_loss}, val_loss: {val_loss}, train_acc: {train_acc}, val_acc: {val_acc}"
        )
        lr_sheduler.step()

        # Record loss and accuracies for learning curve plots
        fieldnames = [
            'epoch', 'train_loss', 'val_loss', 'train_acc', 'val_acc'
        ]
        out_dict = {
            'epoch': epoch,
            'train_loss': train_loss,
            'val_loss': val_loss,
            'train_acc': train_acc,
            'val_acc': val_acc
        }
        with open('./outputs/infection.csv', 'a') as out_f:
            writer = DictWriter(out_f, fieldnames=fieldnames)
            writer.writerow(out_dict)

    end_time = time.time()
    print("*" * 20)
    print(
        f"{now()} finished; epoch {best_epoch} best_acc: {best_acc}, time/epoch: {(end_time-start_time)/config.epochs}"
    )
    print()

    # Instantiate another model
    model_covid = VGG()
    if config.use_gpu:
        model_covid.to(device)
    optimizer_covid = optim.AdamW(model_covid.parameters(), lr=config.lr)
    lr_sheduler_covid = optim.lr_scheduler.StepLR(optimizer_covid,
                                                  step_size=3,
                                                  gamma=0.7)
    best_acc_covid = -0.1
    best_epoch_covid = -1
    start_time_covid = time.time()

    # Train another model that classifies covid and non-covid images
    print("***** Training the second classifier *****")
    for epoch_covid in range(config.epochs):
        total_loss_covid = 0.0
        model_covid.train()
        for images_data, target_labels in tqdm(covid_train_loader):
            # images_data: [batch_size, 1, 150, 150]
            # target_labels: [batch_size, 2]
            if config.use_gpu:
                images_data = images_data.cuda()
                target_labels = target_labels.cuda()
            total_loss_covid = 0.0
            model_covid.train()
            optimizer_covid.zero_grad()
            predicted_labels = model_covid(images_data)
            loss = criterion(predicted_labels, target_labels)

            # L2 regularization
            l2_reg = torch.tensor(0.)
            if config.use_gpu:
                l2_reg = l2_reg.cuda()
            for param in model_covid.parameters():
                l2_reg += torch.norm(param)
                loss += l2_lambda * l2_reg
            loss.backward()
            optimizer_covid.step()
            total_loss_covid += loss.item()

        # Evaluate the performance and save the model parameters each epoch
        train_acc_covid, train_loss_covid = evaluate(covid_train_loader,
                                                     model_covid)
        val_acc_covid, val_loss_covid = evaluate(covid_test_loader,
                                                 model_covid)
        torch.save(model_covid.state_dict(),
                   './checkpoints/' + str(epoch_covid) + '_params_covid.pth')

        # Save the best performing model parameters based on validation accuracy
        if val_acc_covid > best_acc_covid:
            best_acc_covid = val_acc_covid
            best_epoch_covid = epoch_covid
            torch.save(model_covid.state_dict(),
                       './checkpoints/' + 'best_params_covid.pth')
        print(
            f"{now()} epoch {epoch_covid}: train_loss: {train_loss_covid}, val_loss: {val_loss_covid}, train_acc: {train_acc_covid}, val_acc_covid: {val_acc_covid}"
        )
        lr_sheduler_covid.step()

        # Record loss and accuracies for learning curve plots
        fieldnames = [
            'epoch', 'train_loss', 'val_loss', 'train_acc', 'val_acc'
        ]
        out_dict = {
            'epoch': epoch_covid,
            'train_loss': train_loss_covid,
            'val_loss': val_loss_covid,
            'train_acc': train_acc_covid,
            'val_acc': val_acc_covid
        }
        with open('./outputs/infection.csv', 'a') as out_f:
            writer = DictWriter(out_f, fieldnames=fieldnames)
            writer.writerow(out_dict)
    end_time = time.time()
    print("*" * 20)
    print(
        f"{now()} finished; epoch {best_epoch_covid} best_acc_covid: {best_acc_covid}, time/epoch: {(end_time-start_time)/config.epochs}"
    )
Example #6
0
            target = target.cuda()

            output = net(images)
            loss = loss_func(output, target)

            opt.zero_grad()
            loss.backward()
            opt.step()
            losses.append(loss.item())

            print("Epoch: %4d, step: %4d, loss: %.3f" % (epoch, i, loss.item()))

        mean_loss = np.mean(losses)
        lr_decay.step()

        writer.add_scalar('train/loss', mean_loss, epoch)
        writer.add_scalar('learning rate', opt.param_groups[0]['lr'], epoch)
        print("=============== Mean loss in epoch %4d is: %.3f ===============" % (epoch, mean_loss))

        if epoch % 5 == 0:
            precious, recall, accuracy = valid_accuracy(valid_loader, net)
            print("=============== Epoch %4d - precious: %.3f, recall: %.3f, accuracy: %.3f ==============="
                  % (epoch, precious, recall, accuracy))
            writer.add_scalars('validation', {'precious': precious,
                                              'recall': recall,
                                              'accuracy': accuracy}, epoch)
            torch.save({'epoch': epoch, 'lr': opt.param_groups[0]['lr'], 'state_dict': net.state_dict()},
                       "./modules/vgg-%d-%.3f.pth.tar" % (epoch, mean_loss))

    writer.close()