Beispiel #1
0
net.cuda()
## init ??

optimizer = torch.optim.SGD(net.parameters(),
                            lr=0.001,
                            momentum=0.9,
                            weight_decay=args.weight_decay)
#optimizer = torch.optim.Adam(net.parameters(), lr = 0.001, weight_decay=args.weight_decay)
vgg_perceptual_loss = vgg19(pretrained=True)

vgg_perceptual_loss.cuda()
vgg_perceptual_loss.eval()
for p in vgg_perceptual_loss.parameters():
    p.requires_grad = False
clock = TrainClock()
clock.epoch = args.start_epoch
epoch_loss = AvgMeter('loss')
p0_loss = AvgMeter('conv_1_loss')
p1_loss = AvgMeter('conv_2_loss')
p2_loss = AvgMeter('conv_3_loss')
p3_loss = AvgMeter('conv_4_loss')
p4_loss = AvgMeter('conv_5_loss')
p5_loss = AvgMeter('conv_55_loss')
data_time_m = AvgMeter('data time')
batch_time_m = AvgMeter('train time')

for e_ in range(epoch_num):

    # torch.cuda.empty_cache()
    net.train()
Beispiel #2
0
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('--epochs',
                        default=200,
                        type=int,
                        help='epoch number(Default:200)')
    parser.add_argument('--start_epoch',
                        default=0,
                        type=int,
                        help='start epoch number')
    parser.add_argument('-b',
                        '--batch_size',
                        default=6,
                        type=int,
                        help='mini-batch size(Default:6)')
    parser.add_argument('--lr',
                        '--learning_rate',
                        default=1e-3,
                        type=float,
                        help='initial learning rate(Default:1e-3)')
    parser.add_argument('--resume',
                        type=str,
                        default=None,
                        help='The path for checkpoint file')
    parser.add_argument('--exp',
                        type=str,
                        default='test',
                        help='The name of this exp')

    parser.add_argument('--content',
                        type=float,
                        default=10.0,
                        help='the weight of content loss(Default:10.0)')
    parser.add_argument('--tv',
                        type=float,
                        default=3e-3,
                        help='the weight of TV loss(Default:0.003)')
    parser.add_argument('--adv',
                        type=float,
                        default=3.00,
                        help='the weight of adv loss(Default:1.0)')

    parser.add_argument('--first_stage',
                        type=str,
                        default='./exps/2_baseline/checkpoint.pth.tar',
                        help='first stage model')

    args = parser.parse_args()
    base_dir = './twostageExps/'
    exp_dir = os.path.join(base_dir, args.exp)
    base_results_dir = os.path.join(exp_dir, 'results/')
    best_metric = 0
    if not os.path.exists(base_dir):
        os.mkdir(base_dir)
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)
    if not os.path.exists(base_results_dir):
        os.mkdir(base_results_dir)

    save_args(args, exp_dir)

    global AdLossWeight
    global TvLossWeight
    global ContentLossWeight
    AdLossWeight = args.adv
    TvLossWeight = args.tv
    ContentLossWeight = args.content
    log_dir = os.path.join('./twostageLogs', args.exp)

    writer = SummaryWriter(log_dir)

    first_stage = network()
    generator = network()
    critic = critic_network()
    optimizer_G = optim.Adam(generator.parameters(), args.lr)
    optimizer_C = optim.Adam(critic.parameters(), args.lr)
    scheduler_C = ReduceLROnPlateau(optimizer_G,
                                    'min',
                                    factor=0.2,
                                    patience=10,
                                    verbose=True)
    scheduler_G = ReduceLROnPlateau(optimizer_C,
                                    'min',
                                    factor=0.2,
                                    patience=10,
                                    verbose=True)

    if args.resume != None:
        assert os.path.exists(args.resume), 'model does not exist!'
        print('=> loading checkpoint {}'.format(args.resume))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        generator.load_state_dict(checkpoint['generator'])
        critic.load_state_dict(checkpoint['critic'])
        #best_metric = checkpoint['best_metric']
        optimizer_G = checkpoint['optimizer_G']
        optimizer_C = checkpoint['optimizer_C']

        print('=> loaded checkpoint {} - epoch:{} - best_metric:{}'.format(
            args.resume, args.start_epoch, best_metric))
    else:
        print('No checkpoint. A new begining')

    if args.first_stage != None:
        assert os.path.exists(
            args.first_stage), 'first stage model does not exist!'
        print('=> loading first stage model {}'.format(args.first_stage))
        first_stage_checkpoint = torch.load(args.first_stage)
        first_stage.load_state_dict(first_stage_checkpoint['generator'])

        for p in first_stage.parameters():
            p.requires_grad = False

    vgg_for_perceptual_loss = vgg19()
    for p in vgg_for_perceptual_loss.parameters():
        p.requires_grad = False

    generator.cuda()
    critic.cuda()
    vgg_for_perceptual_loss.cuda()
    vgg_for_perceptual_loss.eval()
    first_stage.cuda()
    first_stage.eval()
    clock = TrainClock()
    clock.epoch = args.start_epoch
    data_dir = config.data_dir
    train_loader = get_dataloaders(os.path.join(data_dir, 'train.json'),
                                   batch_size=args.batch_size,
                                   shuffle=True)
    valid_loader = get_dataloaders(os.path.join(config.data_dir, 'val.json'),
                                   batch_size=args.batch_size,
                                   shuffle=True)
    print('Begin training')

    for epoch in range(args.start_epoch, args.epochs):

        results_dir = os.path.join(base_results_dir, '{}'.format(epoch))
        if not os.path.exists(results_dir):
            os.mkdir(results_dir)

        train(first_stage, generator, critic, optimizer_G, optimizer_C,
              train_loader, vgg_for_perceptual_loss, clock, writer, 2)

        save_checkpoint(
            {
                'epoch': clock.epoch,
                'generator': generator.state_dict(),
                'critic': critic.state_dict(),
                'optimizer_G': optimizer_G,
                'optimizer_C': optimizer_C,
            },
            is_best=True,
            prefix=exp_dir)
        torch.cuda.empty_cache()
        test_on_benchmark_two_stage(first_stage, generator, results_dir)
        torch.cuda.empty_cache()
        CriticRealLoss, ContentLoss = evaluate_on_val_two_stage(
            first_stage, generator, critic, valid_loader,
            vgg_for_perceptual_loss, clock, writer,
            os.path.join(exp_dir, 'valresults.txt'))
        scheduler_C.step(CriticRealLoss)
        scheduler_G.step(ContentLoss)
        torch.cuda.empty_cache()
import numpy as np
from torch.autograd import Variable
from DataSet import Dataset
import my_snip.metrics as metrics
from my_snip.clock import TrainClock, AvgMeter
from my_snip.config import MultiStageLearningRatePolicy
import torch.nn as nn
import torch.optim as optim

from TorchDataset import TorchDataset
from torch.utils.data import DataLoader
import time

from tensorboardX import SummaryWriter

clock = TrainClock()

clock.epoch = 21
# In[25]:
base_learing_rate = 0.01
base_batch_size = 16
ratio = 1

weight_decay = 1e-4
gpu_ids = [0, 1, 2, 3]
num_workers = 8
num_workers *= len(gpu_ids)

minibatch_size = 16 * len(gpu_ids) * ratio
learing_rate = base_learing_rate * ratio
net = SEResNeXt(101, num_class=80)
init_trained_weights(net, model_path)

net.cuda(gpu_id)

# In[22]:

criterion = nn.CrossEntropyLoss().cuda(gpu_id)

optimizer = optim.SGD(net.parameters(),
                      lr=0.01,
                      momentum=0.9,
                      weight_decay=1e-4)
#optimizer = optim.Adam(net.parameters(), lr = 0.01, weight_decay = 1e-4)
clock = TrainClock()

ud_loss_m = AvgMeter('ud_loss')
accuracy_m = AvgMeter('top-1-accuracy')
top3_accuracy_m = AvgMeter('top-3-accuracy')
data_time_m = AvgMeter('Reading Batch Data')
batch_time_m = AvgMeter('Batch time')

for epoch_i in range(num_epoch):

    net.train()
    print('Epoch {} starts'.format(epoch_i))
    clock.tock()
    epoch_time = time.time()

    adjust_learning_rate(optimizer, clock.epoch)