net.cuda() ## init ?? optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=args.weight_decay) #optimizer = torch.optim.Adam(net.parameters(), lr = 0.001, weight_decay=args.weight_decay) vgg_perceptual_loss = vgg19(pretrained=True) vgg_perceptual_loss.cuda() vgg_perceptual_loss.eval() for p in vgg_perceptual_loss.parameters(): p.requires_grad = False clock = TrainClock() clock.epoch = args.start_epoch epoch_loss = AvgMeter('loss') p0_loss = AvgMeter('conv_1_loss') p1_loss = AvgMeter('conv_2_loss') p2_loss = AvgMeter('conv_3_loss') p3_loss = AvgMeter('conv_4_loss') p4_loss = AvgMeter('conv_5_loss') p5_loss = AvgMeter('conv_55_loss') data_time_m = AvgMeter('data time') batch_time_m = AvgMeter('train time') for e_ in range(epoch_num): # torch.cuda.empty_cache() net.train()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--epochs', default=200, type=int, help='epoch number(Default:200)') parser.add_argument('--start_epoch', default=0, type=int, help='start epoch number') parser.add_argument('-b', '--batch_size', default=6, type=int, help='mini-batch size(Default:6)') parser.add_argument('--lr', '--learning_rate', default=1e-3, type=float, help='initial learning rate(Default:1e-3)') parser.add_argument('--resume', type=str, default=None, help='The path for checkpoint file') parser.add_argument('--exp', type=str, default='test', help='The name of this exp') parser.add_argument('--content', type=float, default=10.0, help='the weight of content loss(Default:10.0)') parser.add_argument('--tv', type=float, default=3e-3, help='the weight of TV loss(Default:0.003)') parser.add_argument('--adv', type=float, default=3.00, help='the weight of adv loss(Default:1.0)') parser.add_argument('--first_stage', type=str, default='./exps/2_baseline/checkpoint.pth.tar', help='first stage model') args = parser.parse_args() base_dir = './twostageExps/' exp_dir = os.path.join(base_dir, args.exp) base_results_dir = os.path.join(exp_dir, 'results/') best_metric = 0 if not os.path.exists(base_dir): os.mkdir(base_dir) if not os.path.exists(exp_dir): os.mkdir(exp_dir) if not os.path.exists(base_results_dir): os.mkdir(base_results_dir) save_args(args, exp_dir) global AdLossWeight global TvLossWeight global ContentLossWeight AdLossWeight = args.adv TvLossWeight = args.tv ContentLossWeight = args.content log_dir = os.path.join('./twostageLogs', args.exp) writer = SummaryWriter(log_dir) first_stage = network() generator = network() critic = critic_network() optimizer_G = optim.Adam(generator.parameters(), args.lr) optimizer_C = optim.Adam(critic.parameters(), args.lr) scheduler_C = ReduceLROnPlateau(optimizer_G, 'min', factor=0.2, patience=10, verbose=True) scheduler_G = ReduceLROnPlateau(optimizer_C, 'min', factor=0.2, patience=10, verbose=True) if args.resume != None: assert os.path.exists(args.resume), 'model does not exist!' print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] generator.load_state_dict(checkpoint['generator']) critic.load_state_dict(checkpoint['critic']) #best_metric = checkpoint['best_metric'] optimizer_G = checkpoint['optimizer_G'] optimizer_C = checkpoint['optimizer_C'] print('=> loaded checkpoint {} - epoch:{} - best_metric:{}'.format( args.resume, args.start_epoch, best_metric)) else: print('No checkpoint. A new begining') if args.first_stage != None: assert os.path.exists( args.first_stage), 'first stage model does not exist!' print('=> loading first stage model {}'.format(args.first_stage)) first_stage_checkpoint = torch.load(args.first_stage) first_stage.load_state_dict(first_stage_checkpoint['generator']) for p in first_stage.parameters(): p.requires_grad = False vgg_for_perceptual_loss = vgg19() for p in vgg_for_perceptual_loss.parameters(): p.requires_grad = False generator.cuda() critic.cuda() vgg_for_perceptual_loss.cuda() vgg_for_perceptual_loss.eval() first_stage.cuda() first_stage.eval() clock = TrainClock() clock.epoch = args.start_epoch data_dir = config.data_dir train_loader = get_dataloaders(os.path.join(data_dir, 'train.json'), batch_size=args.batch_size, shuffle=True) valid_loader = get_dataloaders(os.path.join(config.data_dir, 'val.json'), batch_size=args.batch_size, shuffle=True) print('Begin training') for epoch in range(args.start_epoch, args.epochs): results_dir = os.path.join(base_results_dir, '{}'.format(epoch)) if not os.path.exists(results_dir): os.mkdir(results_dir) train(first_stage, generator, critic, optimizer_G, optimizer_C, train_loader, vgg_for_perceptual_loss, clock, writer, 2) save_checkpoint( { 'epoch': clock.epoch, 'generator': generator.state_dict(), 'critic': critic.state_dict(), 'optimizer_G': optimizer_G, 'optimizer_C': optimizer_C, }, is_best=True, prefix=exp_dir) torch.cuda.empty_cache() test_on_benchmark_two_stage(first_stage, generator, results_dir) torch.cuda.empty_cache() CriticRealLoss, ContentLoss = evaluate_on_val_two_stage( first_stage, generator, critic, valid_loader, vgg_for_perceptual_loss, clock, writer, os.path.join(exp_dir, 'valresults.txt')) scheduler_C.step(CriticRealLoss) scheduler_G.step(ContentLoss) torch.cuda.empty_cache()
import numpy as np from torch.autograd import Variable from DataSet import Dataset import my_snip.metrics as metrics from my_snip.clock import TrainClock, AvgMeter from my_snip.config import MultiStageLearningRatePolicy import torch.nn as nn import torch.optim as optim from TorchDataset import TorchDataset from torch.utils.data import DataLoader import time from tensorboardX import SummaryWriter clock = TrainClock() clock.epoch = 21 # In[25]: base_learing_rate = 0.01 base_batch_size = 16 ratio = 1 weight_decay = 1e-4 gpu_ids = [0, 1, 2, 3] num_workers = 8 num_workers *= len(gpu_ids) minibatch_size = 16 * len(gpu_ids) * ratio learing_rate = base_learing_rate * ratio
net = SEResNeXt(101, num_class=80) init_trained_weights(net, model_path) net.cuda(gpu_id) # In[22]: criterion = nn.CrossEntropyLoss().cuda(gpu_id) optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4) #optimizer = optim.Adam(net.parameters(), lr = 0.01, weight_decay = 1e-4) clock = TrainClock() ud_loss_m = AvgMeter('ud_loss') accuracy_m = AvgMeter('top-1-accuracy') top3_accuracy_m = AvgMeter('top-3-accuracy') data_time_m = AvgMeter('Reading Batch Data') batch_time_m = AvgMeter('Batch time') for epoch_i in range(num_epoch): net.train() print('Epoch {} starts'.format(epoch_i)) clock.tock() epoch_time = time.time() adjust_learning_rate(optimizer, clock.epoch)