def main(): net = SegNet(num_classes=num_classes).cuda() if len(train_args['snapshot']) == 0: curr_epoch = 0 else: print 'training resumes from ' + train_args['snapshot'] net.load_state_dict( torch.load( os.path.join(ckpt_path, exp_name, train_args['snapshot']))) split_snapshot = train_args['snapshot'].split('_') curr_epoch = int(split_snapshot[1]) train_record['best_val_loss'] = float(split_snapshot[3]) train_record['corr_mean_iu'] = float(split_snapshot[6]) train_record['corr_epoch'] = curr_epoch net.train() mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) train_simul_transform = simul_transforms.Compose([ simul_transforms.Scale(int(train_args['input_size'][0] / 0.875)), simul_transforms.RandomCrop(train_args['input_size']), simul_transforms.RandomHorizontallyFlip() ]) val_simul_transform = simul_transforms.Compose([ simul_transforms.Scale(int(train_args['input_size'][0] / 0.875)), simul_transforms.CenterCrop(train_args['input_size']) ]) img_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) target_transform = standard_transforms.Compose([ expanded_transforms.MaskToTensor(), expanded_transforms.ChangeLabel(ignored_label, num_classes - 1) ]) restore_transform = standard_transforms.Compose([ expanded_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage() ]) train_set = CityScapes('train', simul_transform=train_simul_transform, transform=img_transform, target_transform=target_transform) train_loader = DataLoader(train_set, batch_size=train_args['batch_size'], num_workers=16, shuffle=True) val_set = CityScapes('val', simul_transform=val_simul_transform, transform=img_transform, target_transform=target_transform) val_loader = DataLoader(val_set, batch_size=val_args['batch_size'], num_workers=16, shuffle=False) weight = torch.ones(num_classes) weight[num_classes - 1] = 0 criterion = CrossEntropyLoss2d(weight).cuda() # don't use weight_decay for bias optimizer = optim.SGD([{ 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' and 'dec' in name ], 'lr': 2 * train_args['new_lr'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' and 'dec' in name ], 'lr': train_args['new_lr'], 'weight_decay': train_args['weight_decay'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' and 'dec' not in name ], 'lr': 2 * train_args['pretrained_lr'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' and 'dec' not in name ], 'lr': train_args['pretrained_lr'], 'weight_decay': train_args['weight_decay'] }], momentum=0.9, nesterov=True) if len(train_args['snapshot']) > 0: optimizer.load_state_dict( torch.load( os.path.join(ckpt_path, exp_name, 'opt_' + train_args['snapshot']))) optimizer.param_groups[0]['lr'] = 2 * train_args['new_lr'] optimizer.param_groups[1]['lr'] = train_args['new_lr'] optimizer.param_groups[2]['lr'] = 2 * train_args['pretrained_lr'] optimizer.param_groups[3]['lr'] = train_args['pretrained_lr'] if not os.path.exists(ckpt_path): os.mkdir(ckpt_path) if not os.path.exists(os.path.join(ckpt_path, exp_name)): os.mkdir(os.path.join(ckpt_path, exp_name)) for epoch in range(curr_epoch, train_args['epoch_num']): train(train_loader, net, criterion, optimizer, epoch) validate(val_loader, net, criterion, optimizer, epoch, restore_transform)
def main(): training_batch_size = 8 validation_batch_size = 8 epoch_num = 200 iter_freq_print_training_log = 50 lr = 1e-4 net = SegNet(pretrained=True, num_classes=num_classes).cuda() curr_epoch = 0 # net = FCN8VGG(pretrained=False, num_classes=num_classes).cuda() # snapshot = 'epoch_41_validation_loss_2.1533_mean_iu_0.5225.pth' # net.load_state_dict(torch.load(os.path.join(ckpt_path, snapshot))) # split_res = snapshot.split('_') # curr_epoch = int(split_res[1]) net.train() mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) train_simultaneous_transform = SimultaneousCompose([ SimultaneousRandomHorizontallyFlip(), SimultaneousRandomScale((0.9, 1.1)), SimultaneousRandomCrop((300, 500)) ]) train_transform = transforms.Compose([ RandomGaussianBlur(), transforms.ToTensor(), transforms.Normalize(*mean_std) ]) val_simultaneous_transform = SimultaneousCompose( [SimultaneousScale((300, 500))]) val_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(*mean_std)]) restore = transforms.Compose( [DeNormalize(*mean_std), transforms.ToPILImage()]) train_set = VOC(train_path, simultaneous_transform=train_simultaneous_transform, transform=train_transform, target_transform=MaskToTensor()) train_loader = DataLoader(train_set, batch_size=training_batch_size, num_workers=8, shuffle=True) val_set = VOC(val_path, simultaneous_transform=val_simultaneous_transform, transform=val_transform, target_transform=MaskToTensor()) val_loader = DataLoader(val_set, batch_size=validation_batch_size, num_workers=8) criterion = CrossEntropyLoss2d(ignored_label=ignored_label) optimizer = optim.SGD([{ 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' ] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' ], 'weight_decay': 5e-4 }], lr=lr, momentum=0.9, nesterov=True) if not os.path.exists(ckpt_path): os.mkdir(ckpt_path) best = [1e9, -1, -1] # [best_val_loss, best_mean_iu, best_epoch] for epoch in range(curr_epoch, epoch_num): train(train_loader, net, criterion, optimizer, epoch, iter_freq_print_training_log) if (epoch + 1) % 20 == 0: lr /= 3 adjust_lr(optimizer, lr) validate(epoch, val_loader, net, criterion, restore, best)