def main(): global args, best_loss args = parser.parse_args() if args.tensorboard: from tensorboard_logger import configure print("Using tensorboard") configure("%s" % (args.dir)) # loading core configuration c_config = CoreConfig() if args.core_config == '': print('No core config file given, using default core configuration') if not os.path.exists(args.core_config): sys.exit('Cannot find the config file: {}'.format(args.core_config)) else: c_config.read(args.core_config) print('Using core configuration from {}'.format(args.core_config)) # loading Unet configuration u_config = UnetConfig() if args.unet_config == '': print('No unet config file given, using default unet configuration') if not os.path.exists(args.unet_config): sys.exit('Cannot find the unet configuration file: {}'.format( args.unet_config)) else: # need train_image_size for validation u_config.read(args.unet_config, args.train_image_size) print('Using unet configuration from {}'.format(args.unet_config)) offset_list = c_config.offsets print("offsets are: {}".format(offset_list)) # model configurations from core config num_classes = c_config.num_classes num_colors = c_config.num_colors num_offsets = len(c_config.offsets) # model configurations from unet config start_filters = u_config.start_filters up_mode = u_config.up_mode merge_mode = u_config.merge_mode depth = u_config.depth train_data = args.train_dir + '/train' val_data = args.train_dir + '/val' trainset = WaldoDataset(train_data, c_config, args.train_image_size) trainloader = torch.utils.data.DataLoader(trainset, num_workers=4, batch_size=args.batch_size, shuffle=True) valset = WaldoDataset(val_data, c_config, args.train_image_size) valloader = torch.utils.data.DataLoader(valset, num_workers=4, batch_size=args.batch_size) NUM_TRAIN = len(trainset) NUM_VAL = len(valset) NUM_ALL = NUM_TRAIN + NUM_VAL print('Total samples: {0} \n' 'Using {1} samples for training, ' '{2} samples for validation'.format(NUM_ALL, NUM_TRAIN, NUM_VAL)) # create model model = UNet(num_classes, num_offsets, in_channels=num_colors, depth=depth, start_filts=start_filters, up_mode=up_mode, merge_mode=merge_mode).cuda() # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # define optimizer # optimizer = t.optim.Adam(model.parameters(), lr=1e-3) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) # Train for epoch in range(args.start_epoch, args.epochs): Train(trainloader, model, optimizer, epoch) val_loss = Validate(valloader, model, epoch) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_loss, }, is_best) print('Best validation loss: ', best_loss) # visualize some example outputs outdir = '{}/imgs'.format(args.dir) if not os.path.exists(outdir): os.makedirs(outdir) sample(model, valloader, outdir, c_config)
def main(): global args, best_loss args = parser.parse_args() if args.tensorboard: print("Using tensorboard") configure("exp/%s" % (args.name)) if not (os.path.exists(args.train_data) and os.path.exists(args.train_data) and os.path.exists(args.test_data)): train, val, test = DataProcess(args.train_path, args.test_path, 0.9, args.img_channels) torch.save(train, args.train_data) torch.save(val, args.val_data) torch.save(test, args.test_data) s_trans = tsf.Compose([ tsf.ToPILImage(), tsf.Resize((args.img_height, args.img_width)), tsf.ToTensor(), ]) offset_list = [(1, 1), (0, -2)] # split the training set into training set and validation set trainset = Dataset(args.train_data, s_trans, offset_list, args.num_classes, args.img_height, args.img_width) trainloader = torch.utils.data.DataLoader(trainset, num_workers=1, batch_size=args.batch_size) valset = Dataset(args.val_data, s_trans, offset_list, args.num_classes, args.img_height, args.img_width) valloader = torch.utils.data.DataLoader(valset, num_workers=1, batch_size=args.batch_size) # datailer = iter(trainloader) # img, bound, class_id = datailer.next() # # print img.shape, bound.shape, class_id.shape # torch.set_printoptions(threshold=5000) # print bound.shape # torchvision.utils.save_image(img, 'raw.png') # torchvision.utils.save_image(bound[:, 0:1, :, :], 'bound1.png') # torchvision.utils.save_image(bound[:, 1:2, :, :], 'bound2.png') # torchvision.utils.save_image(class_id[:, 0:1, :, :], 'class1.png') # torchvision.utils.save_image(class_id[:, 1:2, :, :], 'class2.png') # sys.exit('stop') NUM_TRAIN = len(trainset) NUM_VAL = len(valset) NUM_ALL = NUM_TRAIN + NUM_VAL print( 'Total samples: {0} \n' 'Using {1} samples for training, ' '{2} samples for validation'.format(NUM_ALL, NUM_TRAIN, NUM_VAL)) # create model model = UNet(args.num_classes, len(offset_list), in_channels=3, depth=args.depth).cuda() # model = UNet(3, 1, len(offset_list)) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # define optimizer # optimizer = t.optim.Adam(model.parameters(), lr=1e-3) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) # Train for epoch in range(args.start_epoch, args.epochs): Train(trainloader, model, optimizer, epoch) val_loss = Validate(valloader, model, epoch) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_loss, }, is_best) print 'Best validation loss: ', best_loss # Visualize some predicted masks on training data to get a better intuition # about the performance. Comment it if not necessary. datailer = iter(trainloader) img, classification, bound = datailer.next() torchvision.utils.save_image(img, 'imgs/raw.png') for i in range(len(offset_list)): torchvision.utils.save_image(bound[:, i:i + 1, :, :], 'imgs/bound_{}.png'.format(i)) for i in range(args.num_classes): torchvision.utils.save_image(classification[:, i:i + 1, :, :], 'imgs/class_{}.png'.format(i)) img = torch.autograd.Variable(img).cuda() predictions = model(img) predictions = predictions.data class_pred = predictions[:, :args.num_classes, :, :] bound_pred = predictions[:, args.num_classes:, :, :] for i in range(len(offset_list)): torchvision.utils.save_image(bound_pred[:, i:i + 1, :, :], 'imgs/bound_pred{}.png'.format(i)) for i in range(args.num_classes): torchvision.utils.save_image(class_pred[:, i:i + 1, :, :], 'imgs/class_pred{}.png'.format(i))
def main(): global args, best_loss args = parser.parse_args() if args.tensorboard: print("Using tensorboard") configure("exp/%s" % (args.name)) if not (os.path.exists(args.train_data) and os.path.exists(args.train_data) and os.path.exists(args.test_data)): train, val, test = DataProcess(args.train_path, args.test_path, 0.9, args.img_channels) t.save(train, args.train_data) t.save(val, args.val_data) t.save(test, args.test_data) s_trans = tsf.Compose([ tsf.ToPILImage(), tsf.Resize((args.img_height, args.img_width)), tsf.ToTensor(), ]) t_trans = tsf.Compose([ tsf.ToPILImage(), tsf.Resize((args.img_height, args.img_width), interpolation=PIL.Image.NEAREST), tsf.ToTensor(), ]) # split the training set into training set and validation set trainset = TrainDataset(args.train_data, s_trans, t_trans) trainloader = t.utils.data.DataLoader(trainset, num_workers=1, batch_size=args.batch_size, shuffle=True) valset = TrainDataset(args.val_data, s_trans, t_trans) valloader = t.utils.data.DataLoader(valset, num_workers=1, batch_size=args.batch_size) NUM_TRAIN = len(trainset) NUM_VAL = len(valset) NUM_ALL = NUM_TRAIN + NUM_VAL print( 'Total samples: {0} \n' 'Using {1} samples for training, ' '{2} samples for validation'.format(NUM_ALL, NUM_TRAIN, NUM_VAL)) testset = TestDataset(args.test_data, s_trans) testloader = t.utils.data.DataLoader(testset, num_workers=1, batch_size=1) # create model model = UNet(1, in_channels=3, depth=args.depth).cuda() # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = t.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # define optimizer optimizer = t.optim.Adam(model.parameters(), lr=1e-3) # Train for epoch in range(args.start_epoch, args.epochs): Train(trainloader, model, optimizer, epoch) val_loss = Validate(valloader, model, epoch) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_loss, }, is_best) print 'Best validation loss: ', best_loss # Visualize some predicted masks on training data to get a better intuition # about the performance. Comment it if not necessary. datailer = iter(trainloader) img, mask = datailer.next() torchvision.utils.save_image(img, 'raw.png') torchvision.utils.save_image(mask, 'mask.png') img = t.autograd.Variable(img).cuda() img_pred = model(img) img_pred = img_pred.data torchvision.utils.save_image(img_pred > 0.5, 'predicted.png') # Load the best model and evaluate on test set checkpoint = t.load('exp/%s/' % (args.name) + 'model_best.pth.tar') model.load_state_dict(checkpoint['state_dict']) Test(testloader, model)
init_util.print_network(model) # model = nn.DataParallel(model, device_ids=[0]) # multi-GPU log = logger.Logger('./output/{}'.format(args.save)) best = [0, np.inf] # 初始化最优模型的epoch和performance trigger = 0 # early stop 计数器 for epoch in range(1, args.epochs + 1): common.adjust_learning_rate(optimizer, epoch, args) train_log = train(model, train_loader) val_log = val(model, val_loader) log.update(epoch, train_log, val_log) # Save checkpoint. state = { 'net': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } torch.save( state, os.path.join('./output/{}'.format(args.save), 'latest_model.pth')) trigger += 1 if val_log['Val Loss'] < best[1]: print('Saving best model') torch.save( state, os.path.join('./output/{}'.format(args.save), 'best_model.pth')) best[0] = epoch best[1] = val_log['Val Loss']
transform = transforms.Compose([ utils.transforms.RandomMirror(), utils.transforms.ToTensor(), utils.transforms.Downsize(2) ]) dataset = utils.datasets.SteelDefectDataset(csv_file='train.csv', root_dir='data/severstal-steel-defect-detection',transform=transform) train_loader = DataLoader(dataset, batch_size=1,shuffle=True) criterion = utils.loss.SegmentMSELoss() for e in range(1,epoch+1): print('Epoch {}:'.format(e)) total_loss = 0 for batch, data in tqdm(enumerate(train_loader),total=len(train_loader),leave=False): optimizer.zero_grad() imgs, cs, targets = data['img'], data['c'], data['target'] imgs = imgs.to(device) targets = targets.to(device) out = model(imgs) loss = criterion(out,cs,targets) loss.backward() total_loss += loss.data optimizer.step() if batch == 500: print(total_loss/batch) print('Loss: {:.3f}'.format(total_loss/(batch))) torch.save(model.state_dict(), os.path.join('weights','Unet_e{}.pth'.format(e)))