def train(opt): transforms = tv.transforms.Compose([ tv.transforms.Resize(opt.image_size), tv.transforms.CenterCrop(opt.image_size), # tv.transforms.ToTensor(), DefectAdder(mode=opt.defect_mode, defect_shape=('line', ), normal_only=True), ToTensorList(), NormalizeList(opt.mean, opt.std), # tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = tv.datasets.ImageFolder(opt.data_path, transform=transforms) train_dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True) if opt.validate: val_transforms = tv.transforms.Compose([ tv.transforms.Resize(opt.image_size), tv.transforms.CenterCrop(opt.image_size), # tv.transforms.ToTensor(), DefectAdder(mode=opt.defect_mode, defect_shape=('line', )), ToTensorList(), NormalizeList(opt.mean, opt.std), # tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) val_dataset = tv.datasets.ImageFolder(opt.val_path, transform=val_transforms) val_dataloader = DataLoader(val_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True) else: val_dataloader = None map_location = lambda storage, loc: storage netd = Discriminator(opt) netg = Generater(opt) nets = FCN32s(n_class=2, input_channels=6) if opt.use_gpu: netd.cuda() netg.cuda() nets.cuda() if opt.netd_path: print('loading checkpoint for discriminator...') checkpoint = modify_checkpoint( netd, torch.load(opt.netd_path, map_location=map_location)['net']) netd.load_state_dict(checkpoint, strict=False) if opt.netg_path: print('loading checkpoint for generator...') checkpoint = modify_checkpoint( netg, torch.load(opt.netg_path, map_location=map_location)['net']) netg.load_state_dict(checkpoint, strict=False) optimizer_g = optim.Adam(netg.parameters(), opt.lrg, betas=(opt.beta1, 0.999)) optimizer_d = optim.Adam(netd.parameters(), opt.lrd, betas=(opt.beta1, 0.999)) optimizer_s = optim.Adam(nets.parameters(), opt.lrs, betas=(opt.beta1, 0.999)) scheduler_g = torch.optim.lr_scheduler.MultiStepLR(optimizer_g, milestones=opt.steps, gamma=0.1) scheduler_d = torch.optim.lr_scheduler.MultiStepLR(optimizer_d, milestones=opt.steps, gamma=0.1) scheduler_s = torch.optim.lr_scheduler.MultiStepLR(optimizer_s, milestones=opt.steps, gamma=0.1) trainer = Trainer(opt, [netd, netg, nets], [optimizer_d, optimizer_g, optimizer_s], [scheduler_d, scheduler_g, scheduler_s], train_dataloader, val_dataloader) trainer.train()
def distributed_train(gpu, opt): rank = opt.nr * opt.gpus + gpu world_size = opt.gpus * opt.nodes dist.init_process_group(backend='nccl', init_method='env://', world_size=world_size, rank=rank) torch.cuda.set_device(gpu) transforms = tv.transforms.Compose([ tv.transforms.Resize(opt.image_size), tv.transforms.CenterCrop(opt.image_size), # tv.transforms.ToTensor(), DefectAdder(mode=opt.defect_mode, defect_shape=('line', ), normal_only=True), ToTensorList(), NormalizeList(opt.mean, opt.std), # tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = tv.datasets.ImageFolder(opt.data_path, transform=transforms) train_sampler = torch.utils.data.distributed.DistributedSampler( dataset, num_replicas=world_size, rank=rank) train_dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, drop_last=True, sampler=train_sampler) if opt.validate: val_transforms = tv.transforms.Compose([ tv.transforms.Resize(opt.image_size), tv.transforms.CenterCrop(opt.image_size), # tv.transforms.ToTensor(), DefectAdder(mode=opt.defect_mode, defect_shape=('line', )), ToTensorList(), NormalizeList(opt.mean, opt.std), # tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) val_dataset = tv.datasets.ImageFolder(opt.val_path, transform=val_transforms) val_sampler = torch.utils.data.distributed.DistributedSampler( val_dataset, num_replicas=world_size, rank=rank) val_dataloader = DataLoader(val_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, drop_last=True, sampler=val_sampler) else: val_dataloader = None map_location = lambda storage, loc: storage netd = Discriminator(opt) netg = Generater(opt) nets = FCN32s(n_class=2, input_channels=6) if opt.use_gpu: netd.cuda(gpu) netg.cuda(gpu) nets.cuda(gpu) netd = nn.parallel.DistributedDataParallel(netd, device_ids=[gpu]) netg = nn.parallel.DistributedDataParallel(netg, device_ids=[gpu]) nets = nn.parallel.DistributedDataParallel(nets, device_ids=[gpu]) if opt.netd_path: print('loading checkpoint for discriminator...') checkpoint = modify_checkpoint( netd, torch.load(opt.netd_path, map_location=map_location)['net']) netd.load_state_dict(checkpoint, strict=False) if opt.netg_path: print('loading checkpoint for generator...') checkpoint = modify_checkpoint( netg, torch.load(opt.netg_path, map_location=map_location)['net']) netg.load_state_dict(checkpoint, strict=False) optimizer_g = optim.Adam(netg.parameters(), opt.lrg, betas=(opt.beta1, 0.999)) optimizer_d = optim.Adam(netd.parameters(), opt.lrd, betas=(opt.beta1, 0.999)) optimizer_s = optim.Adam(nets.parameters(), opt.lrs, betas=(opt.beta1, 0.999)) scheduler_g = torch.optim.lr_scheduler.MultiStepLR(optimizer_g, milestones=opt.steps, gamma=0.1) scheduler_d = torch.optim.lr_scheduler.MultiStepLR(optimizer_d, milestones=opt.steps, gamma=0.1) scheduler_s = torch.optim.lr_scheduler.MultiStepLR(optimizer_s, milestones=opt.steps, gamma=0.1) criterion = nn.BCELoss() contrast_criterion = nn.MSELoss() true_labels = torch.ones(opt.batch_size) fake_labels = torch.zeros(opt.batch_size) if opt.use_gpu: criterion.cuda() contrast_criterion.cuda() true_labels, fake_labels = true_labels.cuda(), fake_labels.cuda() # fix_noises, noises = fix_noises.cuda(), noises.cuda() trainer = Trainer(opt, [netd, netg, nets], [optimizer_d, optimizer_g, optimizer_s], [scheduler_d, scheduler_g, scheduler_s], train_dataloader, val_dataloader) trainer.train()