def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # ---------------------------------------------------------------------------------------- # Create model(s) and send to device(s) # ---------------------------------------------------------------------------------------- net = model.ResUNet(3, False).float() net.load_state_dict(torch.load('ResUNet.pt')) if args.distributed: if args.gpu is not None: torch.cuda.set_device(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel( net, device_ids=[args.gpu]) else: net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net) elif args.gpu is not None: torch.cuda.set_device(args.gpu) net.cuda(args.gpu) else: net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net) # ---------------------------------------------------------------------------------------- # Define dataset path and data splits # ---------------------------------------------------------------------------------------- #Input_Data = scipy.io.loadmat("\Path\To\Inputs.mat") #Output_Data = scipy.io.loadmat("\Path\To\Outputs.mat") #Input = Input_Data['data'] #Output = Output_Data['data'] Input = utilities.load_obj( f'{args.path_to_data}/inputs') #Input_Data['Inputs'] Output = utilities.load_obj( f'{args.path_to_data}/outputs') # Output_Data['Outputs'] # ---------------------------------------------------------------------------------------- # Create datasets (with augmentation) and dataloaders # ---------------------------------------------------------------------------------------- Raman_Dataset_Test = dataset.RamanDataset(Input, Output, batch_size=args.batch_size, spectrum_len=args.spectrum_len) test_loader = DataLoader(Raman_Dataset_Test, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) # ---------------------------------------------------------------------------------------- # Evaluate # ---------------------------------------------------------------------------------------- MSE_NN, MSE_SG = evaluate(test_loader, net, args)
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # ---------------------------------------------------------------------------------------- # Create model(s) and send to device(s) # ---------------------------------------------------------------------------------------- net = model.ResUNet(3, args.batch_norm).float() if args.distributed: if args.gpu is not None: torch.cuda.set_device(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.gpu]) else: net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net) elif args.gpu is not None: torch.cuda.set_device(args.gpu) net.cuda(args.gpu) else: net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net) # ---------------------------------------------------------------------------------------- # Define dataset path and data splits # ---------------------------------------------------------------------------------------- Input_Data = scipy.io.loadmat("\Path\To\Inputs.mat") Output_Data = scipy.io.loadmat("\Path\To\Outputs.mat") Input = Input_Data['Inputs'] Output = Output_Data['Outputs'] spectra_num = len(Input) train_split = round(0.9 * spectra_num) val_split = round(0.1 * spectra_num) input_train = Input[:train_split] input_val = Input[train_split:train_split+val_split] output_train = Output[:train_split] output_val = Output[train_split:train_split+val_split] # ---------------------------------------------------------------------------------------- # Create datasets (with augmentation) and dataloaders # ---------------------------------------------------------------------------------------- Raman_Dataset_Train = dataset.RamanDataset(input_train, output_train, batch_size = args.batch_size, spectrum_len = args.spectrum_len, spectrum_shift=0.1, spectrum_window = False, horizontal_flip = False, mixup = True) Raman_Dataset_Val = dataset.RamanDataset(input_val, output_val, batch_size = args.batch_size, spectrum_len = args.spectrum_len) train_loader = DataLoader(Raman_Dataset_Train, batch_size = args.batch_size, shuffle = False, num_workers = 0, pin_memory = True) val_loader = DataLoader(Raman_Dataset_Val, batch_size = args.batch_size, shuffle = False, num_workers = 0, pin_memory = True) # ---------------------------------------------------------------------------------------- # Define criterion(s), optimizer(s), and scheduler(s) # ---------------------------------------------------------------------------------------- criterion = nn.L1Loss().cuda(args.gpu) criterion_MSE = nn.MSELoss().cuda(args.gpu) if args.optimizer == "sgd": optimizer = optim.SGD(net.parameters(), lr = args.lr) elif args.optimizer == "adamW": optimizer = optim.AdamW(net.parameters(), lr = args.lr) else: # Adam optimizer = optim.Adam(net.parameters(), lr = args.lr) if args.scheduler == "decay-lr": scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.2) elif args.scheduler == "multiplicative-lr": lmbda = lambda epoch: 0.985 scheduler = optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda) elif args.scheduler == "cyclic-lr": scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr = args.base_lr, max_lr = args.lr, mode = 'triangular2', cycle_momentum = False) elif args.scheduler == "one-cycle-lr": scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr = args.lr, steps_per_epoch=len(train_loader), epochs=args.epochs, cycle_momentum = False) else: # constant-lr scheduler = None print('Started Training') print('Training Details:') print('Network: {}'.format(args.network)) print('Epochs: {}'.format(args.epochs)) print('Batch Size: {}'.format(args.batch_size)) print('Optimizer: {}'.format(args.optimizer)) print('Scheduler: {}'.format(args.scheduler)) print('Learning Rate: {}'.format(args.lr)) print('Spectrum Length: {}'.format(args.spectrum_len)) DATE = datetime.datetime.now().strftime("%Y_%m_%d") log_dir = "runs/{}_{}_{}_{}".format(DATE, args.optimizer, args.scheduler, args.network) models_dir = "{}_{}_{}_{}.pt".format(DATE, args.optimizer, args.scheduler, args.network) writer = SummaryWriter(log_dir = log_dir) for epoch in range(args.epochs): train_loss = train(train_loader, net, optimizer, scheduler, criterion, criterion_MSE, epoch, args) val_loss = validate(val_loader, net, criterion_MSE, args) if args.scheduler == "decay-lr" or args.scheduler == "multiplicative-lr": scheduler.step() writer.add_scalar('Loss/train', train_loss, epoch) writer.add_scalar('Loss/val', val_loss, epoch) torch.save(net.state_dict(), models_dir) print('Finished Training')
def load_model_keras(): model = m.ResUNet(image_size) model.load_weights(model_path) return model