def main(): data_set = ImageFolder(root='anime-faces', transform=as_array) val_ratio = 0.1 val_size = int(len(data_set) * val_ratio) train_size = len(data_set) - val_size train_set, val_set = random_split(data_set, [train_size, val_size]) confs = [ ('data/anime_faces/train.lmdb', train_set), ('data/anime_faces/val.lmdb', val_set), ] for path, data_set in confs: convert_data_set(path, data_set)
def load_data(test_split, seed, batch_size): """Loads the data""" sonar_dataset = SonarDataset('./sonar.all-data') # Create indices for the split dataset_size = len(sonar_dataset) test_size = int(test_split * dataset_size) train_size = dataset_size - test_size train_dataset, test_dataset = random_split(sonar_dataset, [train_size, test_size]) train_loader = DataLoader( train_dataset.dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader( test_dataset.dataset, batch_size=batch_size, shuffle=True) return train_loader, test_loader
return inputs_array, targets_array inputs_array, targets_arrays = dataframe_to_arrays(dataframe) # pylint: disable=E1101 inputs = torch.from_numpy(inputs_array) targets = torch.from_numpy(targets_arrays) inputs, targets = inputs.float(), targets.float() # pylint: enable=E1101 targets = targets.squeeze(dim=1) dataset = TensorDataset(inputs, targets) val_percent = 0.15 val_size = int(num_rows * val_percent) train_size = int(num_rows - val_size) train_ds, val_ds = random_split(dataset, [train_size, val_size]) batch_size = 32 train_loader = DataLoader(train_ds, batch_size, shuffle=True) val_loader = DataLoader(val_ds, batch_size, shuffle=True) input_size = len(input_cols) output_size = len(output_cols) class InsuranceModel(nn.Module): def __init__(self): super().__init__() self.linear = nn.Linear(input_size, output_size) def forward(self, xb):
def _split_dataset(self, dataset, rate): dataset_len = len(dataset) val_len = int(rate * dataset_len) train_len = dataset_len - val_len return random_split(dataset, (train_len, val_len))
train_coefficients = hyperparam_conf['train_coefficients'] # %% mnist config dataset_config = conf['mnist_config'] max_rate = dataset_config['max_rate'] use_transform = dataset_config['use_transform'] # %% transform config if use_transform == True: rand_transform = get_rand_transform(conf['transform']) else: rand_transform = None # load mnist training dataset mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=rand_transform) mnist_trainset, mnist_devset = random_split(mnist_trainset, [50000, 10000], generator=torch.Generator().manual_seed(42)) # load mnist test dataset mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=None) # acc file name acc_file_name = experiment_name + '_' + conf['acc_file_name'] # %% define model class mysnn(torch.nn.Module): def __init__(self): super().__init__() self.length = length self.batch_size = batch_size self.train_coefficients = train_coefficients
def main(args): torch.manual_seed(222) torch.cuda.manual_seed_all(222) np.random.seed(222) print(args) # config = [] device = torch.device('cuda') model = None if args.arch == "UNet": model = UNet(args).to(device) else: raise("architectures other than Unet hasn't been added!!") # update_lrs = nn.Parameter(args.update_lr*torch.ones(self.update_step, len(self.net.vars)), requires_grad=True) model.optimizer = optim.Adam(model.parameters(), lr=args.lr, eps=1e-7, amsgrad=True, weight_decay=args.weight_decay) model.lr_scheduler = optim.lr_scheduler.ExponentialLR(model.optimizer, args.exp_decay) tmp = filter(lambda x: x.requires_grad, model.parameters()) num = sum(map(lambda x: np.prod(x.shape), tmp)) print(model) #for name, param in model.named_parameters(): # print(name, param.size()) print('Total trainable tensors:', num, flush=True) SUMMARY_INTERVAL=5 TEST_PRINT_INTERVAL=SUMMARY_INTERVAL*5 ITER_SAVE_INTERVAL=300 EPOCH_SAVE_INTERVAL=5 model_path = args.model_saving_path + args.model_name + "_batch_size_" + str(args.batch_size) + "_lr_" + str(args.lr) if not os.path.isdir(model_path): os.mkdir(model_path) ds = SimulationDataset(args.data_folder, total_sample_number = args.total_sample_number) means = [1e-3, 1e-3]#ds.means; #[Hy_meanR, Hy_meanI, Ex_meanR, Ex_meanI, Ez_meanR, Ez_meanI]; print("means: ", means); torch.manual_seed(42) train_ds, test_ds = random_split(ds, [int(0.9*len(ds)), len(ds) - int(0.9*len(ds))]) #print("total training samples: %d, total test samples: %d" % (len(train_ds), len(test_ds)), flush=True) train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=0) test_loader = DataLoader(test_ds, batch_size=args.batch_size, shuffle=True, num_workers=0) train_mean = 0 test_mean = 0 # first get the mean-absolute-field value: for sample_batched in train_loader: train_mean += torch.mean(torch.abs(sample_batched["field"])) for sample_batched in test_loader: test_mean += torch.mean(torch.abs(sample_batched["field"])) train_mean /= len(train_loader) test_mean /= len(test_loader) print("total training samples: %d, total test samples: %d, train_abs_mean: %f, test_abs_mean: %f" % (len(train_ds), len(test_ds), train_mean, test_mean), flush=True) # for visualizing the graph: #writer = SummaryWriter('runs/'+args.model_name) #test_input = None #for sample in train_loader: # test_input = sample['structure'] # break #writer.add_graph(model, test_input.to(device)) #writer.close() df = pd.DataFrame(columns=['epoch','train_loss', 'train_phys_reg', 'test_loss', 'test_phys_reg']) train_loss_history = [] train_phys_reg_history = [] test_loss_history = [] test_phys_reg_history = [] start_epoch=0 if (args.continue_train): print("Restoring weights from ", model_path+"/last_model.pt", flush=True) checkpoint = torch.load(model_path+"/last_model.pt") start_epoch=checkpoint['epoch'] model = checkpoint['model'] model.lr_scheduler = checkpoint['lr_scheduler'] model.optimizer = checkpoint['optimizer'] df = read_csv(model_path + '/'+'df.csv') scaler = GradScaler() best_loss = 1e4 last_epoch_data_loss = 1.0 last_epoch_physical_loss = 1.0 for step in range(start_epoch, args.epoch): print("epoch: ", step, flush=True) reg_norm = regConstScheduler(step, args, last_epoch_data_loss, last_epoch_physical_loss); # training for sample_batched in train_loader: model.optimizer.zero_grad() x_batch_train, y_batch_train = sample_batched['structure'].to(device), sample_batched['field'].to(device) with autocast(): logits = model(x_batch_train, bn_training=True) #calculate the loss using the ground truth loss = model.loss_fn(logits, y_batch_train) logits = logits[:,:,1:-1, :] # print("loss: ", loss, flush=True) # Calculate physical residue pattern = (x_batch_train*(n_Si - n_air) + n_air)**2; # rescale the 0/1 pattern into dielectric constant pattern = torch.cat((torch.ones([pattern.shape[0], 1, 1, 256], dtype = torch.float32, device = device)*n_sub**2, pattern), dim=2); #fields = logits; # predicted fields [Hy_R, Hy_I, Ex_R, Ex_I, Ez_R, Ez_I] fields = torch.cat((y_batch_train[:, :, 0:1, :], logits, y_batch_train[:, :, -1:, :]), dim=2); FD_Hy = H_to_H(-fields[:, 0]*means[0], -fields[:, 1]*means[1], dL, omega, pattern); #phys_regR = 10*model.loss_fn(FD_Hy[:, 0]/means[0], logits[:, 0])/reg_norm; #phys_regI = 10*model.loss_fn(FD_Hy[:, 1]/means[1], logits[:, 1])/reg_norm; phys_regR = model.loss_fn(FD_Hy[:, 0]/means[0], logits[:, 0])*reg_norm; phys_regI = model.loss_fn(FD_Hy[:, 1]/means[1], logits[:, 1])*reg_norm; loss += phys_regR + phys_regI; scaler.scale(loss).backward() scaler.step(model.optimizer) scaler.update() #loss.backward() #model.optimizer.step() #Save the weights at the end of each epoch checkpoint = { 'epoch': step, 'model': model, 'optimizer': model.optimizer, 'lr_scheduler': model.lr_scheduler } torch.save(checkpoint, model_path+"/last_model.pt") # evaluation train_loss = 0 train_phys_reg = 0 for sample_batched in train_loader: x_batch_train, y_batch_train = sample_batched['structure'].to(device), sample_batched['field'].to(device) with torch.no_grad(): logits = model(x_batch_train, bn_training=False) loss = model.loss_fn(logits, y_batch_train) logits = logits[:, :, 1:-1, :] # Calculate physical residue pattern = (x_batch_train*(n_Si - n_air) + n_air)**2; # rescale the 0/1 pattern into dielectric constant pattern = torch.cat((torch.ones([pattern.shape[0], 1, 1, 256], dtype = torch.float32, device = device)*n_sub**2, pattern), dim=2); #fields = logits; # predicted fields [Hy_R, Hy_I, Ex_R, Ex_I, Ez_R, Ez_I] fields = torch.cat((y_batch_train[:, :, 0:1, :], logits, y_batch_train[:, :, -1:, :]), dim=2); FD_Hy = H_to_H(-fields[:, 0]*means[0], -fields[:, 1]*means[1], dL, omega, pattern); #phys_regR = 10*model.loss_fn(FD_Hy[:, 0]/means[0], fields[:, 0, 1:-1, :])/reg_norm; #phys_regI = 10*model.loss_fn(FD_Hy[:, 1]/means[1], fields[:, 1, 1:-1, :])/reg_norm; phys_regR = model.loss_fn(FD_Hy[:, 0]/means[0], fields[:, 0, 1:-1, :])*reg_norm; phys_regI = model.loss_fn(FD_Hy[:, 1]/means[1], fields[:, 1, 1:-1, :])*reg_norm; #loss = loss + phys_reg1 + phys_reg2 + phys_reg3; train_loss += loss train_phys_reg += 0.5*(phys_regR + phys_regI); train_loss /= len(train_loader) train_phys_reg /= len(train_loader) test_loss = 0 test_phys_reg = 0 for sample_batched in test_loader: x_batch_test, y_batch_test = sample_batched['structure'].to(device), sample_batched['field'].to(device) with torch.no_grad(): logits = model(x_batch_test, bn_training=False) loss = model.loss_fn(logits, y_batch_test) logits = logits[:, :, 1:-1, :] # Calculate physical residue pattern = (x_batch_test*(n_Si - n_air) + n_air)**2; # rescale the 0/1 pattern into dielectric constant pattern = torch.cat((torch.ones([pattern.shape[0], 1, 1, 256], dtype = torch.float32, device = device)*n_sub**2, pattern), dim=2); #fields = logits; # predicted fields [Hy_R, Hy_I, Ex_R, Ex_I, Ez_R, Ez_I] fields = torch.cat((y_batch_test[:, :, 0:1, :], logits, y_batch_test[:, :, -1:, :]), dim=2); FD_Hy = H_to_H(-fields[:, 0]*means[0], -fields[:, 1]*means[1], dL, omega, pattern); #phys_regR = 10*model.loss_fn(FD_Hy[:, 0]/means[0], fields[:, 0, 1:-1, :])/reg_norm; #phys_regI = 10*model.loss_fn(FD_Hy[:, 1]/means[1], fields[:, 1, 1:-1, :])/reg_norm; phys_regR = model.loss_fn(FD_Hy[:, 0]/means[0], fields[:, 0, 1:-1, :]); phys_regI = model.loss_fn(FD_Hy[:, 1]/means[1], fields[:, 1, 1:-1, :]); test_loss += loss test_phys_reg += 0.5*(phys_regR + phys_regI); test_loss /= len(test_loader) test_phys_reg /= len(test_loader) last_epoch_data_loss = test_loss last_epoch_physical_loss = test_phys_reg.detach().clone() test_phys_reg *= reg_norm print('train loss: %.5f, train phys reg: %.5f, test loss: %.5f, test phys reg: %.5f, last_physical_loss: %.5f' % (train_loss, train_phys_reg, test_loss, test_phys_reg, last_epoch_physical_loss), flush=True) model.lr_scheduler.step() df = df.append({'epoch': step+1, 'lr': str(model.lr_scheduler.get_last_lr()), 'train_loss': train_loss.item(), 'train_phys_reg': train_phys_reg.item(), 'test_loss': test_loss.item(), 'test_phys_reg': test_phys_reg.item(), }, ignore_index=True) df.to_csv(model_path + '/'+'df.csv',index=False) if(test_loss<best_loss): best_loss = test_loss checkpoint = { 'epoch': step, 'model': model, 'optimizer': model.optimizer, 'lr_scheduler': model.lr_scheduler } torch.save(checkpoint, model_path+"/best_model.pt")
def main(args): data_dir = args.data_dir figure_path = args.figure_dir model_path = args.model_dir file_name = "data.hdf5" # Set skip_training to False if the model has to be trained, to True if the model has to be loaded. skip_training = False # Set the torch device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device = {}".format(device)) # Initialize parameters parameters = Params_cross( subject_n=args.sub, hand=args.hand, batch_size=args.batch_size, valid_batch_size=args.batch_size_valid, test_batch_size=args.batch_size_test, epochs=args.epochs, lr=args.learning_rate, wd=args.weight_decay, patience=args.patience, device=device, y_measure=args.y_measure, desc=args.desc, ) # Import data and generate train-, valid- and test-set # Set if generate with RPS values or not (check network architecture used later) print("Testing: {} ".format(parameters.desc)) dataset = MEG_Cross_Dataset_no_bp( data_dir, file_name, parameters.subject_n, mode="train", y_measure=parameters.y_measure, ) test_dataset = MEG_Cross_Dataset_no_bp( data_dir, file_name, parameters.subject_n, mode="test", y_measure=parameters.y_measure, ) # split the dataset in train, test and valid sets. train_len, valid_len = len_split_cross(len(dataset)) # train_dataset, valid_test, test_dataset = random_split(dataset, [train_len, valid_len, test_len], # generator=torch.Generator().manual_seed(42)) train_dataset, valid_dataset = random_split( dataset, [train_len, valid_len] ) print( "Train dataset len {}, valid dataset len {}, test dataset len {}".format( len(train_dataset), len(valid_dataset), len(test_dataset) ) ) # Initialize the dataloaders trainloader = DataLoader( train_dataset, batch_size=parameters.batch_size, shuffle=True, num_workers=4, ) validloader = DataLoader( valid_dataset, batch_size=parameters.valid_batch_size, shuffle=True, num_workers=4, ) testloader = DataLoader( test_dataset, batch_size=parameters.test_batch_size, shuffle=False, num_workers=4, ) # Initialize network with torch.no_grad(): sample, y = iter(trainloader).next() n_times = sample.shape[-1] net = MNet(n_times) print(net) # Training loop or model loading if not skip_training: print("Begin training....") # Check the optimizer before running (different from model to model) # optimizer = Adam(net.parameters(), lr=parameters.lr, weight_decay=5e-4) optimizer = SGD(net.parameters(), lr=parameters.lr, weight_decay=5e-4) scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=15) print("scheduler : ", scheduler) loss_function = torch.nn.MSELoss() start_time = timer.time() net, train_loss, valid_loss = train( net, trainloader, validloader, optimizer, scheduler, loss_function, parameters.device, parameters.epochs, parameters.patience, parameters.hand, model_path, ) train_time = timer.time() - start_time print("Training done in {:.4f}".format(train_time)) # visualize the loss as the network trained fig = plt.figure(figsize=(10, 4)) plt.plot( range(1, len(train_loss) + 1), train_loss, label="Training Loss" ) plt.plot( range(1, len(valid_loss) + 1), valid_loss, label="Validation Loss" ) # find position of lowest validation loss minposs = valid_loss.index(min(valid_loss)) + 1 plt.axvline( minposs, linestyle="--", color="r", label="Early Stopping Checkpoint", ) plt.xlabel("epochs") plt.ylabel("loss") # plt.ylim(0, 0.5) # consistent scale # plt.xlim(0, len(train_loss)+1) # consistent scale plt.grid(True) plt.legend() plt.tight_layout() plt.show() image1 = fig plt.savefig(os.path.join(figure_path, "loss_plot.pdf")) if not skip_training: # Save the trained model save_pytorch_model(net, model_path, "model.pth") else: # Load the model (properly select the model architecture) net = MNet() net = load_pytorch_model( net, os.path.join(model_path, "model.pth"), parameters.device ) # Evaluation print("Evaluation...") net.eval() y_pred = [] y = [] y_pred_valid = [] y_valid = [] with torch.no_grad(): for data, labels in testloader: data, labels = ( data.to(parameters.device), labels.to(parameters.device), ) y.extend(list(labels[:, parameters.hand])) y_pred.extend((list(net(data)))) for data, labels in validloader: data, labels = ( data.to(parameters.device), labels.to(parameters.device), ) y_valid.extend(list(labels[:, parameters.hand])) y_pred_valid.extend((list(net(data)))) # Calculate Evaluation measures print("Evaluation measures") mse = mean_squared_error(y, y_pred) rmse = mean_squared_error(y, y_pred, squared=False) mae = mean_absolute_error(y, y_pred) r2 = r2_score(y, y_pred) rmse_valid = mean_squared_error(y_valid, y_pred_valid, squared=False) r2_valid = r2_score(y_valid, y_pred_valid) valid_loss_last = min(valid_loss) print("Test set ") print("mean squared error {}".format(mse)) print("root mean squared error {}".format(rmse)) print("mean absolute error {}".format(mae)) print("r2 score {}".format(r2)) print("Validation set") print("root mean squared error valid {}".format(rmse_valid)) print("r2 score valid {}".format(r2_valid)) print("last value of the validation loss:".format(valid_loss_last)) # plot y_new against the true value focus on 100 timepoints fig, ax = plt.subplots(1, 1, figsize=[10, 4]) times = np.arange(200) ax.plot(times, y_pred[0:200], color="b", label="Predicted") ax.plot(times, y[0:200], color="r", label="True") ax.set_xlabel("Times") ax.set_ylabel("{}".format(parameters.y_measure)) ax.set_title( "Sub {}, hand {}, {} prediction".format( str(parameters.subject_n), "sx" if parameters.hand == 0 else "dx", parameters.y_measure, ) ) plt.legend() plt.savefig(os.path.join(figure_path, "Times_prediction_focus.pdf")) plt.show() # plot y_new against the true value fig, ax = plt.subplots(1, 1, figsize=[10, 4]) times = np.arange(len(y_pred)) ax.plot(times, y_pred, color="b", label="Predicted") ax.plot(times, y, color="r", label="True") ax.set_xlabel("Times") ax.set_ylabel("{}".format(parameters.y_measure)) ax.set_title( "Sub {}, hand {}, {} prediction".format( str(parameters.subject_n), "sx" if parameters.hand == 0 else "dx", parameters.y_measure, ) ) plt.legend() plt.savefig(os.path.join(figure_path, "Times_prediction.pdf")) plt.show() # scatterplot y predicted against the true value fig, ax = plt.subplots(1, 1, figsize=[10, 4]) ax.scatter(np.array(y), np.array(y_pred), color="b", label="Predicted") ax.set_xlabel("True") ax.set_ylabel("Predicted") # plt.legend() plt.savefig(os.path.join(figure_path, "Scatter.pdf")) plt.show() # scatterplot y predicted against the true value fig, ax = plt.subplots(1, 1, figsize=[10, 4]) ax.scatter( np.array(y_valid), np.array(y_pred_valid), color="b", label="Predicted" ) ax.set_xlabel("True") ax.set_ylabel("Predicted") # plt.legend() plt.savefig(os.path.join(figure_path, "Scatter_valid.pdf")) plt.show() # log the model and parameters using mlflow tracker with mlflow.start_run(experiment_id=args.experiment) as run: for key, value in vars(parameters).items(): mlflow.log_param(key, value) mlflow.log_param("Time", train_time) mlflow.log_metric("MSE", mse) mlflow.log_metric("RMSE", rmse) mlflow.log_metric("MAE", mae) mlflow.log_metric("R2", r2) mlflow.log_metric("RMSE_Valid", rmse_valid) mlflow.log_metric("R2_Valid", r2_valid) mlflow.log_metric("Valid_loss", valid_loss_last) mlflow.log_artifact(os.path.join(figure_path, "Times_prediction.pdf")) mlflow.log_artifact( os.path.join(figure_path, "Times_prediction_focus.pdf") ) mlflow.log_artifact(os.path.join(figure_path, "loss_plot.pdf")) mlflow.log_artifact(os.path.join(figure_path, "Scatter.pdf")) mlflow.log_artifact(os.path.join(figure_path, "Scatter_valid.pdf")) mlflow.pytorch.log_model(net, "models")
import copy from dataset import MyDataset from torch.utils import data from torch.autograd import Variable use_gpu = torch.cuda.is_available() num_gpu = list(range(torch.cuda.device_count())) #trans = transforms.Compose([ # transforms.ToTensor(), #]) whole_set = MyDataset() length = len(whole_set) train_size = 2 train_size, validate_size = train_size, len(whole_set) - train_size train_set, validate_set = data.random_split(whole_set, [train_size, validate_size]) image_datasets = {'train': train_set, 'val': validate_set} batch_size = 2 train_loader = data.DataLoader(train_set, batch_size=batch_size, shuffle=True) val_loader = data.DataLoader(validate_set, batch_size=1, shuffle=True) model = unet_2d() # pixel accuracy and mIOU list pixel_acc_list = [] mIOU_list = [] #batch_size = 4 #dataloaders = data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0), ## parameters for Solver-Adam in this example batch_size = 6 # lr = 1e-4 # achieved besty results step_size = 100 # Won't work when epochs <=100
dataset = TUDataset("/home/anoopkumar/Brain/HGP-SL/data", name=args.dataset, use_node_attr=False) args.num_classes = dataset.num_classes args.num_features = dataset.num_features print(args) print("len of the dataset ######## ", len(dataset)) num_training = int(len(dataset) * 0.7) num_val = int(len(dataset) * 0.1) num_test = len(dataset) - (num_training + num_val) training_set, validation_set, test_set = random_split( dataset, [num_training, num_val, num_test]) train_loader = DataLoader(training_set, batch_size=args.batch_size, shuffle=True) val_loader = DataLoader(validation_set, batch_size=args.batch_size, shuffle=False) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False) model = Model(args).to(args.device) model.load_state_dict(torch.load("pre_trained")) print("model loaded ################") optimizer = torch.optim.Adam(model.parameters(), lr=args.lr,
domain = args.domains[i] test_dset = all_dsets[i] # Override the domain IDs k = 0 for j in range(len(all_dsets)): if j != i: all_dsets[j].set_domain_id(k) k += 1 test_dset.set_domain_id(k) # For test #all_dsets = [all_dsets[0], all_dsets[2]] # Split the data if args.indices_dir is None: subsets = [ random_split(all_dsets[j], [train_sizes[j], val_sizes[j]]) for j in range(len(all_dsets)) if j != i ] else: # load the indices dset_choices = [ all_dsets[j] for j in range(len(all_dsets)) if j != i ] subset_indices = defaultdict(lambda: [[], []]) with open(f'{args.indices_dir}/train_idx_{domain}.txt') as f, \ open(f'{args.indices_dir}/val_idx_{domain}.txt') as g: for l in f: vals = l.strip().split(',') subset_indices[int(vals[0])][0].append(int(vals[1])) for l in g: vals = l.strip().split(',')
from torchvision.models import resnet152 from preprocess import TrainingDataset, labels input_size = 224 batch_size = 16 num_epochs = 25 num_classes = torch.unique(labels).shape[0] dataset = TrainingDataset(input_size) len_train_set = int(len(dataset) * 0.7) len_val_set = len(dataset) - len_train_set train_set, val_set = random_split(dataset, [len_train_set, len_val_set]) datasets = {'train': train_set, 'val': val_set} device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # for some reason the default_collate function causes a problem. def collate(batch): images = torch.stack([x for x, _ in batch], 0) labels = torch.stack([k for _, k in batch], 0) return images, labels def train(use_pretrained=False): loaders = {
doc_correspnd_info_dict = {} #document毎にシンボリックな値をdocument名と辞書に変えるための辞書 n_doc = [] for unit in n_Entdics_Reldics: doc_correspnd_info_dict[unit[0]] = unit[1:] n_doc.append([unit[0]]) n_doc = torch.LongTensor(n_doc).to(device) # pdb.set_trace() dataset = D.TensorDataset(n_doc, word_input, attention_mask, y_span_size_1, y_span_size_2, y_span_size_3, y_span_size_4) train_size = int(0.8 * len(word_input)) devel_size = int(0.1 * len(word_input)) test_size = len(word_input) - train_size - devel_size train_dataset, devel_dataset, test_dataset = D.random_split( dataset, [train_size, devel_size, test_size]) train_loader = D.DataLoader( train_dataset, batch_size=int(config.get('main', 'BATCH_SIZE_TRAIN')), shuffle=strtobool(config.get('main', 'BATCH_SHUFFLE_TRAIN'))) devel_loader = D.DataLoader( devel_dataset, batch_size=int(config.get('main', 'BATCH_SIZE_DEVEL')), shuffle=strtobool(config.get('main', 'BATCH_SHUFFLE_DEVEL'))) test_loader = D.DataLoader(test_dataset, batch_size=int(config.get('main', 'BATCH_SIZE_TEST')), shuffle=strtobool( config.get('main', 'BATCH_SHUFFLE_TEST'))) print('finish', end='\n')
# %% md ## Import v-Dem dataset # %% x, c, y, concept_names = load_vDem('../data') dataset_xc = TensorDataset(x, c) dataset_cy = TensorDataset(c, y) train_size = int(len(dataset_cy) * 0.5) val_size = (len(dataset_cy) - train_size) // 2 test_size = len(dataset_cy) - train_size - val_size train_data, val_data, test_data = random_split( dataset_cy, [train_size, val_size, test_size]) train_loader = DataLoader(train_data, batch_size=train_size) val_loader = DataLoader(val_data, batch_size=val_size) test_loader = DataLoader(test_data, batch_size=test_size) n_concepts = next(iter(train_loader))[0].shape[1] n_classes = 2 print(concept_names) print(n_concepts) print(n_classes) # %% md ## 10-fold cross-validation with explainer network
def main(): args = parse_args() print(vars(args)) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu if args.model_name == 'c3d': model=c3d.C3D(with_classifier=False) elif args.model_name == 'r3d': model=r3d.R3DNet((1,1,1,1),with_classifier=False) elif args.model_name == 'r21d': model=r21d.R2Plus1DNet((1,1,1,1),with_classifier=False) print(args.model_name) model = sscn.SSCN_OneClip(base_network=model, with_classifier=True, num_classes=4) if ckpt: weight = load_pretrained_weights(ckpt) model.load_state_dict(weight, strict=True) #train train_dataset =PredictDataset(params['dataset'],mode="train",args=args); if params['data'] =='kinetics-400': val_dataset = PredictDataset(params['dataset'],mode='val',args=args); elif params['data'] == 'UCF-101': val_size = 800 train_dataset, val_dataset = random_split(train_dataset, (len(train_dataset) - val_size, val_size)) elif params['data'] == 'hmdb': val_size = 400 train_dataset, val_dataset = random_split(train_dataset, (len(train_dataset) - val_size, val_size)) train_loader = DataLoader(train_dataset,batch_size=params['batch_size'],shuffle=True,num_workers=params['num_workers'],drop_last=True) val_loader = DataLoader(val_dataset,batch_size=params['batch_size'],shuffle=True,num_workers=params['num_workers'],drop_last=True) if multi_gpu ==1: model = nn.DataParallel(model) model = model.cuda() criterion_CE = nn.CrossEntropyLoss().cuda() criterion_MSE = Motion_MSEloss().cuda() model_params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'fc8' in key: print(key) model_params += [{'params':[value],'lr':10*learning_rate}] else: model_params += [{'params':[value],'lr':learning_rate}] optimizer = optim.SGD(model_params, momentum=params['momentum'], weight_decay=params['weight_decay']) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', min_lr=1e-7, patience=50, factor=0.1) save_path = params['save_path_base'] + "train_predict_{}_".format(args.exp_name) + params['data'] model_save_dir = os.path.join(save_path,time.strftime('%m-%d-%H-%M')) writer = SummaryWriter(model_save_dir) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) prev_best_val_loss = 100 prev_best_loss_model_path = None for epoch in tqdm(range(start_epoch,start_epoch+train_epoch)): train(train_loader,model,criterion_MSE,criterion_CE,optimizer,epoch,writer,root_path=model_save_dir) val_loss = validation(val_loader,model,criterion_MSE,criterion_CE,optimizer,epoch) if val_loss < prev_best_val_loss: model_path = os.path.join(model_save_dir, 'best_model_{}.pth.tar'.format(epoch)) torch.save(model.state_dict(), model_path) prev_best_val_loss = val_loss; if prev_best_loss_model_path: os.remove(prev_best_loss_model_path) prev_best_loss_model_path = model_path scheduler.step(val_loss); if epoch % 20 == 0: checkpoints = os.path.join(model_save_dir, 'model_{}.pth.tar'.format(epoch)) torch.save(model.state_dict(),checkpoints) print("save_to:",checkpoints);
def train_test_split(self, train_ratio): train_len = int(len(self) * train_ratio) test_len = len(self) - train_len self.train_ds, self.test_ds = random_split(self, (train_len, test_len)) return self.train_ds, self.test_ds
def main(args): torch.manual_seed(222) torch.cuda.manual_seed_all(222) np.random.seed(222) print(args) # config = [] device = torch.device('cuda') model = None if args.arch == "UNet": model = UNet(args).to(device) else: raise ("architectures other than Unet hasn't been added!!") # update_lrs = nn.Parameter(args.update_lr*torch.ones(self.update_step, len(self.net.vars)), requires_grad=True) model.optimizer = optim.Adam(model.parameters(), lr=args.lr, eps=1e-7, amsgrad=True, weight_decay=args.weight_decay) model.lr_scheduler = optim.lr_scheduler.ExponentialLR( model.optimizer, args.exp_decay) tmp = filter(lambda x: x.requires_grad, model.parameters()) num = sum(map(lambda x: np.prod(x.shape), tmp)) print(model) #for name, param in model.named_parameters(): # print(name, param.size()) print('Total trainable tensors:', num, flush=True) SUMMARY_INTERVAL = 5 TEST_PRINT_INTERVAL = SUMMARY_INTERVAL * 5 ITER_SAVE_INTERVAL = 300 EPOCH_SAVE_INTERVAL = 5 model_path = args.model_saving_path + args.model_name + "_batch_size_" + str( args.batch_size) + "_lr_" + str(args.lr) if not os.path.isdir(model_path): os.mkdir(model_path) ds = SimulationDataset(args.data_folder, total_sample_number=args.total_sample_number) torch.manual_seed(42) train_ds, test_ds = random_split( ds, [int(0.9 * len(ds)), len(ds) - int(0.9 * len(ds))]) print("total training samples: %d, total test samples: %d" % (len(train_ds), len(test_ds)), flush=True) train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=0) test_loader = DataLoader(test_ds, batch_size=args.batch_size, shuffle=True, num_workers=0) # for visualizing the graph: #writer = SummaryWriter('runs/'+args.model_name) #test_input = None #for sample in train_loader: # test_input = sample['structure'] # break #writer.add_graph(model, test_input.to(device)) #writer.close() df = pd.DataFrame(columns=[ 'epoch', 'train_loss', 'train_phys_reg', 'test_loss', 'test_phys_reg' ]) train_loss_history = [] train_phys_reg_history = [] test_loss_history = [] test_phys_reg_history = [] start_epoch = 0 if (args.continue_train): print("Restoring weights from ", model_path + "/last_model.pt", flush=True) checkpoint = torch.load(model_path + "/last_model.pt") start_epoch = checkpoint['epoch'] model = checkpoint['model'] model.lr_scheduler = checkpoint['lr_scheduler'] model.optimizer = checkpoint['optimizer'] df = read_csv(model_path + '/' + 'df.csv') best_loss = 1e4 for step in range(start_epoch, args.epoch): print("epoch: ", step, flush=True) # training for sample_batched in train_loader: model.optimizer.zero_grad() x_batch_train, y_batch_train = sample_batched['structure'].to( device), sample_batched['field'].to(device) logits = model(x_batch_train, bn_training=True) #calculate the loss using the ground truth loss = model.loss_fn(logits, y_batch_train) # print("loss: ", loss, flush=True) loss.backward() model.optimizer.step() #Save the weights at the end of each epoch checkpoint = { 'epoch': step, 'model': model, 'optimizer': model.optimizer, 'lr_scheduler': model.lr_scheduler } torch.save(checkpoint, model_path + "/last_model.pt") # evaluation train_loss = 0 for sample_batched in train_loader: x_batch_train, y_batch_train = sample_batched['structure'].to( device), sample_batched['field'].to(device) with torch.no_grad(): logits = model(x_batch_train, bn_training=False) loss = model.loss_fn(logits, y_batch_train) train_loss += loss train_loss /= len(train_loader) test_loss = 0 for sample_batched in test_loader: x_batch_test, y_batch_test = sample_batched['structure'].to( device), sample_batched['field'].to(device) with torch.no_grad(): logits = model(x_batch_test, bn_training=False) loss = model.loss_fn(logits, y_batch_test) test_loss += loss test_loss /= len(test_loader) print('train loss: %.5f, test loss: %.5f' % (train_loss, test_loss), flush=True) model.lr_scheduler.step() df = df.append( { 'epoch': step + 1, 'lr': str(model.lr_scheduler.get_last_lr()), 'train_loss': train_loss, 'phys_reg': 0, 'test_loss': test_loss, 'test_phys_reg': 0 }, ignore_index=True) df.to_csv(model_path + '/' + 'df.csv', index=False) if (test_loss < best_loss): best_loss = test_loss checkpoint = { 'epoch': step, 'model': model, 'optimizer': model.optimizer, 'lr_scheduler': model.lr_scheduler } torch.save(checkpoint, model_path + "/best_model.pt")
dtype=torch.int64) print(tensor_data.shape, tensor_data.shape[0], tensor_data.shape[1], tensor_data.shape[2]) tensor_data = torch.reshape( tensor_data, (tensor_data.shape[0], 1, tensor_data.shape[1], tensor_data.shape[2])) print(tensor_data.shape) # tensor_data = tensor_data.to(dtype=torch.long) tensor_label = torch.tensor(np.array(data["Recommended IND"]), device=device, dtype=torch.int64) dataset = torch.utils.data.TensorDataset(tensor_data, tensor_label) train_data, test_data = random_split( dataset, [round(0.8 * tensor_data.shape[0]), round(0.2 * tensor_data.shape[0])], generator=torch.Generator().manual_seed(42)) train_loader = DataLoader(train_data, batch_size=100, shuffle=True) test_loader = DataLoader(test_data, batch_size=100, shuffle=True) net = Net() net = net.to(device) # weights = [1.0, 0.225] # class_weights = torch.FloatTensor(weights) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) for epoch in range(3): # loop over the dataset multiple times
def load_data(data_dir, batch_size=32, resize=64, train_ratio=.8, Display=False): """Load dataset from specified path. Returns dataset and dataloader dictionaries with transforms set below. data_dir: String or Path object that specifies directory. Play it safe and use absolute path batch_size: Batch size of dataloader, 32 by default NOTE: Depending on your dataset folder names may have to change dictionary item names accordingly. i.e 'valid' to 'test' or even 'val'. """ # Incorporate path checking and try catch in the future # Assumes order [Training, Testing] dataset_folders = { 'train': '2D_Images_Training', 'test': '2D_Images_Testing' } if Path.exists(Path(data_dir)): data_transforms = { 'train': transforms.Compose([ transforms.RandomResizedCrop(resize), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'test': transforms.Compose([ transforms.RandomResizedCrop(resize), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } image_datasets = { x[0]: datasets.ImageFolder(Path(data_dir, x[1]), data_transforms[x[0]]) for x in dataset_folders.items() } # Get size of train and split based on param train_ratio training_init_size = len(image_datasets['train']) train_size = int(train_ratio * training_init_size) val_size = training_init_size - train_size # Random Split and resulting grab indices train, val = random_split(image_datasets['train'], [train_size, val_size]) train_indices = train.indices val_indices = val.indices if Display: print(f'Initial Training Dataset size: {training_init_size}') print(f'Train/Val Ratio: {train_ratio}/{1-train_ratio}') print(f'New Training Dataset size: {train_size}') # Create the dataloaders and store dataset_sizes train_dataloader = DataLoader( image_datasets['train'], batch_size=batch_size, sampler=SubsetRandomSampler(train_indices)) val_dataloader = DataLoader(image_datasets['train'], batch_size=batch_size, sampler=SubsetRandomSampler(val_indices)) test_dataloader = DataLoader(image_datasets['test'], batch_size=batch_size, shuffle=True, num_workers=4) dataloaders = { 'train': train_dataloader, 'val': val_dataloader, 'test': test_dataloader } dataset_sizes = { 'train': train_size, 'val': val_size, 'test': len(image_datasets['test']) } log = [x for x in dataloaders] print(f"Successfully Loaded built {log}") print( f"Root folders {dataloaders['train'].dataset.root} and {dataloaders['test'].dataset.root} " ) return image_datasets, dataloaders, dataset_sizes else: print(f"{data_dir} is a faulty path")
def get_model(gd, save_dir='./checkpoints', version=1, continue_training=False, use_divnorm=0): ''' get a shared generalized quadratic model given a PixelDataset (see Datasets.py) continue_training will load the best model and continue training from there. Useful for refining a model after stim correction version (default: 1) - set to a new number if you want to train a completely new model ''' from V1FreeViewingCode.models.basic import sGQM from pytorch_lightning import Trainer, seed_everything from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint from pytorch_lightning.loggers import TestTubeLogger from pathlib import Path save_dir = Path(save_dir) # get train/validation set n_val = np.floor(len(gd)/5).astype(int) n_train = (len(gd)-n_val).astype(int) gd_train, gd_val = random_split(gd, lengths=[n_train, n_val]) # build dataloaders bs = 1000 train_dl = DataLoader(gd_train, batch_size=bs) valid_dl = DataLoader(gd_val, batch_size=bs) D_in = gd.NX*gd.NY*gd.num_lags if use_divnorm==2: gqm0 = sQDN(input_dim=D_in, n_hidden=gd.NC*2, output_dim=gd.NC, learning_rate=.001,betas=[.9,.999], weight_decay=1e-0, normalization=2, relu = True, filternorm = 0, optimizer='AdamW', ei_split=gd.NC) elif use_divnorm==1: gqm0 = sDN(input_dim=D_in, n_hidden=gd.NC*2, output_dim=gd.NC, learning_rate=.001,betas=[.9,.999], weight_decay=1e-0, optimizer='AdamW') else: gqm0 = sGQM(input_dim=D_in, n_hidden=gd.NC*2, output_dim=gd.NC, learning_rate=.001,betas=[.9,.999], weight_decay=1e-0, normalization=2, relu = True, filternorm = 1, optimizer='AdamW', ei_split=gd.NC) early_stop_callback = EarlyStopping(monitor='val_loss', min_delta=0.0) checkpoint_callback = ModelCheckpoint(monitor='val_loss') logger = TestTubeLogger( save_dir=save_dir, name=gd.id, version=version # fixed to one to ensure checkpoint load ) ckpt_folder = save_dir / sessid / 'version_{}'.format(version) / 'checkpoints' best_epoch = find_best_epoch(ckpt_folder) if best_epoch is None: trainer = Trainer(gpus=1, callbacks=[early_stop_callback], checkpoint_callback=checkpoint_callback, logger=logger, deterministic=False, progress_bar_refresh_rate=20, max_epochs=1000, auto_lr_find=False) seed_everything(42) # trainer.tune(gqm0, train_dl, valid_dl) # find learning rate trainer.fit(gqm0, train_dl, valid_dl) else: if use_divnorm==2: gqm0 = sQDN.load_from_checkpoint(str(ckpt_folder / 'epoch={}.ckpt'.format(best_epoch))) elif use_divnorm==1: gqm0 = sDN.load_from_checkpoint(str(ckpt_folder / 'epoch={}.ckpt'.format(best_epoch))) else: gqm0 = sGQM.load_from_checkpoint(str(ckpt_folder / 'epoch={}.ckpt'.format(best_epoch))) if continue_training: trainer = Trainer(gpus=1, callbacks=[early_stop_callback], checkpoint_callback=checkpoint_callback, logger=logger, deterministic=False, progress_bar_refresh_rate=20, max_epochs=1000, auto_lr_find=False, resume_from_checkpoint=str(ckpt_folder / 'epoch={}.ckpt'.format(best_epoch)) ) seed_everything(42) trainer.fit(gqm0, train_dl, valid_dl) return gqm0
data_y6 = normal_label(data_y6) train_x = np.append(data_x1, data_x6, axis=0) train_y = np.append(data_y1, data_y6, axis=0) test_x = data_x4 test_y = data_y4 train_x = torch.from_numpy(train_x) train_y = torch.from_numpy(train_y) test_x = torch.from_numpy(test_x) test_y = torch.from_numpy(test_y) train_dataset = Data.TensorDataset(train_x, train_y) all_num = train_x.shape[0] train_num = int(all_num * 0.8) train_data, val_data = Data.random_split(train_dataset, [train_num, all_num - train_num]) train_loader = Data.DataLoader( dataset=train_data, batch_size=BATCH_SIZE, shuffle=True, ) val_loader = Data.DataLoader( dataset=val_data, batch_size=BATCH_SIZE, shuffle=True, ) test_dataset = Data.TensorDataset(test_x, test_y) test_loader = Data.DataLoader( dataset=test_dataset, batch_size=BATCH_SIZE,
all_datasets = datasets.ImageFolder('./drive/My Drive/特別研究/mnist_sampled', transform=ImageTransform(mean, std)) test_datasets = MNIST(root='./drive/My Drive/特別研究', train=False, transform=transforms.ToTensor(), download=True) random.seed(2) numpy.random.seed(2) torch.manual_seed(2) torch.cuda.manual_seed(2) torch.backends.cudnn.deterministic = True #Trainとvalに分割 train_dataset, val_dataset = data.random_split(all_datasets, [500, 100]) #Dataloaderの作成 train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True) test_dataloader = data.DataLoader(test_datasets, batch_size=batch_size, shuffle=False) #ネットワーク構築 class ConvNet(nn.Module):
def imshow(img): npimg = img.numpy() plt.imshow(np.transpose(npimg, (1, 2, 0))) masks = pd.read_csv(os.path.join(ds_path, 'train_ship_segmentations.csv')).fillna(-1) print(masks.head()) airimg = AirbusDS(path, masks) # total images in set print(airimg.len) train_len = int(0.7 * airimg.len) valid_len = airimg.len - train_len train, valid = D.random_split(airimg, lengths=[train_len, valid_len]) loader = D.DataLoader(train, batch_size=24, shuffle=True, num_workers=0) # get some images dataiter = iter(loader) images, masks = dataiter.next() # show images plt.figure(figsize=(16, 16)) plt.subplot(211) imshow(torchvision.utils.make_grid(images)) plt.subplot(212) imshow(torchvision.utils.make_grid(masks)) plt.show()
def get_splits(self, n_test=0.33): # determine sizes test_size = round(n_test * len(self.X)) train_size = len(self.X) - test_size # calculate the split return random_split(self, [train_size, test_size])
val_tfms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(channel_means, channel_stds) ]) mask_tfms = transforms.Compose([transforms.ToTensor()]) dataset = ImgDataSet(img_dir=DIR_IMG, img_fnames=img_names, img_transform=train_tfms, mask_dir=DIR_MASK, mask_fnames=mask_names, mask_transform=mask_tfms) train_size = int(0.85 * len(dataset)) valid_size = len(dataset) - train_size train_dataset, valid_dataset = random_split(dataset, [train_size, valid_size]) train_loader = DataLoader(train_dataset, args.batch_size, shuffle=False, pin_memory=torch.cuda.is_available(), num_workers=args.num_workers) valid_loader = DataLoader(valid_dataset, args.batch_size, shuffle=False, pin_memory=torch.cuda.is_available(), num_workers=args.num_workers) model.cuda() train(train_loader, model, criterion, optimizer, validate, args)
def train_val_split(dataset: Dataset, val_size: float = 0.1): L = len(dataset) train_size = int(L * (1 - val_size)) val_size = L - train_size return random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))
if cfg.use_gpu and torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') # generate the volleyball dataset object random_seed = 137 # set the seed random.seed(random_seed) full_dataset = volleyballDataset.VolleyballDatasetNew(cfg.dataPath, cfg.imageSize, frameList=list(range(17)) ,mode=cfg.dataset_mode, seq_num=cfg.seq_len) # get the object information(object categories count) cfg.actions_num, cfg.activities_num, cfg.orientations_num = full_dataset.classCount() # divide the whole dataset into train and test full_dataset_len = full_dataset.__len__() if cfg.split_mode == 3: train_len = int(full_dataset_len * cfg.dataset_splitrate) test_len = full_dataset_len - train_len trainDataset, testDataset = data.random_split(full_dataset, [train_len, test_len]) elif cfg.split_mode == 2: indices = full_dataset.output_allFrame() random.shuffle(indices) split = int(cfg.dataset_splitrate * len(indices)) train_indices = indices[:split] test_indices = indices[split:] trainDataset = volleyballDataset.VolleyballDatasetNew(cfg.dataPath, cfg.imageSize, frameList=list(range(17)) ,mode=cfg.dataset_mode, seq_num=cfg.seq_len) trainDataset.set_allFrame(train_indices) testDataset = volleyballDataset.VolleyballDatasetNew(cfg.dataPath, cfg.imageSize, frameList=list(range(17)) ,mode=cfg.dataset_mode, seq_num=cfg.seq_len) testDataset.set_allFrame(test_indices) elif cfg.split_mode == 4: trainDataset = volleyballDataset.VolleyballDatasetS(cfg.dataPath, cfg.imageSize, cfg.train_seqs, mode=1) testDataset = volleyballDataset.VolleyballDatasetS(cfg.dataPath, cfg.imageSize, cfg.test_seqs, mode=0)
img_tensor, label = dataset[0] print(img_tensor.shape, label) # %5 img_tensor # %% print(img_tensor[0, 10:15, 10:15]) # %% print(torch.max(img_tensor), torch.min(img_tensor)) # %% plt.imshow(img_tensor[0, 10:15, 10:15], cmap='gray') # %% train_ds, val_ds = random_split(dataset, [50000, 10000]) len(train_ds), len(val_ds) # %% batch_size = 128 train_loader = DataLoader(train_ds, batch_size, shuffle=True) val_loader = DataLoader(val_ds, batch_size) # %% input_size = 28*28 num_classes = 10 # %% model = nn.Linear(input_size, num_classes) print(model.weight.shape) model.weight # %%
def main(): print( "#####################################################################" ) print("Step1 Training Phase") print( "#####################################################################" ) p = parameter.Parameter() datasets_save_dir = p.datasets_path model_save_dir = p.model_path split = p.datasets_split batch_size = p.batch_size learning_late = p.learning_late num_layer = p.num_layer epochs = p.epochs device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("CUDA is available:", torch.cuda.is_available()) print("\nLoading Datasets.....") if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) #保存モデル名の定義 now = datetime.datetime.now() model_path = model_save_dir + "/model_layer" + str( num_layer) + "_" + now.strftime('%Y%m%d_%H%M%S') + ".pt" tensor_speech = torch.load(datasets_save_dir + "/tensor_speech") tensor_addnoise = torch.load(datasets_save_dir + "/tensor_addnoise") mydataset = utils.TensorDataset(tensor_speech, tensor_addnoise) data_num = tensor_speech.shape[0] data_split = [ data_num * split[0], data_num * split[1], data_num * split[2] ] test_dataset, val_dataset, train_dataset = utils.random_split( mydataset, data_split) train_loader = utils.DataLoader(train_dataset, batch_size=batch_size, num_workers=os.cpu_count(), pin_memory=True, shuffle=True) val_loader = utils.DataLoader(val_dataset, batch_size=batch_size, num_workers=os.cpu_count(), pin_memory=True, shuffle=True) test_loader = utils.DataLoader(test_dataset, batch_size=batch_size, num_workers=os.cpu_count(), pin_memory=True, shuffle=True) # model feat = tensor_addnoise.shape[1] sequence = tensor_addnoise.shape[2] model = mm.Net(sequence, feat, num_layer).to(device) #loss/optimizer criterion = nn.L1Loss().to(device) # criterion = nn.MSELoss().to(device) optimizer = optim.Adam(model.parameters(), lr=learning_late) print( "#####################################################################" ) print(" Start Training..") print( "#####################################################################" ) train_loss_list = [] test_loss_list = [] for epoch in tqdm(range(1, epochs + 1), desc='[Training..]'): # Training model.train() # 訓練モードに train_loss = 0 for batch_idx, (speech, addnoise) in enumerate(train_loader): # データ取り出し speech, addnoise = speech.to(device), addnoise.to(device) optimizer.zero_grad() # 伝搬 mask = model(addnoise) #modelでmask自体を推定する h_hat = mask * addnoise #雑音つき音声にmaskを適応し所望音声を強調 # 損失計算とバックプロパゲーション loss = criterion(h_hat, speech) #強調音声 vs ラベル loss.backward() optimizer.step() train_loss += loss.item() train_loss /= len(train_loader.dataset) train_loss_list.append(train_loss) # Eval model.eval() test_loss = 0 with torch.no_grad(): for speech, addnoise in val_loader: # データ取り出し speech, addnoise = speech.to(device), addnoise.to(device) mask = model(addnoise) h_hat = mask * addnoise test_loss += criterion(h_hat, speech).item() # sum up batch loss test_loss /= len(test_loader.dataset) test_loss_list.append(test_loss) tqdm.write( '\nTrain set: Average loss: {:.6f}\nTest set: Average loss: {:.6f}' .format(train_loss, test_loss)) if epoch == 1: best_loss = test_loss torch.save(model.state_dict(), model_path) else: if best_loss > test_loss: torch.save(model.state_dict(), model_path) best_loss = test_loss if epoch % 10 == 0: #10回に1回定期保存 epoch_model_path = model_save_dir + "/model_layer" + str( num_layer) + "_" + now.strftime( '%Y%m%d_%H%M%S') + "_Epoch" + str(epoch) + ".pt" torch.save(model.state_dict(), epoch_model_path) fig, ax = plt.subplot() ax.plot(train_loss_list, linewidth=2, color="red", label="Train Loss") ax.plot(test_loss_list, linewidth=2, color="blue", label="Test Loss") ax.legend(loc='upper right') fig.tight_layout() fig.savefig(model_save_dir + "/model_layer" + str(num_layer) + "_" + now.strftime('%Y%m%d_%H%M%S') + ".png")
# Hyperparameters Defined #------------------------------- batch_size = 128 #input_size = 1000 #hidden_size = 120 #num_classes = 8 num_epochs = 8 #batch_size = 64 learning_rate = 1e-3 #---------------------------------------------------------------------------- all_data = datasets.ImageFolder(root='./natural_images') train_data_len = int(len(all_data) * 0.8) valid_data_len = int((len(all_data) - train_data_len) / 2) test_data_len = int(len(all_data) - train_data_len - valid_data_len) train_data, val_data, test_data = random_split( all_data, [train_data_len, valid_data_len, test_data_len]) train_data.dataset.transform = image_transforms['train'] val_data.dataset.transform = image_transforms['val'] test_data.dataset.transform = image_transforms['test'] print(len(train_data), len(val_data), len(test_data)) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True) test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True) trainiter = iter(train_loader) images, labels = next(trainiter) print(images.shape, labels.shape) #-----------------------------------------------------------------------------------------------
def train_net(net, discriminator1, discriminator2, upsample, device, epochs=8, batch_size=1, lr=0.001, val_percent=0.1, save_cp=True, img_scale=1): ########################## # creating syn dataset ########################## dataset = BasicDataset(config.SOURCE_RGB_DIR_PATH, config.SOURCE_MASKS_DIR_PATH, extend_dataset=True, num_images=config.ITERATIONS, scale=img_scale, apply_imgaug=config.APPLY_IMAGE_AUG, take_center_crop=config.TAKE_CENTER_CROP, crop_h=config.CROP_H, crop_w=config.CROP_W, mask_suffix=config.SOURCE_GT_MASK_SUFFIX) source_train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) source_train_iterator = enumerate(source_train_loader) ########################## # creating real dataset ########################## dataset = BasicDataset(config.TARGET_RGB_DIR_PATH, config.TARGET_MASKS_DIR_PATH, extend_dataset=True, num_images=int(config.ITERATIONS + config.NUM_VAL), scale=img_scale, apply_imgaug=config.APPLY_IMAGE_AUG, take_center_crop=config.TAKE_CENTER_CROP, crop_h=config.CROP_H, crop_w=config.CROP_W, mask_suffix=config.TARGET_GT_MASK_SUFFIX) n_val = config.NUM_VAL n_train = len(dataset) - n_val train, val = random_split(dataset, [n_train, n_val]) logging.info(f'Real - Train dataset has {n_train} examples') logging.info(f'Real - Val dataset has {n_val} examples') target_train_loader = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) target_val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=True) target_train_iterator = enumerate(target_train_loader) ########################## ########################## writer = SummaryWriter(comment=f'_ADV_{config.EXPERIMENT}') global_step = 0 logging.info(f'''Starting training: Model: {config.MODEL_SELECTION} Epochs: {epochs} Batch size: {batch_size} Learning rate: {lr} Checkpoints: {save_cp} Training size: {n_train} Val size: {n_val} Images scaling: {img_scale} Crop images: {config.TAKE_CENTER_CROP} Image Size: {config.CROP_H}, {config.CROP_W} Apply imgaug: {config.APPLY_IMAGE_AUG} Device: {device.type} ''') ################## # SEGMENTATION ################## if config.MODEL_SELECTION == 'og_deeplab': optimizer = optim.RMSprop(net.optim_parameters(lr=lr), lr=lr, weight_decay=1e-8, momentum=0.9) else: optimizer = optim.RMSprop(net.parameters(), lr=lr, weight_decay=1e-8, momentum=0.9) if config.NUM_CLASSES > 1: criterion = nn.CrossEntropyLoss() else: criterion = nn.BCEWithLogitsLoss() ################## # DISCRIMINATOR ################## optimizer_discriminator1 = optim.Adam(discriminator1.parameters(), lr=lr, betas=(0.9, 0.99)) optimizer_discriminator2 = optim.Adam(discriminator2.parameters(), lr=lr, betas=(0.9, 0.99)) if config.GAN == 'Vanilla': bce_loss = torch.nn.BCEWithLogitsLoss() elif config.GAN == 'LS': bce_loss = torch.nn.MSELoss() else: raise NotImplementedError ################## # TODO: LR scheduler ################## # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min' if config.NUM_CLASSES > 1 else 'max', patience=2) # scheduler_discriminator1 = optim.lr_scheduler.ReduceLROnPlateau(optimizer_discriminator1, 'min' if config.NUM_CLASSES > 1 else 'max', patience=2) # scheduler_discriminator2 = optim.lr_scheduler.ReduceLROnPlateau(optimizer_discriminator1, 'min' if config.NUM_CLASSES > 1 else 'max', patience=2) def lr_poly(base_lr, iter, max_iter, power): return base_lr * ((1 - float(iter) / max_iter)**(power)) def adjust_learning_rate(optimizer, i_iter, lr=config.LR): lr = lr_poly(lr, i_iter, config.ITERATIONS, config.POWER) optimizer.param_groups[0]['lr'] = lr if len(optimizer.param_groups) > 1: optimizer.param_groups[1]['lr'] = lr * 10 def adjust_learning_rate_D(optimizer, i_iter, lr=config.LR): lr = lr_poly(lr, i_iter, config.ITERATIONS, config.POWER) optimizer.param_groups[0]['lr'] = lr if len(optimizer.param_groups) > 1: optimizer.param_groups[1]['lr'] = lr * 10 ################## ################## net.train() discriminator1.train() discriminator2.train() best_Fwb = -np.inf # labels for adversarial training target_label = 0 source_label = 1 if save_cp: try: os.mkdir(config.CHECKPOINT_DIR_PATH) logging.info('Created checkpoint directory') except OSError: pass with tqdm(total=config.ITERATIONS, desc=f'Iterations {config.ITERATIONS}', unit='img') as pbar: while global_step < config.ITERATIONS: seg_loss = 0 seg_adv_loss = 0 dis_loss1 = 0 dis_loss2 = 0 optimizer.zero_grad() optimizer_discriminator1.zero_grad() optimizer_discriminator2.zero_grad() adjust_learning_rate(optimizer, global_step) adjust_learning_rate_D(optimizer_discriminator1, global_step) adjust_learning_rate_D(optimizer_discriminator2, global_step) ########################## # Seg w./ Source ########################## _, batch = source_train_iterator.__next__() imgs = batch['image'] true_masks = batch['mask'] assert imgs.shape[1] == config.NUM_CHANNELS, \ f'Network has been defined with {config.NUM_CHANNELS} input channels, ' \ f'but loaded images have {imgs.shape[1]} channels. Please check that ' \ 'the images are loaded correctly.' imgs = imgs.to(device=device, dtype=torch.float32) mask_type = torch.float32 if config.NUM_CLASSES == 1 else torch.long true_masks = true_masks.to(device=device, dtype=mask_type) if config.MULTI_PRED: ################################### # multi ################################### masks_pred1_source, masks_pred2_source = net(imgs) loss = criterion(masks_pred1_source, true_masks.squeeze(1)) + \ config.LAMBDA_SEG * criterion(masks_pred2_source, true_masks.squeeze(1)) seg_loss += loss.item() loss.backward() nn.utils.clip_grad_value_(net.parameters(), 0.1) writer.add_scalar('Loss/train', seg_loss, global_step) ########################## # Seg w./ Target ########################## # don't accumulate grads in D for param in discriminator1.parameters(): param.requires_grad = False for param in discriminator2.parameters(): param.requires_grad = False ############### ############### _, batch = target_train_iterator.__next__() imgs = batch['image'] imgs = imgs.to(device=device, dtype=torch.float32) # true_masks = batch['mask'] # true_masks = true_masks.to(device=device, dtype=mask_type) if config.MULTI_PRED: ################################### # multi ################################### masks_pred1_target, masks_pred2_target = net(imgs) discriminator_out1_target = discriminator1( F.softmax(masks_pred1_target)) discriminator_out2_target = discriminator2( F.softmax(masks_pred2_target)) discriminator_adv1_fill = Variable( torch.FloatTensor(discriminator_out1_target.data.size()).fill_( source_label)).to(device=device) discriminator_adv2_fill = Variable( torch.FloatTensor(discriminator_out2_target.data.size()).fill_( source_label)).to(device=device) loss = config.ADV_SEG1 * bce_loss(discriminator_out1_target, discriminator_adv1_fill) + \ config.ADV_SEG2 * bce_loss(discriminator_out2_target, discriminator_adv2_fill) seg_adv_loss += loss.item() loss.backward() writer.add_scalar('Adv_Loss/Adv', seg_adv_loss, global_step) ############################# # DISCRIMINATOR w/ Target ############################# # now accumulate grads in D for param in discriminator1.parameters(): param.requires_grad = True for param in discriminator2.parameters(): param.requires_grad = True masks_pred1_target = masks_pred1_target.detach() masks_pred2_target = masks_pred2_target.detach() discriminator_out1_target = discriminator1( F.softmax(masks_pred1_target)) discriminator_out2_target = discriminator2( F.softmax(masks_pred2_target)) discriminator_fill1_target = Variable( torch.FloatTensor(discriminator_out1_target.data.size()).fill_( target_label)).to(device=device) discriminator_fill2_target = Variable( torch.FloatTensor(discriminator_out2_target.data.size()).fill_( target_label)).to(device=device) loss_D1 = bce_loss(discriminator_out1_target, discriminator_fill1_target) / 2 loss_D2 = bce_loss(discriminator_out2_target, discriminator_fill2_target) / 2 dis_loss1 += loss_D1.item() dis_loss2 += loss_D2.item() loss_D1.backward() loss_D2.backward() ############################# # DISCRIMINATOR w/ Source ############################# masks_pred1_source = masks_pred1_source.detach() masks_pred2_source = masks_pred2_source.detach() discriminator_out1_source = discriminator1( F.softmax(masks_pred1_source)) discriminator_out2_source = discriminator2( F.softmax(masks_pred2_source)) discriminator_fill1_source = Variable( torch.FloatTensor(discriminator_out1_source.data.size()).fill_( source_label)).to(device=device) discriminator_fill2_source = Variable( torch.FloatTensor(discriminator_out2_source.data.size()).fill_( source_label)).to(device=device) loss_D1 = bce_loss(discriminator_out1_source, discriminator_fill1_source) / 2 loss_D2 = bce_loss(discriminator_out2_source, discriminator_fill2_source) / 2 dis_loss1 += loss_D1.item() dis_loss2 += loss_D2.item() loss_D1.backward() loss_D2.backward() writer.add_scalar('Adv_Loss/Discriminator_1', dis_loss1, global_step) writer.add_scalar('Adv_Loss/Discriminator_2', dis_loss2, global_step) ########################## ########################## optimizer.step() optimizer_discriminator1.step() optimizer_discriminator2.step() pbar.set_postfix( **{ 'loss ': seg_loss, 'adv_loss ': seg_adv_loss, 'dis_loss1 ': dis_loss1, 'dis_loss2 ': dis_loss2 }) global_step += 1 pbar.update(imgs.shape[0]) global_step += 1 if global_step % (config.NUM_IMAGES_PER_EPOCH // (1 * batch_size)) == 0: # segmentation for tag, value in net.named_parameters(): tag = tag.replace('.', '/') if value.grad is None: print('Seg_Layer: ', tag.split('/')) writer.add_histogram('weights/' + tag, value.data.cpu().numpy(), global_step) pass else: writer.add_histogram('weights/' + tag, value.data.cpu().numpy(), global_step) writer.add_histogram('grads/' + tag, value.grad.data.cpu().numpy(), global_step) # discriminator for tag, value in discriminator1.named_parameters(): tag = tag.replace('.', '/') if value.grad is None: print('Dis1_Layer: ', tag.split('/')) writer.add_histogram('dis_weights1/' + tag, value.data.cpu().numpy(), global_step) pass else: writer.add_histogram('dis_weights1/' + tag, value.data.cpu().numpy(), global_step) writer.add_histogram('dis_grads1/' + tag, value.grad.data.cpu().numpy(), global_step) # discriminator for tag, value in discriminator2.named_parameters(): tag = tag.replace('.', '/') if value.grad is None: print('Dis2_Layer: ', tag.split('/')) writer.add_histogram('dis_weights2/' + tag, value.data.cpu().numpy(), global_step) pass else: writer.add_histogram('dis_weights2/' + tag, value.data.cpu().numpy(), global_step) writer.add_histogram('dis_grads2/' + tag, value.grad.data.cpu().numpy(), global_step) # weighted fwb score val_loss, Fwb = eval_net(net, upsample, target_val_loader, writer, global_step, device) writer.add_scalar('Weighted-Fb/Current-Fwb', Fwb, global_step) # scheduler.step(val_loss) # scheduler_discriminator1.step(val_loss) # scheduler_discriminator2.step(val_loss) writer.add_scalar('learning_rate/seg', optimizer.param_groups[0]['lr'], global_step) writer.add_scalar( 'learning_rate/dis1', optimizer_discriminator1.param_groups[0]['lr'], global_step) writer.add_scalar( 'learning_rate/dis2', optimizer_discriminator2.param_groups[0]['lr'], global_step) if config.NUM_CLASSES > 1: writer.add_scalar('Loss/test', val_loss, global_step) else: logging.info('Validation Dice Coeff: {}'.format(Fwb)) writer.add_scalar('Dice/test', Fwb, global_step) if Fwb > best_Fwb and save_cp: best_Fwb = Fwb writer.add_scalar('Weighted-Fb/Best-Fwb', best_Fwb, global_step) torch.save(net.state_dict(), config.BEST_MODEL_SAVE_PATH) torch.save(discriminator1.state_dict(), config.BEST_DIS1_SAVE_PATH) torch.save(discriminator2.state_dict(), config.BEST_DIS2_SAVE_PATH) logging.info( 'Best Model Saved with Fwb: {:.5}!'.format(best_Fwb)) if save_cp: torch.save( net.state_dict(), config.MODEL_SAVE_PATH + "Best_Seg_{:.5}.pth".format((best_Fwb))) torch.save( discriminator1.state_dict(), config.MODEL_SAVE_PATH + "Best_Dis1_{:.5}.pth".format((best_Fwb))) torch.save( discriminator2.state_dict(), config.MODEL_SAVE_PATH + "Best_Dis2_{:.5}.pth".format((best_Fwb))) logging.info('Final Model Saved with Fwb: {:.5}!'.format(best_Fwb)) writer.close()
# Loading Files train_x = np.loadtxt(sys.argv[1]) train_y = np.loadtxt(sys.argv[2]) test_x = np.loadtxt(sys.argv[3]) # Normalize train and test values train_x = normalize(train_x) test_x = normalize(test_x) data_set = DataSet(train_x, train_y) test_set = DataSet(test_x, np.zeros(5000)) # Split data set into 80% Train and 20% Validation train_size = round(data_set.__len__() * 0.8) validation_size = data_set.__len__() - train_size train_set, validation_set = data.random_split(data_set, [train_size, validation_size]) # Train validation and test set loaders train_loader = data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) validation_loader = data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=False) test_loader = data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False) # Model D model = NeuralNetD(IMAGE_SIZE) # Optimizer optimizer_SGD = t.optim.SGD(model.parameters(), lr=0.1) # model's predictions y_hats = []
def train_model_for_data(self, train_x, train_y, epochs, config, valid=0.1, use_class_weights=True, checkpoint_prefix=None, early_stopping=False): vocab_size = train_x.max() + 1 class_weights = {} if train_y.ndim == 1: num_outputs = 1 ## 1-dim array of 0 and 1 one_proportion = float(train_y.sum()) / len(train_y) one_weight = 0.5 / one_proportion zero_weight = 0.5 / (1. - one_proportion) class_weights[0] = zero_weight class_weights[1] = one_weight elif train_y.shape[1] == 1: num_outputs = 1 else: num_outputs = train_y.shape[1] device = 'cuda' if torch.cuda.is_available() else 'cpu' model = self.get_model(train_x.shape, vocab_size, num_outputs, config).to(device) batch_size = config['batch_size'] loss_fn = config['loss_fn'].to(device) opt = config['opt_fn'](model) if 'opt_fn' in config else self.get_default_optimizer(model) # num_batches = train_x.shape[0] // batch_size tensor_dataset = TensorDataset(torch.LongTensor(train_x), torch.FloatTensor(train_y)) train_size = int((1-valid) * len(tensor_dataset)) valid_size = len(tensor_dataset) - train_size train_dataset, dev_dataset = random_split(tensor_dataset, [train_size, valid_size]) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) best_val_loss = -1 for epoch in range(epochs): model.train() epoch_loss = 0 for i_batch, sample_batched in enumerate(train_loader): model.zero_grad() batch_x = sample_batched[0].to(device) batch_y = sample_batched[1].to(device) pred_y = model(batch_x) loss = loss_fn(pred_y, batch_y) loss.backward() opt.step() epoch_loss += loss.item() model.eval() pred_y = model(dev_dataset[:][0].to(device)) val_loss = loss_fn(pred_y, dev_dataset[:][1].to(device)) if val_loss < best_val_loss or best_val_loss < 0: best_val_loss = val_loss outdir = tempfile.gettempdir() if not checkpoint_prefix is None: torch.save(model, os.path.join(outdir, '%s_best_model.pt' % (checkpoint_prefix,))) else: torch.save(model, os.path.join(outdir, 'best_model.pt')) print('Epoch %d: Training loss=%f, validation loss=%f' % (epoch, epoch_loss, val_loss.item())) if best_val_loss > 0: if not checkpoint_prefix is None: best_model = torch.load(os.path.join(outdir, '%s_best_model.pt' % (checkpoint_prefix,))) else: best_model = torch.load(os.path.join(outdir, 'best_model.pt')) else: raise Exception('No good models found!') return best_model, None
def train_net(net, device, epochs=1, batch_size=16, lr=0.001, val_percent=0.1, save_cp=True, img_scale=0.5, start_saving=0, reduction_loss='mean'): dataset = Dataset_CTtoPET(dir_CT, dir_PET) n_val = int(len(dataset) * val_percent) #2685 n_train = len(dataset) - n_val #24168 train, val = random_split(dataset, [n_train, n_val]) train_loader = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=True) writer = SummaryWriter( comment=f'LR_{lr}_BS_{batch_size}_SCALE_{img_scale}') global_step = 0 logging.info(f'''Starting training: Epochs: {epochs} Batch size: {batch_size} Learning rate: {lr} Training size: {n_train} Validation size: {n_val} Checkpoints: {save_cp} Device: {device.type} Images scaling: {img_scale} reduction_loss: {reduction_loss} ''') optimizer = optim.RMSprop(net.parameters(), lr=lr, weight_decay=1e-8, momentum=0.9) #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min' if net.n_classes > 1 else 'max', patience=2) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=6) criterion1 = nn.MSELoss(reduction=reduction_loss) ma_loss = 0.0003 for epoch in range(epochs): net.train() ############## Training ############## ############## Training ############## epoch_loss_sum = 0 with tqdm(total=n_train, desc=f'Epoch {epoch + 1}/{epochs}', unit='img') as pbar: for batch in train_loader: CTs = batch['CT'] #[16, 1, 512, 512] PETs = batch['PET'] assert CTs.shape[1] == net.n_channels, \ f'Network has been defined with {net.n_channels} input channels, ' \ f'but loaded images have {CTs.shape[1]} channels. Please check that ' \ 'the images are loaded correctly.' CTs = CTs.to(device=device, dtype=torch.float32) PET_type = torch.float32 PETs = PETs.to(device=device, dtype=PET_type) PETs_pred = net(CTs) # Loss loss1 = criterion1(PETs_pred, PETs) / batch_size epoch_loss_sum += loss1.item() loss2 = 1 - ms_ssim(X, Y, data_range=1.0, size_average=True) ma_loss = 0.999 * ma_loss + 0.001 * loss1.item() loss = W1 * loss1 + W2 * loss2 pbar.set_postfix(**{'loss (batch)': ma_loss}) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_value_(net.parameters(), 0.1) optimizer.step() pbar.update(CTs.shape[0]) global_step += 1 writer.add_scalar('Loss/training', epoch_loss_sum / len(train_loader), global_step) print('epoch_loss:', epoch_loss_sum / len(train_loader)) print() # Batch saving on training #writer.add_images('images', CTs, global_step) #writer.add_images('PETs/true_train', PETs, global_step) #writer.add_images('PETs/pred_train', PETs_pred, global_step) ############## Validation ############## ############## Validation ############## for tag, value in net.named_parameters(): tag = tag.replace('.', '/') writer.add_histogram('weights/' + tag, value.data.cpu().numpy(), global_step) writer.add_histogram('grads/' + tag, value.grad.data.cpu().numpy(), global_step) val_score, PET_true_val, PET_pred_val = eval_net( net, val_loader, device, reduction_loss) scheduler.step(val_score) # Batch saving on validation writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], global_step) writer.add_scalar('Loss/validation', val_score, global_step) writer.add_images('PETs/true_validation', PET_true_val, global_step) writer.add_images('PETs/pred_validation', PET_pred_val, global_step) logging.info('Validation mse_loss: {}'.format(val_score)) ############## Saving checkpoint ############## ############## Saving checkpoint ############## if save_cp and epoch % 5 == 0: try: os.mkdir(dir_checkpoint) logging.info('Created checkpoint directory') except OSError: pass torch.save( net.state_dict(), dir_checkpoint + f'CP_epoch{start_saving + epoch + 1}.pth') logging.info(f'Checkpoint {start_saving + epoch + 1} saved !') writer.close()