def save(self, fold): epochs_completed_string = str(self.epochs_completed) fold_string = functions.string(fold) file_name = "train_" + epochs_completed_string + fold_string train_path = os.path.join(S.run_path, file_name) # directory labelled with epochs_completed try: os.mkdir(train_path) except OSError as error: print(error) PATH_save = os.path.join(train_path, "model.pt") PATH_opt_save = os.path.join(train_path, "opt.pt") PATH_scaler_save = os.path.join(train_path,"scaler.pt") PATH_val_loss_save = os.path.join(train_path,"val_loss.pt") PATH_epochs_completed_save = os.path.join(train_path,"epochs_completed.pt") #PATH_save = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_64features.pt' #PATH_opt_save = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_opt_64features.pt' #PATH_sigma_save = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_sigma_64features.pt' #PATH_scaler_save = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_scaler_64features.pt' #PATH_val_loss_save = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_val_loss_64features.pt' #PATH_epochs_completed_save = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_val_loss_64features.pt' torch.save(self.model.state_dict(), PATH_save) torch.save(self.optimizer.state_dict(), PATH_opt_save) for k in S.landmarks: PATH_sigma_save = os.path.join(train_path,"sigma_%1.0f.pt" % k) torch.save({'sigma': S.sigmas[k]}, PATH_sigma_save) torch.save(self.scaler.state_dict(), PATH_scaler_save) torch.save({'best_val_loss': self.best_loss}, PATH_val_loss_save) torch.save({'epochs_completed': self.epochs_completed}, PATH_epochs_completed_save)
def save(self, fold): epochs_completed_string = str(self.epochs_completed) # trained fold_string = functions.string(fold) file_name = "train_" + epochs_completed_string + fold_string train_path = os.path.join( S.run_path, file_name) # directory labelled with epochs_completed try: os.mkdir(train_path) except OSError as error: print(error) PATH_save = os.path.join(train_path, "model.pt") PATH_opt_save = os.path.join(train_path, "opt.pt") #PATH_sigma_save = os.path.join(train_path,"sigma.pt") PATH_scaler_save = os.path.join(train_path, "scaler.pt") PATH_val_loss_save = os.path.join(train_path, "val_loss.pt") PATH_epochs_completed_save = os.path.join(train_path, "epochs_completed.pt") torch.save(self.model_load.state_dict(), PATH_save) torch.save(self.optimizer_load.state_dict(), PATH_opt_save) for k in S.landmarks: PATH_sigma_save = os.path.join(train_path, "sigma_%1.0f.pt" % k) torch.save({'sigma': S.sigmas[k]}, PATH_sigma_save) torch.save(self.scaler_load.state_dict(), PATH_scaler_save) torch.save({'best_val_loss': self.best_loss}, PATH_val_loss_save) # trained torch.save({'epochs_completed': self.epochs_completed}, PATH_epochs_completed_save) # trained
def train_model(model, scaler, optimizer, scheduler, alpha, reg, gamma, sigmas, num_epochs, best_loss, epochs_completed, fold): best_model_wts = copy.deepcopy(model.state_dict()) fold = functions.string(fold) for epoch in range(num_epochs): print('Epoch {}/{}'.format((epoch + 1), num_epochs)) print('-' * 10) since = time.time() # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': scheduler.step() print('Learning rates (overall and for sigmas)') for param_group in optimizer.param_groups: print("LR", param_group['lr']) model.train() # Set model to training mode else: print('') print('Testing on val set') print('') model.eval() # Set model to evaluate mode metrics_total = defaultdict(float) metrics_landmarks = defaultdict(float) for i in S.landmarks: metrics_landmarks[i] = defaultdict(float) # i.e. metrics_landmarks[3]['loss'] is loss for landmark denoted by 3 imgs_in_set = 0 # i.e. counts total number of images in train or val or test set iters_to_acc = math.floor( (data_loaders.batch_acc_batches) / S.batch_acc_steps) #batch_number = 1 for i, batch in enumerate(data_loaders.dataloaders[phase]): # print dataloader inputs = batch['image'] idx = batch['idx'] target_coords = batch['coords'] # print(labels.size()) inputs = inputs.float().to(S.device) #target_coords = target_coords.to(S.device) patients = batch['patient'] # target_coords is a dictioanry so is [landmarks]['x'][batch_id] # zero the parameter gradients #print('zero the grad') #optimizer.zero_grad() # amp mod #print(optimizer.parameter()) #sigma.zero_grad # forward # track history only if in train with torch.set_grad_enabled(phase == 'train'): with torch.cuda.amp.autocast(enabled=S.use_amp): outputs = model((inputs)) # 1. convert masks to heatmaps inside loss function (allows sigma optimisation) loss = loss_func.calc_loss_gauss( model, inputs, outputs, target_coords, idx, metrics_landmarks, alpha, reg, gamma, imgs_in_set, sigmas) # iters to accumulate set to 12 this would mean 12 x 3 = 36 images before optim.step() # if 75 images will step twice then is left over with 3 images - need to scale by this # so need to scale #if (i+1) > (S.batch_acc_steps * iters_to_acc): #print('Leftover batch') #print(i+1) #print((data_loaders.batch_acc_batches - S.batch_acc_steps*iters_to_acc)) # loss = loss/((data_loaders.batch_acc_batches - S.batch_acc_steps*iters_to_acc)) #else: # print(iters_to_acc, i) # loss = loss/iters_to_acc # backward + optimize only if in training phase if phase == 'train': scaler.scale(loss).backward() #print(time.time()-start_time) #print(i+1 % data_loaders.batch_acc_batches, i+1 % iters_to_acc ) #if ((i+1) % data_loaders.batch_acc_batches == 0) or ((i+1) % iters_to_acc == 0): # print('reached', data_loaders.batch_acc_batches, i+1) scaler.step(optimizer) scaler.update() scheduler.step() optimizer.zero_grad() # print(time.time()-start_time_op) # statistics imgs_in_set += inputs.size(0) #batch_number += 1 print('Images in set') print(imgs_in_set) print('') print('Summary on %s dataset' % phase) print('') functions.print_metrics(metrics_landmarks, imgs_in_set, phase) # print metrics divides the values by number of images # i.e. when values added to metrics it creates a total sum over all images in the set # within print metrics it divides by the total number of images so all values are means #print('The following have zero requires grad:') #all_have_grad = True #for name, param in model.named_parameters(): # if param.requires_grad == False: # print (name) # all_have_grad = False #if (all_have_grad == True): # print('All parameters have require grad = true') print('Sigmas are') for l in S.landmarks: print(sigmas[l]) # here metrics_landmarks[l]['loss'] is divided by imgs_in_set so loss is defined as average per image!! epoch_loss = 0 for l in S.landmarks: epoch_loss += metrics_landmarks[l][ 'loss'] # total loss i.e. each batch loss summed # add loss per landmark to tensorboard S.writer.add_scalar('%s loss for landmark %1.0f' % (phase, l), metrics_landmarks[l]['loss'] / imgs_in_set, epochs_completed + epoch + 1) if phase == 'train': S.writer.add_scalar('sigma for landmark %1.0f' % l, sigmas[l][0].item(), epochs_completed + epoch + 1) #print('writing to tensorboard') epoch_loss /= imgs_in_set S.writer.add_scalar('total epoch loss for phase %s' % phase, epoch_loss, epochs_completed + epoch + 1) # deep copy the model # note validation is done NOT using sliding window if phase == 'val' and epoch_loss < best_loss: print("\n") print("------ deep copy best model ------ ") best_loss = epoch_loss print(' ' + 'best val loss: {:4f}'.format(best_loss)) print('\n') best_model_wts = copy.deepcopy(model.state_dict()) S.epoch_deep_saved = epochs_completed + epoch + 1 name_of_file = os.path.join(S.run_path, "epoch_saved_%s.txt" % fold) txt_file = open(name_of_file, "a") L = ['epoch saved %1.0f' % S.epoch_deep_saved, '\n'] txt_file.writelines(L) txt_file.close() # save model/optimizer etc. based on current time time_elapsed = time.time() - since finish_time = time.ctime(time_elapsed * (num_epochs - epoch) + time.time()) print('\n') print('Epoch time: ' + '{:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) print('Estimated finish time (till end of epoch batch): ', finish_time) print('\n') # save number of epochs completed epochs_completed_total = epochs_completed + epoch + 1 # load best model weights model.load_state_dict(best_model_wts) return model, best_loss, epochs_completed_total
def performance_metrics_line(model, sigmas, gamma, epochs_completed, fold): # so can print out test ids for each fold at end S.k_fold_ids.append(data_loaders.test_set_ids) # create directory for this eval epochs_completed_string = str(epochs_completed) file_name = "eval_" + epochs_completed_string + functions.string(fold) eval_path = os.path.join( S.run_path, file_name) # directory labelled with epochs_completed try: os.mkdir(eval_path) except OSError as error: print(error) keys = ('clicker_1', 'clicker_2', 'mean') p2p_landmarks = {} outliers_landmarks = {} x_axis_err, x_axis_err_mm = {}, {} y_axis_err, y_axis_err_mm = {}, {} z_axis_err, z_axis_err_mm = {}, {} for i in keys: p2p_landmarks[i] = {} outliers_landmarks[i] = {} x_axis_err[i] = {} x_axis_err_mm[i] = {} y_axis_err[i] = {} y_axis_err_mm[i] = {} z_axis_err[i] = {} z_axis_err_mm[i] = {} for i in keys: for l in S.landmarks: p2p_landmarks[i][l] = np.empty((0), float) outliers_landmarks[i][l] = np.empty((0), float) x_axis_err[i][l] = np.empty((0), float) x_axis_err_mm[i][l] = np.empty((0), float) y_axis_err[i][l] = np.empty((0), float) y_axis_err_mm[i][l] = np.empty((0), float) z_axis_err[i][l] = np.empty((0), float) z_axis_err_mm[i][l] = np.empty((0), float) # load in struc_coord if S.rts == False: struc_coord_clicker_1 = functions.load_obj_pickle( S.root, 'coords_' + 'Oli') struc_coord_clicker_2 = functions.load_obj_pickle( S.root, 'coords_' + 'Aaron') elif S.rts == True: struc_coord_clicker_1 = functions.load_obj_pickle( S.root, 'coords_' + 'Oli_test_set') struc_coord_clicker_2 = functions.load_obj_pickle( S.root, 'coords_' + 'Aaron_test_set') struc_coord_mean = functions.mean_from_clickers(struc_coord_clicker_1, struc_coord_clicker_2) struc_coord = {} struc_coord['clicker_1'] = struc_coord_clicker_1 struc_coord['clicker_2'] = struc_coord_clicker_2 struc_coord['mean'] = struc_coord_mean # initiate max val as 0 for all patients - sliding window stuff # patients needs to be = ['0003.npy', '0004.npy', etc.] patients = data_loaders.test_set_ids val_max_list = {} coord_list = {} pat_index = {} for p in patients: val_max_list[p] = {} coord_list[p] = {} pat_index[p] = {} for l in S.landmarks: val_max_list[p][l] = 0 coord_list[p][l] = {'x': 0, 'y': 0, 'z': 0} pat_index[p][l] = 0 for slide_index in range(S.sliding_points): for batch in data_loaders.dataloaders['test']: image = batch['image'].to(S.device) patient = batch['patient'] pred = model(image) #batch_number = 0 for l in S.landmarks: # cycle over all landmarks for i in range(image.size()[0]): # batch size dimension = 3 height_guess = ((gamma) * (2 * np.pi)**(-dimension / 2) * sigmas[l].item()**(-dimension)) if S.pred_max == True: pred_coords_max, val_max = functions.pred_max( pred, l, S.landmarks)[0], functions.pred_max( pred, l, S.landmarks)[1] # change to gauss fit else: pred_coords_max = functions.gauss_max( pred, l, height_guess, sigmas[l].item(), S.in_x, S.in_y, S.in_z, S.landmarks) # if max value is greatest for this patient then save the predicted coord for this landmark if val_max[i] > val_max_list[patient[i]][l]: val_max_list[patient[i]][l] = val_max[ i] # update max val coord_list[patient[i]][l]['x'], coord_list[ patient[i]][l]['y'], coord_list[patient[i]][l][ 'z'] = pred_coords_max[i][0], pred_coords_max[ i][1], pred_coords_max[i][2] pat_index[patient[i]][l] = slide_index S.slide_index += 1 S.slide_index = 0 # final locations dict final_loc = {} for p in patients: final_loc[p] = {} for l in S.landmarks: final_loc[p][l] = {'x': 0, 'y': 0, 'z': 0} for p in patients: for l in S.landmarks: # cycle over all landmarks #for i in range(image.size()[0]): # batch size pred_max_x, pred_max_y, pred_max_z = coord_list[p][l][ 'x'], coord_list[p][l]['y'], coord_list[p][l]['z'] # convert pred to location in orig img pred_max_x, pred_max_y, pred_max_z = functions.aug_to_orig( pred_max_x, pred_max_y, pred_max_z, S.downsample_user, p, pat_index[p][l]) # final location add final_loc[p][l]['x'], final_loc[p][l]['y'], final_loc[p][l][ 'z'] = pred_max_x, pred_max_y, pred_max_z for k in keys: # clicker_1, clicker_2, and mean struc_loc = struc_coord[k][p] if struc_loc[l]['present'] == True: structure_max_x, structure_max_y, structure_max_z = struc_loc[ l]['x'], struc_loc[l]['y'], struc_loc[l]['z'] # print out images for first one in batch #if batch_number == 0 and i == 0: # for first batch # print('\n') # print('Structure LOC for landmark %1.0f and clicker %s:' % (l,k)) # print(structure_max_x, structure_max_y, structure_max_z) # print('Predicted LOC for landmark %1.0f and clicker %s:' % (l,k)) # print(pred_max_x, pred_max_y, pred_max_z) # print('\n') # point to point takes in original structure location!! img_landmark_point_to_point = functions.point_to_point_mm( structure_max_x, structure_max_y, structure_max_z, pred_max_x, pred_max_y, pred_max_z, p) p2p_landmarks[k][l] = np.append( p2p_landmarks[k][l], img_landmark_point_to_point.cpu()) # if img_point_to_point > 20mm is an outlier x_p2p, x_p2p_mm, y_p2p, y_p2p_mm, z_p2p, z_p2p_mm = functions.axis_p2p_err( structure_max_x, structure_max_y, structure_max_z, pred_max_x, pred_max_y, pred_max_z, p) x_axis_err[k][l] = np.append(x_axis_err[k][l], x_p2p.cpu()) x_axis_err_mm[k][l] = np.append(x_axis_err_mm[k][l], x_p2p_mm.cpu()) y_axis_err[k][l] = np.append(y_axis_err[k][l], y_p2p.cpu()) y_axis_err_mm[k][l] = np.append(y_axis_err_mm[k][l], y_p2p_mm.cpu()) z_axis_err[k][l] = np.append(z_axis_err[k][l], z_p2p.cpu()) z_axis_err_mm[k][l] = np.append(z_axis_err_mm[k][l], z_p2p_mm.cpu()) if img_landmark_point_to_point > 20: outliers_landmarks[k][l] = np.append( outliers_landmarks[k][l], 1) # print 2D slice print('2D slice for landmark %1.0f' % l) #print_2D_slice_line(l, pred_max_x, pred_max_y, pred_max_z, struc_coord, eval_path, p) #batch_number += 1 # not sure where to put for k in keys: print('\n') print('Results summary for clicker %s' % k) print('---------------') latex_line = [] csv_line = [] if S.rts == True: name_of_file = os.path.join(eval_path, "results_rts_line_new_%s.txt" % k) elif S.rts == False: name_of_file = os.path.join(eval_path, "results_line_new_%s.txt" % k) txt_file = open(name_of_file, "a") for l in S.landmarks: print('\n') print('Landmark %1.0f' % l) mean = np.mean(p2p_landmarks[k][l]) std_mean = np.std(p2p_landmarks[k][l], ddof=1) * (len( p2p_landmarks[k][l]))**-0.5 median = np.median(p2p_landmarks[k][l]) upper_perc = np.percentile(p2p_landmarks[k][l], 75) lower_perc = np.percentile(p2p_landmarks[k][l], 25) error_min = np.amin(p2p_landmarks[k][l]) error_max = np.amax(p2p_landmarks[k][l]) outliers_perc = outliers_landmarks[k][l].sum() / len( p2p_landmarks[k][l]) * 100 mean_x_err = np.mean(x_axis_err[k][l]) mean_x_err_mm = np.mean(x_axis_err_mm[k][l]) mean_y_err = np.mean(y_axis_err[k][l]) mean_y_err_mm = np.mean(y_axis_err_mm[k][l]) mean_z_err = np.mean(z_axis_err[k][l]) mean_z_err_mm = np.mean(z_axis_err_mm[k][l]) print(' mean point to point error is ' + str(mean) + '+/-' + str(std_mean)) print(' median point to point error is ' + str(median)) print(' mean point to point error is ' + str(mean) + '+/-' + str(std_mean)) print(' median point to point error is ' + str(median)) print(' 75th percentile is: ' + str(upper_perc)) print(' 25th percentile is ' + str(lower_perc)) print(' minimum point to point error is: ' + str(error_min)) print(' maximum point to point error is: ' + str(error_max)) print(' mean error in x axis is: ' + str(mean_x_err) + ' (' + str(mean_x_err_mm) + ' mm)') print(' mean error in y axis is: ' + str(mean_y_err) + ' (' + str(mean_y_err_mm) + ' mm)') print(' mean error in z axis is: ' + str(mean_z_err) + ' (' + str(mean_z_err_mm) + ' mm)') print(' percentage of images which were outliers is ' + str(outliers_perc) + '%') print(' sigma is ' + str(sigmas[l])) print(' trained for ' + str(epochs_completed) + ' epochs') print(' pred max used = %s' % S.pred_max) print('\n') L = [ '\n', 'Landmark %1.0f' % l, '\n', ' mean point to point error is ' + str(mean) + '+/-' + str(std_mean), '\n', ' median point to point error is ' + str(median), '\n', ' 75th percentile is: ' + str(upper_perc), '\n', ' 25th percentile is ' + str(lower_perc), '\n', ' minimum point to point error is: ' + str(error_min), '\n', ' maximum point to point error is: ' + str(error_max), '\n', ' percentage of images which were outliers is ' + str(outliers_perc) + '%', '\n', ' mean error in x axis is: ' + str(mean_x_err) + '(' + str(mean_x_err_mm) + 'mm)', '\n', ' mean error in y axis is: ' + str(mean_y_err) + '(' + str(mean_y_err_mm) + 'mm)', '\n', ' mean error in z axis is: ' + str(mean_z_err) + '(' + str(mean_z_err_mm) + 'mm)', '\n', ' sigma is ' + str(sigmas[l]), '\n', ' pred max used = ' + str(S.pred_max), '\n', ' trained for ' + str(epochs_completed) + ' epochs\n' ] txt_file.writelines(L) # write in latex table format latex_line_temp = [ ' & ' + str(round(mean, 1)) + '$\pm$' + str(round(std_mean, 1)) ] latex_line = latex_line + latex_line_temp # write in excel format for easy to calc folds csv_line_temp = [ str(round(mean, 1)) + ',' + str(round(std_mean, 1)) + ',' ] csv_line = csv_line + csv_line_temp # add to csv file csv_name = os.path.join(S.save_data_path, 'results_summary.csv') with open(csv_name, 'a', newline='') as file: writer = csv.writer(file) sigma_string = str(sigmas[l]) writer.writerow([ '%s' % S.run_folder, '%s' % epochs_completed_string, 'Landmark %1.0f' % l, str(mean), str(std_mean), str(median), str(outliers_perc) + '%', sigma_string.replace("\n", " "), time.strftime("%Y%m%d-%H%M%S"), 'pred max used = %s' % S.pred_max ]) # write in latex/csv form txt_file.writelines(latex_line) txt_file.writelines(['\n']) txt_file.writelines(csv_line) txt_file.close() print('final locations') print(final_loc) functions.save_obj_pickle(final_loc, eval_path, 'final_coords')
# paths import os import settings as S from useful_functs import functions train_folder_name = "train_" + S.epoch_load + functions.string(S.fold_load) run_folder_name = os.path.join(S.save_data_path, S.run_folder) epoch_load = os.path.join(run_folder_name, train_folder_name) PATH_load = os.path.join(epoch_load, "model.pt") PATH_opt_load = os.path.join(epoch_load, "opt.pt") PATH_scaler_load = os.path.join(epoch_load, "scaler.pt") PATH_val_loss_load = os.path.join(epoch_load, "val_loss.pt") PATH_epochs_completed_load = os.path.join(epoch_load, "epochs_completed.pt") # Paths load and save #PATH_load = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_64features.pt' #PATH_opt_load = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_opt_64features.pt' #PATH_sigma_load = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_sigma_64features.pt' #PATH_scaler_load = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_scaler_64features.pt' #PATH_val_loss_load = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_val_loss_64features.pt' #PATH_epochs_completed_load = r'C:\Users\olive\OneDrive\Documents\CNN\Report\3d_model_unet_downsample_val_loss_64features.pt'