def run_postprocessing(self): print('Post-processing of segmented images...') print('Clipping images... ') filepaths = util.get_nifti_filepaths('predictions/') filenames = util.get_sub_dirs(self.input_dir) for i in tqdm(range(len(filepaths))): self.remove_small_objects(filepaths[i], filenames[i], self.input_dir) # Delete folder created by miscnn shutil.rmtree('predictions/')
def run(self): # Get filenames filenames = util.get_paths_from_tree(self.input_dir, 'segmentation') print('\n\nRunning clustering on samples...\n\n') # Loop over images for i, filename in enumerate(tqdm(filenames)): # Print output img = nib.load(filename) img_data = img.get_data() hd = img.header M = img.affine[:3, :3] iM = np.linalg.inv(M) abc = img.affine[:3, 3] # Get coordinate data of image print('\n\nGet coordinate matrix from segmentation.. ') data = get_coordinates_from_segmentation(img_data, M, iM, abc) # Shuffle the data #np.random.shuffle(data) # Set random seed for reproducibility random.seed(0) # Initialise centroids centroids = data[random.sample(range(data.shape[0]), self.num_clusters)] # Create a list to store which centroid is assigned to each dataset assigned_centroids = np.zeros(len(data), dtype=np.int32) # Number of dimensions in centroid num_centroid_dims = data.shape[1] # List to store SSE for each iteration sse_list = [] # Start clustering time tic = time.time() # Main Loop print('\nStart clustering image...') for n in tqdm(range(self.num_iters), leave=False): # Get closest centroids to each data point assigned_centroids = get_closest_centroid( data[:, None, :], centroids[None, :, :]) # Compute new centroids for c in range(centroids.shape[1]): # Get data points belonging to each cluster cluster_members = data[assigned_centroids == c] # Compute the mean of the clusters cluster_members = cluster_members.mean(axis=0) # Update the centroids centroids[c] = cluster_members # Compute SSE sse = compute_sse(data.squeeze(), centroids.squeeze(), assigned_centroids) sse_list.append(sse) print('\tFinished clustering image.') # End clustering time toc = time.time() # Print output print("Image clustering-time: " + str(round(toc - tic, 2))) print("Image clustering-time per iteration: " + str(round(toc - tic, 2) / self.num_iters)) print('\nSaving clustered image to nifti label-map... ') # Start saving time tic = time.time() # Save clustering to nifti label-map label_map = np.zeros_like(img_data) # Join centeroids and coordinates to dict merged = defaultdict(list) for a, b in zip(assigned_centroids, data): merged[a].append(b) for key, cords in merged.items(): cluster = key + 1 for cord in cords: ijk = invf(cord[0], cord[1], cord[2], iM, abc) label_map[int(ijk[0]), int(ijk[1]), int(ijk[2])] = cluster # End clustering time toc = time.time() # Print output print("Time spent saving image to labelmap: " + str(round(toc - tic, 2))) """ # Start saving time tic = time.time() # Make dict for cent label = 1 print('\nSaving clustered image to nifti label-map... \n') for c in tqdm(range(len(centroids)), leave=False): cluster_members = [data[i] for i in range(len(data)) if assigned_centroids[i] == c] cluster_members = np.array(cluster_members) for cord in cluster_members: ijk = invf(cord[0], cord[1], cord[2], iM, abc) label_map[int(ijk[0]),int(ijk[1]),int(ijk[2])] = label label+=1 # End clustering time toc = time.time() # Print output print("Time spent saving image to labelmap: " + str(round(toc - tic, 2))) """ folder_name = util.get_sub_dirs(self.input_dir)[i] self.save_clustered_image(img, label_map, hd, folder_name) print('File saved.')
def evaluate(model, config, experiment, validation_directory, file_identifier=''): missclassified = {} # get number of classes in model number_of_classes = config['dataset']['number_of_classes'] # image dimensions image_width = config['image_processing']['image_width'] image_height = config['image_processing']['image_height'] image_channels = config['image_processing']['image_channels'] # get class directory names from validation directory class_names = get_sub_dirs(validation_directory) class_names.sort() # get keras labels in label-index format label_index = { class_name: index for index, class_name in enumerate(class_names) } index_label = { index: class_name for index, class_name in enumerate(class_names) } # prepare confusion table confusion = np.zeros((number_of_classes, number_of_classes)) # iterate over each class name for class_name in class_names: print(f'Starting {class_name}') # set path to class directory class_dir = os.path.join(validation_directory, class_name) # iterate over each image in class directory for file_name in os.listdir(class_dir): # models class prediction for image prediction = None # process image before passing it through the network image = imread(os.path.join(class_dir, file_name), mode='RGB') image = imresize(image, (image_width, image_height, image_channels)) image = image.reshape(1, image_width, image_height, image_channels) image = np.true_divide(image, 255.) with tf.get_default_graph().as_default(): predictions = model.predict(image)[0] prediction = np.argmax(predictions) # check prediction against ground truth, i.e, if it equals the class directory name if (prediction != label_index[class_name]): # initialize empty list of fist missclassified of class if class_name not in missclassified: missclassified[class_name] = {} missclassified[class_name][file_name] = { 'prediction': index_label[prediction], 'predictions': { index_label[class_index]: pred for class_index, pred in enumerate(predictions) } } # update confusion table confusion[prediction][label_index[class_name]] += 1 # calculate FP, FN, TP and TN based on confusion table FP = confusion.sum(axis=0) - np.diag(confusion) FN = confusion.sum(axis=1) - np.diag(confusion) TP = np.diag(confusion) TN = confusion.sum() - (FP + FN + TP) print(f"True Positives: { TP }") print(f"True Negatives: { TN }") print(f"False Positives: { FP }") print(f"False Positives: { FN }") # calculate metrics based on FP, FN, TP and TN f1 = np.nan_to_num(f1score(TP, TN, FP, FN)) rec = np.nan_to_num(recall(TP, TN, FP, FN)) acc = np.nan_to_num(accuracy(TP, TN, FP, FN)) prec = np.nan_to_num(precision(TP, TN, FP, FN)) spec = np.nan_to_num(specificity(TP, TN, FP, FN)) mcc = np.nan_to_num(matthews_correlation_coefficient(TP, TN, FP, FN)) # bundle metrics into dictionary metrics = { 'FP': FP, 'FN': FN, 'TP': TP, 'TN': TN, 'f1': f1, 'rec': rec, 'acc': acc, 'prec': prec, 'spec': spec, 'mcc': mcc } # save missclassified images to file together with class for class_name in missclassified: log_misclassifications( f'{file_identifier}_class_misclassifications.txt', missclassified[class_name], class_name, index_label) # write kvasir legend to results file log_class_legend(f'{file_identifier}_split_evaluation_summary.txt', class_names) # write confusion table to results file log_confusion_table(f'{file_identifier}_split_evaluation_summary.txt', confusion) # write model summary to results file log_model_results(f'{file_identifier}_split_evaluation_summary.txt', metrics, file_identifier) # write summaries for each class for class_name in class_names: # class index class_index = label_index[class_name] class_metrics = { key: value[class_index] for key, value in metrics.items() } # write class summary to results file log_class_results(f'{file_identifier}_class_results.txt', class_metrics, class_name, class_index) evaluation_path = config['evaluation']['path'] print("starting test validation...") for file_name in os.listdir(evaluation_path): prediction = None prediction_time = None image = imread(os.path.join(evaluation_path, file_name), mode='RGB') image = imresize(image, (image_width, image_height, image_channels)) image = image.reshape(1, image_width, image_height, image_channels) image = np.true_divide(image, 255.) with tf.get_default_graph().as_default(): start_time = time.time() prediction = model.predict(image)[0] prediction_time = time.time() - start_time prediction_index = np.argmax(prediction) prediction_label = index_label[prediction_index] log_file_evaluation(f'{file_identifier}_test_evaluation_results.txt', file_name, prediction_label, prediction[prediction_index], prediction_time) # add evaluation files to experiment experiment.add_artifact( f'../tmp/{file_identifier}_split_evaluation_summary.txt') experiment.add_artifact( f'../tmp/{file_identifier}_class_misclassifications.txt') experiment.add_artifact(f'../tmp/{file_identifier}_class_results.txt') experiment.add_artifact( f'../tmp/{file_identifier}_test_evaluation_results.txt') # return evaluation metrics return { 'f1': np.mean(f1), 'rec': np.mean(rec), 'acc': np.mean(acc), 'prec': np.mean(prec), 'spec': np.mean(spec), 'mcc': np.mean(mcc) }
def run(self, batch_size): # Get filenames img_filenames = util.get_paths_from_tree(self.input_dir, 'imaging') clus_filenames = util.get_paths_from_tree(self.input_dir, 'cluster') folder_names = util.get_sub_dirs(self.input_dir) print('\n\nRunning feature extraction on samples.. \n\n') for i in tqdm(range(len(img_filenames))): # Create out dir for patches save_dir = util.create_fe_patch_dir(self.patch_size) # Append filename to new list to match input of create 2_patches img = [img_filenames[i]] clus = [clus_filenames[i]] # Extract 3d patches from img img_ids, label_ids = extract_patches.patch_sampler( img_filenames=img, labelmap_filenames=clus, patch_size=self.patch_size, out_dir=save_dir, sampler_type='grid', voxel_spacing=self.resample, patch_overlap=self.patch_overlap, save_patches=True, inference=True) # Save info info = { 'patch_size': self.patch_size, 'cluster_selection': self.cluster_selection, 'num_clusters': self.num_clusters, 'patch_overlap': self.patch_overlap } util.save_dict(info, self.out_dir, 'info.csv') # Add partition to dict and save partition = dict() partition['image'] = img_ids partition['cluster'] = label_ids # Create generators image_generator = DataGenerator(partition['image'], data_dir=save_dir, shuffle=False, batch_size=batch_size) cluster_generator = DataGenerator(partition['cluster'], data_dir=save_dir, shuffle=False, batch_size=batch_size) # Load all data from generator image_data = [x[0] for x in image_generator] cluster_data = [x[0] for x in cluster_generator] image_data = np.array(image_data) cluster_data = np.array(cluster_data) if self.patch_size[0] == 1: image_data = image_data.reshape( image_data.shape[0] * image_data.shape[1], image_data.shape[2], image_data.shape[3], 1) cluster_data = cluster_data.reshape( cluster_data.shape[0] * cluster_data.shape[1], cluster_data.shape[2], cluster_data.shape[3], 1) else: image_data = image_data.reshape( image_data.shape[0] * image_data.shape[1], image_data.shape[2], image_data.shape[3], image_data.shape[4], 1) cluster_data = cluster_data.reshape( cluster_data.shape[0] * cluster_data.shape[1], cluster_data.shape[2], cluster_data.shape[3], cluster_data.shape[4], 1) print(image_data.shape) # Make predictions pred = self.encoder.predict(image_data, verbose=1, batch_size=batch_size) # Calculate center index x_center = 0 y_center = 0 z_center = 0 if self.patch_size[0] != 1: x_center = int(self.patch_size[0] / 2 - 1) y_center = int(self.patch_size[1] / 2 - 1) z_center = int(self.patch_size[2] / 2 - 1) else: x_center = int(self.patch_size[1] / 2 - 1) y_center = int(self.patch_size[2] / 2 - 1) # Start feature extraction time tic = time.time() feature = dict().fromkeys(range(1, self.num_clusters + 1), 0) print('\nStart extracting features from image...') for j in range(len(image_data)): # Find matching cluster based on center voxel if self.cluster_selection == 'center': cluster = int(cluster_data[j, x_center, y_center, z_center]) # Find the matching cluster based on highest share elif self.cluster_selection == 'highest_share': cluster_patch = cluster_data[j].astype(int) cluster = np.argmax(np.bincount(cluster_patch.flat)) # Calculate and add max stds for each cluster if cluster != 0: std = np.std(pred[j]) max_std = feature.get(cluster) if std > max_std: feature[cluster] = std # End clustering time toc = time.time() print("Image feature-extraction-time: " + str(round(toc - tic, 2))) # Save features to disk util.save_features(list(feature.values()), folder_names[i], self.out_dir) # Delete last img patches shutil.rmtree(save_dir)