def TestCombinedKmeans(train_data_matrix, train_labels_matrix, test_data, test_labels): im_size = [240, 240] #Combined K-means pred_labels_kmeans = np.empty( [train_data_matrix.shape[0], train_data_matrix.shape[2]]) for i in range(train_data_matrix.shape[2]): #normalize data train_data, test_data = seg.normalize_data(train_data_matrix[:, :, i], test_data) #get optimized clusters (using 100 iterations and a learning rate of 0.1) _, _, w_final = prj.kmeans_no_plot(train_data, train_labels_matrix[:, i], 100, 0.1) #predict the labels temp_pred = prj.predicted_kmeans_test(w_final, test_data) #store labels for each training subject in one matrix pred_labels_kmeans[:, i] = temp_pred #decision fusion based on majority voting predicted_labels_kmeans_final = scipy.stats.mode(pred_labels_kmeans, axis=1)[0].flatten() #calculate the error and dice err = util.classification_error(test_labels, predicted_labels_kmeans_final) dice = util.dice_multiclass(test_labels, predicted_labels_kmeans_final) predicted_mask = predicted_labels_kmeans_final.reshape( im_size[0], im_size[1]) return predicted_mask, err, dice
def feature_curve(use_random=False): # Load training and test data train_data, train_labels, train_feature_labels = util.create_dataset(1, 1, 'brain') test_data, test_labels, test_feature_labels = util.create_dataset(2, 1, 'brain') if use_random: train_data = np.random.randn(train_data.shape[0], train_data.shape[1]) # Normalize data train_data, test_data = seg.normalize_data(train_data, test_data) # Define parameters feature_sizes = np.arange(train_data.shape[1]) + 1 train_size = 10 k = 3 num_iter = 5 # Store errors test_error = np.empty([len(feature_sizes), num_iter]) test_error[:] = np.nan train_error = np.empty([len(feature_sizes), num_iter]) train_error[:] = np.nan # Train and test with different sizes for i in np.arange(len(feature_sizes)): for j in np.arange(num_iter): print('feature size = {}, iter = {}'.format(feature_sizes[i], j)) start_time = timeit.default_timer() # Subsample training set ix = np.random.randint(len(train_data), size=train_size) subset_train_data = train_data[ix, :] subset_train_labels = train_labels[ix, :] # Train classifier neigh = KNeighborsClassifier(n_neighbors=k) neigh.fit(subset_train_data[:, :feature_sizes[i]], subset_train_labels.ravel()) # Evaluate predicted_test_labels = neigh.predict(test_data[:, :feature_sizes[i]]) predicted_train_labels = neigh.predict(subset_train_data[:, :feature_sizes[i]]) test_error[i, j] = util.classification_error(test_labels, predicted_test_labels) train_error[i, j] = util.classification_error(subset_train_labels, predicted_train_labels) # Timer log elapsed = timeit.default_timer() - start_time # print('elapsed time = {}'.format(elapsed)) ## Display results fig = plt.figure(figsize=(8, 8)) ax1 = fig.add_subplot(111) x = feature_sizes y_test = np.mean(test_error, 1) yerr_test = np.std(test_error, 1) p1 = ax1.errorbar(x, y_test, yerr=yerr_test, label='Test error') ax1.set_xlabel('Number of features') ax1.set_ylabel('Error') ax1.grid() ax1.legend()
def normalized_stats_test(): X, Y = scatter_data_test(showFigs=False) I = plt.imread('../data/dataset_brains/1_1_t1.tif') c, coord_im = seg.extract_coordinate_feature(I) X_data = np.concatenate((X, c), axis=1) X_data_norm, _ = seg.normalize_data(X_data) print(np.mean(X_data_norm, axis=0)) print(np.std(X_data_norm, axis=0))
def knn_curve(): # Load training and test data train_data, train_labels, train_feature_labels = util.create_dataset( 1, 1, 'brain') test_data, test_labels, test_feature_labels = util.create_dataset( 2, 1, 'brain') # Normalize data train_data, test_data = seg.normalize_data(train_data, test_data) #Define parameters num_iter = 3 train_size = 100 k = np.array([1, 3, 5, 9, 15, 25, 100]) # k = np.array([1, 5, 9]) #Store errors test_error = np.empty([len(k), num_iter]) test_error[:] = np.nan dice = np.empty([len(k), num_iter]) dice[:] = np.nan ## Train and test with different values for i in np.arange(len(k)): for j in np.arange(num_iter): print('k = {}, iter = {}'.format(k[i], j)) #Subsample training set ix = np.random.randint(len(train_data), size=train_size) subset_train_data = train_data[ix, :] subset_train_labels = train_labels[ix, :] predicted_test_labels = seg.knn_classifier(subset_train_data, subset_train_labels, test_data, k[i]) # #Train classifier # neigh = KNeighborsClassifier(n_neighbors=k[i]) # neigh.fit(subset_train_data, subset_train_labels) # #Evaluate # predicted_test_labels = neigh.predict(test_data) test_error[i, j] = util.classification_error(test_labels, predicted_test_labels) dice[i, j] = util.dice_overlap(test_labels, predicted_test_labels) ## Display results fig = plt.figure(figsize=(8, 8)) ax1 = fig.add_subplot(111) p1 = ax1.plot(k, np.mean(test_error, 1), 'r', label='error') p2 = ax1.plot(k, np.mean(dice, 1), 'k', label='dice') ax1.set_xlabel('k') ax1.set_ylabel('error') ax1.grid() ax1.legend()
def prepare_input(subject_fd, config): subject_name = os.path.basename(subject_fd) image_mris, original_affine, foreground = get_subject_tensor( subject_fd, subject_name) target_shape = tuple(config['inference_shape']) subject_data_fixed_size, affine = resize_modal_image( image_mris, target_shape) subject_tensor = normalize_data(subject_data_fixed_size) subject_tensor = np.expand_dims(subject_tensor, axis=0) return subject_tensor, original_affine, foreground
def kmeans_clustering_test(): #------------------------------------------------------------------# #TODO: Store errors for training data X, Y = scatter_data_test(showFigs=False) I = plt.imread('../data/dataset_brains/1_1_t1.tif') c, coord_im = seg.extract_coordinate_feature(I) X_data = np.concatenate((X, c), axis=1) test_data, _ = seg.normalize_data(X_data) predicted_labels = seg.kmeans_clustering(test_data) predicted_labels = predicted_labels.reshape(I.shape) plt.imshow(predicted_labels)
def kmeans_clustering_test(): #------------------------------------------------------------------# #TODO: Store errors for training data X, Y = scatter_data_test(showFigs=False) I = plt.imread('../data/dataset_brains/1_1_t1.tif') c, coord_im = seg.extract_coordinate_feature(I) X_data = np.concatenate((X, c), axis=1) # normalized_Xdata, _ = seg.normalize_data(X_data) kmeans_cost = seg.kmeans_clustering(normalized_Xdata) return kmeans_cost
def normalized_stats_test(): X, Y = scatter_data_test(showFigs=False) I = plt.imread('../data/dataset_brains/1_1_t1.tif') c, coord_im = seg.extract_coordinate_feature(I) X_data = np.concatenate((X, c), axis=1) norm_data = seg.normalize_data(X_data)[0] print("=========== Feature Stats Test ===========\n") for i in range(np.size(norm_data[0, :])): mean = np.mean(norm_data[:, i]) std = np.std(norm_data[:, i]) print("Feature ", i + 1, "\nMean: \t", round(mean, 3), "\nStd: \t", round(std, 3), "\n") print("=================== END ==================")
def normalized_stats_test(): X, Y = scatter_data_test(showFigs=False) I = plt.imread('../data/dataset_brains/1_1_t1.tif') c, coord_im = seg.extract_coordinate_feature(I) X_data = np.concatenate((X, c), axis=1) #------------------------------------------------------------------# # TODO: Write code to normalize your dataset containing variety of features, # then examine the mean and std dev train_data, _ = seg.normalize_data(X_data) norm_feature_mean = np.mean(train_data,0) norm_feature_dev = np.std(train_data,0) for i in range(6): print("Feature {} has the following properties. The mean is: {:.2f} and the standard deviation is: {:.2f}".format(i+1,norm_feature_mean.item(i), norm_feature_dev.item(i)))
def normalized_stats_test(): X, Y = scatter_data_test(showFigs=False) I = plt.imread('../data/dataset_brains/1_1_t1.tif') c, coord_im = seg.extract_coordinate_feature(I) X_data = np.concatenate((X, c), axis=1) #------------------------------------------------------------------# # TODO: Write code to normalize your dataset containing variety of features, # then examine the mean and std dev normdata, _ = seg.normalize_data( X_data) #output van def normalize_data is 2 variables mean = np.mean(normdata, axis=0) standard_deviation = np.std(normdata, axis=0) print("Mean is" + str(mean)) print("Std is" + str(standard_deviation))
def normalized_stats_test(): X, Y = scatter_data_test(showFigs=False) I = plt.imread('../data/dataset_brains/1_1_t1.tif') c, coord_im = seg.extract_coordinate_feature(I) X_data = np.concatenate((X, c), axis=1) #------------------------------------------------------------------# # Write code to normalize your dataset containing variety of features, # then examine the mean and std dev n_data = seg.normalize_data(X_data)[0] print(n_data) means = np.zeros(n_data.shape[1]) stds = np.zeros(n_data.shape[1]) for i in range(n_data.shape[1]): means[i] = np.mean(n_data[:, i]) stds[i] = np.std(n_data[:, i]) util.scatter_data(n_data, Y, 0, 1) print(means) print(stds)
train_data, train_labels, train_feature_labels = util.create_dataset( sub, train_slice, task) all_data_matrix[:, :, i] = train_data train_labels_matrix[:, i] = train_labels.flatten() #select certain data: train_data_matrix = all_data_matrix[:, :, train_subjects] train_data_matrix = train_data_matrix[:, features, :] test_data = test_data[:, features] #Combined K-means pred_labels_kmeans = np.empty( [train_data_matrix.shape[0], train_data_matrix.shape[2]]) print(train_data.shape) for i in range(train_data_matrix.shape[2]): train_data, test_data = seg.normalize_data(train_data_matrix[:, :, i], test_data) _, _, w_final = prj.kmeans(train_data, train_labels_matrix[:, i], num_iter, mu) temp_pred = prj.predicted_kmeans_test(w_final, test_data) print("Possible classes are: {}".format(np.unique(temp_pred))) tempdice = util.dice_multiclass(test_labels, temp_pred) temperr = util.classification_error(test_labels, temp_pred) print('Err {:.4f}, dice {:.4f}'.format(temperr, tempdice)) pred_labels_kmeans[:, i] = temp_pred #decision fusion predicted_labels_kmeans_final = scipy.stats.mode(pred_labels_kmeans,
def segmentation_demo(): # Data name specification train_subject = 1 test_subject = 2 train_slice = 1 test_slice = 1 task = 'tissue' # Load data train_data, train_labels, train_feature_labels = util.create_dataset( train_subject, train_slice, task) test_data, test_labels, test_feature_labels = util.create_dataset( test_subject, test_slice, task) # Normalize and feed data through X_pca train_norm, _ = seg.normalize_data(train_data) Xpca, v, w, fraction_variance, ix = seg.mypca(train_norm) relevant_feature = int(np.sum(fraction_variance < 0.95)) + 1 train_norm_ord = train_norm[:, ix] train_norm = train_norm_ord[:, :relevant_feature] # find the predicted labels (here: the train_labels) predicted_labels = seg.segmentation_atlas(None, train_labels, None) # Calculate the error and dice score of these predicted labels in comparison to test labels err = util.classification_error(test_labels, predicted_labels) dice = util.dice_multiclass(test_labels, predicted_labels) # Display results true_mask = test_labels.reshape(240, 240) predicted_mask = predicted_labels.reshape(240, 240) fig = plt.figure(figsize=(8, 8)) ax1 = fig.add_subplot(111) ax1.imshow(true_mask, 'gray') ax1.imshow(predicted_mask, 'viridis', alpha=0.5) print('Subject {}, slice {}.\nErr {}, dice {}'.format( test_subject, test_slice, err, dice)) # COMPARE METHODS num_images = 5 num_methods = 3 im_size = [240, 240] # make space for error and dice data all_errors = np.empty([num_images, num_methods]) all_errors[:] = np.nan all_dice = np.empty([num_images, num_methods]) all_dice[:] = np.nan # data name specification all_subjects = np.arange(num_images) train_slice = 1 task = 'tissue' # make space for data all_data_matrix = np.empty( [train_norm.shape[0], train_norm.shape[1], num_images]) all_labels_matrix = np.empty([train_labels.size, num_images]) all_data_matrix_kmeans = np.empty( [train_norm.shape[0], train_norm.shape[1], num_images]) all_labels_matrix_kmeans = np.empty([train_labels.size, num_images]) # Load datasets once print('Loading data for ' + str(num_images) + ' subjects...') for i in all_subjects: sub = i + 1 train_data, train_labels, train_feature_labels = util.create_dataset( sub, train_slice, task) train_norm, _ = seg.normalize_data(train_data) Xpca, v, w, fraction_variance, ix = seg.mypca(train_norm) relevant_labels = int(np.sum(fraction_variance < 0.95)) + 1 train_norm_ord = train_norm[:, ix] train_norm = train_norm_ord[:, :relevant_labels] all_data_matrix[:, :, i] = train_norm all_labels_matrix[:, i] = train_labels.flatten() # Load datasets for kmeans print('Loading data for ' + str(num_images) + ' subjects...') for i in all_subjects: sub = i + 1 train_data_kmeans, train_labels_kmeans, train_feature_labels_kmeans = create_dataset( sub, train_slice, task) train_norm_kmeans, _ = seg.normalize_data(train_data_kmeans) all_data_matrix_kmeans[:, :, i] = train_norm_kmeans all_labels_matrix_kmeans[:, i] = train_labels_kmeans.flatten() print('Finished loading data.\nStarting segmentation...') # Go through each subject, taking i-th subject as the test for i in np.arange(num_images): sub = i + 1 # Define training subjects as all, except the test subject train_subjects = all_subjects.copy() train_subjects = np.delete(train_subjects, i) # Obtain data about the chosen amount of subjects train_data_matrix = all_data_matrix[:, :, train_subjects] train_labels_matrix = all_labels_matrix[:, train_subjects] test_data = all_data_matrix[:, :, i] test_labels = all_labels_matrix[:, i] test_shape_1 = test_labels.reshape(im_size[0], im_size[1]) fig = plt.figure(figsize=(15, 5)) # Get predicted labels from atlas method predicted_labels = seg.segmentation_combined_atlas(train_labels_matrix) all_errors[i, 0] = util.classification_error(test_labels, predicted_labels) all_dice[i, 0] = util.dice_multiclass(test_labels, predicted_labels) # Plot atlas method predicted_mask_1 = predicted_labels.reshape(im_size[0], im_size[1]) ax1 = fig.add_subplot(151) ax1.imshow(test_shape_1, 'gray') ax1.imshow(predicted_mask_1, 'viridis', alpha=0.5) text_str = 'Err {:.4f}, dice {:.4f}'.format(all_errors[i, 0], all_dice[i, 0]) ax1.set_xlabel(text_str) ax1.set_title('Subject {}: Combined atlas'.format(sub)) # Get predicted labels from kNN method predicted_labels = seg.segmentation_combined_knn(train_data_matrix, train_labels_matrix, test_data, k=10) all_errors[i, 1] = util.classification_error(test_labels, predicted_labels) all_dice[i, 1] = util.dice_multiclass(test_labels, predicted_labels) # Plot kNN method predicted_mask_2 = predicted_labels.reshape(im_size[0], im_size[1]) ax2 = fig.add_subplot(152) ax2.imshow(test_shape_1, 'gray') ax2.imshow(predicted_mask_2, 'viridis', alpha=0.5) text_str = 'Err {:.4f}, dice {:.4f}'.format(all_errors[i, 1], all_dice[i, 1]) ax2.set_xlabel(text_str) ax2.set_title('Subject {}: Combined k-NN'.format(sub)) # Get predicted labels from my own method # all_data_matrix_bnb = np.empty([train_norm.shape[0], train_norm.shape[1], num_images]) # all_labels_matrix_bnb = np.empty([train_labels.size, num_images]) # for ii in all_subjects: # sub = i + 1 # task = 'brain' # train_data_bnb, train_labels_bnb, train_feature_labels_bnb = util.create_dataset(sub, train_slice, task) # train_norm_bnb, _ = seg.normalize_data(train_data_bnb) # Xpca, v, w, fraction_variance, ix = seg.mypca(train_norm_bnb) # relevant_labels_bnb = int(np.sum(fraction_variance < 0.95)) + 1 # train_norm_ord_bnb = train_norm_bnb[:, ix] # train_norm_bnb = train_norm_ord_bnb[:, :relevant_labels_bnb] # all_data_matrix_bnb[:, :, ii] = train_norm_bnb # all_labels_matrix_bnb[:, ii] = train_labels_bnb.flatten() # # qw, we, er = all_data_matrix.shape # for iii in np.arange(qw): # for j in np.arange(er): # if all_labels_matrix_bnb[iii, j] == 0: # for k in np.arange(we): # all_data_matrix[iii, k, j] = 0 # train_data_matrix = all_data_matrix[:, :, train_subjects] # test_data = all_data_matrix[:, :, i] train_data_matrix_kmeans = all_data_matrix_kmeans[:, :, train_subjects] train_labels_matrix_kmeans = all_labels_matrix[:, train_subjects] test_data_kmeans = all_data_matrix_kmeans[:, :, i] predicted_labels = segmentation_mymethod(train_data_matrix_kmeans, train_labels_matrix_kmeans, test_data_kmeans, task) all_errors[i, 2] = util.classification_error(test_labels, predicted_labels) all_dice[i, 2] = util.dice_multiclass(test_labels, predicted_labels) # Plot my own method predicted_mask_3 = predicted_labels.reshape(im_size[0], im_size[1]) ax3 = fig.add_subplot(153) ax3.imshow(test_shape_1, 'gray') ax3.imshow(predicted_mask_3, 'viridis', alpha=0.5) text_str = 'Err {:.4f}, dice {:.4f}'.format(all_errors[i, 2], all_dice[i, 2]) ax3.set_xlabel(text_str) ax3.set_title('Subject {}: My method'.format(sub)) ax4 = fig.add_subplot(154) ax4.imshow(predicted_mask_3, 'viridis') text_str = 'Err {:.4f}, dice {:.4f}'.format(all_errors[i, 2], all_dice[i, 2]) ax4.set_xlabel(text_str) ax4.set_title('Subject {}: My method'.format(sub)) ax5 = fig.add_subplot(155) ax5.imshow(test_shape_1, 'gray') text_str = 'Err {:.4f}, dice {:.4f}'.format(all_errors[i, 2], all_dice[i, 2]) ax5.set_xlabel(text_str) ax5.set_title('Subject {}: My method'.format(sub))
def kmeans_clustering(test_data, K=2): # Returns the labels for test_data, predicted by the kMeans # classifier which assumes that clusters are ordered by intensity # # Input: # test_data num_test x p matrix with features for the test data # k Number of clusters to take into account (2 by default) # Output: # predicted_labels num_test x 1 predicted vector with labels for the test data X_norm, _ = seg.normalize_data(test_data) N, M = X_norm.shape clusters = 4 # link to the cost function of kMeans fun = lambda w: cost_kmeans(test_data, w) # the learning rate mu = 0.01 # iterations num_iter = 100 # Initialize cluster centers and store them in w_initial idx = np.random.randint(N, size=clusters) w_initial = X_norm[idx, :] # Reshape centers to a vector (needed by ngradient) w_vector = w_initial.reshape(K * M, 1) for i in np.arange(num_iter): # gradient ascent change = mu * util.ngradient(fun, w_vector) w_vector = w_vector - change[:, np.newaxis] # Reshape back to dataset w_final = w_vector.reshape(K, M) print(w_final.shape) print(w_final) # Find min_dist and min_index D = scipy.spatial.distance.cdist(test_data, w_final, metric='euclidean') min_index = np.argmin(D, axis=1) # Sort by intensity of cluster center sorted_order = np.argsort(w_final[:, 0], axis=0) # Update the cluster indices based on the sorted order and return results in predicted_labels predicted_labels = np.empty(*min_index.shape) predicted_labels[:] = np.nan for i in np.arange(len(sorted_order)): print(sorted_order[i]) predicted_labels[min_index == sorted_order[i]] = i print(np.unique(predicted_labels)) print("after loop") print(predicted_labels.shape) print(np.unique(predicted_labels)) return predicted_labels
def learning_curve(): # Load training and test data # train_data, train_labels = seg.generate_gaussian_data(1000) train_data, train_labels, _ = util.create_dataset(1, 1, 'brain') # test_data, test_labels = seg.generate_gaussian_data(1000) test_data, test_labels, _ = util.create_dataset(2, 1, 'brain') train_data, test_data = seg.normalize_data(train_data, test_data) # Define parameters train_sizes = np.logspace(0.1, 3.0, num=15).astype(int) k = 1 num_iter = 3 # How often to repeat the experiment # Store errors test_error = np.empty([len(train_sizes), num_iter]) test_error[:] = np.nan test_dice = np.empty([len(train_sizes), num_iter]) test_dice[:] = np.nan train_error = np.empty([len(train_sizes), num_iter]) train_error[:] = np.nan train_dice = np.empty([len(train_sizes), num_iter]) train_dice[:] = np.nan ## Train and test with different values for i in np.arange(len(train_sizes)): for j in np.arange(num_iter): print('train_size = {}, iter = {}'.format(train_sizes[i], j)) # Subsample training set ix = np.random.randint(len(train_data), size=train_sizes[i]) subset_train_data = train_data[ix, :] subset_train_labels = train_labels[ix, :] # Train classifier neigh = KNeighborsClassifier(n_neighbors=k) neigh.fit(subset_train_data, subset_train_labels.ravel()) # Evaluate predicted_test_labels = neigh.predict(test_data) test_labels = test_labels.astype(bool) predicted_test_labels = predicted_test_labels.astype(bool) test_error[i, j] = util.classification_error(test_labels, predicted_test_labels) test_dice[i, j] = util.dice_overlap(test_labels, predicted_test_labels) predicted_train_labels = neigh.predict(train_data).astype(bool) train_labels_bool = train_labels.astype(bool) train_error[i, j] = util.classification_error(train_labels_bool, predicted_train_labels) train_dice[i, j] = util.dice_overlap(train_labels_bool, predicted_train_labels) ## Display results fig = plt.figure(figsize=(8, 8)) gs = fig.add_gridspec(2, 2) ax1 = fig.add_subplot(gs[0, :]) ax2 = fig.add_subplot(gs[1, :]) x = np.log(train_sizes) ticks = list(x) tick_lbls = [str(i) for i in train_sizes] y_test = np.mean(test_error, 1) y_train = np.mean(train_error, 1) yerr_test = np.std(test_error, 1) yerr_train = np.std(train_error, 1) p1 = ax1.errorbar(x, y_test, yerr=yerr_test, label='Test error') p2 = ax2.errorbar(x, y_train, yerr=yerr_train, label='Train error') ax1.set_xlabel('Number of training samples (k)') ax1.set_ylabel('error') ax1.set_xticks(ticks) ax1.set_xticklabels(tick_lbls) ax1.grid() ax1.legend() ax2.set_xlabel('Number of training samples (k)') ax2.set_ylabel('error') ax2.set_xticks(ticks) ax2.set_xticklabels(tick_lbls) ax2.grid() ax2.legend()
def segmentation_mymethod(train_data, train_labels, test_data, num_iter=100, mu=0.1): # def segmentation_mymethod(train_data, train_labels, all_data_matrix, all_labels_matrix, test_data,num_iter = 200,mu = 0.01, task='tissue'): # segments the image based on your own method! # Input: # train_data_matrix num_pixels x num_features x num_subjects matrix of # features # train_labels_matrix num_pixels x num_subjects matrix of labels # test_data num_pixels x num_features test data # task String corresponding to the segmentation task: either 'brain' or 'tissue' # Output: # predicted_labels Predicted labels for the test slice #------------------------------------------------------------------# #TODO: Implement your method here #define your features features = [1, 4] #change this if needed #select the data using features train_data_matrix = train_data[:, features, :] test_data_matrix = test_data[:, features] #define the shape of your label matrix pred_labels_kmeans = np.empty( [train_data_matrix.shape[0], train_data_matrix.shape[2]]) for i in range( train_data.shape[2] ): #predict for each subject in the traindata the labels using kmeans #normalize data (only needed for kmeans, because normalizing data already happens in the ckNN and cAtlases function) train_data, test_data = seg.normalize_data(train_data_matrix[:, :, i], test_data_matrix) #find the optimized clusterpoints #1) kmeans will show start and end plot with the cluster centers # _, _, w_final = kmeans(train_data, train_labels[:,i], num_iter, mu) #realize train_labels is only needed for plotting #2) kmeans will not show any plots _, _, w_final = kmeans_no_plot( train_data, train_labels[:, i], num_iter, mu) #realize train_labels is only needed for plotting #predict the data temp_pred = predicted_kmeans_test(w_final, test_data) #store the predicted lables for each subject pred_labels_kmeans[:, i] = temp_pred #decision fusion based on majority voting predicted_labels_kmeans_final = scipy.stats.mode(pred_labels_kmeans, axis=1)[0].flatten() #get the labels from the other two methods: combined_atlas and combined knn _, pred_labels_cat = seg.segmentation_combined_atlas(train_labels, combining='mode') _, pred_labels_cnn = seg.segmentation_combined_knn(train_data_matrix, train_labels, test_data_matrix, 1) #concatenate all predictions from the three 'submethods' pred_labels_cat = pred_labels_cat.T pred_labels_cnn = pred_labels_cnn.T concat_labels = np.vstack( (predicted_labels_kmeans_final, pred_labels_cat, pred_labels_cnn)).T #decision fusion based on majority voting for the three submethods together predicted_labels = scipy.stats.mode(concat_labels, axis=1)[0] #------------------------------------------------------------------# return predicted_labels
all_subjects = np.arange(num_images) train_slice = 1 task = 'tissue' all_data_matrix = np.empty([train_data.shape[0],train_data.shape[1],num_images]) all_labels_matrix = np.empty([train_labels.size,num_images], dtype=bool) for i in all_subjects: sub = i+1 train_data, train_labels, train_feature_labels = util.create_dataset(sub,train_slice,task) all_data_matrix[:,:,i] = train_data all_labels_matrix[:,i] = train_labels.flatten() #Select data with certain features and normalize it features = [1,4] train_data,_ = seg.normalize_data(train_data[:, features]) test_data ,_= seg.normalize_data(test_data[:,features]) all_data_matrix, _ = seg.normalize_data(all_data_matrix[:, features, :]) #predicted_train = seg.kmeans_clustering(train_data, K=4) if (task == 'tissue'): k = 4 else: k = 2 kmeans_cost, train_predicted, w_final = prj.kmeans(train_data, train_labels k, mu = 0.1, num_iter = 5) dice = util.dice_multiclass(train_labels, train_predicted) error = util.classification_error(train_labels, train_predicted)
def learning_curve(): # Load training and test data train_data, train_labels = seg.generate_gaussian_data(1000) test_data, test_labels = seg.generate_gaussian_data(1000) [train_data, test_data] = seg.normalize_data(train_data, test_data) #Define parameters train_sizes = np.array([1, 3, 10, 30, 100, 300]) k = 1 num_iter = 3 #How often to repeat the experiment #Store errors test_error = np.empty([len(train_sizes),num_iter]) test_error[:] = np.nan test_dice = np.empty([len(train_sizes),num_iter]) test_dice[:] = np.nan #------------------------------------------------------------------# #TODO: Store errors for training data #------------------------------------------------------------------# ## Train and test with different values for i in np.arange(len(train_sizes)): for j in np.arange(num_iter): print('train_size = {}, iter = {}'.format(train_sizes[i], j)) #Subsample training set ix = np.random.randint(len(train_data), size=train_sizes[i]) subset_train_data = train_data[ix,:] subset_train_labels = train_labels[ix,:] #Train classifier neigh = KNeighborsClassifier(n_neighbors=k) neigh.fit(subset_train_data, subset_train_labels.ravel()) #Evaluate predicted_test_labels = neigh.predict(test_data) test_labels = test_labels.astype(bool) predicted_test_labels = predicted_test_labels.astype(bool) test_error[i,j] = util.classification_error(test_labels, predicted_test_labels) test_dice[i,j] = util.dice_overlap(test_labels, predicted_test_labels) #------------------------------------------------------------------# #TODO: Predict training labels and evaluate #------------------------------------------------------------------# ## Display results fig = plt.figure(figsize=(8,8)) ax1 = fig.add_subplot(111) x = np.log(train_sizes) y_test = np.mean(test_error,1) yerr_test = np.std(test_error,1) p1 = ax1.errorbar(x, y_test, yerr=yerr_test, label='Test error') #------------------------------------------------------------------# #TODO: Plot training size #------------------------------------------------------------------# ax1.set_xlabel('Number of training samples (k)') ax1.set_ylabel('error') ticks = list(x) ax1.set_xticks(ticks) tick_lbls = [str(i) for i in train_sizes] ax1.set_xticklabels(tick_lbls) ax1.grid() ax1.legend()
def kmeans_clustering_test(): I = plt.imread('../data/dataset_brains/1_1_t1.tif') X, _ = seg.normalize_data(I) labels = seg.kmeans_clustering(X) # plt.imshow(labels) print(labels)