def test_unstructured_ward_tree(): """ Check that we obtain the correct solution for unstructured ward tree. """ np.random.seed(0) X = np.random.randn(50, 100) children, n_nodes, n_leaves = ward_tree(X.T) n_nodes = 2 * X.shape[1] - 1 assert(len(children) + n_leaves == n_nodes)
def test_height_ward_tree(): """ Check that the height of ward tree is sorted. """ np.random.seed(0) mask = np.ones([10, 10], dtype=np.bool) X = np.random.randn(50, 100) connectivity = grid_to_graph(*mask.shape) children, n_nodes, n_leaves = ward_tree(X.T, connectivity) n_nodes = 2 * X.shape[1] - 1 assert(len(children) + n_leaves == n_nodes)
def test_structured_ward_tree(): """ Check that we obtain the correct solution for structured ward tree. """ np.random.seed(0) mask = np.ones([10, 10], dtype=np.bool) X = np.random.randn(50, 100) connectivity = grid_to_graph(*mask.shape) children, n_components, n_leaves = ward_tree(X.T, connectivity) n_nodes = 2 * X.shape[1] - 1 assert(len(children) + n_leaves == n_nodes)
def test_scikit_vs_scipy(): """Test scikit ward with full connectivity (i.e. unstructured) vs scipy """ from scipy.sparse import lil_matrix n, p, k = 10, 5, 3 connectivity = lil_matrix(np.ones((n, n))) for i in range(5): X = .1*np.random.normal(size=(n, p)) X -= 4*np.arange(n)[:, np.newaxis] X -= X.mean(axis=1)[:, np.newaxis] out = hierarchy.ward(X) children_ = out[:, :2].astype(np.int) children, _, n_leaves = ward_tree(X, connectivity) cut = _hc_cut(k, children, n_leaves) cut_ = _hc_cut(k, children_, n_leaves) assess_same_labelling(cut, cut_)
def fit(self, X, y, n_iterations=25, verbose=0): """ Fits Hierarchical CLustering. Parameters ---------- X : ndarray of shape = (n_samples, n_features) Y : ndarray of shape = (n_samples) A : sparse matrix connectivity matrix n_iterations : int number of iterations = max number of parcel we want verbose : int, optional does it really need explanations? Returns ------- tab : ndarray a list of labels of shape (n_features) two features have the same label if they are in the same parcel. The smaller label correspond to the smaller parcel """ # Computing the ward tree children, n_components, n_leaves = ward_tree(X.T, connectivity=self.A, n_components=self.n_components_A) # Converting children from numpy array to list (faster) children = children.tolist() # Computing the parcel_based_signal for each parcel avg_signals = average_signals(X, children, n_leaves) # The first parcellations is the list of the tree roots parcellation = tree_roots(children, n_components, n_leaves) parcellations = [parcellation] # List of the best parcellations self.scores = [] if verbose: print "\n# First parcellation (=tree roots) : %s" % parcellations for i in range(1, n_iterations+1): # for verbose mode if verbose: print "# Iteration %d" % i # Computing all the parcellations obtainable by splitting a parcel # of the current parcellation iteration_parcellations = split_parcellation(parcellation, children, n_leaves) if (len(iteration_parcellations) == 0): # No parcellation can be splitted print " UserWARNING : n_iterations is too big :" print " Ending function at iteration %d." % i break # Selecting the best parcellation for current iteration parcellation, score = select_best_parcellation( iteration_parcellations, self.clf, avg_signals, y, self.n_jobs, verbose) parcellations.append(parcellation) self.scores.append(score) # We select the best parcel of those "pre-selected" #parcellation = select_best_parcellation(parcellations, # self.clf, avg_signals, y, self.n_jobs, verbose) if self.scores != []: # Otherwise, max is not defined parcellation = parcellations[self.scores.index(max(self.scores))] # Sorting the parcellation, so the smaller label correspond # to the smaller parcel parcellation.sort() # Computing the corresponding labels array self.tab = parcellation_to_label(parcellation, children, n_leaves) self.clf.fit(avg_signals[:, parcellation], y) if hasattr(self.clf, 'coef_'): self.coef_ = self.clf.coef_ return self.tab
def fit(self, X, y, n_iterations=25, verbose=0): """ Fits Hierarchical CLustering. Parameters ---------- X : ndarray of shape = (n_samples, n_features) Y : ndarray of shape = (n_samples) A : sparse matrix connectivity matrix n_iterations : int number of iterations = max number of parcel we want verbose : int, optional does it really need explanations? Returns ------- tab : ndarray a list of labels of shape (n_features) two features have the same label if they are in the same parcel. The smaller label correspond to the smaller parcel """ # Computing the ward tree children, n_components, n_leaves = ward_tree( X.T, connectivity=self.A, n_components=self.n_components_A) # Converting children from numpy array to list (faster) children = children.tolist() # Computing the parcel_based_signal for each parcel avg_signals = average_signals(X, children, n_leaves) # The first parcellations is the list of the tree roots parcellation = tree_roots(children, n_components, n_leaves) parcellations = [parcellation] # List of the best parcellations self.scores = [] if verbose: print "\n# First parcellation (=tree roots) : %s" % parcellations for i in range(1, n_iterations + 1): # for verbose mode if verbose: print "# Iteration %d" % i # Computing all the parcellations obtainable by splitting a parcel # of the current parcellation iteration_parcellations = split_parcellation( parcellation, children, n_leaves) if (len(iteration_parcellations) == 0): # No parcellation can be splitted print " UserWARNING : n_iterations is too big :" print " Ending function at iteration %d." % i break # Selecting the best parcellation for current iteration parcellation, score = select_best_parcellation( iteration_parcellations, self.clf, avg_signals, y, self.n_jobs, verbose) parcellations.append(parcellation) self.scores.append(score) # We select the best parcel of those "pre-selected" #parcellation = select_best_parcellation(parcellations, # self.clf, avg_signals, y, self.n_jobs, verbose) if self.scores != []: # Otherwise, max is not defined parcellation = parcellations[self.scores.index(max(self.scores))] # Sorting the parcellation, so the smaller label correspond # to the smaller parcel parcellation.sort() # Computing the corresponding labels array self.tab = parcellation_to_label(parcellation, children, n_leaves) self.clf.fit(avg_signals[:, parcellation], y) if hasattr(self.clf, 'coef_'): self.coef_ = self.clf.coef_ return self.tab
X = np.zeros(size**2) X2 = X #Generating two convexe parts mask = np.zeros((size, size), dtype=bool) mask[0:roi_size, 0:roi_size] = True mask[-roi_size:, -roi_size:] = True mask = mask.reshape(size**2) X = X[mask] # making n_samples X2 = X2 + np.zeros((n_samples, 1)) X = X + np.arange(n_samples).reshape((n_samples, 1)) Y = np.arange(n_samples) # Generating the connectivity grids and ward trees A = grid_to_graph(n_x=size, n_y=size, mask=mask) children, n_components, n_leaves = ward_tree(X.T, connectivity=A, n_components=2) children = children.tolist() A2 = grid_to_graph(n_x=size, n_y=size) children2, n_components2, n_leaves2 = ward_tree(X2.T, connectivity=A2, n_components=1) children2 = children2.tolist() ############################################################################### # Test functions def test_tree_roots(): """ Tests that the function returns the right roots. """
roi_size = 2 X = np.zeros(size**2) X2 = X #Generating two convexe parts mask = np.zeros((size, size), dtype=bool) mask[0:roi_size, 0:roi_size] = True mask[-roi_size:, -roi_size:] = True mask = mask.reshape(size**2) X = X[mask] # making n_samples X2 = X2 + np.zeros((n_samples, 1)) X = X + np.arange(n_samples).reshape((n_samples, 1)) Y = np.arange(n_samples) # Generating the connectivity grids and ward trees A = grid_to_graph(n_x=size, n_y=size, mask=mask) children, n_components, n_leaves = ward_tree(X.T, connectivity=A, n_components=2) children = children.tolist() A2 = grid_to_graph(n_x=size, n_y=size) children2, n_components2, n_leaves2 = ward_tree(X2.T, connectivity=A2, n_components=1) children2 = children2.tolist() ############################################################################### # Test functions def test_tree_roots(): """ Tests that the function returns the right roots. """ roots1 = supervised_clustering.tree_roots(children, n_components, n_leaves)