def get_augmented_gower_selection(D): """ Do a spectral sign split with neighbor joining fallback. The first choice is to return indices corresponding to positive elements of the dominant eigenvector of the gower matrix. If this defines a degenerate bipartition, then neighbor joining is used as a fallback. @param D: a distance matrix @return: the set of selected indices """ n = len(D) if n < 4: raise ValueError('expected a distance matrix with at least four rows') # get the gower matrix G = MatrixUtil.double_centered(numpy.array(D)) # get the dominant eigenvector eigenvalues, eigenvector_transposes = linalg.eigh(G) eigenvectors = eigenvector_transposes.T dominant_value, dominant_vector = max( (abs(w), v) for w, v in zip(eigenvalues, eigenvectors)) # get the bipartition defined by the dominant eigenvector selection = set(i for i, x in enumerate(dominant_vector) if x > 0) complement = set(range(n)) - selection # if the bipartition is degenerate then resort to neighbor joining if min(len(selection), len(complement)) < 2: selection = set(NeighborJoining.get_neighbors(D)) return selection
def get_augmented_gower_selection(D): """ Do a spectral sign split with neighbor joining fallback. The first choice is to return indices corresponding to positive elements of the dominant eigenvector of the gower matrix. If this defines a degenerate bipartition, then neighbor joining is used as a fallback. @param D: a distance matrix @return: the set of selected indices """ n = len(D) if n < 4: raise ValueError('expected a distance matrix with at least four rows') # get the gower matrix G = MatrixUtil.double_centered(numpy.array(D)) # get the dominant eigenvector eigenvalues, eigenvector_transposes = linalg.eigh(G) eigenvectors = eigenvector_transposes.T dominant_value, dominant_vector = max((abs(w), v) for w, v in zip(eigenvalues, eigenvectors)) # get the bipartition defined by the dominant eigenvector selection = set(i for i, x in enumerate(dominant_vector) if x > 0) complement = set(range(n)) - selection # if the bipartition is degenerate then resort to neighbor joining if min(len(selection), len(complement)) < 2: selection = set(NeighborJoining.get_neighbors(D)) return selection
def _get_any_selection(self, distance_matrix): """ @param distance_matrix: a numpy or row major distance matrix @return: a set of selected indices representing one of the two parts of the bipartition """ return set(NeighborJoining.get_neighbors(distance_matrix))