def test_sparse_scikit_vs_scipy(): # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy n, p, k = 10, 5, 3 rng = np.random.RandomState(0) # Not using a lil_matrix here, just to check that non sparse # matrices are well handled connectivity = np.ones((n, n)) for linkage in _TREE_BUILDERS.keys(): for i in range(5): X = .1 * rng.normal(size=(n, p)) X -= 4. * np.arange(n)[:, np.newaxis] X -= X.mean(axis=1)[:, np.newaxis] out = hierarchy.linkage(X, method=linkage) children_ = out[:, :2].astype(np.int, copy=False) children, _, n_leaves, _ = _TREE_BUILDERS[linkage](X, connectivity) # Sort the order of child nodes per row for consistency children.sort(axis=1) assert_array_equal( children, children_, 'linkage tree differs' ' from scipy impl for' ' linkage: ' + linkage) cut = _hc_cut(k, children, n_leaves) cut_ = _hc_cut(k, children_, n_leaves) assess_same_labelling(cut, cut_) # Test error management in _hc_cut with pytest.raises(ValueError): _hc_cut(n_leaves + 1, children, n_leaves)
def _build_tree(self, X): memory = check_memory(self.memory) if self.linkage == "ward" and self.affinity != "euclidean": raise ValueError("{} was provided as affinity. Ward can only work" "with Euclidean distances.".format(self.affinity)) if self.linkage not in _TREE_BUILDERS: raise ValueError( "Unknown linkage type {}. Valid options are {}".format( self.linkage, _TREE_BUILDERS.keys())) tree_builder = _TREE_BUILDERS[self.linkage] # Construct the tree kwargs = {} if self.linkage != 'ward': kwargs['linkage'] = self.linkage kwargs['affinity'] = self.affinity out = memory.cache(tree_builder)(X, n_clusters=None, return_distance=True, **kwargs) # Scikit-learn's tree_builder returns a tuple (children, # n_connected_components, n_leaves, parent, distances) self.children_, _, self.n_leaves_, _, self.distances_ = out