def discretize(segs, n_class, n_sample, rand): """ Convert a set of segmentations into a set of labels in [0, n_class - 1] :param segs: segmentations :param n_class: number of classes (clusters) for binary splits :param n_sample: number of samples for clustering structured labels :param rand: random number generator """ w = segs[0].shape[0] segs = segs.reshape((segs.shape[0], w ** 2)) # compute all possible lookup inds for w x w patches ids = N.arange(w ** 4, dtype=N.float64) ids1 = N.floor(ids / w / w) ids2 = ids - ids1 * w * w kp = ids2 > ids1 ids1 = ids1[kp] ids2 = ids2[kp] # compute n binary codes zs of length nSamples n_sample = min(n_sample, ids1.shape[0]) kp = rand.permutation(ids1.shape[0])[:n_sample] n = segs.shape[0] ids1 = ids1[kp].astype(N.int32) ids2 = ids2[kp].astype(N.int32) zs = N.zeros((n, n_sample), dtype=N.float64) for i in xrange(n): zs[i] = (segs[i][ids1] == segs[i][ids2]) zs -= N.mean(zs, axis=0) zs = zs[:, N.any(zs, axis=0)] if N.count_nonzero(zs) == 0: lbls = N.ones(n, dtype=N.int32) segs = segs[0] else: # find most representative segs (closest to mean) ind = N.argmin(N.sum(zs * zs, axis=1)) segs = segs[ind] # discretize zs by discretizing pca dimensions d = min(5, n_sample, int(floor(log(n_class, 2)))) zs = robust_pca(zs, d, rand=rand)[0] lbls = N.zeros(n, dtype=N.int32) for i in xrange(d): lbls += (zs[:, i] < 0).astype(N.int32) * 2 ** i lbls = N.unique(lbls, return_inverse=True)[1].astype(N.int32) return lbls, segs.reshape((-1, w, w))