def testElect(self): result = mpi.elect() self.assertLess(result, mpi.SIZE) all_results = mpi.COMM.allgather(result) self.assertEqual(len(set(all_results)), 1) num_presidents = mpi.COMM.allreduce(mpi.is_president()) self.assertEqual(num_presidents, 1)
def omp_n_maximize(X, labels, val, k): """Maximization of omp_n, with the given labels and vals. Note that X is the local data hosted in each MPI node. """ dim = X.shape[1] # G is the gram matrix of the activations AtA_local = np.zeros((k, k)) AtX_local = np.zeros((k, dim)) A = None for start in range(0, X.shape[0], _MINIBATCH): end = min(start + _MINIBATCH, X.shape[0]) batchsize = end - start if A is None: A = np.zeros((batchsize, k)) else: A[:] = 0 for i in range(batchsize): A[i, labels[start + i]] = val[start + i] AtA_local += mathutil.dot(A.T, A) AtX_local += mathutil.dot(A[:batchsize].T, X[start:end]) AtA = np.empty_like(AtA_local) AtX = np.empty_like(AtX_local) mpi.COMM.Allreduce(AtA_local, AtA) mpi.COMM.Allreduce(AtX_local, AtX) # add a regularization term isempty = np.diag(AtA) == 0 AtA.flat[:: k + 1] += 1e-8 centroids = np.ascontiguousarray(np.linalg.solve(AtA, AtX)) # let's deal with inactive guys for i in range(k): if isempty[i]: # randomly restart one centroids[i] = X[np.random.randint(X.shape[0])] mpi.COMM.Bcast(centroids[i], root=mpi.elect()) scale = np.sqrt((centroids ** 2).sum(1)) + np.finfo(np.float64).eps centroids /= scale[:, np.newaxis] return centroids
def _m_step(X, z, k): """M step of the K-means EM algorithm Computation of cluster centers/means Parameters ---------- X: array, shape (n_samples, n_features) z: array, shape (n_samples) Current assignment k: int Number of desired clusters Returns ------- centers: array, shape (k, n_features) The resulting centers """ dim = X.shape[1] centers_local = np.zeros((k, dim)) counts_local = np.zeros(k, dtype = int) centers = np.zeros((k, dim)) counts = np.zeros(k, dtype = int) for q in range(k): center_mask = np.flatnonzero(z == q) counts_local[q] = len(center_mask) if counts_local[q] > 0: centers_local[q] = X[center_mask].sum(axis=0) mpi.COMM.Allreduce(counts_local, counts) mpi.COMM.Allreduce(centers_local, centers) for q in range(k): if counts[q] == 0: centers[q] = X[np.random.randint(X.shape[0])] mpi.COMM.Bcast(centers[q], root=mpi.elect()) counts[q] = 1 centers /= counts.reshape((centers.shape[0], 1)) return centers
def _m_step(X, z, k): """M step of the K-means EM algorithm Computation of cluster centers/means Parameters ---------- X: array, shape (n_samples, n_features) z: array, shape (n_samples) Current assignment k: int Number of desired clusters Returns ------- centers: array, shape (k, n_features) The resulting centers """ dim = X.shape[1] centers_local = np.zeros((k, dim)) counts_local = np.zeros(k, dtype=int) centers = np.zeros((k, dim)) counts = np.zeros(k, dtype=int) for q in range(k): center_mask = np.flatnonzero(z == q) counts_local[q] = len(center_mask) if counts_local[q] > 0: centers_local[q] = X[center_mask].sum(axis=0) mpi.COMM.Allreduce(counts_local, counts) mpi.COMM.Allreduce(centers_local, centers) for q in range(k): if counts[q] == 0: centers[q] = X[np.random.randint(X.shape[0])] mpi.COMM.Bcast(centers[q], root=mpi.elect()) counts[q] = 1 centers /= counts.reshape((centers.shape[0], 1)) return centers
def omp_n_maximize(X, labels, val, k): '''Maximization of omp_n, with the given labels and vals. Note that X is the local data hosted in each MPI node. ''' dim = X.shape[1] # G is the gram matrix of the activations AtA_local = np.zeros((k, k)) AtX_local = np.zeros((k, dim)) A = None for start in range(0, X.shape[0], _MINIBATCH): end = min(start + _MINIBATCH, X.shape[0]) batchsize = end - start if A is None: A = np.zeros((batchsize, k)) else: A[:] = 0 for i in range(batchsize): A[i, labels[start + i]] = val[start + i] AtA_local += mathutil.dot(A.T, A) AtX_local += mathutil.dot(A[:batchsize].T, X[start:end]) AtA = np.empty_like(AtA_local) AtX = np.empty_like(AtX_local) mpi.COMM.Allreduce(AtA_local, AtA) mpi.COMM.Allreduce(AtX_local, AtX) # add a regularization term isempty = (np.diag(AtA) == 0) AtA.flat[::k + 1] += 1e-8 centroids = np.ascontiguousarray(np.linalg.solve(AtA, AtX)) # let's deal with inactive guys for i in range(k): if isempty[i]: # randomly restart one centroids[i] = X[np.random.randint(X.shape[0])] mpi.COMM.Bcast(centroids[i], root=mpi.elect()) scale = np.sqrt((centroids**2).sum(1)) + np.finfo(np.float64).eps centroids /= scale[:, np.newaxis] return centroids