Example #1
0
 def testElect(self):
     result = mpi.elect()
     self.assertLess(result, mpi.SIZE)
     all_results = mpi.COMM.allgather(result)
     self.assertEqual(len(set(all_results)), 1)
     num_presidents = mpi.COMM.allreduce(mpi.is_president())
     self.assertEqual(num_presidents, 1)
Example #2
0
 def testElect(self):
     result = mpi.elect()
     self.assertLess(result, mpi.SIZE)
     all_results = mpi.COMM.allgather(result)
     self.assertEqual(len(set(all_results)), 1)
     num_presidents = mpi.COMM.allreduce(mpi.is_president())
     self.assertEqual(num_presidents, 1)
def omp_n_maximize(X, labels, val, k):
    """Maximization of omp_n, with the given labels and vals. 
    
    Note that X is the local data hosted in each MPI node.
    """
    dim = X.shape[1]
    # G is the gram matrix of the activations
    AtA_local = np.zeros((k, k))
    AtX_local = np.zeros((k, dim))

    A = None
    for start in range(0, X.shape[0], _MINIBATCH):
        end = min(start + _MINIBATCH, X.shape[0])
        batchsize = end - start
        if A is None:
            A = np.zeros((batchsize, k))
        else:
            A[:] = 0
        for i in range(batchsize):
            A[i, labels[start + i]] = val[start + i]
        AtA_local += mathutil.dot(A.T, A)
        AtX_local += mathutil.dot(A[:batchsize].T, X[start:end])
    AtA = np.empty_like(AtA_local)
    AtX = np.empty_like(AtX_local)
    mpi.COMM.Allreduce(AtA_local, AtA)
    mpi.COMM.Allreduce(AtX_local, AtX)
    # add a regularization term
    isempty = np.diag(AtA) == 0
    AtA.flat[:: k + 1] += 1e-8
    centroids = np.ascontiguousarray(np.linalg.solve(AtA, AtX))
    # let's deal with inactive guys
    for i in range(k):
        if isempty[i]:
            # randomly restart one
            centroids[i] = X[np.random.randint(X.shape[0])]
            mpi.COMM.Bcast(centroids[i], root=mpi.elect())
    scale = np.sqrt((centroids ** 2).sum(1)) + np.finfo(np.float64).eps
    centroids /= scale[:, np.newaxis]
    return centroids
def _m_step(X, z, k):
    """M step of the K-means EM algorithm

    Computation of cluster centers/means

    Parameters
    ----------
    X: array, shape (n_samples, n_features)

    z: array, shape (n_samples)
        Current assignment

    k: int
        Number of desired clusters

    Returns
    -------
    centers: array, shape (k, n_features)
        The resulting centers
    """
    dim = X.shape[1]
    centers_local = np.zeros((k, dim))
    counts_local = np.zeros(k, dtype = int)
    centers = np.zeros((k, dim))
    counts = np.zeros(k, dtype = int)
    for q in range(k):
        center_mask = np.flatnonzero(z == q)
        counts_local[q] = len(center_mask)
        if counts_local[q] > 0:
            centers_local[q] = X[center_mask].sum(axis=0)
    mpi.COMM.Allreduce(counts_local, counts)
    mpi.COMM.Allreduce(centers_local, centers)
    for q in range(k):
        if counts[q] == 0:
            centers[q] = X[np.random.randint(X.shape[0])]
            mpi.COMM.Bcast(centers[q], root=mpi.elect())
            counts[q] = 1
    centers /= counts.reshape((centers.shape[0], 1))
    return centers
Example #5
0
def _m_step(X, z, k):
    """M step of the K-means EM algorithm

    Computation of cluster centers/means

    Parameters
    ----------
    X: array, shape (n_samples, n_features)

    z: array, shape (n_samples)
        Current assignment

    k: int
        Number of desired clusters

    Returns
    -------
    centers: array, shape (k, n_features)
        The resulting centers
    """
    dim = X.shape[1]
    centers_local = np.zeros((k, dim))
    counts_local = np.zeros(k, dtype=int)
    centers = np.zeros((k, dim))
    counts = np.zeros(k, dtype=int)
    for q in range(k):
        center_mask = np.flatnonzero(z == q)
        counts_local[q] = len(center_mask)
        if counts_local[q] > 0:
            centers_local[q] = X[center_mask].sum(axis=0)
    mpi.COMM.Allreduce(counts_local, counts)
    mpi.COMM.Allreduce(centers_local, centers)
    for q in range(k):
        if counts[q] == 0:
            centers[q] = X[np.random.randint(X.shape[0])]
            mpi.COMM.Bcast(centers[q], root=mpi.elect())
            counts[q] = 1
    centers /= counts.reshape((centers.shape[0], 1))
    return centers
Example #6
0
def omp_n_maximize(X, labels, val, k):
    '''Maximization of omp_n, with the given labels and vals. 
    
    Note that X is the local data hosted in each MPI node.
    '''
    dim = X.shape[1]
    # G is the gram matrix of the activations
    AtA_local = np.zeros((k, k))
    AtX_local = np.zeros((k, dim))

    A = None
    for start in range(0, X.shape[0], _MINIBATCH):
        end = min(start + _MINIBATCH, X.shape[0])
        batchsize = end - start
        if A is None:
            A = np.zeros((batchsize, k))
        else:
            A[:] = 0
        for i in range(batchsize):
            A[i, labels[start + i]] = val[start + i]
        AtA_local += mathutil.dot(A.T, A)
        AtX_local += mathutil.dot(A[:batchsize].T, X[start:end])
    AtA = np.empty_like(AtA_local)
    AtX = np.empty_like(AtX_local)
    mpi.COMM.Allreduce(AtA_local, AtA)
    mpi.COMM.Allreduce(AtX_local, AtX)
    # add a regularization term
    isempty = (np.diag(AtA) == 0)
    AtA.flat[::k + 1] += 1e-8
    centroids = np.ascontiguousarray(np.linalg.solve(AtA, AtX))
    # let's deal with inactive guys
    for i in range(k):
        if isempty[i]:
            # randomly restart one
            centroids[i] = X[np.random.randint(X.shape[0])]
            mpi.COMM.Bcast(centroids[i], root=mpi.elect())
    scale = np.sqrt((centroids**2).sum(1)) + np.finfo(np.float64).eps
    centroids /= scale[:, np.newaxis]
    return centroids