Beispiel #1
0
 def test_process_centroids_errors_propegated(self):
     obs = self.dist_obs_1_feature
     k = self.k
     with mock.patch(
             'mpids.MPIscipy.cluster._kmeans._process_centroids',
             side_effect=Exception('Mock Execption')) as mock_proc_cents:
         with self.assertRaises(Exception):
             mpi_scipy_cluster.kmeans(obs, k)
Beispiel #2
0
 def test_kmeans_calls_process_observations(self):
     obs = self.dist_obs_1_feature
     k = self.k
     processed_obs, num_features, labels = \
         _process_observations(obs, self.comm)
     with mock.patch('mpids.MPIscipy.cluster._kmeans._process_observations',
                     return_value=(processed_obs, num_features,
                                   labels)) as mock_proc_obs:
         mpi_scipy_cluster.kmeans(obs, k)
     mock_proc_obs.assert_called_with(obs, self.comm)
Beispiel #3
0
    def test_kmeans_produces_same_results_as_scipy_kmeans2_for_3_features_no_seed(
            self):
        scipy_centriods, scipy_labels = \
            scipy_cluster.kmeans2(self.obs_3_features, self.k, iter=1000)
        mpids_centriods, mpids_labels = \
            mpi_scipy_cluster.kmeans(self.dist_obs_3_features, self.k)

        #Check results
        self.assertTrue(self.__compare_labels(scipy_labels, mpids_labels))
        self.assertTrue(
            self.__compare_centroids(scipy_centriods, mpids_centriods))
        #Check returned data types
        self.assertTrue(isinstance(mpids_centriods, Replicated))
        self.assertTrue(isinstance(mpids_labels, Replicated))
        #Check number of returned elements
        self.assertTrue(mpids_centriods.globalshape[0] == self.k)
        self.assertTrue(
            mpids_labels.globalshape[0] == self.obs_3_features.shape[0])
Beispiel #4
0
    def test_kmeans_produces_same_results_as_scipy_kmeans2_for_2_features_with_numpy_seed(
            self):
        k = np.array([[-1, -1], [1, 1]])
        scipy_centriods, scipy_labels = \
            scipy_cluster.kmeans2(self.obs_2_features, k, iter=1000)
        mpids_centriods, mpids_labels = \
            mpi_scipy_cluster.kmeans(self.dist_obs_2_features, k)

        #Check results
        self.assertTrue(self.__compare_labels(scipy_labels, mpids_labels))
        self.assertTrue(
            self.__compare_centroids(scipy_centriods, mpids_centriods))
        #Check returned data types
        self.assertTrue(isinstance(mpids_centriods, Replicated))
        self.assertTrue(isinstance(mpids_labels, Replicated))
        #Check number of returned elements
        self.assertTrue(mpids_centriods.globalshape[0] == len(k))
        self.assertTrue(
            mpids_labels.globalshape[0] == self.obs_2_features.shape[0])
Beispiel #5
0
    def test_kmeans_produces_same_results_as_scipy_kmeans2_for_1_feature_with_Block_distributed_seed(
            self):
        k = np.array([-1, 1])
        k_mpi_np = mpi_np.array(k, dist='b')
        scipy_centriods, scipy_labels = \
            scipy_cluster.kmeans2(self.obs_1_feature, k, iter=1000)
        mpids_centriods, mpids_labels = \
            mpi_scipy_cluster.kmeans(self.dist_obs_1_feature, k_mpi_np)

        #Check results
        self.assertTrue(self.__compare_labels(scipy_labels, mpids_labels))
        self.assertTrue(
            self.__compare_centroids(scipy_centriods, mpids_centriods))
        #Check returned data types
        self.assertTrue(isinstance(mpids_centriods, Replicated))
        self.assertTrue(isinstance(mpids_labels, Replicated))
        #Check number of returned elements
        self.assertTrue(mpids_centriods.globalshape[0] == len(k))
        self.assertTrue(
            mpids_labels.globalshape[0] == self.obs_1_feature.shape[0])
Beispiel #6
0
 def test_process_observations_errors_propegated(self):
     with mock.patch(
             'mpids.MPIscipy.cluster._kmeans._process_observations',
             side_effect=Exception('Mock Execption')) as mock_proc_obs:
         with self.assertRaises(Exception):
             mpi_scipy_cluster.kmeans(None, None)
from mpi4py import MPI
import mpids.MPInumpy as mpi_np
import mpids.MPIscipy.cluster as mpi_cluster
from operations import gen_blobs, measure_time

if __name__ == '__main__':
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    n_procs = comm.Get_size()

    runs = int(sys.argv[1])
    obs_power = int(sys.argv[2])

    local_size = 2**obs_power
    num_obs = n_procs * local_size
    k = 2
    features = 2
    observations, labels = gen_blobs(num_obs, features, k)
    mpi_obs = mpi_np.array(observations, dist='b', dtype=np.float64)

    for _ in range(runs):
        comm.Barrier()
        time = measure_time()
        centroids, labels = mpi_cluster.kmeans(mpi_obs, k)
        time = measure_time() - time
        comm.reduce(time, op=MPI.MAX, root=0)
        if rank == 0:
            print("mpi_scipy,%d,%d,%d,%d,%.9f" %
                  (n_procs, local_size, features, k, time))
        del centroids, labels
Beispiel #8
0
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    #Create simulated 1D observation vector
    k, num_points, centers = 2, 10, [[-1, -0.75], [1, 1.25]]
    x0 = np.random.uniform(centers[0][0], centers[0][1], size=(num_points))
    x1 = np.random.uniform(centers[1][0], centers[1][1], size=(num_points))
    np_1D_obs_features = np.array(x0.tolist() + x1.tolist(), dtype=np.float64)

    #Distribute observations among MPI processes
    mpi_np_1D_obs_features = mpi_np.array(np_1D_obs_features, dist='b')

    #Compute K-Means Clustering Result
    centroids, labels = mpi_scipy_cluster.kmeans(
        mpi_np_1D_obs_features,
        k,
        #Below are the default kwargs
        thresh=1e-5,
        comm=MPI.COMM_WORLD)

    #Compute K-Means Clustering Result using Non-Distributed Input
    centroids_2, labels_2 = mpi_scipy_cluster.kmeans(np_1D_obs_features, k)

    #Check Distributed & Non-Distributed inputs generate the same result
    assert np.allclose(centroids, centroids_2)
    assert np.allclose(labels, labels_2)

    if rank == 0:
        print('Observations: {}\n'.format(np_1D_obs_features))
        print('Computed Centroids: {}\n'.format(centroids))
        print('Computed Labels: {}\n'.format(labels))