def test_process_centroids_errors_propegated(self): obs = self.dist_obs_1_feature k = self.k with mock.patch( 'mpids.MPIscipy.cluster._kmeans._process_centroids', side_effect=Exception('Mock Execption')) as mock_proc_cents: with self.assertRaises(Exception): mpi_scipy_cluster.kmeans(obs, k)
def test_kmeans_calls_process_observations(self): obs = self.dist_obs_1_feature k = self.k processed_obs, num_features, labels = \ _process_observations(obs, self.comm) with mock.patch('mpids.MPIscipy.cluster._kmeans._process_observations', return_value=(processed_obs, num_features, labels)) as mock_proc_obs: mpi_scipy_cluster.kmeans(obs, k) mock_proc_obs.assert_called_with(obs, self.comm)
def test_kmeans_produces_same_results_as_scipy_kmeans2_for_3_features_no_seed( self): scipy_centriods, scipy_labels = \ scipy_cluster.kmeans2(self.obs_3_features, self.k, iter=1000) mpids_centriods, mpids_labels = \ mpi_scipy_cluster.kmeans(self.dist_obs_3_features, self.k) #Check results self.assertTrue(self.__compare_labels(scipy_labels, mpids_labels)) self.assertTrue( self.__compare_centroids(scipy_centriods, mpids_centriods)) #Check returned data types self.assertTrue(isinstance(mpids_centriods, Replicated)) self.assertTrue(isinstance(mpids_labels, Replicated)) #Check number of returned elements self.assertTrue(mpids_centriods.globalshape[0] == self.k) self.assertTrue( mpids_labels.globalshape[0] == self.obs_3_features.shape[0])
def test_kmeans_produces_same_results_as_scipy_kmeans2_for_2_features_with_numpy_seed( self): k = np.array([[-1, -1], [1, 1]]) scipy_centriods, scipy_labels = \ scipy_cluster.kmeans2(self.obs_2_features, k, iter=1000) mpids_centriods, mpids_labels = \ mpi_scipy_cluster.kmeans(self.dist_obs_2_features, k) #Check results self.assertTrue(self.__compare_labels(scipy_labels, mpids_labels)) self.assertTrue( self.__compare_centroids(scipy_centriods, mpids_centriods)) #Check returned data types self.assertTrue(isinstance(mpids_centriods, Replicated)) self.assertTrue(isinstance(mpids_labels, Replicated)) #Check number of returned elements self.assertTrue(mpids_centriods.globalshape[0] == len(k)) self.assertTrue( mpids_labels.globalshape[0] == self.obs_2_features.shape[0])
def test_kmeans_produces_same_results_as_scipy_kmeans2_for_1_feature_with_Block_distributed_seed( self): k = np.array([-1, 1]) k_mpi_np = mpi_np.array(k, dist='b') scipy_centriods, scipy_labels = \ scipy_cluster.kmeans2(self.obs_1_feature, k, iter=1000) mpids_centriods, mpids_labels = \ mpi_scipy_cluster.kmeans(self.dist_obs_1_feature, k_mpi_np) #Check results self.assertTrue(self.__compare_labels(scipy_labels, mpids_labels)) self.assertTrue( self.__compare_centroids(scipy_centriods, mpids_centriods)) #Check returned data types self.assertTrue(isinstance(mpids_centriods, Replicated)) self.assertTrue(isinstance(mpids_labels, Replicated)) #Check number of returned elements self.assertTrue(mpids_centriods.globalshape[0] == len(k)) self.assertTrue( mpids_labels.globalshape[0] == self.obs_1_feature.shape[0])
def test_process_observations_errors_propegated(self): with mock.patch( 'mpids.MPIscipy.cluster._kmeans._process_observations', side_effect=Exception('Mock Execption')) as mock_proc_obs: with self.assertRaises(Exception): mpi_scipy_cluster.kmeans(None, None)
from mpi4py import MPI import mpids.MPInumpy as mpi_np import mpids.MPIscipy.cluster as mpi_cluster from operations import gen_blobs, measure_time if __name__ == '__main__': comm = MPI.COMM_WORLD rank = comm.Get_rank() n_procs = comm.Get_size() runs = int(sys.argv[1]) obs_power = int(sys.argv[2]) local_size = 2**obs_power num_obs = n_procs * local_size k = 2 features = 2 observations, labels = gen_blobs(num_obs, features, k) mpi_obs = mpi_np.array(observations, dist='b', dtype=np.float64) for _ in range(runs): comm.Barrier() time = measure_time() centroids, labels = mpi_cluster.kmeans(mpi_obs, k) time = measure_time() - time comm.reduce(time, op=MPI.MAX, root=0) if rank == 0: print("mpi_scipy,%d,%d,%d,%d,%.9f" % (n_procs, local_size, features, k, time)) del centroids, labels
comm = MPI.COMM_WORLD rank = comm.Get_rank() #Create simulated 1D observation vector k, num_points, centers = 2, 10, [[-1, -0.75], [1, 1.25]] x0 = np.random.uniform(centers[0][0], centers[0][1], size=(num_points)) x1 = np.random.uniform(centers[1][0], centers[1][1], size=(num_points)) np_1D_obs_features = np.array(x0.tolist() + x1.tolist(), dtype=np.float64) #Distribute observations among MPI processes mpi_np_1D_obs_features = mpi_np.array(np_1D_obs_features, dist='b') #Compute K-Means Clustering Result centroids, labels = mpi_scipy_cluster.kmeans( mpi_np_1D_obs_features, k, #Below are the default kwargs thresh=1e-5, comm=MPI.COMM_WORLD) #Compute K-Means Clustering Result using Non-Distributed Input centroids_2, labels_2 = mpi_scipy_cluster.kmeans(np_1D_obs_features, k) #Check Distributed & Non-Distributed inputs generate the same result assert np.allclose(centroids, centroids_2) assert np.allclose(labels, labels_2) if rank == 0: print('Observations: {}\n'.format(np_1D_obs_features)) print('Computed Centroids: {}\n'.format(centroids)) print('Computed Labels: {}\n'.format(labels))