예제 #1
0
    def test_Replicated_matmul(self):
        mpi_array_a = mpi_np.array(self.np_array_a, dist='r')
        mpi_array_b = mpi_np.array(self.np_array_b, dist='r')

        #Check return type
        self.assertTrue(
            isinstance(mpi_np.matmul(self.np_array_a, self.np_array_b),
                       mpi_np.MPIArray))
        self.assertTrue(
            isinstance(mpi_np.matmul(mpi_array_a, mpi_array_b),
                       mpi_np.MPIArray))
        self.assertTrue(
            isinstance(mpi_np.matmul(mpi_array_a, self.np_array_b),
                       mpi_np.MPIArray))
        self.assertTrue(
            isinstance(mpi_np.matmul(self.np_array_a, mpi_array_b),
                       mpi_np.MPIArray))

        #Check result consistent with numpy
        self.assertTrue(np.alltrue(
            np.matmul(self.np_array_a, self.np_array_b) == \
            mpi_np.matmul(mpi_array_a, mpi_array_b)))
        self.assertTrue(np.alltrue(
            np.matmul(self.np_array_a, self.np_array_b) == \
            mpi_np.matmul(self.np_array_a, mpi_array_b)))
        self.assertTrue(np.alltrue(
            np.matmul(self.np_array_a, self.np_array_b) == \
            mpi_np.matmul(mpi_array_a, self.np_array_b)))
예제 #2
0
def creation(size, iters=10000):
    data = np.arange(size, dtype=np.float64).tolist()
    time = measure_time()
    for _ in range(iters):
        mpi_np.array(data, dtype=np.float64)
    time = measure_time() - time
    gc.collect()
    return time/iters
예제 #3
0
def creation(size, iters=10000, comm=MPI.COMM_WORLD):
    data = np.arange(size, dtype=np.float64).tolist()
    time = measure_time()
    for _ in range(iters):
        mpi_np.array(data, dtype=np.float64)
    time = measure_time() - time
    gc.collect()
    comm.reduce(time, op=MPI.MAX, root=0)
    return time / iters
예제 #4
0
def array(size, iters=10000, comm=MPI.COMM_WORLD):
    data = np.arange(size, dtype=np.float64).tolist()
    comm.Barrier()
    time = measure_time()
    for _ in range(iters):
        mpi_np.array(data, dtype=np.float64, comm=comm, dist='b')
    time = measure_time() - time
    comm.reduce(time, op=MPI.MAX, root=0)
    return time / iters
예제 #5
0
    def test_block_distribution_matmul(self):
        rank = self.comm.Get_rank()
        mpi_array_a = mpi_np.array(self.np_array_a, dist='b')
        mpi_array_b = mpi_np.array(self.np_array_b, dist='b')

        #Check result consistent with numpy
        self.assertTrue(np.alltrue(
            np.matmul(self.np_array_a, self.np_array_b)[rank] == \
            mpi_np.matmul(mpi_array_a, mpi_array_b)))
예제 #6
0
 def setUp(self):
     self.comm = MPI.COMM_WORLD
     #Number of clusters
     self.k = 2
     self.seeded_centroids = np.arange(4).reshape(2, 2)
     self.seeded_num_centroids = self.seeded_centroids.shape[0]
     self.seeded_num_features = self.seeded_centroids.shape[-1]
     self.obs_1_feature = self.__create_1_feature_obs()
     self.obs_2_features = self.__create_2_feature_obs()
     self.obs_3_features = self.__create_3_feature_obs()
     self.dist_obs_1_feature = mpi_np.array(self.obs_1_feature, dist='b')
     self.dist_obs_2_features = mpi_np.array(self.obs_2_features, dist='b')
     self.dist_obs_3_features = mpi_np.array(self.obs_3_features, dist='b')
예제 #7
0
    def test_under_partitioned_block_distribution_matmul(self):
        #Current version of code will under partition a 2x8 matrix.
        #Want to make sure logic is sound with petsc4py.
        np_8x2_array = self.np_array_a.reshape(8, 2)
        np_2x8_array = self.np_array_b.reshape(2, 8)
        mpi_array_a = mpi_np.array(np_8x2_array, dist='b')
        mpi_array_b = mpi_np.array(np_2x8_array, dist='b')

        rank = self.comm.Get_rank()
        local_row_start = rank * 2
        local_row_stop = local_row_start + 2
        #Check result consistent with numpy
        self.assertTrue(np.alltrue(
            np.matmul(np_8x2_array, np_2x8_array)[local_row_start: local_row_stop] == \
            mpi_np.matmul(mpi_array_a, mpi_array_b)))
예제 #8
0
def _process_observations(observations, comm):
    """ Helper method to distribute provided observations if necessary.

    Returns
    -------
    observations : Block Distributed MPIArray
        Array of cluster centroids generated from provided set of observations.
        Format
    num_features : int
        Number of features in observation vector.
    labels : Block Distributed MPIArray
        Array of centroid indexes that classify a given observation to its
        closest cluster centroid.
    """
    if not isinstance(observations, Block):
        observations = mpi_np.array(observations, comm=comm, dist='b')

    if observations.globalndim > 2:
        raise ValueError('only 1/2-Dimensional observation' +
                         'vector/matrices supported.')

    num_observations = observations.globalshape[0]
    num_features = \
        observations.globalshape[1] if observations.globalndim == 2 else 1

    labels = mpi_np.zeros(num_observations,
                          dtype=np.int64,
                          comm=comm,
                          dist=observations.dist)

    return observations, num_features, labels
예제 #9
0
    def setUp(self):
        parms = self.create_setUp_parms()
        self.comm = parms.get('comm')
        self.dist = parms.get('dist')
        self.data = parms.get('data')

        self.mpi_array = mpi_np.array(self.data,
                                      comm=self.comm,
                                      dist=self.dist)
예제 #10
0
 def test_process_centroids_providing_Distributed_MPIArray(self):
     k = mpi_np.array(self.seeded_centroids, dist='b')
     num_features = self.seeded_num_features
     obs = self.dist_obs_2_features
     centroids, num_centroids, temp_centroids = \
         _process_centroids(k, num_features, obs, self.comm)
     self.assertTrue(isinstance(centroids, Replicated))
     self.assertTrue(isinstance(temp_centroids, Replicated))
     self.assertEqual(num_centroids, self.seeded_num_centroids)
     #Check seeded centroids returned
     self.assertTrue(np.alltrue(self.seeded_centroids == centroids))
예제 #11
0
 def test_return_behavior_with_np_data_from_all_ranks(self):
     for root in range(self.size):
         np_data = None
         self.assertTrue(np_data is None)
         if self.rank == root:
             np_data = self.np_data
         mpi_np_array = mpi_np.array(np_data,
                                     comm=self.comm,
                                     root=root,
                                     dist=self.dist)
         self.assertTrue(isinstance(mpi_np_array, mpi_np.MPIArray))
         self.assertTrue(isinstance(mpi_np_array, self.dist_class))
         self.assertEqual(mpi_np_array.comm, self.comm)
         self.assertEqual(mpi_np_array.dist, self.dist)
예제 #12
0
 def test_unsupported_distribution(self):
     data = np.arange(10)
     comm = MPI.COMM_WORLD
     with self.assertRaises(InvalidDistributionError):
         mpi_np.array(data, comm=comm, dist='bananas')
     # Test cases where dim input data != dim distribution
     with self.assertRaises(InvalidDistributionError):
         mpi_np.array(data, comm=comm, dist=('*', 'b'))
     with self.assertRaises(InvalidDistributionError):
         mpi_np.array(data, comm=comm, dist=('b', 'b'))
예제 #13
0
    def setUp(self):
        parms = self.create_setUp_parms()
        self.comm = parms.get('comm')
        self.rank = parms.get('rank')
        self.comm_size = parms.get('comm_size')
        self.dist = parms.get('dist')
        self.data = parms.get('data')
        self.local_data = parms.get('local_data')
        self.comm_dims = parms.get('comm_dims')
        self.comm_coord = parms.get('comm_coord')
        self.local_to_global = parms.get('local_to_global')

        self.np_array = np.array(self.data)
        self.np_local_array = np.array(self.local_data)
        self.mpi_array = mpi_np.array(self.data,
                                      comm=self.comm,
                                      dist=self.dist)
예제 #14
0
    def test_kmeans_produces_same_results_as_scipy_kmeans2_for_3_features_with_Block_distributed_seed(
            self):
        k = np.array([[-1, -1, -1], [1, 1, 1]])
        k_mpi_np = mpi_np.array(k, dist='b')
        scipy_centriods, scipy_labels = \
            scipy_cluster.kmeans2(self.obs_3_features, k, iter=1000)
        mpids_centriods, mpids_labels = \
            mpi_scipy_cluster.kmeans(self.dist_obs_3_features, k_mpi_np)

        #Check results
        self.assertTrue(self.__compare_labels(scipy_labels, mpids_labels))
        self.assertTrue(
            self.__compare_centroids(scipy_centriods, mpids_centriods))
        #Check returned data types
        self.assertTrue(isinstance(mpids_centriods, Replicated))
        self.assertTrue(isinstance(mpids_labels, Replicated))
        #Check number of returned elements
        self.assertTrue(mpids_centriods.globalshape[0] == len(k))
        self.assertTrue(
            mpids_labels.globalshape[0] == self.obs_3_features.shape[0])
예제 #15
0
from mpi4py import MPI
import numpy as np

import mpids.MPInumpy as mpi_np

if __name__ == "__main__":

    #Capture default communicator, MPI process rank, and number of MPI processes
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    note = "Note: creation routines are using their default MPI related kwargs."
    note += "\nDefault kwargs:"
    note += " routine(..., comm=MPI.COMM_WORLD, root=0, dist='b')\n"
    print(note) if rank == 0 else None

    #Array, distributed array-like data
    print('From array(array_like_data) Routine') if rank == 0 else None
    array_like_data = list(range(size * 5))
    mpi_array = mpi_np.array(array_like_data)
    print('Local Array Result Rank {}: {}'.format(rank, mpi_array))
    print() if rank == 0 else None
예제 #16
0
def __centroids_from_ndarray(k, num_features, observations, comm):
    #Duplicate ndarray on all processes
    return mpi_np.array(k, dtype=observations.dtype, comm=comm, dist='r')
예제 #17
0
import mpids.MPIscipy.cluster as mpi_scipy_cluster

if __name__ == "__main__":

    #Capture default communicator and MPI process rank
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    #Create simulated 1D observation vector
    k, num_points, centers = 2, 10, [[-1, -0.75], [1, 1.25]]
    x0 = np.random.uniform(centers[0][0], centers[0][1], size=(num_points))
    x1 = np.random.uniform(centers[1][0], centers[1][1], size=(num_points))
    np_1D_obs_features = np.array(x0.tolist() + x1.tolist(), dtype=np.float64)

    #Distribute observations among MPI processes
    mpi_np_1D_obs_features = mpi_np.array(np_1D_obs_features, dist='b')

    #Compute K-Means Clustering Result
    centroids, labels = mpi_scipy_cluster.kmeans(
        mpi_np_1D_obs_features,
        k,
        #Below are the default kwargs
        thresh=1e-5,
        comm=MPI.COMM_WORLD)

    #Compute K-Means Clustering Result using Non-Distributed Input
    centroids_2, labels_2 = mpi_scipy_cluster.kmeans(np_1D_obs_features, k)

    #Check Distributed & Non-Distributed inputs generate the same result
    assert np.allclose(centroids, centroids_2)
    assert np.allclose(labels, labels_2)
from mpi4py import MPI
import mpids.MPInumpy as mpi_np
import mpids.MPIscipy.cluster as mpi_cluster
from operations import gen_blobs, measure_time

if __name__ == '__main__':
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    n_procs = comm.Get_size()

    runs = int(sys.argv[1])
    obs_power = int(sys.argv[2])

    local_size = 2**obs_power
    num_obs = n_procs * local_size
    k = 2
    features = 2
    observations, labels = gen_blobs(num_obs, features, k)
    mpi_obs = mpi_np.array(observations, dist='b', dtype=np.float64)

    for _ in range(runs):
        comm.Barrier()
        time = measure_time()
        centroids, labels = mpi_cluster.kmeans(mpi_obs, k)
        time = measure_time() - time
        comm.reduce(time, op=MPI.MAX, root=0)
        if rank == 0:
            print("mpi_scipy,%d,%d,%d,%d,%.9f" %
                  (n_procs, local_size, features, k, time))
        del centroids, labels
예제 #19
0
from mpi4py import MPI
import numpy as np

import mpids.MPInumpy as mpi_np

if __name__ == "__main__":

    #Capture default communicator and MPI process rank
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    #Arrays  elements (values 0-15)
    data_1D = np.arange(16)

    #Block data distribution
    block_mpi_array_1D = mpi_np.array(data_1D, comm=comm, dist='b')

    print('1D Global data:\n{}\n\n'.format(data_1D)) if rank == 0 else None
    comm.Barrier()
    print('1D Blocked Data Rank {}:\n{}'.format(rank, block_mpi_array_1D))

    #Replicated data distribution
    replicated_mpi_array_1D = mpi_np.array(data_1D, comm=comm, dist='r')

    comm.Barrier()
    print('1D Replicated Data Rank {}:\n{}'\
        .format(rank, replicated_mpi_array_1D))
예제 #20
0
from mpi4py import MPI
import platform

import mpids.MPInumpy as mpi_np

if __name__ == "__main__":

    #Capture default communicator and MPI process rank
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    #Create array-like data with 8 elements, values 0-7
    data = list(range(8))
    #Create MPInumpy array, passing data and default communicator
    mpi_array = mpi_np.array(data, comm=comm)

    #Print mpi_array attributes for each MPI process
    output = '{} Rank {} MPIArray Attributes: \n'.format(platform.node(), rank)
    output += '\t mpi_array.base = {} \n'.format(mpi_array.base)
    output += '\t mpi_array.dtype = {} \n'.format(mpi_array.dtype)
    #Common distributed local and global properties
    output += '\t mpi_array.shape = {} \n'.format(mpi_array.shape)
    output += '\t mpi_array.globalshape = {} \n'.format(mpi_array.globalshape)
    output += '\t mpi_array.size = {} \n'.format(mpi_array.size)
    output += '\t mpi_array.globalsize = {} \n'.format(mpi_array.globalsize)
    output += '\t mpi_array.nbytes = {} \n'.format(mpi_array.nbytes)
    output += '\t mpi_array.globalnbytes = {} \n'.format(mpi_array.globalnbytes)
    output += '\t mpi_array.ndim = {} \n'.format(mpi_array.ndim)
    output += '\t mpi_array.globalndim = {} \n'.format(mpi_array.globalndim)
    #Unique properties to MPIArray
    output += '\t mpi_array.dist = {} \n'.format(mpi_array.dist)