コード例 #1
0
ファイル: _kmeans.py プロジェクト: jrodgers01d/mpids
def _process_observations(observations, comm):
    """ Helper method to distribute provided observations if necessary.

    Returns
    -------
    observations : Block Distributed MPIArray
        Array of cluster centroids generated from provided set of observations.
        Format
    num_features : int
        Number of features in observation vector.
    labels : Block Distributed MPIArray
        Array of centroid indexes that classify a given observation to its
        closest cluster centroid.
    """
    if not isinstance(observations, Block):
        observations = mpi_np.array(observations, comm=comm, dist='b')

    if observations.globalndim > 2:
        raise ValueError('only 1/2-Dimensional observation' +
                         'vector/matrices supported.')

    num_observations = observations.globalshape[0]
    num_features = \
        observations.globalshape[1] if observations.globalndim == 2 else 1

    labels = mpi_np.zeros(num_observations,
                          dtype=np.int64,
                          comm=comm,
                          dist=observations.dist)

    return observations, num_features, labels
コード例 #2
0
def empty(size, iters=10000):
    time = measure_time()
    for _ in range(iters):
        mpi_np.empty(size, dtype=np.float64)
    time = measure_time() - time
    gc.collect()
    return time / iters
コード例 #3
0
def creation(size, iters=10000):
    data = np.arange(size, dtype=np.float64).tolist()
    time = measure_time()
    for _ in range(iters):
        mpi_np.array(data, dtype=np.float64)
    time = measure_time() - time
    gc.collect()
    return time/iters
コード例 #4
0
def empty(size, iters=10000, comm=MPI.COMM_WORLD):
    time = measure_time()
    for _ in range(iters):
        mpi_np.empty(size, dtype=np.float64)
    time = measure_time() - time
    gc.collect()
    comm.reduce(time, op=MPI.MAX, root=0)
    return time / iters
コード例 #5
0
def arange(size, iters=10000, comm=MPI.COMM_WORLD):
    comm.Barrier()
    time = measure_time()
    for _ in range(iters):
        mpi_np.arange(size, dtype=np.float64, comm=comm, dist='b')
    time = measure_time() - time
    comm.reduce(time, op=MPI.MAX, root=0)
    return time / iters
コード例 #6
0
def creation(size, iters=10000, comm=MPI.COMM_WORLD):
    data = np.arange(size, dtype=np.float64).tolist()
    time = measure_time()
    for _ in range(iters):
        mpi_np.array(data, dtype=np.float64)
    time = measure_time() - time
    gc.collect()
    comm.reduce(time, op=MPI.MAX, root=0)
    return time / iters
コード例 #7
0
    def test_block_distribution_matmul(self):
        rank = self.comm.Get_rank()
        mpi_array_a = mpi_np.array(self.np_array_a, dist='b')
        mpi_array_b = mpi_np.array(self.np_array_b, dist='b')

        #Check result consistent with numpy
        self.assertTrue(np.alltrue(
            np.matmul(self.np_array_a, self.np_array_b)[rank] == \
            mpi_np.matmul(mpi_array_a, mpi_array_b)))
コード例 #8
0
    def test_Replicated_matmul(self):
        mpi_array_a = mpi_np.array(self.np_array_a, dist='r')
        mpi_array_b = mpi_np.array(self.np_array_b, dist='r')

        #Check return type
        self.assertTrue(
            isinstance(mpi_np.matmul(self.np_array_a, self.np_array_b),
                       mpi_np.MPIArray))
        self.assertTrue(
            isinstance(mpi_np.matmul(mpi_array_a, mpi_array_b),
                       mpi_np.MPIArray))
        self.assertTrue(
            isinstance(mpi_np.matmul(mpi_array_a, self.np_array_b),
                       mpi_np.MPIArray))
        self.assertTrue(
            isinstance(mpi_np.matmul(self.np_array_a, mpi_array_b),
                       mpi_np.MPIArray))

        #Check result consistent with numpy
        self.assertTrue(np.alltrue(
            np.matmul(self.np_array_a, self.np_array_b) == \
            mpi_np.matmul(mpi_array_a, mpi_array_b)))
        self.assertTrue(np.alltrue(
            np.matmul(self.np_array_a, self.np_array_b) == \
            mpi_np.matmul(self.np_array_a, mpi_array_b)))
        self.assertTrue(np.alltrue(
            np.matmul(self.np_array_a, self.np_array_b) == \
            mpi_np.matmul(mpi_array_a, self.np_array_b)))
コード例 #9
0
    def test_validate_shape_called(self):
        shape_int = 1
        with mock.patch('mpids.MPInumpy.array_creation._validate_shape'
                        ) as mock_obj_int:
            mpi_np.zeros(shape_int)
        mock_obj_int.assert_called_with(shape_int)

        shape_tuple = (1, 2)
        with mock.patch('mpids.MPInumpy.array_creation._validate_shape'
                        ) as mock_obj_tuple:
            mpi_np.zeros(shape_tuple)
        mock_obj_tuple.assert_called_with(shape_tuple)
コード例 #10
0
 def setUp(self):
     self.comm = MPI.COMM_WORLD
     #Number of clusters
     self.k = 2
     self.seeded_centroids = np.arange(4).reshape(2, 2)
     self.seeded_num_centroids = self.seeded_centroids.shape[0]
     self.seeded_num_features = self.seeded_centroids.shape[-1]
     self.obs_1_feature = self.__create_1_feature_obs()
     self.obs_2_features = self.__create_2_feature_obs()
     self.obs_3_features = self.__create_3_feature_obs()
     self.dist_obs_1_feature = mpi_np.array(self.obs_1_feature, dist='b')
     self.dist_obs_2_features = mpi_np.array(self.obs_2_features, dist='b')
     self.dist_obs_3_features = mpi_np.array(self.obs_3_features, dist='b')
コード例 #11
0
    def test_under_partitioned_block_distribution_matmul(self):
        #Current version of code will under partition a 2x8 matrix.
        #Want to make sure logic is sound with petsc4py.
        np_8x2_array = self.np_array_a.reshape(8, 2)
        np_2x8_array = self.np_array_b.reshape(2, 8)
        mpi_array_a = mpi_np.array(np_8x2_array, dist='b')
        mpi_array_b = mpi_np.array(np_2x8_array, dist='b')

        rank = self.comm.Get_rank()
        local_row_start = rank * 2
        local_row_stop = local_row_start + 2
        #Check result consistent with numpy
        self.assertTrue(np.alltrue(
            np.matmul(np_8x2_array, np_2x8_array)[local_row_start: local_row_stop] == \
            mpi_np.matmul(mpi_array_a, mpi_array_b)))
コード例 #12
0
    def test_process_observations_providing_mpi_np_array(self):
        #Default block distribution
        mpi_np_observations = mpi_np.arange(8, dist='b')
        processed_obs, num_features, labels  = \
            _process_observations(mpi_np_observations, self.comm)
        self.assertTrue(isinstance(processed_obs, Block))
        self.assertEqual(num_features, 1)
        self.assertTrue(isinstance(labels, Block))

        #Replicated distribution
        mpi_np_observations = mpi_np.arange(8, dist='r')
        processed_obs, num_features, labels  = \
            _process_observations(mpi_np_observations, self.comm)
        self.assertTrue(isinstance(processed_obs, Block))
        self.assertEqual(num_features, 1)
        self.assertTrue(isinstance(labels, Block))
コード例 #13
0
    def setUp(self):
        parms = self.create_setUp_parms()
        self.comm = parms.get('comm')
        self.dist = parms.get('dist')
        self.data = parms.get('data')

        self.mpi_array = mpi_np.array(self.data,
                                      comm=self.comm,
                                      dist=self.dist)
コード例 #14
0
 def test_process_centroids_providing_Distributed_MPIArray(self):
     k = mpi_np.array(self.seeded_centroids, dist='b')
     num_features = self.seeded_num_features
     obs = self.dist_obs_2_features
     centroids, num_centroids, temp_centroids = \
         _process_centroids(k, num_features, obs, self.comm)
     self.assertTrue(isinstance(centroids, Replicated))
     self.assertTrue(isinstance(temp_centroids, Replicated))
     self.assertEqual(num_centroids, self.seeded_num_centroids)
     #Check seeded centroids returned
     self.assertTrue(np.alltrue(self.seeded_centroids == centroids))
コード例 #15
0
ファイル: _kmeans.py プロジェクト: jrodgers01d/mpids
def __centroids_from_int(k, num_features, observations, comm):
    centroids = mpi_np.zeros((k, num_features),
                             dtype=observations.dtype,
                             comm=comm,
                             dist='r')
    #Pick initial centroids
    num_observations = observations.globalshape[0]
    for j in range(k):
        i = j * (num_observations // k)
        centroids[j] = observations[i]

    return centroids
コード例 #16
0
 def test_return_behavior_with_np_data_from_all_ranks(self):
     for root in range(self.size):
         np_data = None
         self.assertTrue(np_data is None)
         if self.rank == root:
             np_data = self.np_data
         mpi_np_array = mpi_np.array(np_data,
                                     comm=self.comm,
                                     root=root,
                                     dist=self.dist)
         self.assertTrue(isinstance(mpi_np_array, mpi_np.MPIArray))
         self.assertTrue(isinstance(mpi_np_array, self.dist_class))
         self.assertEqual(mpi_np_array.comm, self.comm)
         self.assertEqual(mpi_np_array.dist, self.dist)
コード例 #17
0
 def test_return_behavior_from_all_ranks_with_tuple_shape(self):
     for root in range(self.size):
         shape = None
         self.assertTrue(shape is None)
         if self.rank == root:
             shape = self.tuple_shape
         mpi_np_empty = mpi_np.empty(shape,
                                     comm=self.comm,
                                     root=root,
                                     dist=self.dist)
         self.assertTrue(isinstance(mpi_np_empty, mpi_np.MPIArray))
         self.assertTrue(isinstance(mpi_np_empty, self.dist_class))
         self.assertEqual(mpi_np_empty.comm, self.comm)
         self.assertEqual(mpi_np_empty.dist, self.dist)
コード例 #18
0
 def test_return_behavior_from_all_ranks_with_int_shape(self):
     for root in range(self.size):
         shape = None
         self.assertTrue(shape is None)
         if self.rank == root:
             shape = self.int_shape
         mpi_np_zeros = mpi_np.zeros(shape,
                                     comm=self.comm,
                                     root=root,
                                     dist=self.dist)
         self.assertTrue(isinstance(mpi_np_zeros, mpi_np.MPIArray))
         self.assertTrue(isinstance(mpi_np_zeros, self.dist_class))
         self.assertEqual(mpi_np_zeros.comm, self.comm)
         self.assertEqual(mpi_np_zeros.dist, self.dist)
         self.assertTrue(np.alltrue((mpi_np_zeros) == (0)))
コード例 #19
0
 def test_unsupported_distribution(self):
     data = np.arange(10)
     comm = MPI.COMM_WORLD
     with self.assertRaises(InvalidDistributionError):
         mpi_np.array(data, comm=comm, dist='bananas')
     # Test cases where dim input data != dim distribution
     with self.assertRaises(InvalidDistributionError):
         mpi_np.array(data, comm=comm, dist=('*', 'b'))
     with self.assertRaises(InvalidDistributionError):
         mpi_np.array(data, comm=comm, dist=('b', 'b'))
コード例 #20
0
 def test_return_behavior_from_all_ranks_float_stop(self):
     np_arange = np.arange(20.0)
     for root in range(self.size):
         stop = None
         self.assertTrue(stop is None)
         if self.rank == root:
             stop = 20.0
         mpi_np_arange = mpi_np.arange(stop,
                                       comm=self.comm,
                                       root=root,
                                       dist=self.dist)
         self.assertTrue(isinstance(mpi_np_arange, mpi_np.MPIArray))
         self.assertTrue(isinstance(mpi_np_arange, self.dist_class))
         self.assertEqual(mpi_np_arange.comm, self.comm)
         self.assertEqual(mpi_np_arange.dist, self.dist)
         self.assertTrue(np.alltrue(mpi_np_arange[:] == np_arange))
コード例 #21
0
    def setUp(self):
        parms = self.create_setUp_parms()
        self.comm = parms.get('comm')
        self.rank = parms.get('rank')
        self.comm_size = parms.get('comm_size')
        self.dist = parms.get('dist')
        self.data = parms.get('data')
        self.local_data = parms.get('local_data')
        self.comm_dims = parms.get('comm_dims')
        self.comm_coord = parms.get('comm_coord')
        self.local_to_global = parms.get('local_to_global')

        self.np_array = np.array(self.data)
        self.np_local_array = np.array(self.local_data)
        self.mpi_array = mpi_np.array(self.data,
                                      comm=self.comm,
                                      dist=self.dist)
コード例 #22
0
    def test_kmeans_produces_same_results_as_scipy_kmeans2_for_3_features_with_Block_distributed_seed(
            self):
        k = np.array([[-1, -1, -1], [1, 1, 1]])
        k_mpi_np = mpi_np.array(k, dist='b')
        scipy_centriods, scipy_labels = \
            scipy_cluster.kmeans2(self.obs_3_features, k, iter=1000)
        mpids_centriods, mpids_labels = \
            mpi_scipy_cluster.kmeans(self.dist_obs_3_features, k_mpi_np)

        #Check results
        self.assertTrue(self.__compare_labels(scipy_labels, mpids_labels))
        self.assertTrue(
            self.__compare_centroids(scipy_centriods, mpids_centriods))
        #Check returned data types
        self.assertTrue(isinstance(mpids_centriods, Replicated))
        self.assertTrue(isinstance(mpids_labels, Replicated))
        #Check number of returned elements
        self.assertTrue(mpids_centriods.globalshape[0] == len(k))
        self.assertTrue(
            mpids_labels.globalshape[0] == self.obs_3_features.shape[0])
コード例 #23
0
ファイル: _kmeans.py プロジェクト: jrodgers01d/mpids
def _process_centroids(k, num_features, observations, comm):
    """ Helper method to distribute provided k if necessary and resolve whether
        or not the input is seeded.

    Returns
    -------
    centroids : Replicated MPIArray
        Array of cluster centroids generated from provided set of observations.
    num_centroids : int
        Number of centroids.
    temp_centroids : Replicated MPIArray
        Intermediate centroid locations prior to computing distributed result.
    """
    def __unsupported_type(*args):
        raise TypeError('only number of clusters(int) or ' + \
        'centroid seeds(ndarray or MPIArray) should be k.')

    __process_centroid_map = {
        int: __centroids_from_int,
        np.ndarray: __centroids_from_ndarray,
        Block: __centroids_from_mpinp_block,
        Replicated: __centroids_from_mpinp_undist
    }
    centroids = \
        __process_centroid_map.get(type(k), __unsupported_type)(k,
                                                                num_features,
                                                                observations,
                                                                comm)

    num_centroids = centroids.shape[0]
    if num_features != centroids.shape[-1] and centroids.ndim != 1:
        raise ValueError('expected {} '.format(num_features) + \
                         'number of features in seeded cluster centroids.')
    temp_centroids = mpi_np.zeros((num_centroids, num_features),
                                  dtype=observations.dtype,
                                  comm=comm,
                                  dist='r')

    return centroids, num_centroids, temp_centroids
コード例 #24
0
 def test_validate_shape_errors_propegated(self):
     with mock.patch('mpids.MPInumpy.array_creation._validate_shape',
                     side_effect=Exception('Mock Execption')) as mock_obj:
         with self.assertRaises(Exception):
             mpi_np.zeros(1)
コード例 #25
0
from mpi4py import MPI
import numpy as np

import mpids.MPInumpy as mpi_np

if __name__ == "__main__":

    #Capture default communicator, MPI process rank, and number of MPI processes
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    note = "Note: creation routines are using their default MPI related kwargs."
    note += "\nDefault kwargs:"
    note += " routine(..., comm=MPI.COMM_WORLD, root=0, dist='b')\n"
    print(note) if rank == 0 else None

    #Arange, evenly spaced values within specified interval
    print('From arange(start, stop, step) Routine') if rank == 0 else None
    mpi_arange = mpi_np.arange(size * 5)
    print('Local Arange Result Rank {}: {}'.format(rank, mpi_arange))
    print() if rank == 0 else None
コード例 #26
0
 def test_unsupported_functionality(self):
     #Use of 'out' field
     mpi_out = np.zeros(())
     with self.assertRaises(NotSupportedError):
         mpi_np.matmul(self.np_array_a, self.np_array_b, out=mpi_out)
コード例 #27
0
import mpids.MPInumpy as mpi_np
import numpy as np
from mpi4py import MPI
from operations import add, sub, mul, div

if __name__ == '__main__':
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    n_procs = comm.Get_size()
    local_size = 2**16
    size = n_procs * local_size
    iters = 1000
    mpi_np_arr = mpi_np.arange(size, dtype=np.float64)

    add_time = add(mpi_np_arr, iters=iters)
    sub_time = sub(mpi_np_arr, iters=iters)
    mul_time = mul(mpi_np_arr, iters=iters)
    div_time = div(mpi_np_arr, iters=iters)

    if rank == 0:
        print("mpi_np,add,%d,%d,%.9f" % (n_procs, local_size, add_time))
        print("mpi_np,sub,%d,%d,%.9f" % (n_procs, local_size, sub_time))
        print("mpi_np,mul,%d,%d,%.9f" % (n_procs, local_size, mul_time))
        print("mpi_np,div,%d,%d,%.9f" % (n_procs, local_size, div_time))
コード例 #28
0
from mpi4py import MPI
import mpids.MPInumpy as mpi_np
import mpids.MPIscipy.cluster as mpi_cluster
from operations import gen_blobs, measure_time

if __name__ == '__main__':
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    n_procs = comm.Get_size()

    runs = int(sys.argv[1])
    obs_power = int(sys.argv[2])

    local_size = 2**obs_power
    num_obs = n_procs * local_size
    k = 2
    features = 2
    observations, labels = gen_blobs(num_obs, features, k)
    mpi_obs = mpi_np.array(observations, dist='b', dtype=np.float64)

    for _ in range(runs):
        comm.Barrier()
        time = measure_time()
        centroids, labels = mpi_cluster.kmeans(mpi_obs, k)
        time = measure_time() - time
        comm.reduce(time, op=MPI.MAX, root=0)
        if rank == 0:
            print("mpi_scipy,%d,%d,%d,%d,%.9f" %
                  (n_procs, local_size, features, k, time))
        del centroids, labels
コード例 #29
0
from mpi4py import MPI
import numpy as np

import mpids.MPInumpy as mpi_np

if __name__ == "__main__":

    #Capture default communicator and MPI process rank
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    #Arrays  elements (values 0-15)
    data_1D = np.arange(16)

    #Block data distribution
    block_mpi_array_1D = mpi_np.array(data_1D, comm=comm, dist='b')

    print('1D Global data:\n{}\n\n'.format(data_1D)) if rank == 0 else None
    comm.Barrier()
    print('1D Blocked Data Rank {}:\n{}'.format(rank, block_mpi_array_1D))

    #Replicated data distribution
    replicated_mpi_array_1D = mpi_np.array(data_1D, comm=comm, dist='r')

    comm.Barrier()
    print('1D Replicated Data Rank {}:\n{}'\
        .format(rank, replicated_mpi_array_1D))
コード例 #30
0
from mpi4py import MPI
import numpy as np

import mpids.MPInumpy as mpi_np

if __name__ == "__main__":

    #Capture default communicator, MPI process rank, and number of MPI processes
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    note = "Note: creation routines are using their default MPI related kwargs."
    note += "\nDefault kwargs:"
    note += " routine(..., comm=MPI.COMM_WORLD, root=0, dist='b')\n"
    print(note) if rank == 0 else None

    #Array, distributed array-like data
    print('From array(array_like_data) Routine') if rank == 0 else None
    array_like_data = list(range(size * 5))
    mpi_array = mpi_np.array(array_like_data)
    print('Local Array Result Rank {}: {}'.format(rank, mpi_array))
    print() if rank == 0 else None